I've tried to implement the K-means algorithm in C# but somehow the output of it is a black (small) image. I wrote the following code:
public static Color[,] Kmeans(int number_clusters, Vector[,] input, int iterations)
{
int length = input.GetLength(0);
int height = input.GetLength(1);
Random randomizer = new Random();
//Inits centroides
List<Vector> centroides = new List<Vector>(number_clusters);
List<List<Vector>> clusters = new List<List<Vector>>(number_clusters);
for(int i = 0; i < number_clusters; i++)
{
Vector centroid = input[randomizer.Next(0, length), randomizer.Next(0, height)];
List<Vector> cluster = new List<Vector>();
cluster.Add(centroid);
clusters.Add(cluster);
centroides.Add(centroid);
}
int count = 0;
while (count < iterations) {
for (int x = 0; x < input.GetLength(0); x++)
{
for (int y = 0; y < input.GetLength(1); y++)
{
Vector currentVector = input[x, y];
double min_distance = Double.PositiveInfinity;
Vector closest_centroid = centroides[0];
//Finds closest centroid
foreach (Vector centroid in centroides)
{
double distance = currentVector.Distance(centroid);
if (Math.Pow(distance, 2) < min_distance)
{
min_distance = Math.Pow(distance, 2);
closest_centroid = centroid;
}
}
//Find cluster with that centroid in it
List<Vector> to_assign = clusters[0];
foreach (List<Vector> cluster_list in clusters)
{
if (cluster_list[0].r == closest_centroid.r && cluster_list[0].g == closest_centroid.g && cluster_list[0].b == closest_centroid.b)
{
to_assign = cluster_list;
break;
}
}
to_assign.Add(currentVector);
int current_length = to_assign.Count();
Vector current_centroid = to_assign[0];
Vector new_centroid = new Vector(0, 0, 0);
foreach (Vector vector in to_assign)
{
new_centroid.Sum(vector);
}
new_centroid = new Vector(new_centroid.r / current_length, new_centroid.g / current_length, new_centroid.b / current_length);
to_assign.RemoveAt(0);
to_assign.Insert(0, new_centroid);
for (int i = 0; i < centroides.Count(); i++)
{
if (centroides[i].r == current_centroid.r && centroides[i].g == current_centroid.g && current_centroid.b == centroides[i].b)
{
centroides[i] = new_centroid;
break;
}
}
}
}
count++;
}
foreach(List<Vector> cluster_list in clusters)
{
Vector current_centroid = cluster_list[0];
for(int i = 1; i < cluster_list.Count(); i++)
{
cluster_list[i].r = current_centroid.r;
cluster_list[i].g = current_centroid.g;
cluster_list[i].b = current_centroid.b;
}
}
Color[,] output = new Color[length, height];
foreach (List<Vector> cluster_list in clusters)
{
for (int i = 0; i < cluster_list.Count(); i++)
{
Vector current_vector = cluster_list[i];
output[current_vector.x, current_vector.y] = Color.FromArgb((int) current_vector.r, (int) current_vector.g, (int) current_vector.b);
}
}
return output;
}
The Vector is the following class:
public class Vector
{
public double r, g, b;
public int x, y;
public Vector(double r, double g, double b)
{
this.r = r;
this.g = g;
this.b = b;
}
public void SetCoordinates(int x, int y)
{
this.x = x;
this.y = y;
}
public double Distance(Vector v2)
{
double r_power = Math.Pow((r - v2.r), 2);
double g_power = Math.Pow((g - v2.g), 2);
double b_power = Math.Pow((b - v2.b), 2);
double distance = Math.Sqrt((r_power + g_power + b_power));
return distance;
}
public Vector Product(int scalar)
{
return new Vector(r * scalar, g * scalar, b * scalar);
}
public double Length()
{
return Math.Sqrt((Math.Pow(r, 2) + Math.Pow(g, 2) + Math.Pow(b, 2)));
}
public Vector Sum(Vector v2)
{
double new_r = r + v2.r;
double new_g = g + v2.g;
double new_b = b + v2.b;
return new Vector(new_r, new_g, new_b);
}
}
I tried implementing the code by the following description of the algorithm:
Assign the object to the clusters: For each object v in the test set do the following steps: 1 Compute the square distance between v and each centroid k of each cluster ( d 2 ( v , k )). 2 Assign the object v to the cluster with the nearest centroid. Update the centroids: For each cluster k compute their average vector. The average vector is computed analogously to the average of a number: we sum all the vectors in the cluster (with the vector sum) and we divide (scalar product with the inverse) by the number of vectors in the cluster. Repeat the two steps above
Could anyone help me out fixing this algorithm
I found the solution:
foreach (Vector vector in to_assign)
{
new_centroid.Sum(vector);
}
Should be changed to:
foreach (Vector vector in to_assign)
{
new_centroid = new_centroid.Sum(vector);
}
This because the Sum method returns a new Vector, which now isn't stored
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With