Skip to main content
Engineering LibreTexts

6: Examples

  • Page ID
    100791
  • \( \newcommand{\vecs}[1]{\overset { \scriptstyle \rightharpoonup} {\mathbf{#1}} } \)

    \( \newcommand{\vecd}[1]{\overset{-\!-\!\rightharpoonup}{\vphantom{a}\smash {#1}}} \)

    \( \newcommand{\id}{\mathrm{id}}\) \( \newcommand{\Span}{\mathrm{span}}\)

    ( \newcommand{\kernel}{\mathrm{null}\,}\) \( \newcommand{\range}{\mathrm{range}\,}\)

    \( \newcommand{\RealPart}{\mathrm{Re}}\) \( \newcommand{\ImaginaryPart}{\mathrm{Im}}\)

    \( \newcommand{\Argument}{\mathrm{Arg}}\) \( \newcommand{\norm}[1]{\| #1 \|}\)

    \( \newcommand{\inner}[2]{\langle #1, #2 \rangle}\)

    \( \newcommand{\Span}{\mathrm{span}}\)

    \( \newcommand{\id}{\mathrm{id}}\)

    \( \newcommand{\Span}{\mathrm{span}}\)

    \( \newcommand{\kernel}{\mathrm{null}\,}\)

    \( \newcommand{\range}{\mathrm{range}\,}\)

    \( \newcommand{\RealPart}{\mathrm{Re}}\)

    \( \newcommand{\ImaginaryPart}{\mathrm{Im}}\)

    \( \newcommand{\Argument}{\mathrm{Arg}}\)

    \( \newcommand{\norm}[1]{\| #1 \|}\)

    \( \newcommand{\inner}[2]{\langle #1, #2 \rangle}\)

    \( \newcommand{\Span}{\mathrm{span}}\) \( \newcommand{\AA}{\unicode[.8,0]{x212B}}\)

    \( \newcommand{\vectorA}[1]{\vec{#1}}      % arrow\)

    \( \newcommand{\vectorAt}[1]{\vec{\text{#1}}}      % arrow\)

    \( \newcommand{\vectorB}[1]{\overset { \scriptstyle \rightharpoonup} {\mathbf{#1}} } \)

    \( \newcommand{\vectorC}[1]{\textbf{#1}} \)

    \( \newcommand{\vectorD}[1]{\overrightarrow{#1}} \)

    \( \newcommand{\vectorDt}[1]{\overrightarrow{\text{#1}}} \)

    \( \newcommand{\vectE}[1]{\overset{-\!-\!\rightharpoonup}{\vphantom{a}\smash{\mathbf {#1}}}} \)

    \( \newcommand{\vecs}[1]{\overset { \scriptstyle \rightharpoonup} {\mathbf{#1}} } \)

    \( \newcommand{\vecd}[1]{\overset{-\!-\!\rightharpoonup}{\vphantom{a}\smash {#1}}} \)

    < C Sharp Programming

    Wikipedia has related information at K-means++

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Drawing;
    using System.Security.Cryptography;
    
    class KMeansPP
    {
        //Output object
        public class PointClusters
        {
            private Dictionary<Point, List<Point>> _pc = new Dictionary<Point, List<Point>>();
    
            public Dictionary<Point, List<Point>> PC
            {
                get { return _pc; } set { _pc = value; }
            }
        }
    
        //Intermediate calculation object
        public struct PointDetails
        {
            private Point _seedpoint;
            private double[] _Weights;
            private double _Sum;
            private double _minD;
    
            public Point SeedPoint
            {
                get { return _seedpoint; } set { _seedpoint = value; }
            }
    
            public double[] Weights
            {
                get { return _Weights; } set { _Weights = value; }
            }
    
            public double Sum
            {
                get { return _Sum; } set { _Sum = value; }
            }
    
            public double MinD
            {
                get { return _minD; } set { _minD = value; }
            }
        }
    
    
        /// <summary>
        /// Basic (non kd-tree) implementation of kmeans++ algorithm. 
        /// cf. http://en.Wikipedia.org/wiki/K-means%2B%2B
        /// Excellent for financial diversification cf. 
        /// Clustering Techniques for Financial Diversification, March 2009
        /// cf http://www.cse.ohio-state.edu/~johansek/clustering.pdf 
        /// Zach Howard & Keith Johansen
        /// Note1: If unsure what value of k to use, try: k ~ (n/2)^0.5
        /// cf. http://en.Wikipedia.org/wiki/Determining_the_number_of_clusters_in_a_data_set
        /// </summary>
        /// <param name="allPoints">All points in ensemble</param>
        /// <param name="k">Number of clusters</param>
        /// <returns></returns>
        public PointClusters GetKMeansPP(List<Point> allPoints, int k)
        {
            //1. Preprocess KMeans (obtain optimized seed points)
            List<Point> seedPoints = GetSeedPoints(allPoints, k);
    
            //2. Regular KMeans algorithm
            PointClusters resultado = GetKMeans(allPoints, seedPoints, k);
    
            return resultado;
        }
    
        //Bog standard k-means.
        private PointClusters GetKMeans(List<Point> allPoints, List<Point> seedPoints, int k)
        {
            PointClusters cluster = new PointClusters();
            double[] Distances = new double[k];
            double minD = double.MaxValue;
            List<Point> sameDPoint = new List<Point>();
            bool exit = true;
    
            //Cycle thru all points in ensemble and assign to nearest centre 
            foreach (Point p in allPoints)
            {
                foreach (Point sPoint in seedPoints)
                {
                    double dist = GetEuclideanD(p, sPoint);
                    if (dist < minD)
                    {
                        sameDPoint.Clear();
                        minD = dist;
                        sameDPoint.Add(sPoint);
                    }
                    if (dist == minD)
                    {
                        if (!sameDPoint.Contains(sPoint))
                            sameDPoint.Add(sPoint);
                    }
                }
    
                //Extract nearest central point. 
                Point keyPoint;
                if (sameDPoint.Count > 1)
                {
                    int index = GetRandNumCrypto(0, sameDPoint.Count);
                    keyPoint = sameDPoint[index];
                }
                else
                    keyPoint = sameDPoint[0];
    
                //Assign ensemble point to correct central point cluster
                if (!cluster.PC.ContainsKey(keyPoint))  //New
                {
                    List<Point> newCluster = new List<Point>();
                    newCluster.Add(p);
                    cluster.PC.Add(keyPoint, newCluster);
                }
                else
                {   //Existing cluster centre   
                    cluster.PC[keyPoint].Add(p);
                }
    
                //Reset
                sameDPoint.Clear();
                minD = double.MaxValue;
            }
    
            //Bulletproof check - it it come out of the wash incorrect then re-seed.
            if (cluster.PC.Count != k)
            {
                cluster.PC.Clear();
                seedPoints = GetSeedPoints(allPoints, k);
            }
    
            List<Point> newSeeds = GetCentroid(cluster);
    
            //Determine exit
            foreach (Point newSeed in newSeeds)
            {
                if (!cluster.PC.ContainsKey(newSeed))
                    exit = false;
            }
    
            if (exit)
                return cluster;
            else
                return GetKMeans(allPoints, newSeeds, k);
        }
    
        /// <summary>
        /// Get the centroid of a set of points
        /// cf. http://en.Wikipedia.org/wiki/Centroid
        /// Consider also: Metoid cf. http://en.Wikipedia.org/wiki/Medoids
        /// </summary>
        /// <param name="pcs"></param>
        /// <returns></returns>
        private List<Point> GetCentroid(PointClusters pcs)
        {
            List<Point> newSeeds = new List<Point>(pcs.PC.Count);
            Point newSeed;
            int sumX = 0; int sumY = 0;
    
            foreach (List<Point> cluster in pcs.PC.Values)
            {
                foreach (Point p in cluster)
                {
                    sumX += p.X;
                    sumY += p.Y;
                }
    
                newSeed = new Point(sumX / cluster.Count, sumY / cluster.Count);
                newSeeds.Add(newSeed);
                sumX = sumY = 0;
            }
    
            return newSeeds;
        }
    
    
        private List<Point> GetSeedPoints(List<Point> allPoints, int k)
        {
            List<Point> seedPoints = new List<Point>(k);
            PointDetails pd;
            List<PointDetails> pds = new List<PointDetails>();
            int index = 0;
    
            //1. Choose 1 random point as first seed
            int firstIndex = GetRandNorm(0, allPoints.Count);
            Point FirstPoint = allPoints[firstIndex];
            seedPoints.Add(FirstPoint);
    
            for (int i = 0; i < k - 1; i++)
            {
                if (seedPoints.Count >= 2)
                {
                    //Get point with min distance
                    PointDetails minpd = GetMinDPD(pds);
                    index = GetWeightedProbDist(minpd.Weights, minpd.Sum);
                    Point SubsequentPoint = allPoints[index];
                    seedPoints.Add(SubsequentPoint);
    
                    pd = new PointDetails();
                    pd = GetAllDetails(allPoints, SubsequentPoint, pd);
                    pds.Add(pd);
                }
                else
                {
                    pd = new PointDetails();
                    pd = GetAllDetails(allPoints, FirstPoint, pd);
                    pds.Add(pd);
                    index = GetWeightedProbDist(pd.Weights, pd.Sum);
                    Point SecondPoint = allPoints[index];
                    seedPoints.Add(SecondPoint);
    
                    pd = new PointDetails();
                    pd = GetAllDetails(allPoints, SecondPoint, pd);
                    pds.Add(pd);
                }
            }
    
            return seedPoints;
        }
    
        /// <summary>
        /// Very simple weighted probability distribution. NB: No ranking involved.
        /// Returns a random index proportional to to D(x)^2
        /// </summary>
        /// <param name="w">Weights</param>
        /// <param name="s">Sum total of weights</param>
        /// <returns>Index</returns>
        private int GetWeightedProbDist(double[] w, double s)
        {
            double p = GetRandNumCrypto();
            double q = 0d;
            int i = -1;
    
            while (q < p)
            {
                i++;
                q += (w[i] / s);
            }
            return i;
        }
    
        //Gets a pseudo random number (of normal quality) in range: [0, 1)
        private double GetRandNorm()
        {
            Random seed = new Random();
            return seed.NextDouble();
        }
    
        //Gets a pseudo random number (of normal quality) in range: [min, max)
        private int GetRandNorm(int min, int max)
        {
            Random seed = new Random();
            return seed.Next(min, max);
        }
    
        //Pseudorandom number (of crypto strength) in range: [min,max) 
        private int GetRandNumCrypto(int min, int max)
        {
            byte[] salt = new byte[8];
            RNGCryptoServiceProvider rng = new RNGCryptoServiceProvider();
            rng.GetBytes(salt);
            return (int)((double)BitConverter.ToUInt64(salt, 0) / UInt64.MaxValue * (max - min)) + min;
        }
    
        //Pseudorandom number (of crypto strength) in range: [0.0,1.0) 
        private double GetRandNumCrypto()
        {
            byte[] salt = new byte[8];
            RNGCryptoServiceProvider rng = new RNGCryptoServiceProvider();
            rng.GetBytes(salt);
            return (double)BitConverter.ToUInt64(salt, 0) / UInt64.MaxValue;
        }
    
    
        //Gets the weight, sum, & min distance. Loop consolidation essentially.
        private PointDetails GetAllDetails(List<Point> allPoints, Point seedPoint, PointDetails pd)
        {
            double[] Weights = new double[allPoints.Count];
            double minD = double.MaxValue;
            double Sum = 0d;
            int i = 0;
    
            foreach (Point p in allPoints)
            {
                if (p == seedPoint) //Delta is 0
                    continue;
    
                Weights[i] = GetEuclideanD(p, seedPoint);
                Sum += Weights[i];
                if (Weights[i] < minD)
                    minD = Weights[i];
                i++;
            }
    
            pd.SeedPoint = seedPoint;
            pd.Weights = Weights;
            pd.Sum = Sum;
            pd.MinD = minD;
    
            return pd;
        }
    
        /// <summary>
        /// Simple Euclidean distance
        /// cf. http://en.Wikipedia.org/wiki/Euclidean_distance
        /// Consider also: Manhattan, Chebyshev & Minkowski distances
        /// </summary>
        /// <param name="P1"></param>
        /// <param name="P2"></param>
        /// <returns></returns>
        private double GetEuclideanD(Point P1, Point P2)
        {
            double dx = (P1.X - P2.X);
            double dy = (P1.Y - P2.Y);
            return ((dx * dx) + (dy * dy));
        }
    
        //Gets min distance from set of PointDistance objects. If similar then chooses random item.
        private PointDetails GetMinDPD(List<PointDetails> pds)
        {
            double minValue = double.MaxValue;
            List<PointDetails> sameDistValues = new List<PointDetails>();
    
            foreach (PointDetails pd in pds)
            {
                if (pd.MinD < minValue)
                {
                    sameDistValues.Clear();
                    minValue = pd.MinD;
                    sameDistValues.Add(pd);
                }
                if (pd.MinD == minValue)
                {
                    if (!sameDistValues.Contains(pd))
                        sameDistValues.Add(pd);
                }
            }
    
            if (sameDistValues.Count > 1)
                return sameDistValues[GetRandNumCrypto(0, sameDistValues.Count)];
            else
                return sameDistValues[0];
        }
    }

    This page titled 6: Examples is shared under a CC BY-SA 4.0 license and was authored, remixed, and/or curated by Wikibooks via source content that was edited to the style and standards of the LibreTexts platform; a detailed edit history is available upon request.

    • Was this article helpful?