###################################################### # Script to create dendrograms and clusers in R # read the data into a datatable TheData <- read.csv('C:/Temp/599_LabClusters/GEO499_580_cluster_analysis_data_20130225_9PM.csv') #this is new, Doug XY <- c(TheData$Current_Residence_Long,TheData$Current_Residence_Lat) XY <- c(TheData$Hometown_Long,TheData$Hometown_Lat) Matrix <- matrix(XY,23,2) ###################################################### # Run this section to see the points and the center of the clusters # Prepare Data TheData <- na.omit(Matrix) # listwise deletion of missing Matrix <- scale(Matrix) # standardize variables # K-Means Cluster Analysis ClusterInfo <- kmeans(Matrix, 4) # matrix of x and y values, number of clusters # Plut the result using the cluster to colorize the points in each cluster plot(Matrix, col = ClusterInfo$cluster) # add the centers of the clusters points(ClusterInfo$centers, col = 1:4, pch = 8, cex = 2) ###################################################### # Run from here to get the dendrogram (elbow) # this is stocastic so run it repeatedly to see the variation # find the varience TheVariance=apply(Matrix,2,var) # find the sum of squares for 1 cluster WithinClusterSumOfSquares <- (nrow(Matrix)-1)*sum(TheVariance) # find the sum of squares for 2 to 15 clusters for (i in 2:15) { ClusterInfo=kmeans(Matrix, centers=i) WithinClusterSumOfSquares[i] <- sum(ClusterInfo$withinss) } # plut the result plot(1:15, WithinClusterSumOfSquares, type="b", xlab="Number of Clusters",ylab="Within groups sum of squares")