This file displays several visualizations of the principal components clustering of the library branches.

library(knitr)
setwd("/home/rburke/oboc/src/rcr-analysis/src/viz/")
read_chunk("cluster-viz.R")

Project constants

# Load project constants
setwd("/home/rburke/oboc/src/rcr-analysis/src/")
source("common.R")

Load libraries

### Load packages
library(ggplot2)
library(GGally)
## Warning: replacing previous import by 'utils::capture.output' when loading
## 'GGally'
## Warning: replacing previous import by 'utils::head' when loading 'GGally'
## Warning: replacing previous import by 'utils::installed.packages' when
## loading 'GGally'
## Warning: replacing previous import by 'utils::str' when loading 'GGally'
library(RColorBrewer)
library(plyr)
library(reshape2)
library(maptools)
## Loading required package: sp
## Checking rgeos availability: TRUE
library(plotrix)
library(classInt)
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:plyr':
## 
##     ozone
library(ggmap)
library(rgdal)
## rgdal: version: 1.1-10, (SVN revision 622)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 1.11.3, released 2015/09/16
##  Path to GDAL shared files: /usr/share/gdal/1.11
##  Loaded PROJ.4 runtime: Rel. 4.9.2, 08 September 2015, [PJ_VERSION: 492]
##  Path to PROJ.4 shared files: (autodetected)
##  Linking to sp version: 1.2-3
library(mapproj)
library(RMySQL)
## Loading required package: DBI

Loading the principal components data

# Read the principal components of the demographic data
path.demo <- paste(RCR_DATA, "branch/branch-data-prcomp-", 
                   DATA_VERSION, ".csv", sep="")
demo <- read.csv(path.demo)

Load the cluster table

# Read the cluster table
path.cluster <- paste(RCR_DATA, "branch/branch-cluster-", 
                      DATA_VERSION, ".csv", sep="")
cluster <- read.csv(path.cluster)
cluster$Cluster <- as.factor(cluster$Cluster)

Load the library locations for the map

# Database connection
con <- dbConnect(MySQL(),
                 user=params$username, password=params$password,
                 dbname="oboc", host="localhost")
location_query <- paste("select code_branch, lat_branch, long_branch ",
  " from CPL_branch where lat_branch is not NULL")
rs1 <- dbSendQuery(con, location_query)
## Warning in .local(conn, statement, ...): Decimal MySQL column 1 imported as
## numeric
## Warning in .local(conn, statement, ...): Decimal MySQL column 2 imported as
## numeric
branchpt <- dbFetch(rs1)
colnames(branchpt) <- c("Code", "Latitude", "Longitude")

Load the shape files for the map

# Read in the chicago community areas map
path.map <- paste(RCR_DATA, "aux/CommAreas.shp", sep="")

comm <- readOGR(dsn=path.map, layer="CommAreas")
## OGR data source with driver: ESRI Shapefile 
## Source: "/home/rburke/oboc/src/rcr-analysis/data/aux/CommAreas.shp", layer: "CommAreas"
## with 77 features
## It has 9 fields

Associate clusters with locations

branches <- join(branchpt, cluster, by="Code", type="left")

Convert shape files

# Transform to Lat,Long coordinate system
comm.proj <- spTransform(comm, CRS("+proj=longlat +datum=WGS84"))
# Turn into a data frame for ggplot
comm.proj = fortify(comm.proj, region="COMMUNITY")

Branch map, colored by cluster

pl <- ggplot() + 
    # Community areas outlined in white
    geom_polygon(data=comm.proj, aes(x=long, y=lat, group=group),
                 color="white", show.legend=FALSE)
pl <- pl +
    # Branches colored by cluster
    geom_point(data=branches, aes(x=Longitude, y=Latitude, 
                                  fill=Cluster, 
                                  color="black",
                                  size=5),
               pch=21)
pl <- pl + scale_fill_brewer("Clusters", palette="Dark2",
                                breaks=seq(1,6),
                                labels=seq(1,6))
pl <- pl + guides(fill=guide_legend(override.aes=list(size=5)),
                    color="none", size="none")
pl <- pl + coord_map() + theme_nothing(legend=TRUE)
## Warning: `panel.margin` is deprecated. Please use `panel.spacing` property
## instead
print(pl)

Data frame for multi-dimensional scaling

code.drop <- c("H0", "S1", "W1")
demo.noRG <- demo[!(demo$Code %in% code.drop),]
demo.clust <- join(demo, cluster, by="Code", type="left")
branchPC8 <- demo.clust[,c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6", "PC7", "PC8")]

Compute distances

d <- dist(branchPC8) # euclidean distances between the rows
fit <- cmdscale(d,eig=TRUE, k=2) # k is the number of dim

Plot 2-D projection

mds <- data.frame(Coord1=fit$points[,1], Coord2=fit$points[,2],
                  Code=demo.clust$Code, 
                  Cluster=as.factor(demo.clust$Cluster))

p <- ggplot(data=mds, aes(x=Coord1, y=Coord2, color=Cluster, label=Code))
p <- p + geom_text(fontface="bold")
p <- p + scale_color_brewer("Clusters", palette="Dark2",
                                      breaks=seq(1,5),
                                      labels=seq(1,5))
p <- p + guides(color=guide_legend(override.aes=list(size=5)), size="none")
print (p)