This file displays a strip plot visualization combining circulation and cluster information.

library(knitr)
setwd("/home/rburke/oboc/src/rcr-analysis/src/viz")
read_chunk("circ-strip.R")

Project constants

# Load project constants
setwd("/home/rburke/oboc/src/rcr-analysis/src/")
source("common.R")

Load libraries

### Load packages
library(caret)
library(stats)
library(gplots)
library(ggplot2)
library(GGally)
library(RColorBrewer)
library(plyr)
library(dplyr)
library(RMySQL)

Loading the cluster table

# Read the cluster table
path.cluster <- paste(RCR_DATA, "branch/branch-cluster-", 
                      DATA_VERSION, ".csv", sep="")
cluster <- read.csv(path.cluster)
cluster$Cluster <- as.factor(cluster$Cluster)

Load the total circulation table

# Database connection
con <- dbConnect(MySQL(),
                 user=params$username, password=params$password,
                 dbname="oboc", host="localhost")
# Circulation query
# Note that the normalized table has only the selected books and only the checkouts.
circ_query <- paste("select count(T.id_item), T.abbrev_season, T.code_branch ",
                    "from V_norm_trans2 T ",
                    "where day_season >= 0 and day_season < 365 ",
                    "group by T.abbrev_season, T.code_branch", sep="")
circ <- dbGetQuery(con, circ_query)
#circ <- dbFetch(rs1)
colnames(circ) <- c("Freq", "Book", "Code")

Join circulation and cluster tables

circ.clust <- join(circ, cluster, by="Code", type="left")
# Drop zeros
circ.clust <- circ.clust[circ.clust$Freq>0,]

Strip plot checkouts vs book

p <- ggplot(data=circ.clust, aes(x=Book, y=Freq, label=Code, color=Cluster))
p <- p + geom_text(position=position_jitter(width=0.3))
p <- p + scale_y_log10()
print(p)