Processes the holdings files to produce the final dataset


Load the project constants

# Load project constants
# Holdings

Load the libraries


# --- LOAD_TRANS ----
# Loading function
# i: Index into the BOOK_PATHS data frame (see params.R)
load_holdings <- function(book) {
  file <- BOOK_PATHS[BOOK_PATHS$Book==book,2] <- paste(RCR_DATA, file, "holdings.txt", sep="")
  df <- read.csv(, sep="|", stringsAsFactors = F)
  return (df)

Defining functions for managing the data

Load the holdings data

Create a canonical data frame for holdings data. This is necessary because the transaction files do not use the same fields.

# Canonicalize holdings
# Keep only Branch info.
# Note that the files are not all saved with the same naming convention
canonicalize_holdings <- function(book, df) {
  if (book %in% c("AM", "TC")) { <- data.frame(Book=book, Code=df[,c("KBRA")], stringsAsFactors=FALSE)
  } else if (book %in% c("GB", "BT", "WS", "KC")) { <- data.frame(Book=book, Code=df[,c("BRANCH")], stringsAsFactors=FALSE)
  } else {
    stop(paste("Unknown book: ", book, sep=""))
  return (

Filter the branches using the BRANCHES_IGNORABLE data

# Branch filter
# Fix the branch data for the North Austin problem
# Remove the branches that we're not using
branch_filter <- function (df) {
  if (any($Code))) {
    df[$Code),]$Code <- "NoA"
  df <- df[!(df$Code %in% BRANCHES_IGNORABLE),]
  return (df)

The main function

Loads and process the transaction data.

# Processes all the holdings into a single data frame
event_holdings <- function() {
  hold.files <- list()
  for (book in BOOK_PATHS[,1]) {
    df <- load_holdings(book) <- canonicalize_holdings(book, df)
    df.branch <- branch_filter(
    hold.files[[book]] <- df.branch
  # Assemble all the listed data frames into one. 
  holdsdf <- ldply(hold.files, .id="Book")
  # Convert the Book and Code data to factors
  holdsdf$Book <- factor(holdsdf$Book)
  holdsdf$Code <- factor(holdsdf$Code)
  return (holdsdf)

Creating the total transactions per branch table

Get all the transactions.

holdsdf <- event_holdings()

Make table and convert back to data frame

holds.tbl <- xtabs(~ Book + Code, data=holdsdf)
holds.tbl.df <-
holds.tbl.df <- rename(holds.tbl.df, replace=c("Freq"="Holds"))

Save the data frame

path.out <- paste(RCR_DATA, "holdings-", 
                  DATA_VERSION, ".csv", sep="")
write.csv(holds.tbl.df, path.out, row.names=FALSE)