1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
|
## Using NCES data to analyze education finances to Woonsocket over 15 years.
## Initialize required packages
require(plyr)
require(reshape2)
require(ggplot2)
require(scales)
## Best to ignore this function-- it's mostly magic to me too. Essentially,
## multiplot takes in a bunch of plots and then puts them into one image
## arranging them by columns equal to a paramter cols. Credit to:
## [wiki.stdout.org/rcookbook...](http://wiki.stdout.org/rcookbook/Graphs/Multiple%20graphs%20on%20one%20page%20() ggplot2)/
multiplot <- function(..., plotlist=NULL, cols) {
require(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# Make the panel
plotCols = cols # Number of columns of plots
plotRows = ceiling(numPlots/plotCols) # Number of rows needed, calculated from # of cols
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(plotRows, plotCols)))
vplayout <- function(x, y)
viewport(layout.pos.row = x, layout.pos.col = y)
# Make each plot, in the correct location
for (i in 1:numPlots) {
curRow = ceiling(i/plotCols)
curCol = (i-1) %% plotCols + 1
print(plots[[i]], vp = vplayout(curRow, curCol ))
}
}
## Load data from the modified CSV. I made the following changes from the NCES
## downloaded file: 1) I removed all of the description header so that row one
## of the CSV is the attribute names; 2) I pasted the transposed state values
## to the final observation so that I have a state observation row analogous to
## the other LEA rows.
raw_data <- read.csv('rawdata.csv')
## Change name of first column to make things easier for later.
names(raw_data)[1] <- c('distname')
## Creating Time Series Data for each community of interest.
## I'm going to use a custom function to automate the steps required to create
## district level data in a time series.
create_ts <- function(name){
# First create a column vector with the local funding
# A few things to note: First, t() is the transpose function and helps to
# make my "wide" data (lots of columns) "long" (lots of rows). Second, R
# has a funny behavior that is very covenient for data anaylsts. It performs
# many common mathematical operations element-wise, so the simple division
# of two vectors below actually divides element by element through the
# vector, e.g. column 17 is divided by column 2 to provide the first element
# in the resulting vector. This makes calculating per pupil amounts very
# convenient.
local <- t(subset(raw_data,distname==name)[,c(17:31)]/
subset(raw_data,distname==name)[,c(2:16)])
# Performing the same operation for state per pupil amounts.
state <- t(subset(raw_data,distname==name)[,c(32:46)]/
subset(raw_data,distname==name)[,c(2:16)])
# Putting state and local data together and getting rid of the nasty
# attribute names from NCES by just naming the rows with a sequence
# of integers.
results <- data.frame(local,state,row.names=seq(1,15,1))
# Naming my two attributes
names(results) <- c('local','state')
# Generating the year attribute
results[['year']] <- seq(1995, 2009, 1)
# This command is a bit funky, but basically it makes my data as long as
# possible so that each line has an ID (year in this case) and one value
# (the dollars in this case). I also have a label that describes that value,
# which is local or state.
results <- melt(results, id.vars='year')
# Returning my "results" object
results
}
## Create the Woonsocket data-- note that R is case sensitive so I must use all
## capitals to match the NCES convention.
woonsocket <- create_ts('WOONSOCKET')
pawtucket <- create_ts('PAWTUCKET')
providence <- create_ts('PROVIDENCE')
westwarwick <- create_ts('WEST WARWICK')
state <- create_ts('STATE')
## Developing a plot of JUST local revenues for the selected communities
## First I create a percentage change data frame. I think that looking at
## percent change overtime is generally more fair. While the nominal dollar
## changes are revealing, my analysis is drawing attention to the trend rather
## than the initial values.
## First, I pull out just the local dollars.
perwoonlocal <- subset(woonsocket,variable=='local')
## Now I modify the value to be divided by the starting value - 100%
perwoonlocal[['value']] <- with(perwoonlocal, (value/value[1])-1)
## A little renaming for the combining step later
names(perwoonlocal) <-c('year','disname','value')
perwoonlocal[['disname']]<-'Woonsocket'
## I repeat this procedure for all the districts of interest.
perpawlocal <- subset(pawtucket,variable=='local')
perpawlocal[['value']] <- with(perpawlocal, (value/value[1])-1)
names(perpawlocal) <-c('year','disname','value')
awlocal[['disname']]<-'Pawtucket'
perprolocal <- subset(providence,variable=='local')
perprolocal[['value']] <- with(perprolocal, (value/value[1])-1)
names(perprolocal) <-c('year','disname','value')
perprolocal[['disname']]<-'Providence'
perwwlocal <- subset(westwarwick, variable=='local')
perwwlocal[['value']] <- with(perwwlocal, (value/value[1])-1)
names(perwwlocal) <-c('year','disname','value')
perwwlocal[['disname']]<-'West Warwick'
perrilocal <- subset(state,variable=='local')
perrilocal[['value']] <- with(perrilocal, (value/value[1])-1)
names(perrilocal) <-c('year','disname','value')
perrilocal[['disname']]<-'State Average'
## The same process can be used for state data
perwoonstate <- subset(woonsocket,variable=='state')
## Now I modify the value to be divided by the starting value - 100%
perwoonstate[['value']] <- with(perwoonstate, (value/value[1])-1)
## A little renaming for the combining step later
names(perwoonstate) <-c('year','disname','value')
perwoonstate[['disname']]<-'Woonsocket'
## I repeat this procedure for all the districts of interest.
perpawstate <- subset(pawtucket,variable=='state')
perpawstate[['value']] <- with(perpawstate, (value/value[1])-1)
names(perpawstate) <-c('year','disname','value')
perpawstate[['disname']]<-'Pawtucket'
perprostate <- subset(providence,variable=='state')
perprostate[['value']] <- with(perprostate, (value/value[1])-1)
names(perprostate) <-c('year','disname','value')
perprostate[['disname']]<-'Providence'
perwwstate <- subset(westwarwick, variable=='state')
perwwstate[['value']] <- with(perwwstate, (value/value[1])-1)
names(perwwstate) <-c('year','disname','value')
perwwstate[['disname']]<-'West Warwick'
perristate <- subset(state,variable=='state')
perristate[['value']] <- with(perristate, (value/value[1])-1)
names(perristate) <-c('year','disname','value')
perristate[['disname']]<-'State Average'
## Pull together the data sets for the overall picture.
localfunding <- rbind(perwoonlocal, perpawlocal,perprolocal,perwwlocal,perrilocal)
statefunding <- rbind(perwoonstate, perpawstate,perprostate,perwwstate,perristate)
## A little ggplot2 line plot magic...
localperplot <- ggplot(localfunding,aes(year, value, color=disname)) +
geom_line() +
geom_text(data=subset(localfunding, year==2009),
mapping=aes(year,value,
label=paste(100*round(value,3),'%',sep='')),
vjust=-.4) +
scale_y_continuous('Percent Change from FY1995',
label=percent) +
scale_x_continuous('Year') +
opts(title='Percent Change in Local Per Pupil Revenue, FY1995- FY2009') +
opts(plot.title=theme_text(size=16,face='bold')) +
opts(legend.title=theme_blank()) +
opts(legend.position=c(.08,.82))
stateperplot <- ggplot(statefunding,aes(year, value, color=disname)) +
geom_line() +
geom_text(data=subset(statefunding, year==2008 | year==2009),
mapping=aes(year,value,
label=paste(100*round(value,3),'%',sep='')),
vjust=-.4) +
scale_y_continuous('Percent Change from FY1995',
label=percent) +
scale_x_continuous('Year') +
opts(title='Percent Change in State Per Pupil Revenue, FY1995- FY2009') +
opts(plot.title=theme_text(size=16,face='bold')) +
opts(legend.title=theme_blank()) +
opts(legend.position=c(.08,.82))
ggsave('localperplot.png',localperplot,width=10,height=8,units='in',dpi=72)
ggsave('stateperplot.png',stateperplot,width=10,height=8,units='in',dpi=72)
## Proportion of Aid
proportion <- function(data){
# This reshapes the data so that there is a year, local, and state column.
# The mean function has no purpose, because this data is unique by year
# variable combinations.
prop <- dcast(data,year~variable,mean)
# Adding local and state get our total non-federal dollars
prop[['total']] <- apply(prop[,2:3],1,sum)
prop[['perlocal']] <- with(prop, local/total)
prop
}
## Prepare new data frames for proportion graphs
propwoon <- as.data.frame(c(disname='Woonsocket',
proportion(woonsocket)))
proppaw <- as.data.frame(c(disname='Pawtucket',
proportion(pawtucket)))
propprov <- as.data.frame(c(disname='Providence',
proportion(providence)))
propww <- as.data.frame(c(disname='West Warwick',
proportion(westwarwick)))
propri <- as.data.frame(c(disname='State Average',
proportion(state)))
## Note, I could have called proportion() inside of the rbind(), but I wanted
## my code to be clearer and felt there may be some use for the independent
## proportion data frames in further analysis. Sometimes more lines of code
## and more objects is easier to maintain and more flexible for exploratory,
## non-production code. This is especially true when handling such small
## data sets that there is no impact on performance.
locprop <- rbind(propwoon, proppaw,propprov,propww,propri)
## Some ggplot2 magic time!
localpropplot <- ggplot(locprop,aes(year, perlocal, color=disname)) +
geom_line() +
geom_text(data=subset(locprop, year==1995 | year==2008 | year==2009),
mapping=aes(year,perlocal,
label=paste(100*round(perlocal,3),'%',sep='')),
vjust=-.4) +
scale_y_continuous('Percent Change from FY1995',
label=percent) +
scale_x_continuous('Year') +
opts(title='Percent Change in Local Proportion of Per Pupil Revenue\n Excluding Federal Funding, FY1995-FY2009') +
opts(plot.title=theme_text(size=16,face='bold')) +
opts(legend.title=theme_blank()) +
opts(legend.position=c(.9,.65))
ggsave('localpropplot.png',localpropplot,width=10,height=8,units='in',dpi=72)
|