> ## Reading the classroom data from a remote web site (Exercises 1) > classroom <- read.csv("http://www-personal.umich.edu/~bwest/classroom.csv") > str(classroom) 'data.frame': 1190 obs. of 12 variables: $ sex : int 1 0 1 0 0 1 0 0 1 0 ... $ minority: int 1 1 1 1 1 1 1 1 1 1 ... $ mathkind: int 448 460 511 449 425 450 452 443 422 480 ... $ mathgain: int 32 109 56 83 53 65 51 66 88 -7 ... $ ses : num 0.46 -0.27 -0.03 -0.38 -0.03 0.76 -0.03 0.2 0.64 0.13 ... $ yearstea: num 1 1 1 2 2 2 2 2 2 2 ... $ mathknow: num NA NA NA -0.11 -0.11 -0.11 -0.11 -0.11 -0.11 -0.11 ... $ housepov: num 0.082 0.082 0.082 0.082 0.082 0.082 0.082 0.082 0.082 0.082 ... $ mathprep: num 2 2 2 3.25 3.25 3.25 3.25 3.25 3.25 3.25 ... $ classid : int 160 160 160 217 217 217 217 217 217 217 ... $ schoolid: int 1 1 1 1 1 1 1 1 1 1 ... $ childid : int 1 2 3 4 5 6 7 8 9 10 ... > summary(classroom) sex minority mathkind mathgain Min. :0.0000 Min. :0.0000 Min. :290.0 Min. :-110.00 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:439.2 1st Qu.: 35.00 Median :1.0000 Median :1.0000 Median :466.0 Median : 56.00 Mean :0.5059 Mean :0.6773 Mean :466.7 Mean : 57.57 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:495.0 3rd Qu.: 77.00 Max. :1.0000 Max. :1.0000 Max. :629.0 Max. : 253.00 ses yearstea mathknow housepov Min. :-1.61000 Min. : 0.00 Min. : -2.5000 Min. :0.0120 1st Qu.:-0.49000 1st Qu.: 4.00 1st Qu.: -0.7200 1st Qu.:0.0850 Median :-0.03000 Median :10.00 Median : -0.1300 Median :0.1270 Mean :-0.01298 Mean :12.21 Mean : 0.0312 Mean :0.1782 3rd Qu.: 0.39750 3rd Qu.:20.00 3rd Qu.: 0.8500 3rd Qu.:0.2550 Max. : 3.21000 Max. :40.00 Max. : 2.6100 Max. :0.5640 NA's :109.0000 mathprep classid schoolid childid Min. :1.000 Min. : 1.0 Min. : 1.00 Min. : 1.0 1st Qu.:2.000 1st Qu.: 80.0 1st Qu.: 26.00 1st Qu.: 298.2 Median :2.300 Median :157.0 Median : 54.00 Median : 595.5 Mean :2.612 Mean :157.5 Mean : 52.94 Mean : 595.5 3rd Qu.:3.000 3rd Qu.:238.8 3rd Qu.: 79.00 3rd Qu.: 892.8 Max. :6.000 Max. :312.0 Max. :107.00 Max. :1190.0 > ## Running some of the code from the "Plotting densities" example > xmax <- qexp(0.995, rate=0.2) > (xmax <- qexp(0.995, rate=0.2)) # adding redundant parens causes assigned value to print [1] 26.49159 > xvals <- seq(0, xmax, length=100) > str(xvals) num [1:100] 0 0.268 0.535 0.803 1.07 ... > head(xvals, 10) [1] 0.0000000 0.2675918 0.5351836 0.8027754 1.0703671 1.3379589 1.6055507 [8] 1.8731425 2.1407343 2.4083261 > head(diff(xvals)) # should be equally spaced [1] 0.2675918 0.2675918 0.2675918 0.2675918 0.2675918 0.2675918 > unique(diff(xvals)) # nearly but not exactly equally spaced [1] 0.2675918 0.2675918 0.2675918 0.2675918 0.2675918 0.2675918 0.2675918 [8] 0.2675918 > library(ggplot2) # ggplot2 requires other packages Loading required package: reshape Loading required package: plyr Loading required package: grid Loading required package: proto > sessionInfo() # check what system, version, packages etc. used R version 2.11.1 (2010-05-31) i486-pc-linux-gnu locale: [1] LC_CTYPE=en_US.utf8 LC_NUMERIC=C [3] LC_TIME=en_US.utf8 LC_COLLATE=en_US.utf8 [5] LC_MONETARY=C LC_MESSAGES=en_US.utf8 [7] LC_PAPER=en_US.utf8 LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=en_US.utf8 LC_IDENTIFICATION=C attached base packages: [1] grid stats graphics grDevices utils datasets methods [8] base other attached packages: [1] ggplot2_0.8.8 proto_0.3-8 reshape_0.8.3 plyr_1.1 loaded via a namespace (and not attached): [1] tools_2.11.1 > upgrade.packages() # got the name of update.packages() wrong Error: could not find function "upgrade.packages" > ?install.packages # check other docs for "See also" > ## base plot of exponential density with rate=0.2 > qplot(xvals, dexp(xvals, rate=0.2), geom="line", ylab="f(x)", xlab="x") > ## add a horizontal line (notice change of prompt for continuation of expression) > qplot(xvals, dexp(xvals, rate=0.2), geom="line", ylab="f(x)", + xlab="x") + geom_hline(yintercept=0) > ## alternative approach. Assign a base plot to a name then add to it > (p <- qplot(xvals, dexp(xvals, rate=0.2), geom="line", ylab="f(x)", xlab="x")) > p + geom_hline(yintercept=0) > ## add a single point just to the left of zero > xvals <- c(-1e-5, xvals) > ## determine the maximum endpoint for all rates > xmax <- max(qexp(0.995, rate=c(0.2, 0.3, 0.4, 0.5))) > ## Check that the vectorization works as expected > qexp(0.995, rate=c(0.2, 0.3, 0.4, 0.5)) [1] 26.49159 17.66106 13.24579 10.59663 > ## Create and melt the data frame > df <- data.frame(xvals, + dexp(xvals,rate=0.2), + dexp(xvals,rate=0.3), + dexp(xvals,rate=0.4), + dexp(xvals,rate=0.5)) > names(df) <- c("x", "0.2", "0.3", "0.4", "0.5") > str(df) 'data.frame': 101 obs. of 5 variables: $ x : num -0.00001 0 0.26759 0.53518 0.80278 ... $ 0.2: num 0 0.2 0.19 0.18 0.17 ... $ 0.3: num 0 0.3 0.277 0.256 0.236 ... $ 0.4: num 0 0.4 0.359 0.323 0.29 ... $ 0.5: num 0 0.5 0.437 0.383 0.335 ... > str(fr <- melt(df, id="x")) 'data.frame': 404 obs. of 3 variables: $ x : num -0.00001 0 0.26759 0.53518 0.80278 ... $ variable: Factor w/ 4 levels "0.2","0.3","0.4",..: 1 1 1 1 1 1 1 1 1 1 ... $ value : num 0 0.2 0.19 0.18 0.17 ... > ## Check some values around the transition from rate=0.2 to 0.3 > fr[95:105,] x variable value 95 24.8860361 0.2 0.001378657 96 25.1536279 0.2 0.001306814 97 25.4212197 0.2 0.001238714 98 25.6888115 0.2 0.001174162 99 25.9564033 0.2 0.001112975 100 26.2239950 0.2 0.001054976 101 26.4915868 0.2 0.001000000 102 -0.0000100 0.3 0.000000000 103 0.0000000 0.3 0.300000000 104 0.2675918 0.3 0.276858055 105 0.5351836 0.3 0.255501276 > ## plot with different colors > qplot(x, value, data=fr, geom="line", ylab="f(x)", + xlab="x", color=variable) + geom_hline(yintercept=0) > ## tried to get different line styles but misremembered the name > qplot(x, value, data=fr, geom="line", ylab="f(x)", + xlab="x", linestyle=variable) + geom_hline(yintercept=0) Error in eval(expr, envir, enclos) : object 'variable' not found > ## Error message was misleading - there is a 'variable' name > names(fr) [1] "x" "variable" "value" > ## See if the correct name is in the docs for qplot > ?qplot > ## Not easily seen there but I remembered the argument name is linetype > qplot(x, value, data=fr, geom="line", ylab="f(x)", + xlab="x", linetype=variable) + geom_hline(yintercept=0)