########################################################################## ## Maya Sen ## Gov 2000 ## Section 1 ## Sept 10, 2009 ## Credit to Jenn Larson, Matt Blackwell and other Gov 2000 TFs for this code. ########################################################################## ## about homeworks. ########################################################################## ## 1. please make it so that we can run your entire R file from beginning ## to end without errors. ## 2. please make it so that R prints out your ## answers so we don't have to go around fishing for them in your code. ## example x <- 14+7 x ## 3. if your R file crashes, does not print out answers, or gives answers ## that contradict your write up, that's bad! ## 4. assume that we have the data saved in our working directory. ## don't change the working directory in the R file you submit ## (because we don't ahve the same folders you do!) ########################################################################## ## Review of last time. ########################################################################## ## Do you have R loaded? if not, go to ## http://www.r-project.org/ ## Now... ## R is a basic calculator: 2+2 8^2 (100+5)/5 log(45) exp(.69) ## R can store objects and you can recall them later p <- .05 p t <- c(1,0,0,0,1,1,0) ## a vector (c = "concatinate") t z <- (100+5)/5 z ## R has many useful preprogrammed functions that manipulate objects ## Suppose you have a complicated vector: a <- c(1,1,1,3,4,5,6,6,6,6,6,6,10,11,12,12,12, 5,6,2,5,7,3,5,9,0,1,3,5,7,0,12,12,23,34,21,234) summary(a) sum(a) mean(a) median(a) length(a) ## length is the number of elements in the vector sort(a) ?sort ## confused? just ask for help! sort(a, decreasing = TRUE) var(a) sd(a) max(a) min(a) ## You can also subset vectors pretty easily a[5] a[length(a)] a[4:6] ## ...onto new material ######################################################################## ## Matrices ######################################################################## ## A matrix is the simplest kind of two-dimensional structure. ## We'll use the "matrix" function ## to see the arguments matrix() takes, use args(matrix) ## or ?matrix ## Let's make some matrices: matrix(data = 1:12, nrow = 3, ncol = 4) matrix(data = 1:12, nrow = 2, ncol = 6) matrix(data = 1:12, nrow = 2, ncol = 6, byrow = TRUE) ## You can also make an empty matrix to fill in later; ## this is useful for writing functions later on in the term holder <- matrix(data = NA, nrow = 2, ncol = 3) holder ## and give the colums and rows names: colnames(holder) <- c("left", "middle", "right") rownames(holder) <- c("up", "down") holder ## You can also create a matrix by combining vectors together my.vec <- c(4:8) my.vec2 <- c(5:9) my.vec3 <- c(1:5) cbind(my.vec, my.vec2, my.vec3) ## column binding rbind(my.vec, my.vec2, my.vec3) ## row binding ## Let's store the last matrix example <- rbind(my.vec, my.vec2, my.vec3) ## We can extract particular elements of a matrix just like ## we could from a vector, though this time we have to specify ## two dimensions, the row and the column example[1,1] example[c(1,2), 1] example[,1] ## just the column example[1,] ## just the row ########################################################################## ## logical statements (or logical operators) ########################################################################## x <- 5 x == 5 ## here we're asking R, "does x equal 5?" ## a few more to get some intuition: TRUE == TRUE TRUE == FALSE 5 == 5.0 "hello" == "hello!" ## Let's move on to vectors. x <- c(1,2,3,3,3,4,5,6,7,8,9) ## using the same "==" operator as before x == 1 x == 2 x == 3 ## now, try "!=" x != 1 ## here, we are asking R, "does x NOT equal 1" x != 2 x != 3 ## and "<=" (less than or equal to) ## and ">=" (less than or equal to) x <= 4 ## is x less than 4? x >= 4 ## is x greater than 4? ## to make it slightly clearer, we'll print them together, using ## the drata.frame command, which pulls collected vectors ## into one data.frame ## note: a data frame, a matrix-like structure whose columns may be ## of differing types (characters, numbers, etc.) data.frame(x, x == 1) data.frame(x, x == 3) data.frame(x, x == 2) ## We can also use these logical statement to pull out ## parts of the vector that we want. x[x==1] ## These become hugely important in what are called "if" statements x <- 5 if (x == 5) { print("gov2000 rocks!") } else { print("this isn't giving me the right answer!") } x <- 6 if (x == 5) { print("gov2000 rocks!") } else { print("this isn't giving me the right answer!") } ## and this leads us into... ################################################################# ## Writing Functions ################################################################# ## You can write your own functions in R using the "function" command ## Functions can be really complicated or really simple! ## This function will take three numbers as arguments; it will add ## the first two and divide the sum by the third my.function <- function(x,y,z){ out <- (x + y)/z return(out) } ## Now we call our function with my.function(x = 5, y = 10, z = 3) my.function(5, 10, 3) ## Now let's see a function that returns the smallest element in a vector ## using the R commands we've seen so far small <- function(vec){ sorted <- sort(vec) out <- sorted[1] return(out) } ## Let's test this small(a) ## how do we write a function to return a vector's maximum? large <- function(vec){ sorted <- sort(vec, decreasing = TRUE) out <- sorted[1] return(out) } large(a) ################################################################# ## A real data example ################################################################# ls() rm(list = ls()) ## this command cleans everything in your R workspace load("cambridge.RData") ## from section 1 on the course webpage ls() class(loans) ## what kind of data this is ##(Note: a dataframe is slightly different than a matrix, since ## it allows you to also include character strings.) head(loans) ## show the top of the data nrow(loans) ## get the number of rows of nes ncol(loans) ## get the number of columns of nes names(loans) ## get the variable names of nes summary(loans) ## summarize the variables ## nes is a dataframe, which is how R stores datasets. ## we can access the individual variables by using the dollar ## sign: loans$income loans$amount ## but we can still use our old tricks-- ## Remember how to grab certain rows: loans[c(105,216,307,415,430),] ## or a certain individual observation loans[452,] ## And certain columns: loans[, c(1,2,3)] loans[,c("hisp","income")] ## we can also use many functions to analyze the data: summary(loans$amount) max(loans$amount) min(loans$amount) ## Now, we probably will want to subset the dataset by values of ## certain variables. Perhaps we would like two datasets: one ## for males and one for females. men <- loans[loans$sex == "Male",] women <- loans[loans$sex == "Female",] ## or nes$gender != "Male" mean(men$income) mean(women$income) mean(men$rate) mean(women$rate) ## or one for blacks (race = 3) and non-blacks (race !=3) blacks <- loans[loans$race == 3,] nonblacks <- loans[loans$race != 3,] mean(blacks$income) mean(nonblacks$income) mean(blacks$rate) mean(nonblacks$rate) ## we can also construct complex logical statements with ## "and" (&) and "or" (|) ## rich men loans[(loans$income >= 150) & (loans$sex == "Male"),] ## rich men who are black loans[(loans$income >= 150) & (loans$sex == "Male") & loans$race == 3,] ## QUESTION: Can we write code to compare the interest ## rates for blacks versus non black in the ## rich men demographic? ## yes! mean(loans$rate[(loans$income >= 150) & (loans$sex == "Male") & loans$race == 3]) ## blacks mean(loans$rate[(loans$income >= 150) & (loans$sex == "Male") & loans$race != 3]) ## non-blacks ## =======> what conclusions can you draw? ################################################################# ## creating and saving figures ################################################################# ## R makes creating figures pretty easy; ## the standard subsetting rules apply hist(loans$rate) ## basic ## dressing it up: hist(loans$rate, col = "gold", xlim = c(3,10), xlab = "interest rate", ylab = "# applicants", main = "histogram, cambridge mortgage rates 2006") ## (see http://research.stowers-institute.org/efg/R/Color/Chart/ for more colors!) ## and then to add a vertical line at the mean: abline(v = mean(loans$rate), col = "blue") ## To save the file: pdf(file= "Histogram1.pdf", width = 5, height = 5, family = "Helvetica", pointsize = 10) hist(loans$rate, col = "gold", xlim = c(3,10), xlab = "interest rate", ylab = "# applicants", main = "histogram, cambridge mortgage rates 2006") dev.off() ## We can also do different kinds of figures: plot(density(loans$rate), xlab = "interest rate", ylab = "density", main = "density plot, cambridge mortgage rates 2006") ## we'll use these more extensively later on in the course! ################################################################# ## creating tables -- if we have time! ################################################################# ## The two main functions for creating Latex tables in R are xtable() ## and latex(). Neither function is in the base package, so first you need to ## install one of the relevant libraries. ## To use xtable(), install.packages("xtable") library(xtable) ## One of the biggest sources of error comes from installing a package ## but then forgetting to load the library. ## After you've installed a package, it will be on your computer ## and next time you can skip the install step and simply load the library ## The easiest way to use a function like xtable() is to collect ## everything you want in your table into a matrix. ## matrix(), as.matrix(), cbind() and rbind() are useful here. ## Let's make an example matrix: example <- matrix(data = 1:12, nrow = 4) example ## I can add row and column names to my matrix with rownames(example) <- c("row 1", "the second row", "numero tres", "Mr. 4") colnames(example) <- c("a variable", "some means", "Something Else") ## Now we can generate the code to write the matrix as a latex table xtable(example) ## Voila ## As you may have expected, we can play with the formatting xtable(example, digits = c(2,2,2,2)) xtable(example, digits = c(2,2,2,2), caption = "Conclusive Evidence") xtable(example, digits = c(2,2,2,2), caption = "Conclusive Evidence", align = c("l","c","c","c")) xtable(example, digits = c(2,2,2,2), caption = "Conclusive Evidence", align = c("l||","c","c","c")) ## xtable() produces straightforward Latex tables. If you are interested ## in a more complicated table that has more formatting options, ## check out latex() in the Design library install.packages("Design") library(Design) ## latex() produces output in a separate file unless you ## specify file = "" (which I recommend) latex(example, file = "")