#this is a lab to introduce functionality of R. #my main objective is to show how to do many of the things researchers #need to do. This includes: loading data, saving data, transforming data, #plotting data, and data analysis. ############################### #I: LOADING YOUR DATA ############################### #download the CSV file I created by going to streviscerto.com and then #navigate to "Intro to R". #save the CSV file in your R working directory (WD). Set up WD through: #Session -> Set Working Directory # -or- #Edit -> Settings #or you can use this code# #warning: this can be different/difficult in Windows# setwd("/Users/trevis/Dropbox (Personal)/~Research/r data") #this will tell you what your working directory is getwd() #with that in mind, let's make the CSV dataset an R dataset with 'read.csv' #this turns the CSV file into what R calls a "data frame," which is a rectangle of columns and rows #this is what most of us would just call a "dataset" # in R <- is equal to = #we are naming the data frame/set "rmdata" rmdata <- read.csv("introtor.csv") #let's just look at the data in the window #first let's make the dataset smaller using the subset command rmdata2 <- subset(rmdata, groupid<101) #now let's see how you would do the opposite. #now we are going to write (i.e., save) rather than read a CSV file write.csv(rmdata2, "rmlabsmall.csv",row.names=FALSE) ############################### #II: TRANSFORMING YOUR DATA ############################### #We need to tell R which dataset should include this new variable # the format here is datasetname$variablename rmdata$happy00 <- rmdata$happy*100 #If I haven't said it already: LOOK AT THE DATASETS!!!! #second way to create a new variable, which requires a "package" #-mutate- allows you to generate new variables much like Stat's -gen- #KP I figured out to include this in a new dataset. w/out this, mutate doesn't 'save' the new vars #only took me an hour to figure this out! #(Note: we're not using these variables. this is just to show you.) #(Note2: 'mutate' is from dplyr package) #first find and install dplyr. This is one of the most popular packages in R. #it is part of what is called "tidyverse". Tidyverse is a collection of popular packages that are #all designed to work together naturally. #let's install it, which will also install dplyr. #library(tidyverse) library(dplyr) #this is the mutate command in dplyr, which allows us to create many variables simultaneously. #notice how the new dataset has the same nubmer of obs but more variables rmdatatest <- mutate(rmdata, jobstress00 = jobstress*100, jobsat00 = jobsat*100, logjobstress=log(5+jobstress) ) #rename a variable rmdatatest<-rename(rmdatatest, jobstress2 = jobstress00) #now let's just keep numerical variables for summary stats* x <- subset(rmdata, select = c(happy, jobsat, jobstress)) ############################### #III: TRANSFORMING YOUR DATA ############################### summary(x) #i like the psych pacakge for summary stats #psych is "A general purpose toolbox developed orginally for personality, psychometric theory #and experimental psychology" library(psych) describe(x) #we can examine correlations cor(x) #if you have missing data, you will need this option. #otherwise, your matrix will include NAs. cor(x, use = "complete.obs") #we can round it down round(cor(x, use = "complete.obs"),2) ############################### #IV: VISUALIZING YOUR DATA ############################### #here is what i love about R. so many cool packages. Here is one to help #you visualize your data library(PerformanceAnalytics) chart.Correlation(x, histogram=TRUE, pch=19) #Now let's spend a little time on data visualization using GGPLOT2 #first histograms and density plots #first, let's look at "hist" in base R hist(rmdata$happy) library(ggplot2) ggplot(rmdata, aes(x=happy)) + geom_histogram() ggplot(rmdata, aes(x=happy)) + geom_histogram(bins=50) ggplot(rmdata, aes(x=happy)) + geom_density() ggplot(rmdata, aes(x=happy)) + geom_density() + geom_density(color="darkblue", fill="lightblue") ggplot(rmdata, aes(x=happy)) + geom_density() + geom_density(color="green", fill="magenta") #you can also make these their own objects a<-ggplot(rmdata, aes(x=happy)) + geom_density() + geom_density(color="darkblue", fill="lightblue") b<-ggplot(rmdata, aes(x=happy)) + geom_density() + geom_density(color="green", fill="magenta") #but maybe we want to make them side by side library(gridExtra) grid.arrange(a,b) grid.arrange(a,b, nrow=1) #basic scatter plots ggplot(rmdata, aes(x=happy, y=jobsat)) + geom_point() # We can also change other features of the plot ggplot(rmdata, aes(x=happy, y=jobsat)) + geom_point(size=3, shape=13, color="purple") #for shape/number codes: https://r-graphics.org/recipe-scatter-shapes ##################################################### #V: CREATING GROUP-LEVEL VARIABLES AND MERGING DATA ##################################################### ## What about creating group variables? ## IMPORTANT: need dplyr package to run the following ## this is analog to proc means in SAS ## create firm-level variables to merge back in ##by group in a dataset named 'sums' ## NOTE: "%>%" is known as a "pipe" and can be translated as meaning "then" ## Just use this code and substitute your dataset and variables sums <- rmdata %>% select(groupid, jobsat, jobstress) %>% group_by(groupid) %>% summarize(avgjobsat = mean(jobsat), avgjobstress = mean(jobstress), minjobsat = min(jobsat), maxjobsat = max(jobsat), sdjobstress = sd(jobstress), count1 = n()) head(sums) #let's merge them back into orginal dataset # merge two data frames by one variable (groupid) # so easy! no sorting required! # the all.x is for a left merge keeping all in rmdata a <- merge(rmdata, sums,by="groupid", all.x = TRUE) head(a) summary(a) #this is merging using dplyr's 'left_join' b <- left_join(rmdata, sums, by="groupid") ##################################################### #VI: OLS REGRESSION MODELS ##################################################### #regression model ols1 <- lm(happy ~ jobsat + jobstress, data = rmdata) summary(ols1) #this is a pretty cool way to look at plots for OLS plot(ols1) #let's make it look prettier using stargazer package library(stargazer) stargazer(ols1, type = "text", out = "regression.html" ,title = "My First Model") #use ASQ's format stargazer(ols1, type = "text", out = "regression.html" ,title = "My First ASQ Model", style="asq") ##################################################### #VI: MULTILEVEL/MIXED MODELS ##################################################### library(lme4) #for mixed models and VarCorr function library(sjstats) #for icc function #unconditional means# mun <- lmer(happy ~ (1|groupid), data = rmdata) summary(mun) #here is another way to get ICCs using performance package. Another way to call #packages. performance::icc(mun) #let's add some IVs m1 <- lmer(happy ~ jobsat + jobstress + (1|groupid), data = rmdata) summary(m1) performance::icc(m1)