# Correlation and Simple Regression -- Generic RScript # 8.1 Packages Needed req <- substitute(require(x, character.only = TRUE)) libs<-c("tidyverse", "ggplot2", "GGally", "plotly", "ggstatsplot", "car", "rgl", "sjPlot") sapply(libs, function(x) eval(req) || {install.packages(x); eval(req)}) # 8.2 Basic correlation command cor(mydata$var1, mydata$var2, use = "complete.obs") cor.test(mydata$var1, mydata$var2, use = "complete.obs") # 8.3 Correlation Matrix ## if want the actual p-values, include p.numeric = TRUE as an argument in the command. sjPlot::tab_corr(mydata[, c("var1", "var2", "var3")], na.deletion = "listwise", corr.method = "pearson", title = "Title of Table", show.p = TRUE, digits = 2, triangle = "lower", file = "filename.htm") # 8.4 Graphing Options ## 8.4.1 Scatterplot and fitted line ggplot(data = mydata, aes(x = xvar, y = yvar)) + geom_point() ggplot(data = mydata, aes(x = xvar, y = yvar)) + geom_smooth(method = "lm") ggplot(data = mydata, aes(x = xvar, y = yvar, color = 3rdvar)) + geom_point(alpha = .3, size = 3) + geom_smooth(method = "lm", aes(group = 1), color = "black") + labs(title = "Title of Graph", x = "X-axis label", y = "Y-axis label") ## 8.4.2 Correlogram for Correlation Matrix ## If you want listwise deletion to be used, you will first have to create a subset data set and drop those cases that have NA in any of the variables. mydata2 <- mydata[complete.cases(mydata[ c("var1", "var2", "var3")]),] ggstatsplot::ggcorrmat(data = mydata2, cor.vars = c(var1, var2, var3), cor.vars.names = c("var1 name", "var2 name", "var3 name"), title = "Title of Graph", matrix.type = "lower") ## 8.4.3 Scatterplot Matrix and Correlation Matrix ## Can use either GGally’s ggpairs() function (which can accommodate a categorical variable) or sjPlot’s tab_corr() function) ## If using a categorical variable, be sure to declare it as a factor variable if you haven’t already done so. mydata$catvar <- factor(mydata$catvar) GGally::ggpairs(data = subset(mydata, select = c(var1, var2, var3, var4), title = "Graph Title")) GGally::ggpairs(data = subset(mydata, select = c(var1, var2, var3, catvar)), + ggplot2::aes(group = 1, color = catvar, alpha = .5)) ## output to .htm works best; saves to working directory ## if want the actual p-values, include p.numeric = TRUE as an argument. sjPlot::tab_corr(mydata[, c("var1", "var2", "var3")], na.deletion = "listwise", corr.method = "pearson", title = "Title of Table", show.p = TRUE, digits = 2, triangle = "lower", file = "filename.htm") ## 8.4.4 3D Scatterplot ## The car and rgl packages makes possible an interactive 3d scatterplot. You can also create separate plans ## based on a factor variable (catvar). Link to scatter3d page. I do find that I often switch the xvar and zvar ## variables to correspond with how I think the 3d scatterplot should look. Note! To get this to display more ## intuitively, your "z-axis label" should be attached to xlab and your "x-axis label" should be the label for ## your zlab. scatter3d(yvar ~ zvar + xvar, data = mydata, xlab = "z-axis label", ylab = "y-axis label", zlab = "x-axis label") scatter3d(yvar ~ zvar + xvar | catvar, data = mydata, xlab = "z-axis label", ylab = "y-axis label", zlab = "x-axis label")) ## 8.4.5 Interactive Scatterplot (using plotly package) ## Can either create an object first with ggplot and then use plotly’s ggplotly function, or can use the plot_ly function directly. object <- ggplot(data = mydata, aes(x = xvar, y = yvar, color = 3rdvar)) + geom_point(alpha = .3, size = 3) + geom_smooth(method = "lm", aes(group = 1), color = "black") + labs(title = "Title of Graph", x = "X-axis label", y = "Y-axis label") plotly::ggplotly(object) ## Or can use plotly::plot_ly(data = mydata, x = ~xvar, y = ~yvar, type = "scatter")