########################################################################### # PUBLG100: Introduction to Quantitative Methods # # Week 7 Seminar: Interactions # # # Change your working directory setwd("N:/PUBLG100") # Check your working directory getwd() ## ----message=FALSE------------------------------------------------------- library(foreign) library(Zelig) library(texreg) library(dplyr) rm(list=ls()) ## ------------------------------------------------------------------------ # load quality of government institute 2015 dataset world.data <- read.dta("http://uclspp.github.io/PUBLG100/data/QoG2012.dta") # look at variable names names(world.data) # look at first 5 observations world.data[1:5,] # descriptive summary statistics summary(world.data[, c("undp_hdi", "wbgi_cce", "former_col", "wdi_gdpc")]) # frequency table for categorical variable table(world.data$former_col) ## ------------------------------------------------------------------------ # we remove NA's world.data <- na.omit(world.data) # run the multiple regression m1 <- lm(undp_hdi ~ wbgi_cce + former_col, data = world.data) # regression table screenreg(m1) ## ------------------------------------------------------------------------ # look at cofficients of m1 m1$coefficients ## ------------------------------------------------------------------------ # scatter plot plot(undp_hdi ~ wbgi_cce, data = world.data, xlab = "control of corruption", ylab = "quality of life", main = "effect of a dummy") # the regression line when former_col = 0 abline(coef = c(m1$coefficients[1], m1$coefficients[2]), lty = "solid") # the regression line when former_col = 1 abline(coef = c(m1$coefficients[1] + m1$coefficients[3], m1$coefficients[2]), lty = "dashed") # add a legend to the plot legend("bottomright", c("former_col = 0", "former_col = 1"), lty = c("solid", "dashed")) ## ------------------------------------------------------------------------ # first model z.out1 <- zelig(undp_hdi ~ wbgi_cce + former_col, model = "ls", data = world.data, cite = FALSE) # second model z.out2 <- zelig(undp_hdi ~ wbgi_cce + former_col + wbgi_cce:former_col, model = "ls", data = world.data, cite = FALSE) # F-test (see seminar 5) anova(z.out1$result, z.out2$result) # regression table screenreg(list(z.out1, z.out2)) ## ------------------------------------------------------------------------ # intercepts and slopes of control of corruption intercepts <- c(z.out2$res$coeff[1], z.out2$res$coeff[1] + z.out2$res$coeff[3]) slopes <- c(z.out2$res$coeff[2], z.out2$res$coeff[2] + z.out2$res$coeff[4]) # rbind combines separate vectors or matrices or data frames row-wise cond.efcts <- rbind(intercepts,slopes) # colnames lets you define names of columns of a matrix like object colnames(cond.efcts) <- c("never colony", "former colony") # lets examine the effects # the function round lets us round to 2 digits round(cond.efcts, digits = 2) ## ------------------------------------------------------------------------ # set covariates for countries that weren't colonised x.out1 <- setx(z.out2, former_col = 0, wbgi_cce = -3:2) # set covariates for colonised countries x.out2 <- setx(z.out2, former_col = 1, wbgi_cce = -3:2) # simulate s.out <- sim(z.out2, x = x.out1, x1 = x.out2) # plot results # you will run into problems here if you forgot to do na.omit(world.data) plot(s.out, xlab = "Control of Corruption", ylab = "predicted value of Human Development Index", main = "Effect of Control of Corruption by Colonial Past" ) # add text to Zelig plot where x and y are plot coordinates text( x = -1, y = .8, labels = "no former colonies") text( x = -1, y = .4, labels = "former colonies") ## ------------------------------------------------------------------------ # we check min and max of variables of interest to draw plot limits (xlim & ylim) summary(world.data[, c("undp_hdi", "wdi_gdpc") ]) # plot of relationship b/w income & the human development index plot( undp_hdi ~ wdi_gdpc, data = world.data, xlim = c( xmin = 0, xmax = 65000), ylim = c( ymin = 0, ymax = 1), frame = FALSE, xlab = "World Bank GDP/captia", ylab = "Human Development Index", main = "Relationship b/w Income and Quality of Life") # add the regression line using zelig() within abline() abline(zelig(undp_hdi ~ wdi_gdpc, data = world.data, model = "ls", cite = FALSE)) # lowess line lines(lowess(world.data$wdi_gdpc, world.data$undp_hdi), col="red") ## ------------------------------------------------------------------------ # we include a quadradic term for income z.out <- zelig(undp_hdi ~ wdi_gdpc + I(wdi_gdpc^2), data = world.data, model = "ls", cite = FALSE) # regression output summary(z.out) # do we need the quadratic term or is the relationship in fact linear? # use an F-Test to check z.out.base <- zelig(undp_hdi ~ wdi_gdpc, data = world.data, model = "ls", cite = FALSE) anova(z.out.base$res, z.out$res) # setting covariates; GDP/captia is a sequence from 0 to 45000 by steps of 1000 x.out <- setx(z.out, wdi_gdpc = seq(0, 45000, 1000)) # simulate using our model and our covariates s.out <- sim(z.out, x = x.out) # plot the results plot(s.out) ## ------------------------------------------------------------------------ # we order by ex-colony and then hdi head(arrange(world.data, former_col, undp_hdi)) # note: to change the data set you would have to assign it: # world.data <- arrange(world.data, former_col, undp_hdi) # the default order is ascending, for descending use: head(arrange(world.data, desc(former_col, undp_hdi)))