########################################################################### # PUBLG100: Introduction to Quantitative Methods # # Week 7 Solutions: Interactions # # ## ----message=FALSE------------------------------------------------------- library(foreign) # to load .dta files library(texreg) # to print nice regression tables library(Zelig) # to model & illustrate uncertainty ## ------------------------------------------------------------------------ # clear workspace rm(list = ls()) # load data set from seminar page data.set <- read.dta("http://uclspp.github.io/PUBLG100/data/QoG2012.dta") # exercise subset exercise.subset <- data.set[, c("undp_hdi", "wdi_gdpc")] # removing NA's on our two variables of interest exercise.subset <- na.omit(exercise.subset) # running regression with squared GDP/captia exercise1.reg <- zelig(undp_hdi ~ wdi_gdpc + I(wdi_gdpc^2), model = "ls", data = exercise.subset, cite = FALSE) screenreg(exercise1.reg) ## ------------------------------------------------------------------------ ## effect of an increase from $5,000 to $15,000 # set.seed() makes your results replicable, it does not # matter which values you put inside set.seed(123) # setting covariates set.gdp5000 <- setx(exercise1.reg, wdi_gdpc = 5000) set.gdp15000 <- setx(exercise1.reg, wdi_gdpc = 15000) # simulating sim.5000.to.15000 <- sim(exercise1.reg, x = set.gdp5000, x1 = set.gdp15000) # checking for the first differences summary(sim.5000.to.15000) ## effect of an increase from 25,000 to 35,000 # setting covariates set.gdp25000 <- setx(exercise1.reg, wdi_gdpc = 25000) set.gdp35000 <- setx(exercise1.reg, wdi_gdpc = 35000) # simulating sim.25000.to.35000 <- sim(exercise1.reg, x = set.gdp25000, x1 = set.gdp35000) # checking for the first differences summary(sim.25000.to.35000) # checking summary statistics of wealth summary(exercise.subset$wdi_gdpc) # checking 25th percentile and 75th percentile quantile(exercise.subset$wdi_gdpc, probs = c(0.25, 0.75)) # looking at the distribution of wealth hist(exercise.subset$wdi_gdpc) # checking summary statistic of the dependent variable summary(exercise.subset$undp_hdi) diff(range(exercise.subset$undp_hdi)) # the effect of increasing from $5000 to $15000 summary(sim.5000.to.15000) # calculating the effect in percentage terms (compared to the range of DV) 0.182 / diff(range(exercise.subset$undp_hdi)) # the effect of increasing from $25000 to $35000 summary(sim.25000.to.35000) # calculating the effect in percentage (compared to range of DV) 0.041 / diff(range(exercise.subset$undp_hdi)) ## ------------------------------------------------------------------------ # clear workspace rm(list = ls()) # load data set data.set <- read.dta("http://uclspp.github.io/PUBLG100/data/QoG2012.dta") # subset for analysis exercise2.subset <- data.set[, c("wbgi_pse", "h_j", "former_col")] # remove missings from subset exercise2.subset <- na.omit(exercise2.subset) # estimate model without interaction base.model <- zelig(wbgi_pse ~ h_j + former_col, model = "ls", data = exercise2.subset, cite = FALSE) # estimate model including the interaction interaction.model <- zelig(wbgi_pse ~ h_j + former_col + h_j*former_col, model = "ls", data = exercise2.subset, cite = FALSE) # comparing the models screenreg(list(base.model, interaction.model)) # F-test anova(base.model$result, interaction.model$result) ## ------------------------------------------------------------------------ # summary statistics of the dependent variable summary(exercise2.subset$wbgi_pse) # range of the dependnet variable diff(range(exercise2.subset$wbgi_pse)) # percentage change in pol. stability of having indep. judiciary if NOT colonised 0.24 / diff(range(exercise2.subset$wbgi_pse)) # percentage change in pol. stability of having indep. judiciary if colonised 0.10 / diff(range(exercise2.subset$wbgi_pse)) ## ------------------------------------------------------------------------ # clear workspace rm(list = ls()) # load data set data.set <- read.dta("http://uclspp.github.io/PUBLG100/data/QoG2012.dta") # subset for analysis exercise3.subset <- data.set[, c("undp_hdi", "h_j", "wbgi_cce")] # remove missings exercise3.subset <- na.omit(exercise3.subset) # estimate model without interaction base.model <- zelig(undp_hdi ~ wbgi_cce + h_j, model = "ls", data = exercise3.subset, cite = FALSE) # estimate interaction model interaction.model <- zelig(undp_hdi ~ wbgi_cce + h_j + wbgi_cce:h_j, model = "ls", data = exercise3.subset, cite = FALSE) # output table screenreg(list(base.model, interaction.model)) # F-Test anova(base.model$result, interaction.model$result) ## ------------------------------------------------------------------------ # illustration of results # range of control of corruption summary(exercise3.subset$wbgi_cce) # setting the two scenarios no.indep.jud.setx <- setx(interaction.model, h_j = 0, wbgi_cce = seq(-1.7, 2.5, 0.1) ) an.indep.jud.setx <- setx(interaction.model, h_j = 1, wbgi_cce = seq(-1.7, 2.5, 0.1) ) # simulation simulation.result <- sim(interaction.model, x = no.indep.jud.setx, x1 = an.indep.jud.setx) # plot results plot(simulation.result, main = "Effect of Control of Corruption Conditional on Indep. Judiciary", xlab = "Control of Corruption", ylab = "Quality of Life") ## ------------------------------------------------------------------------ # clear workspace rm(list = ls()) # load data set from your local folder where you saved the data set california.schools <- read.dta("caschool.dta") # check variable names names(california.schools) # summary stats of variables of interest summary(california.schools[, c("testscr", "avginc")]) # checking for missings in data set summary(california.schools) # scatter plot plot( testscr ~ avginc, data = california.schools, main = "Relationship b/w Income and Test Scores", xlab = "Average District Income in $1000's", ylab = "Average Test Score of Reading and Math") # regression exluding squared income base.reg <- zelig( testscr ~ avginc, model = "ls", data = california.schools, cite = FALSE) # regression including squared income square.reg <- zelig( testscr ~ avginc + I(avginc^2), model = "ls", data = california.schools, cite = FALSE) # regression tables screenreg(list(base.reg, square.reg)) # F-test anova(base.reg$result, square.reg$result) ## ------------------------------------------------------------------------ # descriptive summary statistics of district income summary(california.schools$avginc) # what is the income in the 10th percentile? low1 <- quantile(california.schools$avginc, 0.10) low1 # what is the income in the 15th percentile low2 <- quantile(california.schools$avginc, 0.15) low2 # how big is difference in terms of money low2 - low1 # illustrating effect of a move from the 10th to the 15th percentile x.out.low1 <- setx(square.reg, avginc = low1) x.out.low2 <- setx(square.reg, avginc = low2) # simulation s.out.low <- sim(square.reg, x = x.out.low1, x1 = x.out.low2) # check first difference summary(s.out.low) # dependent variable summary summary(california.schools$testscr) # range of the depdendent variable diff(range(california.schools$testscr)) # percent change of DV (percent change based on range of DV as observed in the sample) caused by IV 2.6 / diff(range(california.schools$testscr)) ## ------------------------------------------------------------------------ # what is the avg. income at the 75th percentile? high1 <- quantile(california.schools$avginc, 0.75) high1 # we add the same amount of wealth as we did for the poor districts high2 <- high1 + (low2 - low1) high2 # illustrating effect of adding $840 avg. district income to a district at 75th percentile x.out.high1 <- setx(square.reg, avginc = high1) x.out.high2 <- setx(square.reg, avginc = high2) # simulation s.out.low <- sim(square.reg, x = x.out.high1, x1 = x.out.high2) # check first difference summary(s.out.low) # percent change of DV (percent change based on range of DV as observed in the sample) caused by IV 2 / diff(range(california.schools$testscr))