########################################################################### # PUBLG100: Introduction to Quantitative Methods # # Week 5 Seminar: Multiple linear regression models # # # Change your working directory setwd("N:/PUBLG100") # Check your working directory getwd() ## ----message=FALSE------------------------------------------------------- library(foreign) # to load foreign file formats library(Zelig) # to predict from regression output and present uncertainty library(texreg) # to plot regression output to screen or file library(dplyr) # to manipulate data (load this library last!) rm(list = ls()) ## ------------------------------------------------------------------------ # load data set in Stata format world.data <- read.dta("http://uclspp.github.io/PUBLG100/data/qog_std_cs_jan15.dta") # check the dimensions of the data set dim(world.data) ## ------------------------------------------------------------------------ # select 5 variables from world.data and rename them world.data <- select(world.data, country = cname, pol.stability = wbgi_pse, latitude = lp_lat_abst, globalization = dr_ig, qual.inst = ti_cpi) ## ------------------------------------------------------------------------ # summarize the data set summary(world.data) ## ------------------------------------------------------------------------ # drop missing values world.data <- na.omit(world.data) ## ------------------------------------------------------------------------ # transform latitude variable world.data$latitude <- world.data$latitude * 90 ## ------------------------------------------------------------------------ # plot political instability against distance to equator plot(x = world.data$latitude, y = world.data$pol.stability, # x and y variables frame = FALSE, # no frame around the plot pch = 20, # point type main = "Relation b/w Distance to the Equator and Political Stability", # title xlab = "distance to equator in degrees", # x axis title ylab = "political stability" # y axis title ) # bivariate OLS model m1 <- lm(pol.stability ~ latitude, data = world.data ) # plot line of best fit abline(m1) # regression output screenreg(m1) ## ------------------------------------------------------------------------ # how many countries have political stability value lower than -0.58 length( which( world.data$pol.stability < -0.58) ) ## ------------------------------------------------------------------------ # the distance between the minimum and maximum of the dependent variable diff(range(world.data$pol.stability)) ## ------------------------------------------------------------------------ # percentage change in the dependent variable # of an increase in 10 degrees of the independent variable 0.2 / diff(range(world.data$pol.stability)) ## ------------------------------------------------------------------------ # countries with lowest 5 percent of quality of institutions filter(world.data, qual.inst <= quantile(qual.inst, 0.05)) ## ------------------------------------------------------------------------ # which values do the BRIC countries have filter(world.data, country %in% c("Brazil", "Russia", "India", "China")) ## ------------------------------------------------------------------------ # model with more explanatory variables m2 <- lm(pol.stability ~ latitude + globalization + qual.inst, data = world.data) # print model output to screen screenreg( list(m1, m2)) ## ------------------------------------------------------------------------ # joint significance (F-statistic) anova(m1, m2) ## ----echo = FALSE-------------------------------------------------------- # PLEASE IGNORE THIS CODE. IT'S ONLY HERE FOR GENERATING THE COURSE WEBSITE zelig_stat <- function(obj, stat, index = 1) { round(summary(s.out)$stats[stat][[1]][index], digits = 3) } set.seed(123) ## ------------------------------------------------------------------------ # multivariate regression model z.out <- zelig( pol.stability ~ latitude + globalization + qual.inst, model = "ls", data = world.data, cite = FALSE) # setting covariates x.out <- setx(z.out, qual.inst = seq(0, 10, 1)) # simulation s.out <- sim(z.out, x = x.out) # plot results plot(s.out, qi = "pv", xlab = "Quality of Institutions") ## ----fig.width=13, fig.height=11----------------------------------------- # 25th percentile of quality of institutions x.out.low <- setx(z.out, qual.inst = quantile(world.data$qual.inst, 0.25)) # 75th percentile of quality of institutions x.out.high <- setx(z.out, qual.inst = quantile(world.data$qual.inst, 0.75)) # simulation s.out <- sim(z.out, x = x.out.low, x1 = x.out.high) # summary output summary(s.out) # plot results plot(s.out)