###########################################################################
# PUBLG100: Introduction to Quantitative Methods
#
# Week 7 Solutions: Interactions
#
#

## ----message=FALSE-------------------------------------------------------
library(foreign)
library(texreg) 
library(Zelig)
library(dplyr)

## ------------------------------------------------------------------------
rm(list = ls())

## ------------------------------------------------------------------------
qog_data <- read.dta("QoG2012.dta")

qog_data <- rename(qog_data, 
                   hdi = undp_hdi, 
                   gdp = wdi_gdpc)

## ------------------------------------------------------------------------
z.out <- zelig(hdi ~ gdp + I(gdp^2), model = "ls", data = qog_data)

summary(z.out)

## ------------------------------------------------------------------------
x.low <- setx(z.out, gdp = 5000)
x.high <- setx(z.out, gdp = 15000)

s.out <- sim(z.out, x = x.low, x1 = x.high)

summary(s.out)

## ----echo = FALSE--------------------------------------------------------
# IGNORE THESE FUNCTIONS. They are used for generating course website
get_first_diff <- function(x) {
  mean(x$sim.out$x1$fd[[1]])
}

get_first_diff_str <- function(x) {
  sprintf("`%.2f`", get_first_diff(x))
}

get_first_diff_ci <- function(x) {
  quantile(x$sim.out$x1$fd[[1]], c(0.025, 0.975))
}

get_first_diff_ci_str <- function(x, sep = "and") {
  ci <- get_first_diff_ci(x)
  
  sprintf("`%.2f` %s `%.2f`", ci[[1]], sep, ci[[2]])
}

## ------------------------------------------------------------------------
summary(qog_data$hdi)

## ----echo = FALSE--------------------------------------------------------
# IGNORE THESE FUNCTIONS. They are used for generating course website
show_range <- function(x) {
  low <- min(x, na.rm = TRUE)
  high <- max(x, na.rm = TRUE)

  if (low < 0) {
    sprintf("`%.2f - (%.2f) = %.2f`", high, low, high - low)
  } else {
    sprintf("`%.2f - %.2f = %.2f`", high, low, high - low)
  }
}

show_sum <- function(x) {
  if (x[2] < 0) {
    sprintf("`%.2f + (%.2f) = %.2f`", x[1], x[2], x[1] + x[2])
  } else {
    sprintf("`%.2f + %.2f = %.2f`", x[1], x[2], x[1] + x[2])
  }
}

show_percent_increase <- function(x, y) {
  low <- min(x, na.rm = TRUE)
  high <- max(x, na.rm = TRUE)
  pct <- y / (high - low)
  
  sprintf("`%.2f / %.2f = %0.2f` or `%.0f%%`", y, high - low, pct, pct*100)
}

show_first_diff_percent <- function(x, s) {
  show_percent_increase(x, get_first_diff(s))
}

## ------------------------------------------------------------------------
0.182 / diff(range(qog_data$hdi, na.rm = TRUE))

## ------------------------------------------------------------------------
x.low <- setx(z.out, gdp = 25000)
x.high <- setx(z.out, gdp = 35000)

s.out <- sim(z.out, x = x.low, x1 = x.high)

summary(s.out)

## ------------------------------------------------------------------------
qog_data <- rename(qog_data, pol_stability = wbgi_pse, indep_judiciary = h_j)

## ------------------------------------------------------------------------
table(qog_data$indep_judiciary)

## ------------------------------------------------------------------------
qog_data$indep_judiciary[qog_data$indep_judiciary == -5] = 0

## ------------------------------------------------------------------------
base.model <- lm(pol_stability ~ indep_judiciary + former_col, 
                 data = qog_data)

interaction.model <- lm(pol_stability ~ indep_judiciary + former_col + 
                          indep_judiciary:former_col, 
                        data = qog_data)

screenreg(list(base.model, interaction.model))

## ------------------------------------------------------------------------
anova(base.model, interaction.model)

## ----echo = FALSE--------------------------------------------------------
show_coef <- function(x, i) {
  sprintf("`%.2f`",  coefficients(x)[i])
}

show_coef_diff <- function(x, i, j) {
  cx <- coefficients(x)
  show_sum(c(cx[i], cx[j]))
}

## ------------------------------------------------------------------------
summary(qog_data$pol_stability)

## ------------------------------------------------------------------------
diff(range(qog_data$pol_stability))

## ------------------------------------------------------------------------
1.45 / diff(range(qog_data$pol_stability))

## ------------------------------------------------------------------------
0.61 / diff(range(qog_data$pol_stability))

## ------------------------------------------------------------------------
qog_data <- rename(qog_data, corruption_control = wbgi_cce)

base.model <- lm(hdi ~ corruption_control + indep_judiciary, data = qog_data)

interaction.model <- lm(hdi ~ corruption_control * indep_judiciary, data = qog_data)

screenreg(list(base.model, interaction.model))

## ------------------------------------------------------------------------
anova(base.model, interaction.model)

## ----eval----------------------------------------------------------------
z.out <- zelig(hdi ~ corruption_control * indep_judiciary, 
               data = qog_data, model = "ls")

summary(qog_data$corruption_control)

x.out1 <- setx(z.out, indep_judiciary = 0, corruption_control = seq(-1.7, 2.5, 0.1) )
x.out2 <- setx(z.out, indep_judiciary = 1, corruption_control = seq(-1.7, 2.5, 0.1) )

s.out <- sim(z.out, x.out1, x.out2)

# plot results
ci.plot(s.out,
        ci = 95,
        main = "Effect of Control of Corruption Conditional on Indep. Judiciary",
        xlab = "Control of Corruption",
        ylab = "Quality of Life")

## ----eval = FALSE--------------------------------------------------------
## rm(list = ls())

## ------------------------------------------------------------------------
ca_schools <- read.dta("caschool.dta")

# scatter plot
plot(testscr ~ avginc, data = ca_schools,
     main = "Relationship b/w Income and Test Scores",
     xlab = "Average District Income in $1000's",
     ylab = "Average Test Score of Reading and Math")

## ------------------------------------------------------------------------
model1 <- lm(testscr ~ avginc, data = ca_schools)
model2 <- lm(testscr ~ avginc + I(avginc^2), data = ca_schools)

screenreg(list(model1, model2))

## ------------------------------------------------------------------------
anova(model1, model2)

## ------------------------------------------------------------------------
z.out <- zelig(testscr ~ avginc + I(avginc^2), data = ca_schools, model = "ls")

x.low <- setx(z.out, avginc = quantile(ca_schools$avginc, 0.10))
x.high <- setx(z.out, avginc = quantile(ca_schools$avginc, 0.15))

s.out <- sim(z.out, x.low, x.high)
summary(s.out)

## ------------------------------------------------------------------------
quantile(ca_schools$avginc, c(0.10, 0.15))

## ------------------------------------------------------------------------
x.low <- setx(z.out, avginc = quantile(ca_schools$avginc, 0.75))
x.high <- setx(z.out, avginc = quantile(ca_schools$avginc, 0.75) + 0.84)

s.out <- sim(z.out, x.low, x.high)

summary(s.out)