###########################################################################
# PUBLG100: Introduction to Quantitative Methods
#
# Week 6 Solutions: Assumptions and Violations of Assumptions
#
#

## ------------------------------------------------------------------------
rm(list = ls())

## ----message = FALSE-----------------------------------------------------
library(texreg)
library(lmtest)
library(sandwich)
library(dplyr)

## ------------------------------------------------------------------------
wdi <- read.csv("https://uclspp.github.io/PUBLG100/data/WDI_2012.csv")
polity <- read.csv("https://uclspp.github.io/PUBLG100/data/polity4_2012.csv")

merged_data <- merge(wdi, polity, by = "iso3n")

## ------------------------------------------------------------------------
merged_data <- subset(merged_data, 
                      complete.cases(merged_data[, c("UnemploymentRate", "Inflation", "MilitaryExpenditure", "HealthExpenditure", "MobileSubscribers")]))

## ------------------------------------------------------------------------
model_1 = lm(polity2 ~ UnemploymentRate, data = merged_data)
model_2 = lm(polity2 ~ Inflation, data = merged_data)
model_3 = lm(polity2 ~ MilitaryExpenditure, data = merged_data)
model_4 = lm(polity2 ~ HealthExpenditure, data = merged_data)
model_5 = lm(polity2 ~ MobileSubscribers, data = merged_data)

## ------------------------------------------------------------------------
screenreg(list(model_1, model_2, model_3, model_4, model_5), 
          digits = 3,
          custom.model.names = c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"))

## ------------------------------------------------------------------------
model_6 = lm(polity2 ~ MilitaryExpenditure + UnemploymentRate, data = merged_data)
model_7 = lm(polity2 ~ MilitaryExpenditure + Inflation, data = merged_data)
model_8 = lm(polity2 ~ MilitaryExpenditure + HealthExpenditure, data = merged_data)
model_9 = lm(polity2 ~ MilitaryExpenditure + MobileSubscribers, data = merged_data)

screenreg(list(model_3, model_6, model_7, model_8, model_9), 
          digits = 3,
          custom.model.names = c("Model 3", "Model 6", "Model 7", "Model 8", "Model 9"))

## ------------------------------------------------------------------------
model_10 = lm(polity2 ~ MilitaryExpenditure + HealthExpenditure + UnemploymentRate, data = merged_data)
model_11 = lm(polity2 ~ MilitaryExpenditure + HealthExpenditure + Inflation, data = merged_data)
model_12 = lm(polity2 ~ MilitaryExpenditure + HealthExpenditure + MobileSubscribers, data = merged_data)

screenreg(list(model_8, model_10, model_11, model_12), 
          digits = 3,
          custom.model.names = c("Model 8", "Model 10", "Model 11", "Model 12"))

## ------------------------------------------------------------------------
model_13 = lm(polity2 ~ MilitaryExpenditure + HealthExpenditure + MobileSubscribers + UnemploymentRate, data = merged_data)
model_14 = lm(polity2 ~ MilitaryExpenditure + HealthExpenditure + MobileSubscribers + Inflation, data = merged_data)

screenreg(list(model_12, model_13, model_14), 
          digits = 3,
          custom.model.names = c("Model 12", "Model 13", "Model 14"))

## ------------------------------------------------------------------------
bptest(model_12)

## ------------------------------------------------------------------------
corrected_errors <- coeftest(model_12, vcov = vcovHC(model_12)) 

screenreg(model_12, 
          digits = 3,
          custom.model.names = c("Model 12"),
          override.se = corrected_errors[, "Std. Error"], 
          override.pval = corrected_errors[, "Pr(>|t|)"])

## ------------------------------------------------------------------------
rm(list = ls())

## ------------------------------------------------------------------------
ma_schools <- read.csv("https://uclspp.github.io/PUBLG100/data/MA_Schools.csv")

## ------------------------------------------------------------------------
summary(ma_schools$score8)
summary(ma_schools$income)
summary(ma_schools$lunch)
summary(ma_schools$english)
summary(ma_schools$stratio)

## ------------------------------------------------------------------------
model_1 <- lm(score8 ~ stratio + english + lunch + income, data = ma_schools)
summary(model_1)

## ------------------------------------------------------------------------
model_2 <- lm(score8 ~ stratio + english + lunch, data = ma_schools)
model_3 <- lm(score8 ~ stratio + english + income, data = ma_schools)
model_4 <- lm(score8 ~ stratio + lunch + income, data = ma_schools)
model_5 <- lm(score8 ~ english + lunch + income, data = ma_schools)

## ------------------------------------------------------------------------
screenreg(list(model_1, model_2, model_3, model_4, model_5), 
          digits = 3,
          custom.model.names = c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"))

## ------------------------------------------------------------------------
model_6 <- lm(score8 ~ stratio + lunch, data = ma_schools)
model_7 <- lm(score8 ~ stratio + income, data = ma_schools)
model_8 <- lm(score8 ~ lunch + income, data = ma_schools)

screenreg(list(model_4, model_6, model_7, model_8), 
          digits = 3,
          custom.model.names = c("Model 4", "Model 6", "Model 7", "Model 8"))

## ------------------------------------------------------------------------
model_9 <- lm(score8 ~ lunch, data = ma_schools)
model_10 <- lm(score8 ~ income, data = ma_schools)

screenreg(list(model_8, model_9, model_10), 
          digits = 3,
          custom.model.names = c("Model 8", "Model 9", "Model 10"))

## ------------------------------------------------------------------------
bptest(model_8)

## ------------------------------------------------------------------------
rm(list = ls())

## ------------------------------------------------------------------------
ca_schools <- read.csv("https://uclspp.github.io/PUBLG100/data/CA_Schools.csv")

## ------------------------------------------------------------------------
ca_schools <- ca_schools %>%
  filter(grades == "KK-08") %>%
  mutate(stratio = students / teachers,
         avg_score = (read + math)/2)

## ------------------------------------------------------------------------
summary(ca_schools$avg_score)
summary(ca_schools$income)
summary(ca_schools$lunch)
summary(ca_schools$english)
summary(ca_schools$stratio)

## ------------------------------------------------------------------------
model_1 <- lm(avg_score ~ stratio + english + lunch + income, data = ca_schools)
model_2 <- lm(avg_score ~ stratio + english + lunch, data = ca_schools)
model_3 <- lm(avg_score ~ stratio + english + income, data = ca_schools)
model_4 <- lm(avg_score ~ stratio + lunch + income, data = ca_schools)
model_5 <- lm(avg_score ~ english + lunch + income, data = ca_schools)

## ------------------------------------------------------------------------
screenreg(list(model_1, model_2, model_3, model_4, model_5), 
          digits = 3,
          custom.model.names = c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"))