###########################################################################
# PUBLG100: Introduction to Quantitative Methods
#
# Week 6 Seminar: Assumptions and Violations of Assumptions
#
#

# Set your working directory
#
# CAUTION: Make sure the directory you specify here matches the working directory on your computer.
#          We're using N:/PUBLG100 only for illustration purposes and it would only work if you're 
#          using a UCL dekstop. If you're using your own laptop, then replace N:/PUBLG100 with the 
#          appropriate directory (or folder)
setwd("N:/PUBLG100")


# Verify that your working directory is set correctly
getwd()


## ------------------------------------------------------------------------
# clear environment
rm(list = ls())

## ----eval = FALSE--------------------------------------------------------
## install.packages("lmtest")
## install.packages("sandwich")

## ----message = FALSE-----------------------------------------------------
library(lmtest)
library(sandwich)
library(texreg)
library(dplyr)

## ------------------------------------------------------------------------
shark_attacks <- read.csv("shark_attacks.csv")

head(shark_attacks)

## ------------------------------------------------------------------------
model1 <- lm(SharkAttacks ~ IceCreamSales, data = shark_attacks)
screenreg(model1)

## ------------------------------------------------------------------------
cor(select(shark_attacks, SharkAttacks, Temperature, IceCreamSales))

## ------------------------------------------------------------------------
model2 <- lm(SharkAttacks ~ IceCreamSales + Temperature, data = shark_attacks)

## ------------------------------------------------------------------------
screenreg(list(model1, model2))

## ------------------------------------------------------------------------
cps <- read.csv("cps2013.csv")

## ------------------------------------------------------------------------
head(cps)

## ------------------------------------------------------------------------
plot(income ~ education, data = cps)

## ------------------------------------------------------------------------
model1 <- lm(income ~ education, data = cps)
summary(model1)

## ------------------------------------------------------------------------
plot(income ~ education, data = cps)
abline(model1, col = "red")

## ------------------------------------------------------------------------
plot(fitted(model1), residuals(model1), main = "Residuals vs. Fitted")

## ------------------------------------------------------------------------
bptest(model1)

## ------------------------------------------------------------------------
screenreg(coeftest(model1, vcov = vcovHC(model1)))

## ----echo = FALSE--------------------------------------------------------
corrected_errors <- coeftest(model1, vcov = vcovHC(model1)) 

## ------------------------------------------------------------------------
corrected_errors <- coeftest(model1, vcov = vcovHC(model1)) 

screenreg(model1, 
          override.se = corrected_errors[, 2], 
          override.pval = corrected_errors[, 4])

## ------------------------------------------------------------------------
cps_men <- subset(cps, gender == "Male")
cps_women <- subset(cps, gender == "Female")

## ------------------------------------------------------------------------
head(cps_men[order(cps_men$income, decreasing = TRUE), ], n = 5)
head(cps_women[order(cps_women$income, decreasing = TRUE), ], n = 5)

## ------------------------------------------------------------------------
cps %>%
  group_by(gender) %>%
  top_n(5, income) %>%
  arrange(gender, desc(income))