###########################################################################
# PUBLG100: Introduction to Quantitative Methods
#
# Week 6 Seminar: Assumptions and Violations of Assumptions
#
#

# Change your working directory
setwd("N:/PUBLG100")

# Check your working directory
getwd()


## ------------------------------------------------------------------------
# clear environment
rm(list = ls())

## ------------------------------------------------------------------------
install.packages("lmtest")
install.packages("sandwich")

## ----message = FALSE-----------------------------------------------------
library(lmtest)
library(sandwich)
library(texreg)
library(dplyr)

## ------------------------------------------------------------------------
income_data <- read.csv("http://uclspp.github.io/PUBLG100/data/cps2013.csv")

## ------------------------------------------------------------------------
str(income_data)
head(income_data)

## ------------------------------------------------------------------------
plot(hourly_earnings ~ years_of_education, data = income_data)

## ------------------------------------------------------------------------
model1 <- lm(hourly_earnings ~ years_of_education, data = income_data)
summary(model1)

## ------------------------------------------------------------------------
plot(hourly_earnings ~ years_of_education, data = income_data)
abline(model1, col = "red")

## ------------------------------------------------------------------------
plot(fitted(model1), residuals(model1), main = "Residuals vs. Fitted")

## ------------------------------------------------------------------------
bptest(model1)

## ------------------------------------------------------------------------
coeftest(model1, vcov = vcovHC(model1)) 

## ----echo = FALSE--------------------------------------------------------
corrected_errors <- coeftest(model1, vcov = vcovHC(model1)) 

## ------------------------------------------------------------------------
corrected_errors <- coeftest(model1, vcov = vcovHC(model1)) 

screenreg(model1, 
          override.se = corrected_errors[, "Std. Error"], 
          override.pval = corrected_errors[, "Pr(>|t|)"])

## ------------------------------------------------------------------------
shark_attacks = read.csv("http://uclspp.github.io/PUBLG100/data/shark_attacks.csv")

head(shark_attacks)

## ------------------------------------------------------------------------
model1 = lm(SharkAttacks ~ IceCreamSales, data = shark_attacks)
screenreg(model1)

## ------------------------------------------------------------------------
cor(shark_attacks[c("SharkAttacks", "Temperature", "IceCreamSales")])

## ------------------------------------------------------------------------
model2 = lm(SharkAttacks ~ IceCreamSales + Temperature, data = shark_attacks)

## ------------------------------------------------------------------------
screenreg(list(model1, model2))

## ------------------------------------------------------------------------
subset_men <- subset(income_data, gender == "Male")
subset_women <- subset(income_data, gender == "Female")

## ------------------------------------------------------------------------
subset_men$rank <- rank(-subset_men$hourly_earnings, ties.method = "min")
subset_women$rank <- rank(-subset_women$hourly_earnings, ties.method = "min")

## ------------------------------------------------------------------------
ranked_data <- rbind(subset_men, subset_women)

## ------------------------------------------------------------------------
ranked_data <- income_data %>%
  group_by(gender)  %>%
  mutate(group_rank = min_rank(desc(hourly_earnings)))

## ------------------------------------------------------------------------
ranked_data %>%
  filter(gender == "Male") %>%
  arrange(group_rank) %>%
  head()

ranked_data %>%
  filter(gender == "Female") %>%
  arrange(group_rank) %>%
  head()