########################################################################### # PUBLG100: Introduction to Quantitative Methods # # Week 6 Seminar: Assumptions and Violations of Assumptions # # # Change your working directory setwd("N:/PUBLG100") # Check your working directory getwd() ## ------------------------------------------------------------------------ # clear environment rm(list = ls()) ## ------------------------------------------------------------------------ install.packages("lmtest") install.packages("sandwich") ## ----message = FALSE----------------------------------------------------- library(lmtest) library(sandwich) library(texreg) library(dplyr) ## ------------------------------------------------------------------------ income_data <- read.csv("http://uclspp.github.io/PUBLG100/data/cps2013.csv") ## ------------------------------------------------------------------------ str(income_data) head(income_data) ## ------------------------------------------------------------------------ plot(hourly_earnings ~ years_of_education, data = income_data) ## ------------------------------------------------------------------------ model1 <- lm(hourly_earnings ~ years_of_education, data = income_data) summary(model1) ## ------------------------------------------------------------------------ plot(hourly_earnings ~ years_of_education, data = income_data) abline(model1, col = "red") ## ------------------------------------------------------------------------ plot(fitted(model1), residuals(model1), main = "Residuals vs. Fitted") ## ------------------------------------------------------------------------ bptest(model1) ## ------------------------------------------------------------------------ coeftest(model1, vcov = vcovHC(model1)) ## ----echo = FALSE-------------------------------------------------------- corrected_errors <- coeftest(model1, vcov = vcovHC(model1)) ## ------------------------------------------------------------------------ corrected_errors <- coeftest(model1, vcov = vcovHC(model1)) screenreg(model1, override.se = corrected_errors[, "Std. Error"], override.pval = corrected_errors[, "Pr(>|t|)"]) ## ------------------------------------------------------------------------ shark_attacks = read.csv("http://uclspp.github.io/PUBLG100/data/shark_attacks.csv") head(shark_attacks) ## ------------------------------------------------------------------------ model1 = lm(SharkAttacks ~ IceCreamSales, data = shark_attacks) screenreg(model1) ## ------------------------------------------------------------------------ cor(shark_attacks[c("SharkAttacks", "Temperature", "IceCreamSales")]) ## ------------------------------------------------------------------------ model2 = lm(SharkAttacks ~ IceCreamSales + Temperature, data = shark_attacks) ## ------------------------------------------------------------------------ screenreg(list(model1, model2)) ## ------------------------------------------------------------------------ subset_men <- subset(income_data, gender == "Male") subset_women <- subset(income_data, gender == "Female") ## ------------------------------------------------------------------------ subset_men$rank <- rank(-subset_men$hourly_earnings, ties.method = "min") subset_women$rank <- rank(-subset_women$hourly_earnings, ties.method = "min") ## ------------------------------------------------------------------------ ranked_data <- rbind(subset_men, subset_women) ## ------------------------------------------------------------------------ ranked_data <- income_data %>% group_by(gender) %>% mutate(group_rank = min_rank(desc(hourly_earnings))) ## ------------------------------------------------------------------------ ranked_data %>% filter(gender == "Male") %>% arrange(group_rank) %>% head() ranked_data %>% filter(gender == "Female") %>% arrange(group_rank) %>% head()