########################################################################### # PUBLG100: Introduction to Quantitative Methods # # Week 9 Seminar: Binary models: Logit # # # Set your working directory # # CAUTION: Make sure the directory you specify here matches the working directory on your computer. # We're using N:/PUBLG100 only for illustration purposes and it would only work if you're # using a UCL dekstop. If you're using your own laptop, then replace N:/PUBLG100 with the # appropriate directory (or folder) setwd("N:/PUBLG100") # Verify that your working directory is set correctly getwd() ## ----message = FALSE----------------------------------------------------- library(foreign) library(Zelig) library(texreg) library(lmtest) library(dplyr) ## ------------------------------------------------------------------------ rm(list = ls()) ## ------------------------------------------------------------------------ bes <- read.dta("bes.dta") ## ----echo = FALSE-------------------------------------------------------- source("../common/knitr_hooks.R") ## ------------------------------------------------------------------------ bes$Gender <- factor(bes$Gender, levels = c(0, 1), labels = c("Female", "Male")) ## ------------------------------------------------------------------------ head(bes) ## ------------------------------------------------------------------------ bes <- filter(bes, !is.na(Turnout), !is.na(Income), !is.na(polinfoindex), !is.na(Gender), !is.na(edu15), !is.na(edu17), !is.na(edu18), !is.na(edu19plus), !is.na(in_school), !is.na(in_uni)) ## ------------------------------------------------------------------------ model1 <- glm(Turnout ~ Income + polinfoindex + Gender + edu15 + edu17 + edu18 + edu19plus + in_school + in_uni, family = binomial(link = "logit"), data = bes) screenreg(model1) ## ------------------------------------------------------------------------ table(bes$Turnout) ## ----echo = FALSE-------------------------------------------------------- # THIS CODE IS HERE FOR DISPLAYING THE CONTENT ON THE COURSE WEBSITE. YOU DO NOT NEED TO WORRY ABOUT WHAT IT DOES mean_round <- function(x, pct = 1) { sprintf("`%.2f%s`", mean(x)*pct, if_else(pct == 100, "%", "")) } ## ------------------------------------------------------------------------ predicted_probs <- predict(model1, type = "response") ## ------------------------------------------------------------------------ expected <- as.numeric(predicted_probs > 0.5) ## ------------------------------------------------------------------------ observed <- bes$Turnout outcome <- table(observed,expected) outcome ## ------------------------------------------------------------------------ (outcome[1,1] + outcome[2,2]) / sum(outcome) ## ------------------------------------------------------------------------ mean(bes$Turnout) ## ------------------------------------------------------------------------ model2 <- glm(Turnout ~ Income + polinfoindex + Influence + Gender + Age + edu15 + edu17 + edu18 + edu19plus + in_school + in_uni, family = binomial(link = "logit"), data = bes) screenreg(list(model1, model2)) ## ------------------------------------------------------------------------ lrtest(model1, model2) ## ------------------------------------------------------------------------ AIC(model1, model2) BIC(model1, model2) ## ------------------------------------------------------------------------ z.out <- zelig(Turnout ~ Income + polinfoindex + Influence + Gender + Age + edu15 + edu17 + edu18 + edu19plus + in_school + in_uni, model = "logit", data = bes) ## ----echo = FALSE-------------------------------------------------------- set.seed(666) ## ------------------------------------------------------------------------ x.male <- setx(z.out, Gender = "Male", edu18 = 1, Age = mean(bes$Age), Income = mean(bes$Income), polinfoindex = mean(bes$polinfoindex), Influence = mean(bes$Influence), edu15 = 0, edu17 = 0, edu19plus = 0, in_school = 0, in_uni = 0) x.female <- setx(z.out, Gender = "Female", edu18 = 1, Age = mean(bes$Age), Income = mean(bes$Income), polinfoindex = mean(bes$polinfoindex), Influence = mean(bes$Influence), edu15 = 0, edu17 = 0, edu19plus = 0, in_school = 0, in_uni = 0) s.out <- sim(z.out, x.male, x1 = x.female) summary(s.out) ## ----echo = FALSE-------------------------------------------------------- # IGNORE THESE FUNCTIONS. They are used for generating course website get_sim_out <- function(x, i, j) { sprintf("`%d%%`", round(mean(x$sim.out[[i]][[j]][[1]])*100)) } get_margin_of_error <- function(x, i, j, s = "") { qi <- x$sim.out[[i]][[j]][[1]][,1] quantiles <- quantile(qi, c(0.025, 0.975)) sprintf("`%d%s`", round(((quantiles[2] - quantiles[1])/2)*100), s) } get_ci_low <- function(x, i, j) { qi <- x$sim.out[[i]][[j]][[1]][,1] quantiles <- quantile(qi, c(0.025, 0.975)) sprintf("`%d`", round(min(quantiles)*100)) } get_ci_high <- function(x, i, j) { qi <- x$sim.out[[i]][[j]][[1]][,1] quantiles <- quantile(qi, c(0.025, 0.975)) sprintf("`%d`", round(max(quantiles)*100)) } ## ------------------------------------------------------------------------ range(bes$Income) ## ----echo = FALSE-------------------------------------------------------- set.seed(666) ## ------------------------------------------------------------------------ x.male <- setx(z.out, Gender = "Male", edu18 = 1, Age = mean(bes$Age), Income = seq(1, 13), polinfoindex = mean(bes$polinfoindex), Influence = mean(bes$Influence), edu15 = 0, edu17 = 0, edu19plus = 0, in_school = 0, in_uni = 0) x.female <- setx(z.out, Gender = "Female", edu18 = 1, Age = mean(bes$Age), Income = seq(1, 13), polinfoindex = mean(bes$polinfoindex), Influence = mean(bes$Influence), edu15 = 0, edu17 = 0, edu19plus = 0, in_school = 0, in_uni = 0) s.out <- sim(z.out, x.male, x1 = x.female) ## ------------------------------------------------------------------------ ci.plot(s.out, ci = 95, xlab = "Income", ylab = "Predicted Probability of Voting", main = "Effect of Income by Gender") # add labels manually # NOTE: the x and y values below are just coordinates on the plot, you can change them based on # where you want the labels placed text(x = 3, y = .85, labels = "Women") text(x = 6, y = .75, labels = "Men") ## ------------------------------------------------------------------------ bes %>% group_by(Gender) %>% summarise(avg_turnout = mean(Turnout), sd_turnout = sd(Turnout))