###########################################################################
# PUBLG100: Introduction to Quantitative Methods
#
# Week 5 Solutions: Multiple linear regression models
#
#

## ----message=FALSE-------------------------------------------------------
library(Zelig) 
library(texreg)

## ----eval=FALSE----------------------------------------------------------
## # clear workspace
## rm(list = ls())

## ------------------------------------------------------------------------
# load csv data set
corruption_data <- read.csv("corruption.csv")

## ------------------------------------------------------------------------
summary(corruption_data)

## ------------------------------------------------------------------------
model1 <- lm(gdp ~ ti.cpi + region, data = corruption_data)

htmlreg(model1, file = "model1.doc")

screenreg(model1)

## ------------------------------------------------------------------------
model2 <- lm(gdp ~ ti.cpi, data = corruption_data)

anova(model1, model2)

## ------------------------------------------------------------------------
# estimate the linar model including regions
z.out <- zelig(gdp ~ ti.cpi + region, data = corruption_data, model = "ls")

# look at the ti.cpi variable to see the range
summary(corruption_data$ti.cpi)

# set the covariates
x.out <- setx(z.out, ti.cpi = 1:10)

# simulate
s.out <- sim(z.out, x.out)

ci.plot(s.out, ci = 95)

## ----fig.width=13, fig.height=11-----------------------------------------
# estimate the linear model including regions
z.out <- zelig(gdp ~ ti.cpi + region, data = corruption_data, model = "ls")

# look at the labels of the factor variable regions
table(corruption_data$region)

# set the covariates
x.out.europe <- setx( z.out, region = "Europe")
x.out.americas <- setx( z.out, region = "Americas")

# simulate
s.out <- sim( z.out, x = x.out.europe, x1 = x.out.americas)

# look at the first difference
summary(s.out)