###########################################################################
# PUBLG100: Introduction to Quantitative Methods
#
# Week 1 Seminar: Introduction to Quantitative Analysis
#
#

# Change your working directory
setwd("N:/PUBLG100")

# Check your working directory
getwd()


## ------------------------------------------------------------------------
# Create a numeric and a character variable
a <- 5 
typeof(a) # a is a numeric variable
a
b <- "Yay stats class"
typeof(b) # b is a string variable
b

## ------------------------------------------------------------------------
# Create a vector
my.vector <- c(10,-7,99,34,0,-5) # a vector
my.vector
length(my.vector) # how many elements?
# subsetting
my.vector[1] # 1st vector element
my.vector[-1] # all elements but the 1st
my.vector[2:4] # the 2nd to the 4th elements
my.vector[c(2,5)] # 2nd and 5th element
my.vector[length(my.vector)] # the last element

# delete variable 'a' from workspace
rm(a)
# delete everything from workspace
rm(list=ls())

# create a matrix
# type help("matrix") into the console and press ENTER
# read Description, Usage and Arguments
my.matrix1 <- matrix(data = c(1,2,30,40,500,600), nrow = 3, ncol = 2, byrow = TRUE,
                     dimnames = NULL)
my.matrix2 <- matrix(data = c(1,2,30,40,500,600), nrow = 2, ncol = 3, byrow = FALSE)
# How are the matrices different?
my.matrix1
my.matrix2

# subsetting a matrix
my.matrix1[1,2] # element in row 1 and column 2
my.matrix1[2,1] # element in row 2 and column 1
my.matrix1[,1] # 1st column only
my.matrix1[1:2,] # rows 1 to 2
my.matrix1[c(1,3),] # rows 1 and 3 


## ------------------------------------------------------------------------
# let's look at countries after WW2
# were western countries more democratic than the rest?
# (we proxy the "West" by NATO members) 

# load the Polity IV dataset
my.data <- read.csv("http://uclspp.github.io/PUBLG100/data/polity.csv")

# View(my.data) # opens a window with the data set
dim(my.data) # returns number of rows and columns
my.data[1:10,] # look at the top 10 rows of the data set
names(my.data) # the variable names in the data set
# we saw the variable name country but we only saw Afghanistan in the 
# first 10 rows. Are there other countries in the data set?
levels(my.data$country) # levels displays levels of a factor variable

# we drop all oberservations which are not from 1946
my.data <- my.data[my.data$year==1946,]
my.data[1:10,]
summary(my.data$polity2) # descriptive statistics of polity variable

# now lets check if western countries were more democratic than the other countries in 1946
table(my.data$nato, my.data$polity2)
# descriptive summary stats of polity variable by nato membership 
summary(my.data$polity2[my.data$nato==0]) # not in nato
summary(my.data$polity2[my.data$nato==1]) # nato member

## illustration
boxplot(my.data$polity2 ~ as.factor(my.data$nato),
        frame = FALSE,
        main = "Polity IV Scores of NATO founders vs others in 1946",
        xlab = "NATO member",
        ylab = "Polity Score")

# plots you can do with R
library(maps)
library(mapdata)
map('worldHires',
    c('UK', 'Ireland', 'Isle of Man','Isle of Wight'),
    xlim=c(-11,3), ylim=c(49,60.9))
# where are we?
# let's check google maps to find latitude and longitude of SPP
# it's: 51.525051, -0.130186
points(-0.130186,51.525051,col=2,pch=20,cex=1.8) # note longitude goes first