player_data <- read.csv("Master.csv")
pitching_data <- read.csv("Pitching.csv")
salary_data <- read.csv("Salaries.csv")
inflation_index <- read.csv("inflation.csv")
pitching_data$yearID <- as.factor(pitching_data$yearID)
ggplot(pitching_data)+geom_boxplot(aes(x=yearID, y=ERA))
## Warning: Removed 90 rows containing non-finite values (stat_boxplot).
summary_pitching <- summarize(group_by(pitching_data, yearID), Q1 = quantile(ERA,.25,na.rm=T),median=median(ERA,na.rm=T), Q3 = quantile(ERA,.75,na.rm=T), min=min(ERA,na.rm=T), max=max(ERA,na.rm=T))
summary_pitching$yearID <- as.numeric(as.character(summary_pitching$yearID))
ggplot(summary_pitching)+geom_line(aes(x=yearID, y=median))
players_atleast10 <- filter(pitching_data, G>=10)
players_atleast10 <- summarize(group_by(players_atleast10, yearID), low_prop = mean(ERA<=3), na.rm = T, high_prop = mean(ERA>=6), na.rm = T)
players_atleast10$yearID <-as.numeric(as.character(players_atleast10$yearID))
ggplot(players_atleast10)+
geom_line(aes(as.numeric(x=yearID), y=low_prop, color= "3 or under"))+
geom_line(aes(as.numeric(x=yearID), y=high_prop, color= "6 or higher"))+
scale_color_manual(values = c("3 or under" = "darkblue", "6 or higher" = "red"), name = "ERA")+ labs(x="year",y="proportion",title="proportion of pitchers (pitching at least 10 games) With low and High ERs by year")
salary_data$yearID <- as.numeric(as.character(salary_data$yearID))
salary_data$playerID <- as.character(salary_data$playerID)
player_data$playerID <- as.character(player_data$playerID)
BC <- inner_join(salary_data,filter(player_data,!is.na(birthCountry)),by="playerID")
BC <- mutate(BC,country=ifelse(birthCountry=="USA","Born in USA","Born outside USA"))
SSC <- summarize(group_by(BC,yearID,country),
Q1=quantile(salary,.25),
Q3=quantile(salary,.75),
median=median(salary),
min=min(salary),
max=max(salary))
names(inflation_index)[1] <- "yearID"
SC <- left_join(SSC,inflation_index, by="yearID")
SC[SC$yearID==2015,"inflation2015"] <- 1
SC <- mutate(SC, median_inf_adj=median*inflation2015,
Q1_inf_adj=Q1*inflation2015,Q3_inf_adj=Q3*inflation2015,min_inf_adj=min*inflation2015,max_inf_adj=max*inflation2015)
ggplot(SC)+geom_ribbon(aes(x=yearID,ymin=Q1_inf_adj, ymax=Q3_inf_adj,fill=country),alfa=.4)+geom_line(aes(yearID,y=median_inf_adj,color=country),size=1.2)+scale_y_continuous(labels=scales::dollar)+labs(y="Annual Salary (adjusted for inflation)",x="year",title="Salaries of Middle 50% of Earners in Major League Baseball")+scale_color_discrete(name="Median Salary")+scale_fill_discrete(name="middle 50% earners")+theme_minimal()
## Warning: Ignoring unknown parameters: alfa