titanic

Published by onesixx on

https://www.kaggle.com/c/titanic

https://www.mirror.co.uk/news/uk-news/iceberg-sunk-titanic-100000-years-7506651

https://www.kaggle.com/erikbruin/titanic-2nd-degree-families-and-majority-voting

‘PassengerId’, ‘Survived’, ‘Pclass’, ‘Name’, ‘Sex’, ‘Age’, ‘Fare’, ‘Embarked’

library(Hmisc)
library(knitr)
library(ggplot2)
library(dplyr)
library(caret)
library(randomForest)
library(ROCR)
library(cowplot)

train <- read.csv("./input/train.csv", stringsAsFactors=F, na.strings = c("NA", ""))
test  <- read.csv("./input/test.csv",  stringsAsFactors=F, na.strings = c("NA", ""))

train %>% str
test %>% str

test$Survived <- NA
all <- rbind(train, test)

# Check Missing Data 
sapply(all, function(x) {sum(is.na(x))}) [sapply(all, function(x) {sum(is.na(x))}) >0]

all$Survived <- as.factor(all$Survived)
all$Sex      <- as.factor(all$Sex)
all$Pclass   <- as.ordered(all$Pclass)

all[!is.na(all$Survived),] %>% 
    ggplot(aes(x=Survived, fill=Survived)) +
        geom_bar(stat='count') +
        geom_label(stat='count', aes(label=..count..)) +
        labs(x='How many people died and survived on the Titanic?')
    
p1 <- all %>% 
        ggplot(aes(x=Sex, fill=Sex)) +
        geom_bar(stat='count', position='dodge') +
        geom_label(stat='count', aes(label=..count..)) + 
        labs(x='All data')
    
p2 <- all[!is.na(all$Survived),] %>% 
        ggplot(aes(x=Sex, fill=Survived)) +
        geom_bar(stat='count', position='dodge') +
        geom_label(stat='count', aes(label=..count..)) +    
        labs(x='Training data only')
    
plot_grid(p1, p2)


ggplot(diamonds, aes(clarity, fill = cut)) + geom_bar() +
    theme(axis.text.x = element_text(angle=70, vjust=0.5)) + 
    draw_label("DRAFT!", angle = 45, size = 80, alpha = .2)
Categories: Kaggle

onesixx

Blog Owner

Subscribe
Notify of
guest

0 Comments
Inline Feedbacks
View all comments
0
Would love your thoughts, please comment.x
()
x