Titanic competition using svm

# Titanic competition script using the svm method library(e1071) library(rpart) set.seed(415) train <- read.csv(“../input/train.csv”) test <- read.csv(“../input/test.csv”) feature_eng <- function(train_df, test_df) { # Combining the train and test sets for purpose engineering test_df$Survived <- NA combi <- rbind(train_df, test_df) #Features engineering combi$Name <- as.character(combi$Name) # The number of titles are reduced to reduce the noise in the data combi$Title <- sapply(combi$Name, FUN=function(x) {strsplit(x, split='[,.]’)[[1]][2]}) combi$Title <- sub(‘ ‘, ”, combi$Title) #table(combi$Title) combi$Title[combi$Title %in% c(‘Mme’, ‘Mlle’)] <- ‘Mlle’ combi$Title[combi$Title %in% c(‘Capt’, ‘Don’, ‘Major’, ‘Sir’)] <- ‘Sir’ combi$Title[combi$Title %in% c(‘Dona’, ‘Lady’, ‘the Countess’, ‘Jonkheer’)] <- ‘Lady’ combi$Title <- factor(combi$Title) # Reuniting the families together combi$FamilySize <- combi$SibSp + combi$Parch + 1 combi$Surname <- sapply(combi$Name, FUN=function(x) {strsplit(x, split='[,.]’)[[1]][1]}) combi$FamilyID <- paste(as.character(combi$FamilySize), combi$Surname, sep=””) combi$FamilyID[combi$FamilySize <= 2] <- ‘Small’ #table(combi$FamilyID) combi$FamilyID <- factor(combi$FamilyID) #…


Link to Full Article: Titanic competition using svm

Pin It on Pinterest

Share This