projectz

library(randomForest) library(ggplot2) set.seed(1) my_train <- read.csv(“../input/train.csv”, stringsAsFactors=FALSE) my_test <- read.csv(“../input/test.csv”, stringsAsFactors=FALSE) get_features <- function(data) { features <- c(“Pclass”,”Age”,”Sex”,”Parch”,”SibSp”,”Fare”,”Embarked”) char <- data[,features] char$Age[is.na(char$Age)] <- -1 char$Fare[is.na(char$Fare)] <- median(char$Fare, na.rm=TRUE) char$Embarked[char$Embarked==””] = “S” char$Sex <- as.factor(char$Sex) char$Embarked <- as.factor(char$Embarked) return(char) } rf <- randomForest(get_features(my_train), as.factor(my_train$Survived), ntree=100, importance=TRUE) result <- data.frame(PassengerId = my_test$PassengerId) result$Survived <- predict(rf, get_features(my_test)) write.csv(result, file = “result.csv”, row.names=FALSE) This script has been released under the Apache 2.0 open source license.


Link to Full Article: projectz

Pin It on Pinterest

Share This