failedPCA
library(stringr) train=read.csv(“../input/train.csv”,header=TRUE,as.is=FALSE,na.strings = “”) test=read.csv(“../input/test.csv”,header=TRUE,as.is=FALSE,na.strings = “”) #combine test and train combine=rbind(train,cbind(test,Survived=rep(NA,dim(test)[1]))) #carbin factor Cabin=factor(combine$Cabin) combine$cabin.factor=ifelse(is.na(combine$Cabin),0,1) #Pclass combine$Pclass.factor=factor(combine$Pclass) #name sex combine$title=sapply(combine$Name, function(x){str_trim(str_split(x,”,”)[[1]][2],side=”both”)} ) combine$title=sapply(combine$title, function(x){str_trim(str_split(x,”.”)[[1]][1],side=”both”)} ) combine$title[which(combine$title %in% c(“Master”,”Capt”, “Col”, “Don”, “Jonkheer”, “Major”, “Rev”, “Sir”))]=”Mr” combine$title[which(combine$title %in% c(“Dona”, “Lady”, “Mme”, “Lady”, “the Countess”))]=”Mrs” combine$title[which(combine$title %in% c(“Mlle”, “Ms”))]=”Miss” #age impute lm.age=lm(Age~SibSp+Parch+Pclass+title,data=combine) predict.age=predict(lm.age,newdata=combine) combine$Age[is.na(combine$Age)]=predict.age[is.na(combine$Age)] #SibSp Parch combine$family=combine$SibSp+combine$Parch+1 #FareLess100 combine$fareLess100=ifelse(combine$Fare2 glm.titanic=step(glm.titanic) #PCA decomposition ##pr.titanic=princomp(a,cor=TRUE) ##pre=predict(pr.titanic) ##d=pre[,1:3] ##glm.pr.titanic=glm(train.Survived~.,data=data.frame(d,train$Survived),family=binomial(link=logit)) ##beta=coef(glm.pr.titanic) ##load=loadings(pr.titanic) ##x.bar=pr.titanic$center ##x.sd=pr.titanic$scale ##coef0.5,1.0,0.0) #write file=cbind(test$PassengerId,test$Survived) colnames(file)=c(‘PassengerId’,’Survived’) write.csv(file, “./Submission.csv”, row.names = FALSE) This script has been released under the Apache 2.0 open source license.
Link to Full Article: failedPCA