import numpy as np
import pandas as pd
from sklearn import tree
from sklearn.metrics import accuracy_score
from sklearn import svm

#Print you can execute arbitrary python code
train = pd.read_csv("../input/train.csv", dtype={"Age": np.float64}, )
test = pd.read_csv("../input/test.csv", dtype={"Age": np.float64}, )

train = pd.DataFrame(train)
train = train.replace(["male","female"],[1, 0])
train["Age"] = train["Age"].replace("NaN", 0)

test = pd.DataFrame(test)
test = test.replace(["male", "female"],[1, 0])
test["Age"]  = test["Age"].replace("Nan", 0)

#Print to standard output, and see the results in the "log" section below after running your script
print("nnTop of the training data:")

print("nnSummary statistics of training data")

#Any files you save will be available in the output tab below
train.to_csv('copy_of_the_training_data.csv', index=False)

chosen_features = ["Pclass", "Sex", "Age"]

train_features = train[chosen_features]
train_labels = train["Survived"]
test_features = test[chosen_features]

clf = tree.DecisionTreeClassifier(min_samples_leaf = 50)
clf.fit(train_features, train_labels)
pred = clf.predict(test_features)

clf = svm.SVC(kernel="linear")
clf.fit(train_features, train_labels)
pred = clf.predict(test_features)

sub["PassengerId"] = test["PassengerId"]
sub["Survived"] = pred
sub.to_csv("sub.csv", index = False)


URL: Titanic

Source: Google Alert for ML

Pin It on Pinterest

Share This