Using Decision Trees to Predict Titanic Survivors

import pandas as pd
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn.feature_extraction import DictVectorizer
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

tantic=pd.read_csv("http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic.txt")
# print(tantic.head())
X=tantic[['age','sex','pclass']]
Y=tantic['survived']
# print(X.describe())
X['age'].fillna(X['age'].mean(),inplace=True)
X_train,X_text,Y_train,Y_text=train_test_split(X,Y,test_size=0.25,random_state=33)
vec=DictVectorizer(sparse=False)
X_train=vec.fit_transform(X_train.to_dict(orient='record'))
X_text=vec.transform(X_text.to_dict(orient='record'))
jueceshu = DecisionTreeClassifier()
jueceshu.fit(X_train,Y_train)
Y_predict=jueceshu.predict(X_text)
print(classification_report(Y_text,Y_predict,target_names=['died','survived']))
print(jueceshu.score(X_text,Y_text))

 

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=324871543&siteId=291194637