作为新手入门 ,泰坦尼克是必过的坎。
我用了keras,但准确率不高只有0.78
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from keras.models import Sequential
from keras.layers import Dense, Activation,Dropout
from keras.optimizers import RMSprop
from keras.utils import np_utils
def nomalizerandStandard(data):
scaler=StandardScaler()
data=scaler.fit_transform(data)
scaler=MinMaxScaler()
data=scaler.fit_transform(data)
return data
def loadData():
train=pd.read_csv(r"F:\kaggle_file\Titanic\train.csv")
test=pd.read_csv(r"F:\kaggle_file\Titanic\test.csv")
return train,test
def dividedata(data):
target=data['Survived']
train=data.drop(['Survived'],axis=1)
return train,target
def FamilyandName(data):
data['family']=data['SibSp']+data['Parch']
data['Name'].ix[data["Sex"]=='male']=0
data['Name'].ix[data.Name.str.contains('Dona')|data.Name.str.contains('Miss')|data.Name.str.contains('Ms')|data.Name.str.contains('Mlle')]=1
data['Name'].ix[data.Name.str.contains('Countess')|data.Name.str.contains('Mme')|data.Name.str.contains('Mrs')|train.Name.str.contains('Dr')]=2
def preAgeandSex(data):
data.loc[data['Sex']=='male','Sex']=1
data.loc[data['Sex']!=1,'Sex']=0
data['Age']=data['Age'].fillna(data.Age.median())
def fillEmbarkedNan(data):
data['Embarked']=data['Embarked'].fillna("S")
data['Embarked'].ix[data["Embarked"] == "S"]=0
data['Embarked'].ix[data["Embarked"] == "C"]=1
data['Embarked'].ix[data["Embarked"] == "Q"]=2
def dropCabinAndOthers(data):
data=data.drop(["Cabin"],axis=1)
data=data.drop(['Ticket','PassengerId'],axis=1)
return data
def network():
model=Sequential()
model.add(Dense(10,input_shape=(9,),activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(7,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(4,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(2,activation='sigmoid'))
rmsprop = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
model.compile(optimizer=rmsprop,loss='binary_crossentropy',
metrics=['accuracy'])
return model
train,test=loadData()
##for train
FamilyandName(train)
preAgeandSex(train)
fillEmbarkedNan(train)
train=dropCabinAndOthers(train)
#for testdata
FamilyandName(test)
preAgeandSex(test)
fillEmbarkedNan(test)
test=dropCabinAndOthers(test)
test['Fare']=test['Fare'].fillna(test.Age.median())
##for fitting
traindata,traintarget=dividedata(train)
traindata=nomalizerandStandard(traindata)
traintarget=np.array(traintarget)
traintarget = np_utils.to_categorical(traintarget, num_classes=2)
##for predict
test=nomalizerandStandard(test)
model=network()
model.fit(traindata,traintarget,nb_epoch=1000,batch_size=50)
predict=model.predict(test)
predict=predict.argmax(axis=1).reshape(-1,1)
predict=pd.DataFrame(predict)
predict.to_csv('F:\\kaggle_file\\Titanic\\result.csv')