关于泰坦尼克号之灾

泰坦尼克号之灾


使人觉得遥远的不是时间长,而是两三件不可挽回的事。 from 博尔赫斯

  1. import pandas as pd #数据分析

  2. import numpy as np #科学计算

  3. from pandas import Series,DataFrame

  4. data_train = pd.read_csv("/Titanic_data/Train.csv")

  5. import matplotlib.pyplot as plt


  6. fig = plt.figure()

  7. fig.set(alpha=0.2)  # 设定图表颜色alpha参数

  8. plt.subplot2grid((2,3),(0,0))             # 在一张大图里分列几个小图

  9. data_train.Survived.value_counts().plot(kind='bar')# 柱状图

  10. plt.title(u"获救情况 (1为获救)") # 标题

  11. plt.ylabel(u"人数")


  12. plt.subplot2grid((2,3),(0,1))

  13. data_train.Pclass.value_counts().plot(kind="bar")

  14. plt.ylabel(u"人数")

  15. plt.title(u"乘客等级分布")


  16. plt.subplot2grid((2,3),(0,2))

  17. plt.scatter(data_train.Survived, data_train.Age)

  18. plt.ylabel(u"年龄")                         # 设定纵坐标名称

  19. plt.grid(b=True, which='major', axis='y')

  20. plt.title(u"按年龄看获救分布 (1为获救)")


  21. plt.subplot2grid((2,3),(1,0), colspan=2)

  22. data_train.Age[data_train.Pclass == 1].plot(kind='kde')

  23. data_train.Age[data_train.Pclass == 2].plot(kind='kde')

  24. data_train.Age[data_train.Pclass == 3].plot(kind='kde')

  25. plt.xlabel(u"年龄")# plots an axis lable

  26. plt.ylabel(u"密度")

  27. plt.title(u"各等级的乘客年龄分布")

  28. plt.legend((u'头等舱', u'2等舱',u'3等舱'),loc='best') # sets our legend for our graph.


  29. plt.subplot2grid((2,3),(1,2))

  30. data_train.Embarked.value_counts().plot(kind='bar')

  31. plt.title(u"各登船口岸上船人数")

  32. plt.ylabel(u"人数")

  33. plt.show()

关于泰坦尼克号之灾

关于泰坦尼克号之灾

  1. #然后我们再来看看各种舱级别情况下各性别的获救情况

    fig=plt.figure()

  2. fig.set(alpha=0.65) # 设置图像透明度,无所谓

  3. plt.title(u"根据舱等级和性别的获救情况")


  4. ax1=fig.add_subplot(141)

  5. data_train.Survived[data_train.Sex == 'female'][data_train.Pclass != 3].value_counts().plot(kind='bar', label="female highclass", color='#FA2479')

  6. ax1.set_xticklabels([u"获救", u"未获救"], rotation=0)

  7. ax1.legend([u"女性/高级舱"], loc='best')


  8. ax2=fig.add_subplot(142, sharey=ax1)

  9. data_train.Survived[data_train.Sex == 'female'][data_train.Pclass == 3].value_counts().plot(kind='bar', label='female, low class', color='pink')

  10. ax2.set_xticklabels([u"未获救", u"获救"], rotation=0)

  11. plt.legend([u"女性/低级舱"], loc='best')


  12. ax3=fig.add_subplot(143, sharey=ax1)

  13. data_train.Survived[data_train.Sex == 'male'][data_train.Pclass != 3].value_counts().plot(kind='bar', label='male, high class',color='lightblue')

  14. ax3.set_xticklabels([u"未获救", u"获救"], rotation=0)

  15. plt.legend([u"男性/高级舱"], loc='best')


  16. ax4=fig.add_subplot(144, sharey=ax1)

  17. data_train.Survived[data_train.Sex == 'male'][data_train.Pclass == 3].value_counts().plot(kind='bar', label='male low class', color='steelblue')

  18. ax4.set_xticklabels([u"未获救", u"获救"], rotation=0)

  19. plt.legend([u"男性/低级舱"], loc='best')

关于泰坦尼克号之灾

关于泰坦尼克号之灾

  1. #看看各乘客等级的获救情况

  2. fig = plt.figure()

  3. fig.set(alpha=0.2)  # 设定图表颜色alpha参数

  4. Survived_0 = data_train.Pclass[data_train.Survived == 0].value_counts()

  5. Survived_1 = data_train.Pclass[data_train.Survived == 1].value_counts()

  6. df=pd.DataFrame({u'获救':Survived_1, u'未获救':Survived_0})

  7. df.plot(kind='bar', stacked=True)

  8. plt.title(u"各乘客等级的获救情况")

  9. plt.xlabel(u"乘客等级")

  10. plt.ylabel(u"人数")

  11. plt.show()

关于泰坦尼克号之灾

关于泰坦尼克号之灾

  1. #看看各性别的获救情况

  2. fig = plt.figure()

  3. fig.set(alpha=0.2)  # 设定图表颜色alpha参数


  4. Survived_m = data_train.Survived[data_train.Sex == 'male'].value_counts()

  5. Survived_f = data_train.Survived[data_train.Sex == 'female'].value_counts()

  6. df=pd.DataFrame({u'男性':Survived_m, u'女性':Survived_f})

  7. df.plot(kind='bar', stacked=True)

  8. plt.title(u"按性别看获救情况")

  9. plt.xlabel(u"性别")

  10. plt.ylabel(u"人数")

  11. plt.show()

关于泰坦尼克号之灾

关于泰坦尼克号之灾

多年了,你一直在我的伤口中幽居,我放下过天地,却从未放下过你,我生命中的千山万水,任你一一告别。 --仓央嘉措