Python的pandas库的用法(五)处理丢失数据
import pandas as pd
import numpy as np
dates = pd.date_range('20190101',periods=6)
df = pd.DataFrame(np.arange(24).reshape((6,4)),index=dates,columns=['A','B','C','D'])
df.iloc[0,1] = np.nan
df.iloc[1,2] = np.nan
print(df)
选择丢掉数据缺失的某行或某列
# axis = 0 按行来dorp
print(df.dropna(axis=0,how ='any')) # 有任何nan就丢掉
# axis = 1 按列来drop
print(df.dropna(axis=1,how='any')) # how={‘any’,‘all’} 默认how='any'
print(df.dropna(axis=0,how='all'))
填充缺失数据
print(df)
print(df.fillna(value=0))
检查是否缺失数据
print(df.isnull()) # 缺值的返回True,否则返回False
如果DataFrame里数据很多怎么判断是否缺值?
print(np.any(df.isnull())==True)
# 至少包含一个缺失值时返回Ture