用matplotlib画图(1)

import json
import pandas as pd
import numpy as np
import  matplotlib.pyplot as plt
from pandas import DataFrame,Series
path='/Users/zhushuqiang/python/download/pydata-book/datasets/bitly_usagov/example.txt'




records=[json.loads(str) for  str in open(path)]

print(records[0].keys())

#测试某个key是否存在
print('g' in records[0].keys())
print('g' in records[0])
#获取时区列表

time_zones=[record['tz'] for record in records if 'tz' in record]


#统计个数
def get_count(sequence):

    #定义一个字典
    count={}

    for element in sequence:

        if element in count:

             count[element]+=1
        else:
            count[element]=1

    return count

count_dict=get_count(time_zones)

#top-n

def top_n(count_dict,n=10):

    count_list=[(count,time_zone) for time_zone,count in count_dict.items()]

    count_list.sort()

    return count_list[-n:]



print(top_n(count_dict))





fd=DataFrame(records)

print(fd['tz'][:10])

print(fd['tz'].value_counts())

clean_tz=fd['tz'].fillna('Missing')

#学到一招:迭代list中元素,然后判断

clean_tz[clean_tz == '']='Unkown'

#print(clean_tz)


#统计key的个数
tz_count=clean_tz.value_counts()

#画前十个元素的柱状图
tz_count[:10].plot(kind='barh',rot=0)

plt.show()



用matplotlib画图(1)