python微信朋友数据分析
1、 功能
- 微信朋友性别分布
- 微信朋友省份分布
- 微信朋友北京分布
- 微信朋友个性签名词云
2、 源码:
import itchat
import pandas as pd
import numpy as np
from pyecharts import Pie, Map, Style, Page, Bar
import os
from PIL import Image
import jieba
from wordcloud import WordCloud, ImageColorGenerator
import matplotlib.pyplot as plt
def get_attr(friends, key):
return list(map(lambda user: user.get(key), friends))
def get_friends():
itchat.auto_login(hotReload=True)
friends = itchat.get_friends()
users = dict(province=get_attr(friends, "Province"),
city=get_attr(friends, "City"),
nickname=get_attr(friends, "NickName"),
sex=get_attr(friends, "Sex"),
signature=get_attr(friends, "Signature"),
remarkname=get_attr(friends, "RemarkName"))
return users
def sex_stats(users):
df = pd.DataFrame(users)
sex_arr = df.groupby(['sex'], as_index=True)['sex'].count()
data = dict(zip(list(sex_arr.index), list(sex_arr)))
data['不告诉你'] = data.pop(0)
data['帅哥'] = data.pop(1)
data['美女'] = data.pop(2)
return data.keys(), data.values()
def create_sex_charts(users, output_file):
page = Page()
style = Style(width=1100, height=600)
style_middle = Style(width=900, height=500)
data = sex_stats(users)
attr, value = data
chart = Pie('微信性别')
chart.add('', attr, value, center=[50, 50],
radius=[30, 70], is_label_show=True, legend_orient='horizontal', legend_pos='center',
legend_top='bottom', is_area_show=True)
page.add(chart)
page.render(output_file)
def prov_stats(users):
prv = pd.DataFrame(users)
prv_cnt = prv.groupby('province', as_index=True)['province'].count().sort_values()
attr = list(map(lambda x: x if x != '' else '未知', list(prv_cnt.index)))
return attr, list(prv_cnt)
def create_prov_charts(users, output_file):
data = prov_stats(users)
attr, value = data
page = Page()
style = Style(width=1100, height=600)
style_middle = Style(width=900, height=500)
chart = Map('中国地图', style.init_style)
chart.add('', attr, value, is_label_show=True, is_visualmap=True, visual_text_color='#000')
page.add(chart)
chart = Bar('柱状图', style_middle.init_style)
chart.add('', attr, value, is_stack=True, is_convert=True, label_pos='inside', is_legend_show=True,
is_label_show=True)
page.add(chart)
page.render(output_file)
def city_stats(users):
df = pd.DataFrame(users)
data = df.query('province == "北京"')
res = data.groupby('city', as_index=True)['city'].count().sort_values()
attr = list(map(lambda x: '%s区' % x if x != '' else '未知', list(res.index)))
return attr, list(res)
def create_city_charts(users, output_file):
data = city_stats(users)
attr, value = data
page = Page()
style = Style(width=1100, height=600)
style_middle = Style(width=900, height=500)
chart = Map('北京', style.init_style)
chart.add('', attr, value, maptype='北京', is_label_show=True, is_visualmap=True, visual_text_color='#000')
page.add(chart)
chart = Bar('柱状图', style_middle.init_style)
chart.add('', attr, value, is_stack=True, is_convert=True, label_pos='inside', is_label_show=True)
page.add(chart)
page.render(output_file)
def jieba_cut(users):
signature = users['signature']
words = ''.join(signature)
res_list = jieba.cut(words, cut_all=True)
return res_list
def create_wc(words_list):
res_png = os.path.join(os.getcwd(), 'wechat.png')
print (res_png)
words = ' '.join(words_list)
back_pic = np.array(Image.open(res_png))
stopwords = set()
stopwords = stopwords.union(set(['class','span','emoji','emoji','emoji1f388','emoji1f604']))
wc = WordCloud(background_color="white", margin=0,
font_path=os.getcwd()+'STXINGKA.TTF',
mask=back_pic,
max_font_size=70,
stopwords=stopwords
).generate(words)
plt.imshow(wc)
plt.axis('off')
plt.show()
if __name__ == '__main__':
users = get_friends()
sex_chart_file = os.getcwd() + 'sex_chart.html'
prov_chart_file = os.getcwd() + 'prov_chart.html'
city_chart_file = os.getcwd() + 'city_chart.html'
create_sex_charts(users, sex_chart_file)
print ('sex charts created')
create_prov_charts(users, prov_chart_file)
print ('prov charts created')
create_city_charts(users, city_chart_file)
print ('city charts created')
word_list = jieba_cut(users)
create_wc(word_list)
print ('word cloud created')
3、效果