Python项目之企业审批流绩效分析分析应用1
结合词频统计的功能,联想到可以应用于企业审批流程回退意见的词频分析,是企业流程绩效分析的扩展之一。
技术路线:jieba分词,wordcloud绘制特定形状词云
#bpmRejectAnalyzeV1.py
import jieba
import jieba.posseg as pseg
from os import path
from scipy.misc import imread
from wordcloud import WordCloud
import matplotlib.pyplot as plt
def getTxt(txt):
with open(txt,'r',encoding='utf-8')as f:
reject_list = f.readlines()
return reject_list
def segmentWords(txtlist):
stop_words = set(line.strip() for line in open('stopwords.txt', encoding='utf-8'))
newslist = []
for subject in txtlist:
if subject.isspace():
continue
word_list = pseg.cut(subject)
for word, flag in word_list:
if not word in stop_words and flag == 'n':
newslist.append(word)
return newslist
def drawPlant(newslist):
d = path.dirname(__file__)
mask_image = imread(path.join(d, "mickey.png"))
content = ' '.join(newslist)
wordcloud = WordCloud(font_path='simhei.ttf', background_color="white",mask=mask_image, max_words=40).generate(content)
# Display the generated image:
plt.imshow(wordcloud)
plt.axis("off")
wordcloud.to_file('wordcloud.jpg')
plt.show()
def countWords(newslist):
wordDict = {}
for item in newslist:
wordDict[item] = wordDict.get(item,0) + 1
itemList = list(wordDict.items())
itemList.sort(key=lambda x:x[1],reverse=True)
for i in range(100):
word, count = itemList[i]
print("{}:{}".format(word,count))
def main():
txtlist = getTxt('bpmreject.txt')
wordlist = segmentWords(txtlist)
countWords(wordlist)
drawPlant(wordlist)
main()