阶段作业1:完整的中英文词频统计

str='''If I should stay I would only be in your way

So I'll go But I know

I'll think of your every step of the way

And I will always love you Will always love you

You my darling you Bitter-sweet memories

That is all I'm taking with me

So goodbye Please don't cry We both know I'm not

What you need And I will always love you

I will always love you

I hope life treats you kind

And I hope you have all you dreamed of

And I wish you joy and happiness

But above all this Ii wish your love

And I will always love you

I will always love you I will always love you

I will always love you I will always love you

I will always love you Darling I love you

I'll always love you'''


#读取文本文件
f = open('英文歌词.txt','r',encoding='utf-8')
Go = f.read()
f.close()
print(Go)
#预处理
print(Go.lower())
a = ",.;:'`"   
for b in a:
    Go.replace(b,' ')   #利用for循环语句把特殊符号替换成空格
    print(Go)

#分别从空格提取单词
firelist = Go.split()
print(firelist)

#统计每个单词出现的次数
fireset = set(firelist)  #把列表firelist转换成集合,使得单词不会重复出现
#排除语法型词汇,代词、冠词、连词等无语义词
se = {'a','the','and','if','do','of'}
fireset =fireset-se
firedict = {}
for word in fireset:
    firedict[word] = firelist.count(word)
print(len(firedict),firedict)
wordlist = list(firedict.items())
#按单词的频数排序
wordlist.sort(key=lambda x:x[1],reverse=True)
print(wordlist)
#输出TOP(20)
for i in range(20):
    print(wordlist[i])

阶段作业1:完整的中英文词频统计

 

 

 

asd = open('百万英镑.txt', 'r', encoding='utf-8')
strasd = asd.read()  
asd.close()
print(strasd)

#单词计数
strGoSet = set(strasd)
print(len(strGoSet),strGoSet)

strDict ={}
for word in strGoSet:
    strDict[word] = strasd.count(word)

print(len(strDict),strDict)

wcList = list(strDict.items())
wcList.sort()
print(strDict.items())

#词频排序
wcList.sort(key=lambda x:x[1],reverse=True)
print(wcList)

#输出top20
for s in range(20):
    print(wcList[s])

阶段作业1:完整的中英文词频统计