用java实现文本词频统计并且把结果输入到指定的文件中
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.SortedMap;
import java.util.TreeMap;
import javax.jws.Oneway;
public class WordCount {
public static void main(String[] args) throws Exception {
PrintStream ps = new PrintStream(new FileOutputStream("2017113782.txt"));
System.setOut(ps);
BufferedReader br = new BufferedReader(new FileReader("C:/work/word.txt"));
List <String> list = new ArrayList<String>();
String readLine = null;
while((readLine=br.readLine())!=null){
String [] onlyWord = readLine.split("[^a-zA-Z]");//只有字母
for(String word : onlyWord){
if(word.length()!=0){
list.add(word);
}
}
}
br.close();//关闭流操作
Map<String,Integer> map = new TreeMap<String, Integer>();//利用 TreeMap进行统计并且排序
for(String mapWord : list){
if(map.get(mapWord)!=null){
map.put(mapWord, map.get(mapWord)+1);
}else{
map.put(mapWord, 1);
}
}
SortMap(map);
}
public static void SortMap(Map<String,Integer>oldmap){
ArrayList<Map.Entry<String, Integer>>newList = new ArrayList<Map.Entry<String,Integer>>(oldmap.entrySet());
Collections.sort(newList,new Comparator<Map.Entry<String, Integer>>() {
@Override
public int compare(Entry<String, Integer> o1,
Entry<String, Integer> o2) {
return o2.getValue()-o1.getValue();//降
}
});
for(int i=0;i<newList.size();i++){
System.out.println(newList.get(i).getKey()+": "+ newList.get(i).getValue());
}
}
}
the: 12
China: 9
Boeing: 6
Eastern: 6
Airlines: 5
MAX: 5
to: 5
and: 4
for: 4
has: 4
in: 4
The: 3
compensation: 3
it: 3
on: 3
with: 3
Paper: 2
a: 2
been: 2
by: 2
from: 2
had: 2
issue: 2
more: 2
of: 2
two: 2
According: 1
Air: 1
At: 1
Bloomberg: 1
Co: 1
Corp: 1
Fourteen: 1
Ltd: 1
March: 1
Southern: 1
Tuesday: 1
Wednesday: 1
about: 1
adding: 1
after: 1
airline: 1
an: 1
any: 1
are: 1
as: 1
ask: 1
asks: 1
caused: 1
cited: 1
claims: 1
clients: 1
communication: 1
communications: 1
companies: 1
company: 1
confirmed: 1
controllable: 1
crashes: 1
deadly: 1
delivery: 1
disruption: 1
effect: 1
first: 1
grounded: 1
grounding: 1
have: 1
impact: 1
industry: 1
its: 1
jets: 1
knowledge: 1
last: 1
late: 1
limited: 1
lives: 1
lodged: 1
losses: 1
makes: 1
matter: 1
months: 1
move: 1
news: 1
not: 1
officially: 1
outlet: 1
over: 1
people: 1
preliminary: 1
present: 1
previously: 1
quoted: 1
refused: 1
report: 1
representative: 1
s: 1
saying: 1
seek: 1
since: 1
six: 1
stay: 1
stoppages: 1
stressed: 1
suffered: 1
suspensions: 1
talk: 1
talks: 1
than: 1
that: 1
took: 1
under: 1
verified: 1
which: 1
whole: 1
will: 1
worldwide: 1
yet: 1