import java.io.BufferedReader;import java.io.File;import java.io.FileReader;import java.util.ArrayList;import java.util.Iterator;import java.util.List;import java.util.Set;import java.util.TreeSet; public class TestTxt { File file; String c
import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Set; import java.util.TreeSet; public class TestTxt { File file; String content; //保存文章内容 String[] rawWords; //保存单个单词集合 String[] words; //保存各个单词对应的词频 int[] wordFreqs; //输入文章内容 public static String txtToString(File file){ //读取文件 String result = ""; try{ BufferedReader br = new BufferedReader(new FileReader(file)); //构造一个BufferedReader类来读取文件 String s = null; while((s = br.readLine())!=null){ //使用readLine方法,一次读一行 result = result + s + "\\n"; } br.close(); }catch(Exception e){ e.printStackTrace(); } return result; } public void splitWord(){ //对文章根据分隔符进行分词,将结果保存到rawWords数组中 final char SPACE = ' '; //分词的时候,所有的符号全部替换为空格 content = content.replace('\\'', SPACE).replace(',', SPACE).replace('.', SPACE); content = content.replace('(', SPACE).replace(')', SPACE).replace('-', SPACE); rawWords = content.split("\\\\s+"); //凡是空格隔开的都算单词 } public void countWordFreq(){ //统计单词个数 Set<String> set = new TreeSet<String>(); //将所有出现的字符串放入唯一的set中 for(String word: rawWords){ set.add(word); } System.out.println(set); Iterator ite = set.iterator(); List<String> wordsList = new ArrayList<String>(); //开辟空间函数 List<Integer> freqList = new ArrayList<Integer>(); while(ite.hasNext()){ String word = (String) ite.next(); int count = 0; //统计相同字符串的个数 for(String str: rawWords){ if(str.equals(word)){ count++; } } wordsList.add(word); freqList.add(count++); } words = wordsList.toArray(new String[0]); //存入数组当中 wordFreqs = new int[freqList.size()]; for(int i = 0; i < freqList.size(); i++){ wordFreqs[i] = freqList.get(i); } } public static void main(String[] args) { // TODO Auto-generated method stub TestTxt t = new TestTxt(); t.file = new File("D:/test1.txt"); t.content = txtToString(t.file); t.splitWord(); t.countWordFreq(); for(int i = 0;i < t.wordFreqs.length ; i++){ System.out.println(t.words[i] + ":" + t.wordFreqs[i]); } } }