import java.io.BufferedReader;import java.io.File;import java.io.FileReader;import java.util.ArrayList;import java.util.Iterator;import java.util.List;import java.util.Set;import java.util.TreeSet; public class TestTxt { File file; String c
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
public class TestTxt {
File file;
String content; //保存文章内容
String[] rawWords; //保存单个单词集合
String[] words; //保存各个单词对应的词频
int[] wordFreqs; //输入文章内容
public static String txtToString(File file){ //读取文件
String result = "";
try{
BufferedReader br = new BufferedReader(new FileReader(file)); //构造一个BufferedReader类来读取文件
String s = null;
while((s = br.readLine())!=null){ //使用readLine方法,一次读一行
result = result + s + "\\n";
}
br.close();
}catch(Exception e){
e.printStackTrace();
}
return result;
}
public void splitWord(){ //对文章根据分隔符进行分词,将结果保存到rawWords数组中
final char SPACE = ' '; //分词的时候,所有的符号全部替换为空格
content = content.replace('\\'', SPACE).replace(',', SPACE).replace('.', SPACE);
content = content.replace('(', SPACE).replace(')', SPACE).replace('-', SPACE);
rawWords = content.split("\\\\s+"); //凡是空格隔开的都算单词
}
public void countWordFreq(){ //统计单词个数
Set<String> set = new TreeSet<String>(); //将所有出现的字符串放入唯一的set中
for(String word: rawWords){
set.add(word);
}
System.out.println(set);
Iterator ite = set.iterator();
List<String> wordsList = new ArrayList<String>(); //开辟空间函数
List<Integer> freqList = new ArrayList<Integer>();
while(ite.hasNext()){
String word = (String) ite.next();
int count = 0; //统计相同字符串的个数
for(String str: rawWords){
if(str.equals(word)){
count++;
}
}
wordsList.add(word);
freqList.add(count++);
}
words = wordsList.toArray(new String[0]); //存入数组当中
wordFreqs = new int[freqList.size()];
for(int i = 0; i < freqList.size(); i++){
wordFreqs[i] = freqList.get(i);
}
}
public static void main(String[] args) {
// TODO Auto-generated method stub
TestTxt t = new TestTxt();
t.file = new File("D:/test1.txt");
t.content = txtToString(t.file);
t.splitWord();
t.countWordFreq();
for(int i = 0;i < t.wordFreqs.length ; i++){
System.out.println(t.words[i] + ":" + t.wordFreqs[i]);
}
}
}
