当前位置 : 主页 > 编程语言 > c++ >

文件编码的识别-GBK or UTF-8

来源:互联网 收集:自由互联 发布时间:2021-07-03
输入文件或文件路径,输出编码格式 package net.vicp.fyhui.van.util;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.IOException;import java.io
输入文件或文件路径,输出编码格式
package net.vicp.fyhui.van.util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;


public class CodeType {
	
	
	public CodeType() {
		
	}
	
	/**
	 * 判断文件的编码格式,有中文的情况很准确,只有英文时,会判断为UTF-8
	 * @param object为File或String型
	 * @return
	 * @throws IOException
	 * @throws FileNotFoundException
	 */
	public static String whichEncoding(Object object) throws IOException, FileNotFoundException{
		long sizeGBK,sizeUTF8;
		String encoding = null;
		InputStreamReader readGBK = null;
		InputStreamReader readUTF8 = null;
		if(object instanceof File){
			readGBK = new InputStreamReader(new FileInputStream((File)object), "GBK");
			readUTF8  = new InputStreamReader(new FileInputStream((File)object), "UTF-8");
		}else if(object instanceof String){
			readGBK = new InputStreamReader(new FileInputStream((String)object), "GBK");
			readUTF8  = new InputStreamReader(new FileInputStream((String)object), "UTF-8");
		}
		BufferedReader  bReaderGBK = new BufferedReader(readGBK);
		BufferedReader  bReaderUTF8 = new BufferedReader(readUTF8);
		String lineGBK;
		String lineUTF8;
		String sGBK = "";
		String sUTF8 = "";
		while(((lineGBK = bReaderGBK.readLine()) != null) && ((lineUTF8 = bReaderUTF8.readLine()) != null)){	//同步读取每一行
			sGBK = sGBK+lineGBK;		//取出每一行,组成字符串
			sUTF8 = sUTF8+lineUTF8;		//取出每一行,组成字符串
			if(sGBK.length()==sUTF8.length()){
				if(sGBK.length()>2 && sUTF8.length()>2){
					if(!sGBK.substring(3).equals(sUTF8.substring(3))){
						break;
					}
				}
			}
		}
	    String tmpEncoding = sGBK.length() < sUTF8.length() ? "GBK" : "UTF-8" ;		//只有英文时,会判断为UTF-8
//	    String tmpEncoding = sGBK.length() > sUTF8.length() ? "UTF-8" : "GBK" ;		//只有英文时,会判断为GBK
	    
//		System.out.println("Encoding ==> " + tmpEncoding);
		return tmpEncoding;
	}
}
网友评论