当前位置 : 主页 > 编程语言 > c++ >

文件转码 GBK UTF-8互转,添删BOM,Easy CHM合成前转码

来源:互联网 收集:自由互联 发布时间:2021-06-30
CharsetUnits.java package linwancheng.charset;import java.io.File;import java.io.FileFilter;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.InputStrea
CharsetUnits.java
package linwancheng.charset;

import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;

/**
 * 文件转码工具类,支持增删 UTF-8 的 BOM,添加HTML4标签兼容旧CHM阅读器
 * 
 * @param filter
 *            过滤器,可为空,或new NameEnd(拓展名...)
 * @param srcPath
 * @param descPath
 * @param inCharset
 * @param outCharset
 * @param addH4charset
 *            是否添加HTML4标签兼容旧CHM阅读器,true/flase
 * @param addBOM
 *            +1 添加BOM,-1 删除,0 不变
 * 
 * @author linWanCheng
 * @version 2.0
 */
public class CharsetUnits {
    final static String BOM = new String(new byte[] { -17, -69, -65 });

    /** srcfile 转码到 descfile 不带BOM */
    public static void src2desc(FileFilter filter, String inCharset, String outCharset) {
        src2desc(filter, "srcfile", "descfile", inCharset, outCharset, false, -1);
    }

    /** srcfile 转码到 descfile */
    public static void src2desc(FileFilter filter, String inCharset, String outCharset, boolean addH4charset, int addBOM) {
        src2desc(filter, "srcfile", "descfile", inCharset, outCharset, addH4charset, addBOM);
    }

    /** 文件夹转码 */
    public static void src2desc(FileFilter filter, String srcPath, String descPath, String inCharset,
            String outCharset, boolean addH4charset, int addBOM) {
        ArrayList
 
   fileList = new ArrayList
  
   (); listDeep(fileList, filter, false, true, new File(srcPath)); for (int i = 0; i < fileList.size(); i++) { String inPath = fileList.get(i).getPath(); System.out.println((i + 1) + "/" + fileList.size());// 显示进度 String outPath = descPath + inPath.substring(inPath.indexOf("\\")); new File(outPath.substring(0, outPath.lastIndexOf("\\"))).mkdirs(); ToUTF8(inPath, outPath, inCharset, outCharset, addH4charset, addBOM); } } /** 单个文件转码 */ public static void ToUTF8(String inPath, String outPath, String inCharset, String outCharset, boolean addH4charset, int addBOM) { try { BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(inPath), inCharset)); BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outPath, addBOM > 0), outCharset)); String readLine; if ((readLine = in.readLine()) != null) { if (addBOM != 0) { boolean hasBOM = BOM.equals(readLine.substring(0, 2)); if (!hasBOM && addBOM > 0) { readLine = BOM + readLine; } if (hasBOM && addBOM < 0) { readLine = readLine.substring(2); } } out.write(readLine); out.newLine(); } while ((readLine = in.readLine()) != null) { out.write(readLine); out.newLine(); // 添加HTML4标签兼容旧CHM阅读器 if (addH4charset && readLine.startsWith("
   "); out.newLine(); } } in.close(); out.close(); } catch (Exception e) { throw new RuntimeException(e); } } /** * 判断是否已有BOM * * @param path * @return */ public static boolean hasBOM(String path) { boolean result; try { FileInputStream in = new FileInputStream(path); byte[] b = new byte[3]; in.read(b); in.close(); result = (b[0] == -17 && b[1] == -69 && b[2] == -65); } catch (Exception e) { throw new RuntimeException(e); } return result; } /** * 递归子文件夹类的文件夹或文件存入集合 * * @param fileList * 要存入的集合 */ public static void listDeep(ArrayList
   
     fileList, FileFilter filter, boolean dir, boolean file, File... files) { for (File f : files) { if (f.isDirectory()) { if (dir) fileList.add(f); listDeep(fileList, filter, dir, file, f.listFiles(filter)); } else if (file) fileList.add(f); } } } /** * 【文件拓展名】过滤器,忽略大小写 * * @author linWanCheng */ class NameEnd implements FileFilter { String[] nameEnds; public NameEnd(final String... nameEnds) { this.nameEnds = nameEnds; } @Override public boolean accept(final File pathname) { if (pathname.isDirectory()) return true; String name = pathname.getName(); int index = name.lastIndexOf(".") + 1; String en = index == 0 ? "" : name.substring(index); for (String e : nameEnds) { if (en.equalsIgnoreCase(e)) return true; } return false; } }
   
  
 
CharsetTest.java
package linwancheng.charset;

import org.junit.Test;

/**
 * 转码工具测试类
 * 需要Junit 单元测试工具包
 * MyEclipse 下点击方法名运行
 */
public class CharsetTest {
    
    /**
     * MyEclipse文件转到到IDEA用
     */
    @Test
	public void DelBOM() {
		CharsetUnits.src2desc(null, "UTF-8", "UTF-8", false, -1);
	}
    
    @Test
    public void GBKToUTF8() {
        CharsetUnits.src2desc(null, "GBK", "UTF-8", false, -1);
    }

    /**
     * EasyCHM合成前添加BOM
     */
    @Test
    public void ForEasyCHM() {
        CharsetUnits.src2desc(new NameEnd("htm", "html"), "GBK", "UTF-8", true, +1);
    }
    
    @Test
    public void UTF8ToGBK() {
        CharsetUnits.src2desc(null, "UTF-8", "GBK", false, -1);
    }
}
网友评论