当前位置 : 主页 > 编程语言 > java >

java将富文本字符串(HTML)转为文本(Text)

来源:互联网 收集:自由互联 发布时间:2021-06-28
gistfile1.txt public class HtmlToText extends HTMLEditorKit.ParserCallback { private static HtmlToText html2Text = new HtmlToText(); StringBuffer stringBuffer; private HtmlToText() { } public void parse(String str) throws IOException { Inpu
gistfile1.txt
public class HtmlToText extends HTMLEditorKit.ParserCallback {
    private static HtmlToText html2Text = new HtmlToText();

    StringBuffer stringBuffer;

    private HtmlToText() {
    }

    public void parse(String str) throws IOException {

        InputStream iin = new ByteArrayInputStream(str.getBytes());
        Reader in = new InputStreamReader(iin);
        stringBuffer = new StringBuffer();
        ParserDelegator delegator = new ParserDelegator();
        // the third parameter is TRUE to ignore charset directive
        delegator.parse(in, this, Boolean.TRUE);
        iin.close();
        in.close();
    }

    public void handleText(char[] text, int pos) {
        stringBuffer.append(text);
    }

    public String getText() {
        return stringBuffer.toString();
    }

    public static String getContent(String str) {
        try {
            html2Text.parse(str);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return html2Text.getText();
    }

    public static void main(String[] args) {
        String text = HtmlToText.getContent("你的富文本字符串");
        System.out.println(text);
    }
}
网友评论