gistfile1.txt package com.dream.common;import java.awt.image.BufferedImage;import java.awt.image.RenderedImage;import java.io.File;import java.io.FileOutputStream;import java.io.InputStream;import java.util.Iterator;import java.util.List;im
package com.dream.common; import java.awt.image.BufferedImage; import java.awt.image.RenderedImage; import java.io.File; import java.io.FileOutputStream; import java.io.InputStream; import java.util.Iterator; import java.util.List; import javax.imageio.ImageIO; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import org.apache.pdfbox.text.PDFTextStripper; /** * 读取PDF中的内容 * @author zlj * */ public class PDFReader { public static String readText(String path){ File pdfFile = new File(path); PDDocument document = null; try { // 方式一: /** InputStream input = null; input = new FileInputStream( pdfFile ); //加载 pdf 文档 PDFParser parser = new PDFParser(new RandomAccessBuffer(input)); parser.parse(); document = parser.getPDDocument(); **/ // 方式二: document=PDDocument.load(pdfFile); // 获取页码 int pages = document.getNumberOfPages(); // 读文本内容 PDFTextStripper stripper=new PDFTextStripper(); // 设置按顺序输出 stripper.setSortByPosition(true); stripper.setStartPage(1); stripper.setEndPage(pages); String content = stripper.getText(document); return content; } catch(Exception e) { System.out.println(e); return null; } } public static void readImage(String pdfPath,String pdfpath_out){ //带解析PDF File pdfFile = new File(pdfPath); //空白pdf File pdfFile_out = new File(pdfpath_out); PDDocument document = null; PDDocument document_out = null; try { document = PDDocument.load(pdfFile); document_out = PDDocument.load(pdfFile_out); } catch (Exception e) { e.printStackTrace(); } int page_size = document.getNumberOfPages(); System.out.println("getAllPages============"+page_size); int j = 0; for(int i=0;i