gistfile1.txt package com.dream.common;import java.awt.image.BufferedImage;import java.awt.image.RenderedImage;import java.io.File;import java.io.FileOutputStream;import java.io.InputStream;import java.util.Iterator;import java.util.List;im
package com.dream.common;
import java.awt.image.BufferedImage;
import java.awt.image.RenderedImage;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.util.Iterator;
import java.util.List;
import javax.imageio.ImageIO;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.text.PDFTextStripper;
/**
* 读取PDF中的内容
* @author zlj
*
*/
public class PDFReader {
public static String readText(String path){
File pdfFile = new File(path);
PDDocument document = null;
try
{
// 方式一:
/**
InputStream input = null;
input = new FileInputStream( pdfFile );
//加载 pdf 文档
PDFParser parser = new PDFParser(new RandomAccessBuffer(input));
parser.parse();
document = parser.getPDDocument();
**/
// 方式二:
document=PDDocument.load(pdfFile);
// 获取页码
int pages = document.getNumberOfPages();
// 读文本内容
PDFTextStripper stripper=new PDFTextStripper();
// 设置按顺序输出
stripper.setSortByPosition(true);
stripper.setStartPage(1);
stripper.setEndPage(pages);
String content = stripper.getText(document);
return content;
}
catch(Exception e)
{
System.out.println(e);
return null;
}
}
public static void readImage(String pdfPath,String pdfpath_out){
//带解析PDF
File pdfFile = new File(pdfPath);
//空白pdf
File pdfFile_out = new File(pdfpath_out);
PDDocument document = null;
PDDocument document_out = null;
try {
document = PDDocument.load(pdfFile);
document_out = PDDocument.load(pdfFile_out);
} catch (Exception e) {
e.printStackTrace();
}
int page_size = document.getNumberOfPages();
System.out.println("getAllPages============"+page_size);
int j = 0;
for(int i=0;i
