当前位置 : 主页 > 编程语言 > java >

读取pdf,提取图片

来源:互联网 收集:自由互联 发布时间:2021-06-28
gistfile1.txt package com.dream.common;import java.awt.image.BufferedImage;import java.awt.image.RenderedImage;import java.io.File;import java.io.FileOutputStream;import java.io.InputStream;import java.util.Iterator;import java.util.List;im
gistfile1.txt
package com.dream.common;

import java.awt.image.BufferedImage;
import java.awt.image.RenderedImage;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.util.Iterator;
import java.util.List;

import javax.imageio.ImageIO;

import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.text.PDFTextStripper;

/**
 * 读取PDF中的内容
 * @author zlj
 *
 */
public class PDFReader {
	
	public static String readText(String path){
		 File pdfFile = new File(path);
	        PDDocument document = null;
	        try
	        {
	            // 方式一:
	            /**
	            InputStream input = null;
	            input = new FileInputStream( pdfFile );
	            //加载 pdf 文档
	            PDFParser parser = new PDFParser(new RandomAccessBuffer(input));
	            parser.parse();
	            document = parser.getPDDocument();
	            **/

	            // 方式二:
	            document=PDDocument.load(pdfFile);

	            // 获取页码
	            int pages = document.getNumberOfPages();

	            // 读文本内容
	            PDFTextStripper stripper=new PDFTextStripper();
	            // 设置按顺序输出
	            stripper.setSortByPosition(true);
	            stripper.setStartPage(1);
	            stripper.setEndPage(pages);
	            String content = stripper.getText(document);
	            return content;
	        }
	        catch(Exception e)
	        {
	            System.out.println(e);
	            return null;
	        }
	}

	public static void readImage(String pdfPath,String pdfpath_out){
		//带解析PDF
		File pdfFile = new File(pdfPath);
		//空白pdf
		File pdfFile_out = new File(pdfpath_out);
		
		PDDocument document = null;
		PDDocument document_out = null;
		try {
			document = PDDocument.load(pdfFile);
			document_out = PDDocument.load(pdfFile_out);
		} catch (Exception e) {
			e.printStackTrace();
		}
		
		int page_size = document.getNumberOfPages();
		System.out.println("getAllPages============"+page_size);
		
		int j = 0;
		for(int i=0;i
网友评论