当前位置 : 主页 > 网络编程 > JavaScript >

POI对excel中xls文件读较大数据的两种方法数据处理的方法和速度对比

来源:互联网 收集:自由互联 发布时间:2021-06-28
可读 package excel.POI;import org.apache.poi.hssf.eventusermodel.HSSFListener;import org.apache.poi.hssf.eventusermodel.*;import org.apache.poi.hssf.record.*;import org.apache.poi.poifs.filesystem.POIFSFileSystem;import java.io.FileInputS
可读
package excel.POI;

import org.apache.poi.hssf.eventusermodel.HSSFListener;


import org.apache.poi.hssf.eventusermodel.*;
import org.apache.poi.hssf.record.*;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;


/**
 * 只可读类型的XlsMethod,比可读写快(比例:20/13
 */
public class XlsReadOnlyMethod implements HSSFListener {

    private SSTRecord sstRec = null;

    private List
 
   iterList = new LinkedList
  
   (); private String[] tmpStr_ = null; private int colLastRealIndex = 0; //总列 private int rowLastRealIndex = 0; //总行 private int thisRow = 0; // 当前行指针 private boolean helpThisRow = true; private boolean shouldInit = true; private static int sheetAt = 0; // 传入的sheet,不用static会丢失值 private int count = 0; // 用来计数是第几个sheet private boolean isActiveSheet = false; // 用来标记是第几个sheet private static FormatTrackingHSSFListener formatListener; public static void main(String[] args) { try { Long start = System.currentTimeMillis(); String filePath = "F:\\temp\\bigexcel\\x1.xls"; XlsReadOnlyMethod xlsReadOnlyMethod = new XlsReadOnlyMethod(); Iterator iter = xlsReadOnlyMethod.RowIterator(filePath, 0); Long end = System.currentTimeMillis(); System.out.println("spending time = " + (end - start)); } catch (IOException e) { e.printStackTrace(); } } /** * sheet想调哪张调哪张。操作流程: * 1. 构造HSSFRequest对象 * 2. 使用输入流让事件工厂运作,直接监听到读取数据的事件,在监听processRecord中拿数据。 */ public Iterator
   
     RowIterator(String filePath,int sheetAt) throws IOException { this.sheetAt = sheetAt; FileInputStream fin = new FileInputStream(filePath); InputStream inputStream = new POIFSFileSystem(fin).createDocumentInputStream("Workbook"); HSSFRequest hssfRequest = new HSSFRequest(); // hssfRequest.addListenerForAllRecords(new XlsReadOnlyMethod(this.getSource())); // add listener HSSFEventFactory hssfEventFactory = new HSSFEventFactory(); // 使用formatListener可以分别获得date和double MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this); formatListener = new FormatTrackingHSSFListener(listener); hssfRequest.addListenerForAllRecords(formatListener); hssfEventFactory.processEvents(hssfRequest, inputStream); if(null != tmpStr_) if(0 != tmpStr_.length) { iterList.add(tmpStr_); } fin.close(); inputStream.close(); return iterList.iterator(); } /** * 实现HSSFListener的方法,【监听器】获取xls的内容 * eg : http://poi.apache.org/spreadsheet/how-to.html * */ /** * 1. 初始化 * 获取一行所有列 * 设置【当前行指针】 * 初始化tmpStr_ * 设置初始化在每行只走一次 === 3/2 * 2. 获取值 * 把值丢到tmpStr_里(空的就不丢 * 3. 换行 * tmpStr_加到iterList里去 * 如果【当前行指针】跳到下一行,我就换行,并初始化1/3 * 4. 循环 * 到最后一行的时候跳出来,我再执行一边添加。 * */ public void processRecord(Record record) { switch (record.getSid()) { // the BOFRecord can represent either the beginning of a sheet or the workbook case BOFRecord.sid: BOFRecord bof = (BOFRecord) record; if (bof.getType() == bof.TYPE_WORKSHEET) { //遇到sheet if(count++ == sheetAt) { isActiveSheet = true; } else { isActiveSheet = false; } } break; case RowRecord.sid: // 用来初始化当前行有几个cell if(isActiveSheet) { if(null != tmpStr_) { iterList.add(tmpStr_); } RowRecord rowrec = (RowRecord) record; // 可以获得行列的最大值。 如果有错就给后面的数字加大。 colLastRealIndex = rowrec.getLastCol() - rowrec.getFirstCol() + 5; rowLastRealIndex = rowrec.getRowNumber(); if(helpThisRow) { //初始化当前行指针 thisRow = rowrec.getRowNumber(); shouldInit = true; helpThisRow = false; } // System.out.println("我要判定 " + rowrec.getRowNumber() + " 行有" + (colLastRealIndex)+ "个cell了"); } break; /** * 不管是哪条路,只要第一次跑到 就开始 * */ case NumberRecord.sid: // Date int double if(isActiveSheet) { NumberRecord numrec = (NumberRecord) record; //1.如果是下一行,2.如果是第一次运行,我就初始化 checkNextRowAndInit(numrec); tmpStr_[numrec.getColumn()] = formatListener.formatNumberDateCell(numrec); // System.out.println("Cell found with value " + formatListener.formatNumberDateCell(numrec) // + " at row " + numrec.getRow() + " and column " + numrec.getColumn()); } break; case FormulaRecord.sid: // formula if(isActiveSheet) { FormulaRecord forRec = (FormulaRecord) record; checkNextRowAndInit(forRec); tmpStr_[forRec.getColumn()] = String.valueOf(forRec.getValue()); // System.out.println("Fomula found with value " + forRec.getValue() + " at row " + forRec.getRow() + " and column " + forRec.getColumn()); } break; // SSTRecords store a array of unique strings used in Excel. case SSTRecord.sid: sstRec = (SSTRecord) record; // to get SSTRecord break; case LabelSSTRecord.sid: if(isActiveSheet) { LabelSSTRecord lrec = (LabelSSTRecord) record; checkNextRowAndInit(lrec); tmpStr_[lrec.getColumn()] = String.valueOf(sstRec.getString(lrec.getSSTIndex())); // System.out.println("String cell found with value " // + sstRec.getString(lrec.getSSTIndex()) + " at row " + lrec.getRow() // + " and column " + lrec.getColumn()); } break; } } private void checkNextRowAndInit(CellRecord record) { if(thisRow != record.getRow()) { // 已经到下一行了 thisRow = record.getRow(); iterList.add(tmpStr_); // 添加tmpStr_ shouldInit = true; } if(shouldInit) { tmpStr_ = new String[colLastRealIndex]; shouldInit = false; } } }
   
  
 
可写
package excel.POI;

import org.apache.commons.lang.time.DateFormatUtils;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.ss.usermodel.*;

import java.io.File;
import java.io.FileInputStream;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;


/**
 * 可读可写的Xls/Xlsx
 * */
public class XlsReadWriteMethod {

    private String filePath;

    XlsReadWriteMethod(String filePath) {
        this.filePath = filePath;
    }

    public static void main(String[] args) {
        try {
            Long start = System.currentTimeMillis();
            String filePath = "F:\\temp\\bigexcel\\x1.xls";
            XlsReadWriteMethod xlsReadWriteMethod = new XlsReadWriteMethod(filePath);
            Iterator iter = xlsReadWriteMethod.RowIterator(0);
            Long end = System.currentTimeMillis();
            System.out.println("get data time = " + (end - start));
        } catch (Exception e) {
            e.printStackTrace();
        }
    }


    /**
     * 迭代FileBean的每一行
     * */
    public Iterator
 
   RowIterator(int sheetAt) throws Exception {
        Workbook workbook = WorkbookFactory.create(new FileInputStream(new File(filePath)));
        Sheet sheet = workbook.getSheetAt(sheetAt); //get index of sheet

        List
  
    list = new LinkedList
   
    (); for (int i = 0; i < sheet.getLastRowNum(); i++) { Row row = sheet.getRow(i); if(null == row) continue; // 这一行是空行:没东西 int firstCellNum = row.getFirstCellNum(); int lastCellNum = row.getLastCellNum(); String[] oneRowValue = new String[lastCellNum - firstCellNum]; int countNoneCol = 0; for (int j = firstCellNum; j < lastCellNum; j++) { Cell cell = row.getCell(j); int thisCol = j - firstCellNum; if(null == cell) { oneRowValue[thisCol] = ""; continue; } switch (cell.getCellType()) // Cell.getCellType() has not deprecated in API { // Because of Cell.getCellType().getCode() has deprecated And do not provide a replacement way So just use direct ways case -1 : // _NONE oneRowValue[thisCol] = ""; break; case 0 : // NUMERIC Date short dataFormat = cell.getCellStyle().getDataFormat(); // 31/176 : yyyy年m月d日(自定义型和日期型) , 58 : m月d日 , 32 : h时mm分 // 有时候176也是数字型,那这样会报错,干脆直接抓个异常。总之抓一堆dateFormat来兼容就好了。 if(HSSFDateUtil.isCellDateFormatted(cell) || HSSFDateUtil.isCellDateFormatted(cell) || 31 == dataFormat || 58 == dataFormat || 32 == dataFormat || 176 == dataFormat) { try { oneRowValue[thisCol] = DateFormatUtils.format(cell.getDateCellValue(), "yyyy-MM-dd hh:MM:ss"); }catch ( Exception e) { try { cell.setCellType(CellType.STRING); }catch (Exception ex) { //出问题就不转 } oneRowValue[thisCol] = String.valueOf(cell.getStringCellValue()); } } else { try { cell.setCellType(CellType.STRING); }catch (Exception ex) { //出问题就不转 } oneRowValue[thisCol] = String.valueOf(cell.getStringCellValue()); } break; case 1 : // STRING oneRowValue[thisCol] = cell.getStringCellValue(); break; case 2 : // FORMULA try { oneRowValue[thisCol] = String.valueOf(cell.getNumericCellValue()); } catch (IllegalStateException e) { try { oneRowValue[thisCol] = String.valueOf(cell.getRichStringCellValue()); } catch (Exception ex){ oneRowValue[thisCol] = ""; } } break; case 3 : // BLANK oneRowValue[thisCol] = ""; break; case 4 : // BOOLEAN oneRowValue[thisCol] = String.valueOf(cell.getBooleanCellValue()); break; case 5 : // ERROR oneRowValue[thisCol] = String.valueOf(cell.getErrorCellValue()); break; default : oneRowValue[thisCol] = ""; break; } // end switch if("".equals(oneRowValue[thisCol])) countNoneCol++; } // end for if(countNoneCol == (lastCellNum - firstCellNum)) continue;//这一行是空行,里面的东西都无效 list.add(oneRowValue); } // end for 2 return list.iterator(); } }
   
  
 
又读又写
package excel.POI;

import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

import java.io.InputStream;
import java.util.Iterator;

/**
 * 获取xlsx的读取方法
 * */
public class XlsxReadOnlyLoaderTest {

    public void processOneSheet(String filename) throws Exception {
        OPCPackage pkg = OPCPackage.open(filename);
        XSSFReader r = new XSSFReader( pkg );
        SharedStringsTable sst = r.getSharedStringsTable();

        XMLReader parser = fetchSheetParser(sst);

        // To look up the Sheet Name / Sheet Order / rID,
        //  you need to process the core Workbook stream.
        // Normally it's of the form rId# or rSheet#
        InputStream sheet2 = r.getSheet("rId1");
        InputSource sheetSource = new InputSource(sheet2);
        parser.parse(sheetSource);
        sheet2.close();
    }

    public void processAllSheets(String filename) throws Exception {
        OPCPackage pkg = OPCPackage.open(filename);
        XSSFReader r = new XSSFReader( pkg );
        SharedStringsTable sst = r.getSharedStringsTable();

        XMLReader parser = fetchSheetParser(sst);

        Iterator
 
   sheets = r.getSheetsData();
        while(sheets.hasNext()) {
            System.out.println("Processing new sheet:\n");
            InputStream sheet = sheets.next();
            InputSource sheetSource = new InputSource(sheet);
            parser.parse(sheetSource);

            sheet.close();
            System.out.println("");
        }
    }

    public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
        XMLReader parser =
                XMLReaderFactory.createXMLReader(
                        "com.sun.org.apache.xerces.internal.parsers.SAXParser"
                );
        ContentHandler handler = new SheetHandler(sst);
        parser.setContentHandler(handler);
        return parser;
    }

    /**
     * See org.xml.sax.helpers.DefaultHandler javadocs
     */
    private static class SheetHandler extends DefaultHandler {
        private SharedStringsTable sst;
        private String lastContents;
        private boolean nextIsString;

        private SheetHandler(SharedStringsTable sst) {
            this.sst = sst;
        }

        public void startElement(String uri, String localName, String name,
                                 Attributes attributes) throws SAXException {
            // c => cell
            if(name.equals("c")) {
                // Print the cell reference
                System.out.print(attributes.getValue("r") + " - ");
                // Figure out if the value is an index in the SST
                String cellType = attributes.getValue("t");
                if(cellType != null && cellType.equals("s")) {
                    nextIsString = true;
                } else {
                    nextIsString = false;
                }
            }
            // Clear contents cache
            lastContents = "";
        }

        public void endElement(String uri, String localName, String name)
                throws SAXException {
            // Process the last contents as required.
            // Do now, as characters() may be called more than once
            if(nextIsString) {
                int idx = Integer.parseInt(lastContents);
                lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
                nextIsString = false;
            }

            // v => contents of a cell
            // Output after we've seen the string contents
            if(name.equals("v")) {
                System.out.println(lastContents);
            }
        }

        public void characters(char[] ch, int start, int length)
                throws SAXException {
            lastContents += new String(ch, start, length);
        }
    }

    public static void main(String[] args) throws Exception {
        XlsxReadOnlyLoaderTest example = new XlsxReadOnlyLoaderTest();
        example.processOneSheet("F:\\temp\\excel\\2.xlsx");
//		example.processAllSheets("F:\\temp\\excel\\2.xlsx");
    }

}
 
maven_test-master.zip maven_test-master.zip
上一篇:自适应rem
下一篇:map & reduce with Promise
网友评论