可读 package excel.POI;import org.apache.poi.hssf.eventusermodel.HSSFListener;import org.apache.poi.hssf.eventusermodel.*;import org.apache.poi.hssf.record.*;import org.apache.poi.poifs.filesystem.POIFSFileSystem;import java.io.FileInputS
package excel.POI; import org.apache.poi.hssf.eventusermodel.HSSFListener; import org.apache.poi.hssf.eventusermodel.*; import org.apache.poi.hssf.record.*; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.Iterator; import java.util.LinkedList; import java.util.List; /** * 只可读类型的XlsMethod,比可读写快(比例:20/13 */ public class XlsReadOnlyMethod implements HSSFListener { private SSTRecord sstRec = null; private List可写iterList = new LinkedList (); private String[] tmpStr_ = null; private int colLastRealIndex = 0; //总列 private int rowLastRealIndex = 0; //总行 private int thisRow = 0; // 当前行指针 private boolean helpThisRow = true; private boolean shouldInit = true; private static int sheetAt = 0; // 传入的sheet,不用static会丢失值 private int count = 0; // 用来计数是第几个sheet private boolean isActiveSheet = false; // 用来标记是第几个sheet private static FormatTrackingHSSFListener formatListener; public static void main(String[] args) { try { Long start = System.currentTimeMillis(); String filePath = "F:\\temp\\bigexcel\\x1.xls"; XlsReadOnlyMethod xlsReadOnlyMethod = new XlsReadOnlyMethod(); Iterator iter = xlsReadOnlyMethod.RowIterator(filePath, 0); Long end = System.currentTimeMillis(); System.out.println("spending time = " + (end - start)); } catch (IOException e) { e.printStackTrace(); } } /** * sheet想调哪张调哪张。操作流程: * 1. 构造HSSFRequest对象 * 2. 使用输入流让事件工厂运作,直接监听到读取数据的事件,在监听processRecord中拿数据。 */ public Iterator RowIterator(String filePath,int sheetAt) throws IOException { this.sheetAt = sheetAt; FileInputStream fin = new FileInputStream(filePath); InputStream inputStream = new POIFSFileSystem(fin).createDocumentInputStream("Workbook"); HSSFRequest hssfRequest = new HSSFRequest(); // hssfRequest.addListenerForAllRecords(new XlsReadOnlyMethod(this.getSource())); // add listener HSSFEventFactory hssfEventFactory = new HSSFEventFactory(); // 使用formatListener可以分别获得date和double MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this); formatListener = new FormatTrackingHSSFListener(listener); hssfRequest.addListenerForAllRecords(formatListener); hssfEventFactory.processEvents(hssfRequest, inputStream); if(null != tmpStr_) if(0 != tmpStr_.length) { iterList.add(tmpStr_); } fin.close(); inputStream.close(); return iterList.iterator(); } /** * 实现HSSFListener的方法,【监听器】获取xls的内容 * eg : http://poi.apache.org/spreadsheet/how-to.html * */ /** * 1. 初始化 * 获取一行所有列 * 设置【当前行指针】 * 初始化tmpStr_ * 设置初始化在每行只走一次 === 3/2 * 2. 获取值 * 把值丢到tmpStr_里(空的就不丢 * 3. 换行 * tmpStr_加到iterList里去 * 如果【当前行指针】跳到下一行,我就换行,并初始化1/3 * 4. 循环 * 到最后一行的时候跳出来,我再执行一边添加。 * */ public void processRecord(Record record) { switch (record.getSid()) { // the BOFRecord can represent either the beginning of a sheet or the workbook case BOFRecord.sid: BOFRecord bof = (BOFRecord) record; if (bof.getType() == bof.TYPE_WORKSHEET) { //遇到sheet if(count++ == sheetAt) { isActiveSheet = true; } else { isActiveSheet = false; } } break; case RowRecord.sid: // 用来初始化当前行有几个cell if(isActiveSheet) { if(null != tmpStr_) { iterList.add(tmpStr_); } RowRecord rowrec = (RowRecord) record; // 可以获得行列的最大值。 如果有错就给后面的数字加大。 colLastRealIndex = rowrec.getLastCol() - rowrec.getFirstCol() + 5; rowLastRealIndex = rowrec.getRowNumber(); if(helpThisRow) { //初始化当前行指针 thisRow = rowrec.getRowNumber(); shouldInit = true; helpThisRow = false; } // System.out.println("我要判定 " + rowrec.getRowNumber() + " 行有" + (colLastRealIndex)+ "个cell了"); } break; /** * 不管是哪条路,只要第一次跑到 就开始 * */ case NumberRecord.sid: // Date int double if(isActiveSheet) { NumberRecord numrec = (NumberRecord) record; //1.如果是下一行,2.如果是第一次运行,我就初始化 checkNextRowAndInit(numrec); tmpStr_[numrec.getColumn()] = formatListener.formatNumberDateCell(numrec); // System.out.println("Cell found with value " + formatListener.formatNumberDateCell(numrec) // + " at row " + numrec.getRow() + " and column " + numrec.getColumn()); } break; case FormulaRecord.sid: // formula if(isActiveSheet) { FormulaRecord forRec = (FormulaRecord) record; checkNextRowAndInit(forRec); tmpStr_[forRec.getColumn()] = String.valueOf(forRec.getValue()); // System.out.println("Fomula found with value " + forRec.getValue() + " at row " + forRec.getRow() + " and column " + forRec.getColumn()); } break; // SSTRecords store a array of unique strings used in Excel. case SSTRecord.sid: sstRec = (SSTRecord) record; // to get SSTRecord break; case LabelSSTRecord.sid: if(isActiveSheet) { LabelSSTRecord lrec = (LabelSSTRecord) record; checkNextRowAndInit(lrec); tmpStr_[lrec.getColumn()] = String.valueOf(sstRec.getString(lrec.getSSTIndex())); // System.out.println("String cell found with value " // + sstRec.getString(lrec.getSSTIndex()) + " at row " + lrec.getRow() // + " and column " + lrec.getColumn()); } break; } } private void checkNextRowAndInit(CellRecord record) { if(thisRow != record.getRow()) { // 已经到下一行了 thisRow = record.getRow(); iterList.add(tmpStr_); // 添加tmpStr_ shouldInit = true; } if(shouldInit) { tmpStr_ = new String[colLastRealIndex]; shouldInit = false; } } }
package excel.POI; import org.apache.commons.lang.time.DateFormatUtils; import org.apache.poi.hssf.usermodel.HSSFDateUtil; import org.apache.poi.ss.usermodel.*; import java.io.File; import java.io.FileInputStream; import java.util.Iterator; import java.util.LinkedList; import java.util.List; /** * 可读可写的Xls/Xlsx * */ public class XlsReadWriteMethod { private String filePath; XlsReadWriteMethod(String filePath) { this.filePath = filePath; } public static void main(String[] args) { try { Long start = System.currentTimeMillis(); String filePath = "F:\\temp\\bigexcel\\x1.xls"; XlsReadWriteMethod xlsReadWriteMethod = new XlsReadWriteMethod(filePath); Iterator iter = xlsReadWriteMethod.RowIterator(0); Long end = System.currentTimeMillis(); System.out.println("get data time = " + (end - start)); } catch (Exception e) { e.printStackTrace(); } } /** * 迭代FileBean的每一行 * */ public Iterator又读又写RowIterator(int sheetAt) throws Exception { Workbook workbook = WorkbookFactory.create(new FileInputStream(new File(filePath))); Sheet sheet = workbook.getSheetAt(sheetAt); //get index of sheet List list = new LinkedList (); for (int i = 0; i < sheet.getLastRowNum(); i++) { Row row = sheet.getRow(i); if(null == row) continue; // 这一行是空行:没东西 int firstCellNum = row.getFirstCellNum(); int lastCellNum = row.getLastCellNum(); String[] oneRowValue = new String[lastCellNum - firstCellNum]; int countNoneCol = 0; for (int j = firstCellNum; j < lastCellNum; j++) { Cell cell = row.getCell(j); int thisCol = j - firstCellNum; if(null == cell) { oneRowValue[thisCol] = ""; continue; } switch (cell.getCellType()) // Cell.getCellType() has not deprecated in API { // Because of Cell.getCellType().getCode() has deprecated And do not provide a replacement way So just use direct ways case -1 : // _NONE oneRowValue[thisCol] = ""; break; case 0 : // NUMERIC Date short dataFormat = cell.getCellStyle().getDataFormat(); // 31/176 : yyyy年m月d日(自定义型和日期型) , 58 : m月d日 , 32 : h时mm分 // 有时候176也是数字型,那这样会报错,干脆直接抓个异常。总之抓一堆dateFormat来兼容就好了。 if(HSSFDateUtil.isCellDateFormatted(cell) || HSSFDateUtil.isCellDateFormatted(cell) || 31 == dataFormat || 58 == dataFormat || 32 == dataFormat || 176 == dataFormat) { try { oneRowValue[thisCol] = DateFormatUtils.format(cell.getDateCellValue(), "yyyy-MM-dd hh:MM:ss"); }catch ( Exception e) { try { cell.setCellType(CellType.STRING); }catch (Exception ex) { //出问题就不转 } oneRowValue[thisCol] = String.valueOf(cell.getStringCellValue()); } } else { try { cell.setCellType(CellType.STRING); }catch (Exception ex) { //出问题就不转 } oneRowValue[thisCol] = String.valueOf(cell.getStringCellValue()); } break; case 1 : // STRING oneRowValue[thisCol] = cell.getStringCellValue(); break; case 2 : // FORMULA try { oneRowValue[thisCol] = String.valueOf(cell.getNumericCellValue()); } catch (IllegalStateException e) { try { oneRowValue[thisCol] = String.valueOf(cell.getRichStringCellValue()); } catch (Exception ex){ oneRowValue[thisCol] = ""; } } break; case 3 : // BLANK oneRowValue[thisCol] = ""; break; case 4 : // BOOLEAN oneRowValue[thisCol] = String.valueOf(cell.getBooleanCellValue()); break; case 5 : // ERROR oneRowValue[thisCol] = String.valueOf(cell.getErrorCellValue()); break; default : oneRowValue[thisCol] = ""; break; } // end switch if("".equals(oneRowValue[thisCol])) countNoneCol++; } // end for if(countNoneCol == (lastCellNum - firstCellNum)) continue;//这一行是空行,里面的东西都无效 list.add(oneRowValue); } // end for 2 return list.iterator(); } }
package excel.POI; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.xml.sax.*; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; import java.io.InputStream; import java.util.Iterator; /** * 获取xlsx的读取方法 * */ public class XlsxReadOnlyLoaderTest { public void processOneSheet(String filename) throws Exception { OPCPackage pkg = OPCPackage.open(filename); XSSFReader r = new XSSFReader( pkg ); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); // To look up the Sheet Name / Sheet Order / rID, // you need to process the core Workbook stream. // Normally it's of the form rId# or rSheet# InputStream sheet2 = r.getSheet("rId1"); InputSource sheetSource = new InputSource(sheet2); parser.parse(sheetSource); sheet2.close(); } public void processAllSheets(String filename) throws Exception { OPCPackage pkg = OPCPackage.open(filename); XSSFReader r = new XSSFReader( pkg ); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); Iteratormaven_test-master.zip maven_test-master.zipsheets = r.getSheetsData(); while(sheets.hasNext()) { System.out.println("Processing new sheet:\n"); InputStream sheet = sheets.next(); InputSource sheetSource = new InputSource(sheet); parser.parse(sheetSource); sheet.close(); System.out.println(""); } } public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException { XMLReader parser = XMLReaderFactory.createXMLReader( "com.sun.org.apache.xerces.internal.parsers.SAXParser" ); ContentHandler handler = new SheetHandler(sst); parser.setContentHandler(handler); return parser; } /** * See org.xml.sax.helpers.DefaultHandler javadocs */ private static class SheetHandler extends DefaultHandler { private SharedStringsTable sst; private String lastContents; private boolean nextIsString; private SheetHandler(SharedStringsTable sst) { this.sst = sst; } public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { // c => cell if(name.equals("c")) { // Print the cell reference System.out.print(attributes.getValue("r") + " - "); // Figure out if the value is an index in the SST String cellType = attributes.getValue("t"); if(cellType != null && cellType.equals("s")) { nextIsString = true; } else { nextIsString = false; } } // Clear contents cache lastContents = ""; } public void endElement(String uri, String localName, String name) throws SAXException { // Process the last contents as required. // Do now, as characters() may be called more than once if(nextIsString) { int idx = Integer.parseInt(lastContents); lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString(); nextIsString = false; } // v => contents of a cell // Output after we've seen the string contents if(name.equals("v")) { System.out.println(lastContents); } } public void characters(char[] ch, int start, int length) throws SAXException { lastContents += new String(ch, start, length); } } public static void main(String[] args) throws Exception { XlsxReadOnlyLoaderTest example = new XlsxReadOnlyLoaderTest(); example.processOneSheet("F:\\temp\\excel\\2.xlsx"); // example.processAllSheets("F:\\temp\\excel\\2.xlsx"); } }