当前位置 : 主页 > 编程语言 > java >

POI解析excel,支持合并单元格和图片

来源:互联网 收集:自由互联 发布时间:2022-09-02
一、准备一个自定义的类 @Data @AllArgsConstructor public class ExcelMerge { private int startRow; private int endRow; private int startCol; private int endCol; public boolean isRange(int firstRow, int firstCol) { return firstRow =

一、准备一个自定义的类

@Data
@AllArgsConstructor
public class ExcelMerge {

private int startRow;

private int endRow;

private int startCol;

private int endCol;

public boolean isRange(int firstRow, int firstCol) {
return firstRow >= startRow && firstRow <= endRow && firstCol >= startCol && firstCol <= endCol;
}
}


二、编写工具类

@Slf4j
public class ParseExcelUtil {

/** * 解析excel文档 * ps. * List<List<Map<String, Object>>>: 最外层的List是excel中的每一个sheet,第二层List每一个sheet中的行数据,从startTitle + 1 开始, * 第三层 Map 结构为表头中第一个单元格对应的值 * * @param excelUrl excel的url地址 * @param startTitle 开始的表头行,以那一行作为 Map 的KEY * @param parseAllSheet 是否解析当前表格中的所有 sheet(工作薄),false=只解析第一个工作薄 * @return List<List < Map < String, Object>>> */ public static List<List<Map<String, Object>>> parseSimpleExcel(String excelUrl, int startTitle, boolean parseAllSheet) {
ByteArrayInputStream bais = getFileByte(excelUrl);
if (Objects.isNull(bais)) {
return Collections.emptyList();
}

startTitle = Math.max(startTitle, 0);
try {
Workbook wb = WorkbookFactory.create(bais);
int sheets = parseAllSheet ? wb.getNumberOfSheets() : 1;
List<List<Map<String, Object>>> dataList = new ArrayList<>(sheets);
for (int i = 0; i < sheets; i++) {
Sheet sheet = wb.getSheetAt(i);
Map<Integer, String> titleMap = getTitleMap(sheet, startTitle);
if (CollectionUtil.isEmpty(titleMap)) {
log.error("未获取到表头数据,向下移一行,表头位置={} ", startTitle);
startTitle += 1;
titleMap = getTitleMap(sheet, startTitle);
}
if (CollectionUtil.isEmpty(titleMap)) {
log.error("未获取到表头数据,表头位置={}", startTitle);
continue;
}
int lastRow = sheet.getLastRowNum();
if (lastRow <= startTitle) {
log.error("第{}个工作薄的行数不足,总行数{},表头起始位置{}", i, lastRow, startTitle);
continue;
}
startTitle += 1;
// 判断是否有合并的单元格 Map<ExcelMerge, String> rangeMap = getMerge(sheet);
Map<ExcelMerge, List<String>> pictureMap = getPicture(sheet);
boolean hasRange = CollectionUtil.isNotEmpty(rangeMap), hasPicture = CollectionUtil.isNotEmpty(pictureMap);
List<Map<String, Object>> data = new ArrayList<>(lastRow);
for (int j = startTitle; j < lastRow; j++) {
Row row = sheet.getRow(j);
if (Objects.isNull(row)) {
continue;
}
Map<String, Object> map = new HashMap<>(row.getLastCellNum());
for (Map.Entry<Integer, String> entry : titleMap.entrySet()) {
Integer col = entry.getKey();
String value = "";

// 当有合并单元格的时候,只取合并单元格的数据 ExcelMerge merge = getMergeRange(sheet, j, col);
if (hasRange && Objects.nonNull(merge)) {
value = StringUtil.trimToEmpty(rangeMap.get(merge));
}

// 当前有图片的时候,只取图片数据 if (hasPicture && StringUtil.isBlank(value)) {
value = getPictureRange(pictureMap, j, col);
if (StringUtil.isNotBlank(value)) {
log.info("图片={} ", value);
}
}

// 都为空的时候,取单元格的数据 if (StringUtil.isBlank(value)) {
value = getStringCellVal(row.getCell(col));
}
map.put(titleMap.get(col), value);
}
data.add(map);
}
dataList.add(data);
}
return dataList;
} catch (Exception e) {
log.error("解析excel失败, e={} ", e);
}
return Collections.emptyList();
}

/** * 获取工作薄中的图片数据 * ps. * map中的第一个参数为图片所在的范围,第二个参数为图片的base64 * * @param sheet 工作薄 * @return Map */ public static Map<ExcelMerge, List<String>> getPicture(Sheet sheet) {
Map<ExcelMerge, List<String>> map = new HashMap<>(1);
// 判断excel版本,此处2003 if (sheet instanceof HSSFSheet) {
HSSFSheet hssfSheet = (HSSFSheet) sheet;
List<HSSFShape> list = hssfSheet.getDrawingPatriarch().getChildren();
if (CollectionUtil.isEmpty(list)) {
return Collections.emptyMap();
}
for (HSSFShape e : list) {
HSSFSimpleShape shape = (HSSFSimpleShape) e;
if (!(shape instanceof HSSFPicture)) {
continue;
}
HSSFClientAnchor anchor = (HSSFClientAnchor) shape.getAnchor();
if (Objects.isNull(anchor)) {
continue;
}
ExcelMerge merge = new ExcelMerge(anchor.getRow1(), anchor.getRow2(), anchor.getCol1(), anchor.getCol2());
List<String> images = map.get(merge);
if (CollectionUtil.isEmpty(images)) {
images = new ArrayList<>(1);
}
images.add(NhxyConstant.BASE64_IMAGE + Base64.encode(((HSSFPicture) shape).getPictureData().getData()));
map.put(merge, images);
}
} else if (sheet instanceof XSSFSheet) {
XSSFSheet xssfSheet = (XSSFSheet) sheet;
List<XSSFShape> shapes = xssfSheet.getDrawingPatriarch().getShapes();
if (CollectionUtil.isEmpty(shapes)) {
return Collections.emptyMap();
}
for (XSSFShape e : shapes) {
if (!(e instanceof XSSFPicture)) {
continue;
}
XSSFPicture picture = (XSSFPicture) e;
XSSFClientAnchor anchor = picture.getClientAnchor();
if (Objects.isNull(anchor)) {
continue;
}
ExcelMerge merge = new ExcelMerge(anchor.getRow1(), anchor.getRow2(), anchor.getCol1(), anchor.getCol2());
List<String> images = map.get(merge);
if (CollectionUtil.isEmpty(images)) {
images = new ArrayList<>(1);
}
images.add(NhxyConstant.BASE64_IMAGE + Base64.encode(picture.getPictureData().getData()));
map.put(merge, images);
}
}
return map;
}

/** * 获取合并的单元格 * ps. * map中的第一个参数为图片所在的范围,第二个参数为图片的base64 * * @param sheet 工作薄 * @return Map */ public static Map<ExcelMerge, String> getMerge(Sheet sheet) {
if (sheet.getNumMergedRegions() <= 0) {
return Collections.emptyMap();
}
Map<ExcelMerge, String> map = new HashMap<>(sheet.getNumMergedRegions());
for (CellRangeAddress e : sheet.getMergedRegions()) {
int firstRow = e.getFirstRow(), lastRow = e.getLastRow();
int firstColumn = e.getFirstColumn(), lastColumn = e.getLastColumn();
map.put(new ExcelMerge(firstRow, lastRow, firstColumn, lastColumn), getStringCellVal(sheet.getRow(firstRow).getCell(firstColumn)));
}
return map;
}

/** * 判断是否为合并的单元格 * * @param sheet 工作薄 * @param row 行 * @param col 列 * @return boolean */ public static ExcelMerge getMergeRange(Sheet sheet, int row, int col) {
List<CellRangeAddress> rangeList = sheet.getMergedRegions();
if (CollectionUtil.isEmpty(rangeList)) {
return null;
}
return rangeList.stream().filter(e -> (row >= e.getFirstRow() && row <= e.getLastRow()) && (col >= e.getFirstColumn() && col <= e.getLastColumn()))
.map(e -> new ExcelMerge(e.getFirstRow(), e.getLastRow(), e.getFirstColumn(), e.getLastColumn())).findFirst().orElse(null);
}

/** * 获取图片数据 * * @param pictureList 图片列表 * @param row 当前行 * @param col 当前列 * @return String */ public static String getPictureRange(Map<ExcelMerge, List<String>> pictureList, int row, int col) {
if (CollectionUtil.isEmpty(pictureList) || row <= 0 || col <= 0) {
return null;
}
List<String> list = pictureList.entrySet().stream().filter(e -> e.getKey().isRange(row, col)).map(Map.Entry::getValue)
.map(e -> String.join(NhxyConstant.SPLIT_CHAR, e)).collect(Collectors.toList());
return CollectionUtil.isEmpty(list) ? "" : String.join(NhxyConstant.SPLIT_CHAR, list);
}

/** * 获取表头信息 * * @param sheet 工作薄 * @param startTitle 起始位置 * @return TreeMap */ private static Map<Integer, String> getTitleMap(Sheet sheet, int startTitle) {
Row row = sheet.getRow(startTitle);
int last = row.getLastCellNum();
Map<Integer, String> title = new TreeMap<>();
for (int i = 0; i < last; i++) {
String value = getStringCellVal(row.getCell(i));
if (StringUtil.isNotBlank(value)) {
title.put(i, value);
}
}
return title;
}

/** * 获取单元格中的值 * * @param cell 单元格 * @return String */ public static String getStringCellVal(Cell cell) {
if (Objects.isNull(cell) || cell.getCellType() == CellType.BLANK || cell.getCellType() == CellType.ERROR) {
return "";
} else if (cell.getCellType() == CellType.BOOLEAN) {
return StringUtil.trimToEmpty(StringEscapeUtils.unescapeJava(String.valueOf(cell.getBooleanCellValue())));
} else if (cell.getCellType() == CellType.NUMERIC) {
return StringUtil.trimToEmpty(StringEscapeUtils.unescapeJava(String.valueOf(cell.getNumericCellValue())));
} else if (cell.getCellType() == CellType.FORMULA) {
return StringUtil.trimToEmpty(StringEscapeUtils.unescapeJava(String.valueOf(cell.getCellFormula())));
}
return StringUtil.trimToEmpty(StringEscapeUtils.unescapeJava(cell.getStringCellValue()));
}


/** * 获取文件的byte[] * * @param url 文件地址 * @return ByteArrayInputStream */ public static ByteArrayInputStream getFileByte(String url) {
log.debug("文件地址={} ", url);
if (StringUtil.isBlank(url)) {
return null;
}
try {
byte[] bytes = HttpUtil.downloadBytes(url);
return Objects.isNull(bytes) ? null : new ByteArrayInputStream(bytes);
} catch (Exception e) {
log.error("获取excel文件异常, e= {}", e);
}
return null;
}

// public static void main(String[] args) { // // String url = "http://10.0.0.213:7080/M00/00/05/CgAA1WMO90GAEH18AMo2AJy2Ocw444.xls"; // 无合并单元格,正常的excel // String url = "http://10.0.0.213:7080/M00/00/05/CgAA1WMPJNqASFGVAAOaAKMlNaA131.xls"; // 带有合并的单元格,带有图片 // List<List<Map<String, Object>>> lists = ParseExcelUtil.parseSimpleExcel(url, 0, true); // if (CollectionUtil.isEmpty(lists)) { // log.error("没有解析到数据"); // return; // } // log.info("本次共有{} sheet", lists.size()); // for (int i = 0; i < lists.size(); i++) { // List<Map<String, Object>> list = lists.get(i); // log.info("第{}个sheet,共计{}条数据", i, list.size()); // ExcelWriter writer = ExcelUtil.getWriter("E:\\logs\\" + i + ".xlsx"); // try { // writer.write(list, true); // } finally { // if (Objects.nonNull(writer)) { // writer.close(); // } // } // } // }}


上一篇:2012谷歌透明度报告数据简析
下一篇:没有了
网友评论