import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * https://blog.csdn.net/qq
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* https://blog.csdn.net/qq_36240505/article/details/115605315
* 正则表达式读取小说(txt文本)的章节
*/
public class NovelSplitterUtils {
public static void main(String[] args) {
// // 把获取后的list插入到SQLserver
// JdbcConnection.insertSQL(parse(1, "C://Users//下一季//Desktop//一念永恒.txt"));
ArrayList<BookCon> parse = parse(1, "D:\\06tmp\\7175.txt");
for (int i = 0; i < parse.size(); i++) {
System.out.println("i = " + parse.get(i).chapterName);
}
}
public static int idCount = 1;// 手动增加 章节id ,导入多个小说文件时,避免id重复
public static ArrayList<BookCon> parse(int BookId, String path) {
ArrayList<BookCon> list = null;
try {
File file = new File(path);
if (!file.isFile() || !file.exists()) {
return null;
}
InputStreamReader read = new InputStreamReader(new FileInputStream(file), "GBK");
BufferedReader bufferedReader = new BufferedReader(read);
int count = 1;
String lineTxt = null;
list = new ArrayList<>();
StringBuilder lastBuilder = null;
BookCon lastBookContent = null;
while ((lineTxt = bufferedReader.readLine()) != null) {
lineTxt = lineTxt.trim();
// 正则表达式:我的小说章节名是以 ###开头,###结尾 ,所以用第二个更好
Pattern p = Pattern.compile("(第\\S*)[章节卷集部篇回](\\s)(\\S*)[^#]");
// Pattern p = Pattern.compile("(###)(.*)(###)");
Matcher matcher = p.matcher(lineTxt);
if (matcher.find()) {
if (lastBookContent != null) {
lastBookContent.content = lastBuilder.toString();
list.add(lastBookContent);
// System.out.println(lastBuilder.toString());// 输出当前的章节内容,建议设置输出长度
}
BookCon newBookContent = new BookCon(idCount++, BookId, matcher.group(), count++);
// System.out.println(newBookContent.chapterName);// 输出当前的章节名
lastBuilder = new StringBuilder();
lastBookContent = newBookContent;
} else if (lastBuilder != null) {
lastBuilder.append(lineTxt + "\n");
}
}
// 以防最后一个章节无法读取
if (lastBuilder != null) {
lastBookContent.content = lastBuilder.toString();
list.add(lastBookContent);
}
bufferedReader.close();
} catch (Exception e) {
System.out.println("文件读取失败");
e.printStackTrace();
} finally {
return list;
}
}
static class BookCon {
int chapterId;// 章节id
int bookId;// 图书id
String chapterName;// 章节名
String content;// 当前章节内容
int order;// 章节序号
public BookCon(int chapterId, int bookId, String chapterName, int order) {// 构造方法(缺少一个成员变量)
this.order = order;
this.bookId = bookId;
this.chapterId = chapterId;
this.chapterName = chapterName;
}
}
}