LuceneSearcher.java /* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */package cn.huawei.com.CompressedSeacher.
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package cn.huawei.com.CompressedSeacher.util.lucene; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; import java.util.Date; import java.util.HashMap; import java.util.Map; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.queryparser.classic.QueryParser.Operator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; /** * ** * * @author l00358914 */ public class LuceneSearcher { private static final LuceneSearcher instance = new LuceneSearcher(); private static final String indexData_Dir = "D:\\Lucene-600-index-data_directory"; private static final String queries = null; private LuceneSearcher() { } public static LuceneSearcher getInstance() { return instance; } public static void main(String[] args) throws Exception { LuceneSearcher.getInstance().doSearch("contents", "apache", Integer.MAX_VALUE, 0, false, Operator.AND); } /** * ** * * @param search_field 要搜索的目标范围filed * @param queryString 要搜索的关键字 * @param hitsPerPage 返回的最大结果集 * @param repeat * @param raw 是否打印文档原始数据,比如评分等 * @param defaultOperator 默认的操作符,默认是OR Operator.OR * @throws IOException * @throws ParseException */ public static MapdoSearch(String search_field, String queryString, int hitsPerPage, int repeat, boolean raw, Operator defaultOperator) throws IOException, ParseException { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexData_Dir, new String[0]))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; Map temp = new HashMap (); if (queries != null) { in = Files.newBufferedReader(Paths.get(queries, new String[0]), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(search_field, analyzer); while (true) { if ((queries == null) && (queryString == null)) { System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if ((line == null) || (line.length() == -1)) { break; } line = line.trim(); if (line.length() == 0) { break; } parser.setDefaultOperator(defaultOperator);//或者设置默认的操作符为AND 达到上面AND的效果 Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(search_field)); if (repeat > 0) { Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, 100);//IndexSearcher对象的search方法中总是需要一个Query对象(或是Query子类的对象) } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } temp = doPagingSearch(in, searcher, query, hitsPerPage, raw, (queries == null) && (queryString == null)); if (queryString != null) { break; } } reader.close(); return temp; } /** * ** * * @param in * @param searcher * @param query * @param hitsPerPage * @param raw * @param interactive * @throws IOException */ private static Map doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); Map temp = new HashMap (); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if ((line.length() == 0) || (line.charAt(0) == 'n')) { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) {//是否打印文档原生数据 System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score + " "); // System.out.println(searcher.explain(query, hits[i].doc)); } else { Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { // System.out.print(i + 1 + ". " + path+" "); String title = doc.get("title"); if (title != null) { // System.out.println(" Title: " + doc.get("title")); } temp.put(title, path); } else { System.out.println(i + 1 + ". " + "No path for this document"); } } } if ((!interactive) || (end == 0)) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if ((line.length() == 0) || (line.charAt(0) == 'q')) { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } if (line.charAt(0) == 'n') { if (start + hitsPerPage >= numTotalHits) { break; } start += hitsPerPage; break; } int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } System.out.println("No such page"); } if (quit) { break; } end = Math.min(numTotalHits, start + hitsPerPage); } } return temp; } }