当前位置 : 主页 > 编程语言 > java >

CS-LuceneSearcher

来源:互联网 收集:自由互联 发布时间:2021-06-28
LuceneSearcher.java /* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */package cn.huawei.com.CompressedSeacher.
LuceneSearcher.java
/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package cn.huawei.com.CompressedSeacher.util.lucene;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.classic.QueryParser.Operator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;

/**
 * **
 *
 * @author l00358914
 */
public class LuceneSearcher {
    private static final LuceneSearcher instance = new LuceneSearcher();
    private static final String indexData_Dir = "D:\\Lucene-600-index-data_directory";
    private static final String queries = null;
    private LuceneSearcher() {
    }
    public static LuceneSearcher getInstance() {
        return instance;
    }
    public static void main(String[] args)
            throws Exception {
        LuceneSearcher.getInstance().doSearch("contents", "apache", Integer.MAX_VALUE, 0, false, Operator.AND);
    }
    /**
     * **
     *
     * @param search_field  要搜索的目标范围filed
     * @param queryString   要搜索的关键字
     * @param hitsPerPage   返回的最大结果集
     * @param repeat
     * @param raw             是否打印文档原始数据,比如评分等
     * @param defaultOperator 默认的操作符,默认是OR Operator.OR
     * @throws IOException
     * @throws ParseException
     */
    public static Map
 
   doSearch(String search_field, String queryString, int hitsPerPage, int repeat, boolean raw, Operator defaultOperator) throws IOException, ParseException {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexData_Dir, new String[0])));
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new StandardAnalyzer();
        BufferedReader in = null;
        Map
  
    temp = new HashMap
   
    (); if (queries != null) { in = Files.newBufferedReader(Paths.get(queries, new String[0]), StandardCharsets.UTF_8); } else { in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); } QueryParser parser = new QueryParser(search_field, analyzer); while (true) { if ((queries == null) && (queryString == null)) { System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if ((line == null) || (line.length() == -1)) { break; } line = line.trim(); if (line.length() == 0) { break; } parser.setDefaultOperator(defaultOperator);//或者设置默认的操作符为AND 达到上面AND的效果 Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(search_field)); if (repeat > 0) { Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, 100);//IndexSearcher对象的search方法中总是需要一个Query对象(或是Query子类的对象) } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } temp = doPagingSearch(in, searcher, query, hitsPerPage, raw, (queries == null) && (queryString == null)); if (queryString != null) { break; } } reader.close(); return temp; } /** * ** * * @param in * @param searcher * @param query * @param hitsPerPage * @param raw * @param interactive * @throws IOException */ private static Map
    
      doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); Map
     
       temp = new HashMap
      
       (); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if ((line.length() == 0) || (line.charAt(0) == 'n')) { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) {//是否打印文档原生数据 System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score + " "); // System.out.println(searcher.explain(query, hits[i].doc)); } else { Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { // System.out.print(i + 1 + ". " + path+" "); String title = doc.get("title"); if (title != null) { // System.out.println(" Title: " + doc.get("title")); } temp.put(title, path); } else { System.out.println(i + 1 + ". " + "No path for this document"); } } } if ((!interactive) || (end == 0)) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); if ((line.length() == 0) || (line.charAt(0) == 'q')) { quit = true; break; } if (line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } if (line.charAt(0) == 'n') { if (start + hitsPerPage >= numTotalHits) { break; } start += hitsPerPage; break; } int page = Integer.parseInt(line); if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } System.out.println("No such page"); } if (quit) { break; } end = Math.min(numTotalHits, start + hitsPerPage); } } return temp; } }
      
     
    
   
  
 
网友评论