Lucene.Net用了又忘...由于现在信息量爆炸,用过的东西用完就忘,只好自己写个笔记来记录一下了... 1: 需要DLL Lucene.Net.dll PanGu.dll PanGu.HighLight.dll PanGu.Lucene.Analyzer.dll 没有的话,可以去我
Lucene.Net用了又忘...由于现在信息量爆炸,用过的东西用完就忘,只好自己写个笔记来记录一下了...
1: 需要DLL
Lucene.Net.dll
PanGu.dll
PanGu.HighLight.dll
PanGu.Lucene.Analyzer.dll
没有的话,可以去我的资源包里面下,地址如下: http://download.csdn.net/download/kimizhou_blog/10016313
2;生成索引
string indexPath = Context.Server.MapPath("~/App_Data/IndexData");//索引文档保存位置 string commonProductIndexPath = string.Format("{0}/{1}", indexPath, "commonProduct"); //积分商城产品 //开始处理 积分商城产品索引 CreateCommonProductIndex(commonProductIndexPath);
然后看看CreateCommonProductIndex方法
/// <summary> /// 创建积分商城产品索引 /// </summary> /// <param name="indexPath"></param> private void CreateCommonProductIndex(string indexPath) { FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory());//绑定索引目录 bool isExist = IndexReader.IndexExists(directory); if (isExist) { if (IndexWriter.IsLocked(directory)) { IndexWriter.Unlock(directory); } } IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isExist, IndexWriter.MaxFieldLength.UNLIMITED); writer.DeleteAll();//先删之前的索引 IList<ProductInfoByIndex> list = Product.GetProductListByIndex(); foreach (var item in list) { Document document = new Document(); document.Add(new Field("id", item.ProductID.ToString(), Field.Store.YES, Field.Index.ANALYZED));//--所有字段的值都将以字符串类型保存 因为索引库只存储字符串类型数据 string Content = string.Format("{0}", item.ProductName); document.Add(new Field("Content", Content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(document); //文档写入索引库 } writer.Close();//会自动解锁 directory.Close(); //不要忘了Close,否则索引结果搜不到 }
其中
IList<ProductInfoByIndex> list = Product.GetProductListByIndex();方式是去数据中读取这个list对象,这里代码就不贴出来了。到这里你的索引已经创建出来的,那么接下来需要查询和显示
查询是最困难的,各种匹配
3:查询索引并且显示出来
GetProductIndex方法就是获取索引代码如下:
/// <summary> /// 获取积分商品索引 /// </summary> private void GetProductIndex() { string indexPath = Context.Server.MapPath("~/App_Data/IndexData");//索引文档保存位置 string commonProductIndexPath = string.Format("{0}/{1}", indexPath, "commonProduct"); //积分商城产品 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(commonProductIndexPath), new NoLockFactory()); IndexReader reader = IndexReader.Open(directory, true); IndexSearcher searcher = new IndexSearcher(reader); BooleanQuery bQuery = new BooleanQuery(); foreach (string word in SplitContent.SplitWords(Request["SearchKey"])) { Query queryUseringNatrue = new WildcardQuery(new Term("Content", "*" + word + "*")); bQuery.Add(queryUseringNatrue, BooleanClause.Occur.MUST);// MUST 必须 } Sort sort = new Sort(new SortField("id", SortField.FLOAT, true)); //true为降序排序 TopDocs docs = searcher.Search(bQuery, (Filter)null, 9999999, sort); List<ProductInfoByIndex> proList = new List<ProductInfoByIndex>(); for (int i = 0; i < docs.totalHits; i++) { Document doc = searcher.Doc(docs.scoreDocs[i].doc); ProductInfoByIndex product = new ProductInfoByIndex(); product.ProductID = System.Convert.ToInt32(doc.Get("id")); product.ProductName = doc.Get("Content"); //product.ProductName = SplitContent.HightLight(Request["SearchKey"], doc.Get("Content")); proList.Add(product); } productResultList = proList; this.Message += string.Format("|{0}条积分商城产品", docs.totalHits); //PhraseQuery query = new PhraseQuery(); //foreach (string word in SplitContent.SplitWords(Request["SearchKey"])) //{ // query.Add(new Term("Content", word)); //} //query.SetSlop(100); //TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); //searcher.Search(query, null, collector); //ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs; //List<ProductInfoByIndex> proList = new List<ProductInfoByIndex>(); //for (int i = 0; i < docs.Length; i++) //{ // int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id) // Document doc = searcher.Doc(docId);//根据文档id来获得文档对象Document // ProductInfoByIndex product = new ProductInfoByIndex(); // product.ProductID = System.Convert.ToInt32(doc.Get("id")); // //book.ContentDescription = doc.Get("content");//未使用高亮 // //搜索关键字高亮显示 使用盘古提供高亮插件 // product.ProductName = SplitContent.HightLight(Request["SearchKey"], doc.Get("Content")); // proList.Add(product); //} // productResultList = proList; //this.Message += string.Format("|{0}条积分商城产品", docs.Length); }
其中我注释掉的,是另外一种方法,这里我用的效率比较慢的模糊查询
Query queryUseringNatrue = new WildcardQuery(new Term("Content", "*" + word + "*"));
这个类似 数据库的like '%关键字%'
到这里就已经获取到了所有的索引资料了,是不是很简单,你get到了吗?最后我再给大家介绍索引的几种查询方式:
第1种:
//string keyWordUseringNatrue = "营运"; //if (!string.IsNullOrWhiteSpace(keyWordUseringNatrue)) //{ // QueryParser parseUseringNatrue = new QueryParser("UseringNatrue", new PanGuAnalyzer()); // Query query = parseUseringNatrue.Parse(keyWordUseringNatrue); // parseUseringNatrue.SetDefaultOperator(QueryParser.Operator.AND); // bQuery.Add(query, BooleanClause.Occur.MUST); //} //营运 //Query queryUseringNatrue = new WildcardQuery(new Term("UseringNatrue", "营运")); //bQuery.Add(queryUseringNatrue, BooleanClause.Occur.MUST);// MUST 必须
这个查询是什么呢?是一般的查询,会查询出运营相关的,但是他和like不一样,他跟分词有关,比如说,“爱” 就查询不出 “可爱” ,pangu有自己的分词,但是这个比较常用,下面汇总一下其它的查询:
其它查询汇总:
//介绍各种Query //TermQuery: 首先介绍最基本的查询,如果你想执行一个这样的查询:在content字段中查询包含‘刘备的document”,那么你可以用TermQuery: // Term t = new Term("content", "刘备"); // Query query = new TermQuery(t); //BooleanQuery :如果你想这么查询:在content字段中包含”刘备“并且在title字段包含”三国“的document”,那么你可以建立两个TermQuery并把它们用BooleanQuery连接起来: //1 TermQuery termQuery1 = new TermQuery(new Term("content", "刘备")); //2 TermQuery termQuery2 = new TermQuery(new Term("title", "三国")); //3 BooleanQuery booleanQuery = new BooleanQuery(); //4 booleanQuery.Add(termQuery1, BooleanClause.Occur.SHOULD); //5 booleanQuery.Add(termQuery2, BooleanClause.Occur.SHOULD); //WildcardQuery :如果你想对某单词进行通配符查询,你可以用WildcardQuery,通配符包括’?’匹配一个任意字符和’*’匹配零个或多个任意字符,例如你搜索’三国*’,你可能找到’三国演义’或者’三国志’: //1 Query query = new WildcardQuery(new Term("content", "三国*")); //PhraseQuery :你可能对中日关系比较感兴趣,想查找‘中’和‘日’挨得比较近(5个字的距离内)的文章,超过这个距离的不予考虑,你可以 //1 PhraseQuery query = new PhraseQuery(); //2 query.SetSlop(5); //3 query.Add(new Term("content ", "中")); //4 query.Add(new Term("content", "日")); //那么它可能搜到“中日合作……”、“中方和日方……”,但是搜不到“中国某高层领导说日本欠扁” //PrefixQuery :如果你想搜以‘中’开头的词语,你可以用PrefixQuery: //1 PrefixQuery query = new PrefixQuery(new Term("content ", "中")); //FuzzyQuery :FuzzyQuery用来搜索相似的term,使用Levenshtein算法。假设你想搜索跟‘wuzza’相似的词语,你可以: //1 Query query = new FuzzyQuery(new Term("content", "wuzza")); //你可能得到‘fuzzy’和‘wuzzy’。 //RangeQuery: 另一个常用的Query是RangeQuery,你也许想搜索时间域从20060101到20060130之间的document,你可以用RangeQuery: //1 RangeQuery query = new RangeQuery(new Term("time","20060101"), new Term("time","20060130"), true); //最后的true表示用闭合区间。
因为各个版本,他们使用的都不太一样,下面介绍一种常用的读取以后显示的方式,其中Sort就是排序
Stopwatch stopwath = new Stopwatch();//秒表 Sort sort = new Sort(new SortField("CarPrice", SortField.FLOAT,true)); //true为降序排序 CarPrice为价格 SortField.DOC是? TopDocs docs = searcher.Search(bQuery, (Filter)null, 9999999, sort); stopwath.Stop();//秒表停止 long lSearchTime = stopwath.ElapsedMilliseconds;//耗时 List<CarSourceInfoByIndex> carSourceResult = new List<CarSourceInfoByIndex>(); for (int i=0;i<docs.totalHits; i++) { Document doc = searcher.Doc(docs.scoreDocs[i].doc); CarSourceInfoByIndex carSource = new CarSourceInfoByIndex() { Id = int.Parse(doc.Get("Id")), CarPrice = System.Convert.ToDouble(doc.Get("CarPrice")), Recommended = SplitContent.HightLight(Request["SearchKey"], doc.Get("Content")) }; carSourceResult.Add(carSource); } carSourceResultList2 = carSourceResult; this.Message += string.Format("{0}条测试", docs.totalHits);就到这里了,不懂的可以加我QQ 10200454咨询