2.6、Lucene查询高亮


找到需要高亮的片段

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
package com.learn.lucene.chapter2.hignlight;

import com.learn.lucene.chapter2.ik.IKAnalyzer8x;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;

/**
* 找到需要高亮的片段
*/
public class HighlighterTest {
public static void main(String[] args) throws IOException, ParseException, InvalidTokenOffsetsException {
String field = "title";
Path indexPath = Paths.get("indexdir");
Directory directory = FSDirectory.open(indexPath);
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new IKAnalyzer8x();
QueryParser parser = new QueryParser(field, analyzer);
Query query = parser.parse("北大");
System.out.println("Query: " + query);
QueryScorer scorer = new QueryScorer(query, field);
SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<span style=\"color:red;\">", "</span>");
Highlighter highlighter = new Highlighter(htmlFormatter, scorer);
// 高亮分词器
TopDocs topDocs = searcher.search(query, 10);
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
Document doc = searcher.doc(scoreDoc.doc);
System.out.println("DocID: " + scoreDoc.doc);
System.out.println("id: " + doc.get("id"));
System.out.println("title: " + doc.get("title"));
System.out.println("content: " + doc.get("content"));
System.out.println("reply: " + doc.get("reply_display"));
System.out.println("文档评分: " + scoreDoc.score);
TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), scoreDoc.doc, field, analyzer);
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
highlighter.setTextFragmenter(fragmenter);
String str = highlighter.getBestFragment(tokenStream, doc.get(field));
System.out.println("高亮的片段:" + str);
}
directory.close();
reader.close();
}
}

1566709467482