controller层(数据是通过Mybatis+Mysql查询出来的)
@Controller public class LuceneController<Hits> { @Autowired private LuceneServices luceneSerImpl; private Directory dir; private static String indexDir = "D:\\lucene2"; IndexReader reader; //创建索引的分词器与查询用到的分词器要保持一致 public static Analyzer analyzer = new IKAnalyzer5x(true); IndexWriter iw;
//获取IndexWriter
public IndexWriter getIndexWriter() throws IOException{
File file = new File(indexDir);
if (!file.exists()) {
file.mkdir();
}
dir =FSDirectory.open(Paths.get(indexDir));
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
iw = new IndexWriter(dir, iwc);
return iw;
}
//获取IndexSearcher
public IndexSearcher getIndexSearcher() throws IOException{
dir =FSDirectory.open(Paths.get(indexDir));
reader=DirectoryReader.open(dir);
IndexSearcher is=new IndexSearcher(reader);
return is;
}
//创建(更新)索引
@RequestMapping("createIndex.do")
public void createIndex() throws IOException{
IndexSearcher is=this.getIndexSearcher();
//获取数据
List<Map<String,Object>> list = luceneSerImpl.queryList();
Content content = new Content();
String filePath = "D:\\MaxId.txt";
Integer id = luceneSerImpl.QueryMaxId(content);
File file = new File(filePath);
if (!file.exists()) {
file.createNewFile();
}
//将数据库中最大的id写入到文件保存下来
createContentToFile(filePath,id.toString());
IndexWriter iw = this.getIndexWriter();
for (int i=0;i<list.size();i++) {
//根据id查询索引文件是否有该条数据
Term t=new Term("id",list.get(i).get("ID").toString());
Query query=new TermQuery(t);
TopDocs hits=is.search(query, 1);
//有数据进行更新
if (hits.scoreDocs.length>0) {
Document docOld=is.doc(hits.scoreDocs[0].doc);
Document docNew = this.getDocument(list,i);
iw.updateDocument(new Term("id",docOld.get("id")), docNew);
}else{
//没有该条数据进行增加索引
Document doc = this.getDocument(list,i);
iw.addDocument(doc);
}
}
iw.forceMerge(1);//将每次创建的索引文件合并
reader.close();
iw.close();
}
//写数据到文件中
public void createContentToFile(String filePath,String content) throws IOException{
File file = new File(filePath);
BufferedWriter out = null;
try {
//参数为false时,文件里的数据不叠加,只保留一条,参数为true时,原数据和现有数据都保留
out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file, false)));
out.write(","+content);
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if(out != null){
out.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
//第一次创建索引 (先创建索引,先调用此方法)
@RequestMapping("createIndex1.do")
public void createIndex1() throws IOException{
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
//获取数据
List<Map<String,Object>> list = luceneSerImpl.queryList();
IndexWriter iw = this.getIndexWriter();
for (int i=0;i<list.size();i++) {
Document doc = new Document();
doc.add(new StringField("id",list.get(i).get("ID").toString() , Field.Store.YES));
//TextField方法可以进行分词,StringField方法不进行分词,是一个整体存进去
doc.add(new TextField("title", (String)list.get(i).get("TITLE"), Field.Store.YES));
doc.add(new TextField("source", (String)list.get(i).get("SOURCE"), Field.Store.YES));
doc.add(new TextField("content", (String)list.get(i).get("CONTENT"), Field.Store.YES));
//5.3.1版本的排序问题要像下面的方式加入到文档中
StringField tf=new StringField("publishDate", format.format(list.get(i).get("PUBLISHDATE")), Field.Store.YES);
doc.add(tf);
SortedDocValuesField publishDateField = new SortedDocValuesField("publishDate", new BytesRef(format.format(list.get(i).get("PUBLISHDATE")).getBytes()));
doc.add(publishDateField);
iw.addDocument(doc);
}
iw.forceMerge(1);//将每次创建的索引文件合并
iw.close();
}
//获得Document对象
public Document getDocument(List<Map<String,Object>> list,Integer i){
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
Document doc = new Document();
doc.add(new StringField("id",list.get(i).get("ID").toString() , Field.Store.YES));
//TextField方法可以进行分词,StringField方法不进行分词,是一个整体存进去
doc.add(new TextField("title", (String)list.get(i).get("TITLE"), Field.Store.YES));
doc.add(new TextField("source", (String)list.get(i).get("SOURCE"), Field.Store.YES));
doc.add(new TextField("content", (String)list.get(i).get("CONTENT"), Field.Store.YES));
//5.3.1版本的排序问题要像下面的方式加入到文档中
StringField tf=new StringField("publishDate", format.format(list.get(i).get("PUBLISHDATE")), Field.Store.YES);
doc.add(tf);
SortedDocValuesField publishDateField = new SortedDocValuesField("publishDate", new BytesRef(format.format(list.get(i).get("PUBLISHDATE")).getBytes()));
doc.add(publishDateField);
return doc;
}
//对加权之后的数据再次进行查询(高亮分页显示)
@RequestMapping("search.do")
public String SearchData(ModelMap map) throws Exception{
List<Map<String,Object>> list = new ArrayList<Map<String,Object>>();
IndexSearcher is=this.getIndexSearcher();
//同时声明一个与之对应的字段数组
String[] fields = {"title","content"};
//声明BooleanClause.Occur[]数组,它表示多个条件之间的关系
BooleanClause.Occur[] flags=new BooleanClause.Occur[]{BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD};
//Analyzer analyzer = new IKAnalyzer5x(true);
//用MultiFieldQueryParser得到query对象
Query query = MultiFieldQueryParser.parse("北京", fields, flags, analyzer);
QueryScorer scorer=new QueryScorer(query);
//高亮显示
Fragmenter fragmenter=new SimpleSpanFragmenter(scorer);
SimpleHTMLFormatter simpleHTMLFormatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>");
Highlighter highlighter=new Highlighter(simpleHTMLFormatter, scorer);
highlighter.setTextFragmenter(fragmenter);
//排序开始
Sort sort = new Sort();
//按时间降序排序
SortField sf1 = new SortField("publishDate",SortField.Type.STRING, true);
sort.setSort(sf1);
//查询结果
TopDocs resultNew = is.search(query,100,sort);
ScoreDoc[] sds = resultNew.scoreDocs;
int start = (2-1)*5;
int end = 2*5;
for(int i=start;i<end;i++)
{
Map<String,Object> map2 = new HashMap<String, Object>();
Document doc = is.doc(sds[i].doc);
map2.put("id",doc.get("id"));
map2.put("source",doc.get("source"));
map2.put("publishDate",doc.get("publishDate"));
if (doc.get("title")!=null) {
String title = doc.get("title");
TokenStream tokenStream=analyzer.tokenStream("title", new StringReader(title));
String newTitle=highlighter.getBestFragment(tokenStream, title);
map2.put("title", newTitle);
}
if (doc.get("content")!=null) {
String content = doc.get("content");
TokenStream tokenStream=analyzer.tokenStream("content", new StringReader(content));
String newContent = highlighter.getBestFragment(tokenStream, content);
map2.put("content",newContent);
}
list.add(map2);
}
reader.close();
map.put("list", list);
return "GL";
}
//加权操作,通过传入的 searchValue对文档加权之后,更新索引
@RequestMapping("addPower.do")
public void AddPower(String searchValue) throws IOException, ParseException{
IndexSearcher is=this.getIndexSearcher();
IndexWriter iw = this.getIndexWriter();
//同时声明一个与之对应的字段数组
String[] fields = {"title","content"};
//声明BooleanClause.Occur[]数组,它表示多个条件之间的关系
BooleanClause.Occur[] flags=new BooleanClause.Occur[]{BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD};
//用MultiFieldQueryParser得到query对象
Query query = MultiFieldQueryParser.parse(searchValue, fields, flags, analyzer);
//查询结果
TopDocs result = is.search(query,100);
for(ScoreDoc scoreDoc:result.scoreDocs){
Document docOld=is.doc(scoreDoc.doc);
Document docNew = new Document();
TextField field=new TextField("content", docOld.get("content"), Field.Store.YES);
//加权操作
if (docOld.get("content").contains(searchValue)){
field.setBoost(1.8f);
}
docNew.add(field);
iw.updateDocument(new Term("id",docOld.get("id")), docNew);
}
iw.forceMerge(1);//将每次更新的索引文件合并
reader.close();
iw.close();
}
}
>分词器的工具包
/**
- 支持5.x版本的IKAnalyzer
-
@author liuzh
*/
public class IKAnalyzer5x extends Analyzer {private boolean useSmart;
public boolean useSmart() {
return this.useSmart;
}public void setUseSmart(boolean useSmart) {
this.useSmart = useSmart;
}public IKAnalyzer5x() {
this(false);
}public IKAnalyzer5x(boolean useSmart) {
this.useSmart = useSmart;
}@Override
protected TokenStreamComponents createComponents(String fieldName) {
IKTokenizer5x _IKTokenizer = new IKTokenizer5x(this.useSmart);
return new TokenStreamComponents(_IKTokenizer);
}
}
/**
* 支持5.x版本的IKTokenizer
*
* @author liuzh
*/
public class IKTokenizer5x extends Tokenizer {
private IKSegmenter _IKImplement;
private final CharTermAttribute termAtt = (CharTermAttribute)this.addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = (OffsetAttribute)this.addAttribute(OffsetAttribute.class);
private final TypeAttribute typeAtt = (TypeAttribute)this.addAttribute(TypeAttribute.class);
private int endPosition;
public IKTokenizer5x() {
this._IKImplement = new IKSegmenter(this.input, true);
}
public IKTokenizer5x(boolean useSmart) {
this._IKImplement = new IKSegmenter(this.input, useSmart);
}
public IKTokenizer5x(AttributeFactory factory) {
super(factory);
this._IKImplement = new IKSegmenter(this.input, true);
}
public boolean incrementToken() throws IOException {
this.clearAttributes();
Lexeme nextLexeme = this._IKImplement.next();
if(nextLexeme != null) {
this.termAtt.append(nextLexeme.getLexemeText());
this.termAtt.setLength(nextLexeme.getLength());
this.offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());
this.endPosition = nextLexeme.getEndPosition();
this.typeAtt.setType(nextLexeme.getLexemeTypeString());
return true;
} else {
return false;
}
}
public void reset() throws IOException {
super.reset();
this._IKImplement.reset(this.input);
}
public final void end() {
int finalOffset = this.correctOffset(this.endPosition);
this.offsetAtt.setOffset(finalOffset, finalOffset);
}
}```
> lucene所需jar包
![图片描述][1]
![图片描述][2]
[1]: //img.mukewang.com/5a96491d0001447603050871.png
[2]: //img.mukewang.com/5a96495b0001a62103230120.png