继续浏览精彩内容
慕课网APP
程序员的梦工厂
打开
继续
感谢您的支持,我会继续努力的
赞赏金额会直接到老师账户
将二维码发送给自己后长按识别
微信支付
支付宝支付

追加Lucene相关方法

爱总结的小仙女
关注TA
已关注
手记 47
粉丝 57
获赞 437

controller层(数据是通过Mybatis+Mysql查询出来的)


@Controller
public class LuceneController<Hits> {
@Autowired
private LuceneServices luceneSerImpl;
private  Directory dir;
private static String indexDir = "D:\\lucene2";
IndexReader reader;
//创建索引的分词器与查询用到的分词器要保持一致
public static Analyzer analyzer = new IKAnalyzer5x(true);
IndexWriter iw;
//获取IndexWriter
public IndexWriter getIndexWriter() throws IOException{
    File file = new File(indexDir);
    if (!file.exists()) {
        file.mkdir();
    }
   dir =FSDirectory.open(Paths.get(indexDir));  
   IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
   iw = new IndexWriter(dir, iwc);
   return iw;
}

//获取IndexSearcher
public IndexSearcher getIndexSearcher() throws IOException{
    dir =FSDirectory.open(Paths.get(indexDir));
    reader=DirectoryReader.open(dir);
    IndexSearcher is=new IndexSearcher(reader);
    return is;
}

//创建(更新)索引  
@RequestMapping("createIndex.do")
public void createIndex() throws IOException{
    IndexSearcher is=this.getIndexSearcher();
    //获取数据
    List<Map<String,Object>> list = luceneSerImpl.queryList();
    Content content = new Content();
    String filePath = "D:\\MaxId.txt";
    Integer id = luceneSerImpl.QueryMaxId(content);
    File file = new File(filePath);
    if (!file.exists()) {
        file.createNewFile();
    }
    //将数据库中最大的id写入到文件保存下来
    createContentToFile(filePath,id.toString());
    IndexWriter iw = this.getIndexWriter();
    for (int i=0;i<list.size();i++) {
        //根据id查询索引文件是否有该条数据
        Term t=new Term("id",list.get(i).get("ID").toString());
        Query query=new TermQuery(t);
        TopDocs hits=is.search(query, 1);
        //有数据进行更新
        if (hits.scoreDocs.length>0) {
              Document docOld=is.doc(hits.scoreDocs[0].doc); 
              Document docNew = this.getDocument(list,i);
              iw.updateDocument(new Term("id",docOld.get("id")), docNew);
           }else{
               //没有该条数据进行增加索引
                Document doc = this.getDocument(list,i);
                iw.addDocument(doc);
           }
    }
    iw.forceMerge(1);//将每次创建的索引文件合并
    reader.close();
    iw.close();
}

//写数据到文件中
public void createContentToFile(String filePath,String content) throws IOException{
     File file = new File(filePath);
     BufferedWriter out = null;     
    try {    
        //参数为false时,文件里的数据不叠加,只保留一条,参数为true时,原数据和现有数据都保留
        out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file, false)));     
        out.write(","+content);     
    } catch (Exception e) {     
        e.printStackTrace();     
    } finally {     
        try {     
            if(out != null){  
                out.close();     
            }  
        } catch (IOException e) {     
            e.printStackTrace();     
        }     
    }     
}

    //第一次创建索引   (先创建索引,先调用此方法)
    @RequestMapping("createIndex1.do")
    public void createIndex1() throws IOException{
        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
        //获取数据  
        List<Map<String,Object>> list = luceneSerImpl.queryList();
        IndexWriter iw = this.getIndexWriter();
        for (int i=0;i<list.size();i++) {
             Document doc = new Document();
             doc.add(new StringField("id",list.get(i).get("ID").toString() , Field.Store.YES));
             //TextField方法可以进行分词,StringField方法不进行分词,是一个整体存进去
             doc.add(new TextField("title", (String)list.get(i).get("TITLE"), Field.Store.YES));
             doc.add(new TextField("source", (String)list.get(i).get("SOURCE"), Field.Store.YES));
             doc.add(new TextField("content", (String)list.get(i).get("CONTENT"), Field.Store.YES));
             //5.3.1版本的排序问题要像下面的方式加入到文档中
             StringField tf=new StringField("publishDate", format.format(list.get(i).get("PUBLISHDATE")), Field.Store.YES); 
             doc.add(tf);
             SortedDocValuesField publishDateField = new SortedDocValuesField("publishDate", new BytesRef(format.format(list.get(i).get("PUBLISHDATE")).getBytes()));
             doc.add(publishDateField);
             iw.addDocument(doc);
        }
        iw.forceMerge(1);//将每次创建的索引文件合并
        iw.close();
    }

//获得Document对象
public Document getDocument(List<Map<String,Object>> list,Integer i){
     SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
     Document doc = new Document();
     doc.add(new StringField("id",list.get(i).get("ID").toString() , Field.Store.YES));
     //TextField方法可以进行分词,StringField方法不进行分词,是一个整体存进去
     doc.add(new TextField("title", (String)list.get(i).get("TITLE"), Field.Store.YES));
     doc.add(new TextField("source", (String)list.get(i).get("SOURCE"), Field.Store.YES));
     doc.add(new TextField("content", (String)list.get(i).get("CONTENT"), Field.Store.YES));
     //5.3.1版本的排序问题要像下面的方式加入到文档中
     StringField tf=new StringField("publishDate", format.format(list.get(i).get("PUBLISHDATE")), Field.Store.YES); 
     doc.add(tf);
     SortedDocValuesField publishDateField = new SortedDocValuesField("publishDate", new BytesRef(format.format(list.get(i).get("PUBLISHDATE")).getBytes()));
     doc.add(publishDateField);
     return doc;
}

//对加权之后的数据再次进行查询(高亮分页显示)
@RequestMapping("search.do")
public String  SearchData(ModelMap map) throws Exception{
    List<Map<String,Object>> list = new ArrayList<Map<String,Object>>();
    IndexSearcher is=this.getIndexSearcher();
    //同时声明一个与之对应的字段数组 
    String[] fields = {"title","content"}; 
    //声明BooleanClause.Occur[]数组,它表示多个条件之间的关系 
    BooleanClause.Occur[] flags=new BooleanClause.Occur[]{BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD};
    //Analyzer analyzer = new IKAnalyzer5x(true);
    //用MultiFieldQueryParser得到query对象
    Query query = MultiFieldQueryParser.parse("北京", fields, flags, analyzer); 
    QueryScorer scorer=new QueryScorer(query);
    //高亮显示
    Fragmenter fragmenter=new SimpleSpanFragmenter(scorer);
    SimpleHTMLFormatter simpleHTMLFormatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>");
    Highlighter highlighter=new Highlighter(simpleHTMLFormatter, scorer);
    highlighter.setTextFragmenter(fragmenter);
    //排序开始
    Sort sort = new Sort();
    //按时间降序排序
    SortField sf1 = new SortField("publishDate",SortField.Type.STRING, true);
    sort.setSort(sf1);
    //查询结果
    TopDocs resultNew = is.search(query,100,sort);
    ScoreDoc[] sds = resultNew.scoreDocs;
    int start = (2-1)*5; 
    int end = 2*5; 
    for(int i=start;i<end;i++) 
    { 
        Map<String,Object> map2 = new HashMap<String, Object>();
        Document doc = is.doc(sds[i].doc);
        map2.put("id",doc.get("id"));
        map2.put("source",doc.get("source"));
        map2.put("publishDate",doc.get("publishDate"));
        if (doc.get("title")!=null) {
            String title = doc.get("title");
            TokenStream tokenStream=analyzer.tokenStream("title", new StringReader(title));
            String newTitle=highlighter.getBestFragment(tokenStream, title);
            map2.put("title", newTitle);
        }
        if (doc.get("content")!=null) {
            String content = doc.get("content");
            TokenStream tokenStream=analyzer.tokenStream("content", new StringReader(content));
            String newContent = highlighter.getBestFragment(tokenStream, content);
            map2.put("content",newContent);
        }
        list.add(map2);
    } 
    reader.close();
    map.put("list", list);
    return "GL";
}

//加权操作,通过传入的 searchValue对文档加权之后,更新索引
@RequestMapping("addPower.do")
public void AddPower(String searchValue) throws IOException, ParseException{
     IndexSearcher is=this.getIndexSearcher();
     IndexWriter iw = this.getIndexWriter();
    //同时声明一个与之对应的字段数组 
     String[] fields = {"title","content"}; 
    //声明BooleanClause.Occur[]数组,它表示多个条件之间的关系 
     BooleanClause.Occur[] flags=new BooleanClause.Occur[]{BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD};
    //用MultiFieldQueryParser得到query对象
    Query query = MultiFieldQueryParser.parse(searchValue, fields, flags, analyzer); 
      //查询结果
    TopDocs result = is.search(query,100);
    for(ScoreDoc scoreDoc:result.scoreDocs){  
          Document docOld=is.doc(scoreDoc.doc); 
          Document docNew = new Document();
          TextField field=new TextField("content", docOld.get("content"), Field.Store.YES);
          //加权操作        
          if (docOld.get("content").contains(searchValue)){
              field.setBoost(1.8f);
          }
          docNew.add(field);
          iw.updateDocument(new Term("id",docOld.get("id")), docNew);
      }  
              iw.forceMerge(1);//将每次更新的索引文件合并
          reader.close();
          iw.close();
}

}


>分词器的工具包

/**

  • 支持5.x版本的IKAnalyzer
  • @author liuzh
    */
    public class IKAnalyzer5x extends Analyzer {

    private boolean useSmart;

    public boolean useSmart() {
    return this.useSmart;
    }

    public void setUseSmart(boolean useSmart) {
    this.useSmart = useSmart;
    }

    public IKAnalyzer5x() {
    this(false);
    }

    public IKAnalyzer5x(boolean useSmart) {
    this.useSmart = useSmart;
    }

    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
    IKTokenizer5x _IKTokenizer = new IKTokenizer5x(this.useSmart);
    return new TokenStreamComponents(_IKTokenizer);
    }
    }


/**
 * 支持5.x版本的IKTokenizer
 * 
 * @author liuzh
 */
public class IKTokenizer5x extends Tokenizer {
    private IKSegmenter _IKImplement;
    private final CharTermAttribute termAtt = (CharTermAttribute)this.addAttribute(CharTermAttribute.class);
    private final OffsetAttribute offsetAtt = (OffsetAttribute)this.addAttribute(OffsetAttribute.class);
    private final TypeAttribute typeAtt = (TypeAttribute)this.addAttribute(TypeAttribute.class);
    private int endPosition;

    public IKTokenizer5x() {
        this._IKImplement = new IKSegmenter(this.input, true);
    }

    public IKTokenizer5x(boolean useSmart) {
        this._IKImplement = new IKSegmenter(this.input, useSmart);
    }

    public IKTokenizer5x(AttributeFactory factory) {
        super(factory);
        this._IKImplement = new IKSegmenter(this.input, true);
    }

    public boolean incrementToken() throws IOException {
        this.clearAttributes();
        Lexeme nextLexeme = this._IKImplement.next();
        if(nextLexeme != null) {
            this.termAtt.append(nextLexeme.getLexemeText());
            this.termAtt.setLength(nextLexeme.getLength());
            this.offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());
            this.endPosition = nextLexeme.getEndPosition();
            this.typeAtt.setType(nextLexeme.getLexemeTypeString());
            return true;
        } else {
            return false;
        }
    }

    public void reset() throws IOException {
        super.reset();
        this._IKImplement.reset(this.input);
    }

    public final void end() {
        int finalOffset = this.correctOffset(this.endPosition);
        this.offsetAtt.setOffset(finalOffset, finalOffset);
    }
}```

> lucene所需jar包
![图片描述][1]

![图片描述][2]

  [1]: //img.mukewang.com/5a96491d0001447603050871.png
  [2]: //img.mukewang.com/5a96495b0001a62103230120.png
打开App,阅读手记
1人推荐
发表评论
随时随地看视频慕课网APP