同义词分析器无法使用 python 进行弹性搜索

我在 python 代码中有一个如下所示的场景。在此,我试图明确地将 new york 和 ny 定义为同义词。但不幸的是它不起作用。你能指导我吗,因为我是弹性搜索的新手。我也在使用自定义分析器。我也有包含文本的文件同义词.txt:ny,newyork,nyork


from datetime import datetime

from elasticsearch import Elasticsearch


es = Elasticsearch()


keywords = ['thousand eyes', 'facebook', 'superdoc', 'quora', 'your story', 'Surgery', 'lending club', 'ad roll',

            'the honest company', 'Draft kings', 'newyork']

count = 1


doc_setting = {

    "settings": {

        "analysis": {

            "analyzer": {

                "my_analyzer_keyword": {

                    "type": "custom",

                    "tokenizer": "keyword",

                    "filter": [

                        "asciifolding",

                        "lowercase",

                        "synonym"

                    ]

                },

                "my_analyzer_shingle": {

                    "type": "custom",

                    "tokenizer": "standard",

                    "filter": [

                        "asciifolding",

                        "lowercase",

                        "synonym"

                    ]

                }

            },

            "filter": {

                "synonym": {

                    "type": "synonym",

                    "synonyms_path": "synonyms.txt",

                    "ignore_case": "true"

                }

            }

        }

    }, "mappings": {

        "your_type": {

            "properties": {

                "keyword": {

                    "type": "string",

                    "index_analyzer": "my_analyzer_keyword",

                    "search_analyzer": "my_analyzer_shingle"

                }

            }

        }

    }

}

至尊宝的传说
浏览 164回答 1
1回答

呼唤远方

PUT /test_index{&nbsp; &nbsp; "settings": {&nbsp; &nbsp; &nbsp; &nbsp; "analysis": {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "analyzer": {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "my_analyzer_keyword": {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "custom",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "tokenizer": "keyword",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "filter": [&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "asciifolding",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "lowercase",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "synonym"&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; ]&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "my_analyzer_shingle": {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "custom",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "tokenizer": "standard",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "filter": [&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "asciifolding",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "lowercase",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "synonym"&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; ]&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "filter": {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "synonym" : {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type" : "synonym",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "lenient": true,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "synonyms" : ["ny,newyork,nyork"]&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; }, "mappings": {&nbsp; &nbsp; &nbsp; &nbsp; "your_type": {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "properties": {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "keyword": {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "text",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "analyzer": "my_analyzer_keyword",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "search_analyzer": "my_analyzer_shingle"&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; }}然后分析使用POST /test_index/_analyze{&nbsp; &nbsp; "analyzer" : "my_analyzer_shingle",&nbsp; "text" : "I saw news on ny news channel of lending club on facebook, your story and quorat"}我得到的令牌是{&nbsp; &nbsp; "tokens": [&nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "token": "i",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "start_offset": 0,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "end_offset": 1,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "<ALPHANUM>",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "position": 0&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "token": "saw",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "start_offset": 2,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "end_offset": 5,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "<ALPHANUM>",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "position": 1&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "token": "news",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "start_offset": 6,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "end_offset": 10,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "<ALPHANUM>",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "position": 2&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "token": "on",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "start_offset": 11,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "end_offset": 13,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "<ALPHANUM>",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "position": 3&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "token": "ny",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "start_offset": 14,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "end_offset": 16,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "<ALPHANUM>",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "position": 4&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "token": "newyork",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "start_offset": 14,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "end_offset": 16,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "SYNONYM",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "position": 4&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "token": "nyork",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "start_offset": 14,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "end_offset": 16,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "SYNONYM",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "position": 4&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "token": "news",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "start_offset": 17,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "end_offset": 21,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "<ALPHANUM>",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "position": 5&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "token": "channel",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "start_offset": 22,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "end_offset": 29,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "<ALPHANUM>",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "position": 6&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "token": "of",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "start_offset": 30,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "end_offset": 32,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "<ALPHANUM>",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "position": 7&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "token": "lending",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "start_offset": 33,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "end_offset": 40,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "<ALPHANUM>",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "position": 8&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "token": "club",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "start_offset": 41,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "end_offset": 45,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "<ALPHANUM>",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "position": 9&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "token": "on",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "start_offset": 46,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "end_offset": 48,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "<ALPHANUM>",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "position": 10&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "token": "facebook",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "start_offset": 49,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "end_offset": 57,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "<ALPHANUM>",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "position": 11&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "token": "your",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "start_offset": 59,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "end_offset": 63,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "<ALPHANUM>",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "position": 12&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "token": "story",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "start_offset": 64,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "end_offset": 69,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "<ALPHANUM>",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "position": 13&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "token": "and",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "start_offset": 70,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "end_offset": 73,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "<ALPHANUM>",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "position": 14&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "token": "quorat",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "start_offset": 74,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "end_offset": 80,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "<ALPHANUM>",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "position": 15&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; ]}并且搜索产生POST /test_index/_search{&nbsp; &nbsp; "query" : {&nbsp; &nbsp; &nbsp; &nbsp; "match" : { "keyword" : "I saw news on ny news channel of lending club on facebook, your story and quora" }&nbsp; &nbsp; }}{&nbsp; &nbsp; "took": 36,&nbsp; &nbsp; "timed_out": false,&nbsp; &nbsp; "_shards": {&nbsp; &nbsp; &nbsp; &nbsp; "total": 5,&nbsp; &nbsp; &nbsp; &nbsp; "successful": 5,&nbsp; &nbsp; &nbsp; &nbsp; "skipped": 0,&nbsp; &nbsp; &nbsp; &nbsp; "failed": 0&nbsp; &nbsp; },&nbsp; &nbsp; "hits": {&nbsp; &nbsp; &nbsp; &nbsp; "total": 3,&nbsp; &nbsp; &nbsp; &nbsp; "max_score": 1.6858001,&nbsp; &nbsp; &nbsp; &nbsp; "hits": [&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_index": "test_index",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_type": "your_type",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_id": "4",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_score": 1.6858001,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_source": {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "keyword": "newyork"&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_index": "test_index",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_type": "your_type",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_id": "2",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_score": 1.1727304,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_source": {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "keyword": "facebook"&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_index": "test_index",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_type": "your_type",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_id": "5",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_score": 0.6931472,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_source": {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "keyword": "quora"&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; ]&nbsp; &nbsp; }}
打开App,查看更多内容
随时随地看视频慕课网APP

相关分类

Python