猿问

如何组合多个查询?

我有数百万个文档要索引。每个文档都有字段doc_id和doc_title几个字段doc_content。


import requests


index = 'test'


JSON = {

    "mappings": {

        "properties": {

            "doc_id":      {"type": "keyword"},

            "doc_title":   {"type": "text"   },

            "doc_content": {"type": "text"   }

        }

    }

}


r = requests.put(f'http://127.0.0.1:9200/{index}', json=JSON)

为了最小化索引的大小,我保留doc_title并doc_content分开。


docs = [

    {"doc_id": 1, "doc_title": "good"},

    {"doc_id": 1, "doc_content": "a"},

    {"doc_id": 1, "doc_content": "b"},


    {"doc_id": 2, "doc_title": "good"},

    {"doc_id": 2, "doc_content": "c"},

    {"doc_id": 2, "doc_content": "d"},


    {"doc_id": 3, "doc_title": "bad"},

    {"doc_id": 3, "doc_content": "a"},

    {"doc_id": 3, "doc_content": "e"}

]


for doc in docs:

    r = requests.post(f'http://127.0.0.1:9200/{index}/_doc', json=doc)

查询_1:


JSON = {

    "query": {

        "match": {

            "doc_title": "good"

        }

    }

}


r = requests.get(f'http://127.0.0.1:9200/{index}/_search', json=JSON)


[x['_source'] for x in r.json()['hits']['hits']]

[{'doc_id': 1, 'doc_title': 'good'}, {'doc_id': 2, 'doc_title': 'good'}]


查询_2:


JSON = {

    "query": {

        "match": {

            "doc_content": "a"

        }

    }

}


r = requests.get(f'http://127.0.0.1:9200/{index}/_search', json=JSON)


[x['_source'] for x in r.json()['hits']['hits']]

[{'doc_id': 1, 'doc_content': 'a'}, {'doc_id': 3, 'doc_content': 'a'}]


如何结合 query_1 和 query_2?


我需要这样的东西:


JSON = {

    "query": {

        "bool": {

            "must": [

                {"match": {"doc_title": "good"}},

                {"match": {"doc_content": "a"}}

            ]

        }

    }

}


r = requests.get(f'http://127.0.0.1:9200/{index}/_search', json=JSON)


[x['_source'] for x in r.json()['hits']['hits']]

[]


期望的结果:


[{'doc_id': 1, 'doc_title': 'good', 'doc_content': 'a'}]


largeQ
浏览 105回答 1
1回答

函数式编程

分开doc_title&是不好的做法doc_content——你并没有真正减少任何东西。跟着这个:docs = [&nbsp; &nbsp; {"doc_id": 1, "doc_title": "good", "doc_content": ["a", "b"]},&nbsp; &nbsp; {"doc_id": 2, "doc_title": "good", "doc_content": ["c", "d"]},&nbsp; &nbsp; {"doc_id": 3, "doc_title": "bad", "doc_content": ["a", "e"]}]for doc in docs:&nbsp; &nbsp; r = requests.post(f'http://127.0.0.1:9200/{index}/_doc', json=doc)并且您的查询将按预期工作。a无论如何都b应该被共享doc_id=1,不是吗?更新——使contents语法嵌套PUT test{&nbsp; "mappings": {&nbsp; &nbsp; &nbsp; "properties": {&nbsp; &nbsp; &nbsp; &nbsp; "contents": {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "nested",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "properties": {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "doc_content": {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "text"&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; "doc_id": {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "keyword"&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; "doc_title": {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "type": "text"&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; }&nbsp; }}POST test/_doc{&nbsp; "doc_id": 1,&nbsp; "doc_title": "good",&nbsp; "contents": [&nbsp; &nbsp; {"doc_content": "a"},&nbsp; &nbsp; {"doc_content": "b"}&nbsp; ]}GET test/_search{&nbsp; "_source": ["doc_title", "inner_hits"],&nbsp;&nbsp; "query": {&nbsp; &nbsp; "bool": {&nbsp; &nbsp; &nbsp; "must": [&nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "match": {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "doc_title": "good"&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "nested": {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "path": "contents",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "query": {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "match": {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "contents.doc_content": "a"&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "inner_hits": {}&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; ]&nbsp; &nbsp; }&nbsp; }}屈服[&nbsp; {&nbsp; &nbsp; "_index":"test",&nbsp; &nbsp; "_type":"_doc",&nbsp; &nbsp; "_id":"sySOoXEBdiyDG0RsIq21",&nbsp; &nbsp; "_score":0.98082924,&nbsp; &nbsp; "_source":{&nbsp; &nbsp; &nbsp; "doc_title":"good"&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;<------&nbsp; &nbsp; },&nbsp; &nbsp; "inner_hits":{&nbsp; &nbsp; &nbsp; "contents":{&nbsp; &nbsp; &nbsp; &nbsp; "hits":{&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "total":1,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "max_score":0.6931472,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "hits":[&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_index":"test",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_type":"_doc",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_id":"sySOoXEBdiyDG0RsIq21",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_nested":{&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "field":"contents",&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "offset":0&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; },&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_score":0.6931472,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "_source":{&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; "doc_content":"a"&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; <-----&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; ]&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; }&nbsp; }]
随时随地看视频慕课网APP

相关分类

Python
我要回答