摘要
为了验证当前集群经常出现索引超时以及请求拒绝的问题,现模拟线上集群环境及索引设置,通过压测工具随机生成测试数据,针对当前的 850 个分片的索引,以及减半之后的索引,以及更小分片索引的写入进行压测,使用不同的并发、不同的批次大小来观察索引的吞吐情况,并记录写入队列的堆积情况,用来分析分片数、批次数对写入的影响,从而确定后续的优化方案。
压测场景
Elasticsearch 版本 v7.7.1, 共有 57 个节点,其中 3 个独立 Master,3 个协调节点,31GB JVM。
压测流程
单索引 850 分片
索引定义
PUT idx-xxxx-xxxxxx
{
"aliases" : {
"alias-xxxx-xxxxxx" : { }
},
"mappings" : {
"dynamic" : "strict",
"_routing" : {
"required" : true
},
"_source" : {
"excludes" : [
"isExtract*",
"batchNo"
]
},
"properties" : {
"addxxxx" : {
"type" : "text",
"term_vector" : "with_positions_offsets"
},
"clxxxx" : {
"type" : "byte"
},
"contxxxx" : {
"type" : "text",
"boost" : 4.0,
"term_vector" : "with_positions_offsets"
},
"conxxxx" : {
"type" : "keyword",
"doc_values" : false
},
"con1xxxx" : {
"type" : "text",
"boost" : 16.0,
"term_vector" : "with_positions_offsets",
"fields" : {
"keyword" : {
"type" : "keyword",
"normalizer" : "keyword_normalizer"
}
},
"analyzer" : "name_analyzer",
"search_analyzer" : "keyword_analyzer"
},
"contSxxxx" : {
"type" : "long",
"index" : false,
"doc_values" : false
},
"contSxxxxx" : {
"type" : "keyword",
"doc_values" : false
},
"contTxxxx" : {
"type" : "short"
},
"crtxxxx" : {
"type" : "date",
"ignore_malformed" : true,
"format" : "yyyyMMddHHmmss"
},
"duration" : {
"type" : "long",
"index" : false,
"doc_values" : false
},
"largeTxxxx" : {
"type" : "keyword",
"boost" : 8.0,
"index" : false,
"doc_values" : false
},
"md5" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"orderxxxx" : {
"type" : "alias",
"path" : "contName.keyword"
},
"ownxxxxxx" : {
"type" : "keyword",
"doc_values" : false
},
"ownxxxxxxxxxx" : {
"type" : "keyword",
"doc_values" : false
},
"ownxxxxxxxxxxx" : {
"type" : "keyword",
"doc_values" : false
},
"ownxxxxxxxxxxx" : {
"type" : "keyword",
"doc_values" : false
},
"parenxxxxxxxxxx" : {
"type" : "keyword"
},
"pathxx" : {
"type" : "text",
"boost" : 8.0,
"term_vector" : "with_positions_offsets",
"fields" : {
"keyword" : {
"type" : "keyword"
}
},
"analyzer" : "path_analyzer"
},
"presexxxxx" : {
"type" : "keyword",
"boost" : 8.0,
"index" : false,
"doc_values" : false
},
"presexxxxx" : {
"type" : "keyword",
"boost" : 8.0,
"index" : false,
"doc_values" : false
},
"presxxxxxx" : {
"type" : "keyword",
"boost" : 8.0,
"index" : false,
"doc_values" : false
},
"prixxxxxx" : {
"type" : "short",
"index" : false
},
"search_xxxxxx" : {
"type" : "alias",
"path" : "contName"
},
"servixxxxxx" : {
"type" : "byte"
},
"shotxxxxxx" : {
"type" : "date",
"ignore_malformed" : true,
"format" : "yyyyMMddHHmmss"
},
"xxxxxxlThuxxxxxx" : {
"type" : "keyword",
"boost" : 8.0,
"index" : false,
"doc_values" : false
},
"tagxxxxxx" : {
"type" : "text",
"term_vector" : "with_positions_offsets"
},
"thumxxxxxx" : {
"type" : "keyword",
"boost" : 8.0,
"index" : false,
"doc_values" : false
},
"xxxxxxpdxxxxxx" : {
"type" : "date",
"ignore_malformed" : true,
"format" : "yyyyMMddHHmmss"
},
"xxxxxxderAcxxxxxx" : {
"type" : "keyword",
"doc_values" : false
},
"xxxxxxerAccouxxxxxx" : {
"type" : "keyword",
"doc_values" : false
},
"xxxxxxerxxxxxxID" : {
"type" : "keyword",
"doc_values" : false
},
"xxxxxxderNxxxxxx" : {
"type" : "keyword",
"doc_values" : false
}
}
},
"settings" : {
"index" : {
"max_ngram_diff" : "50",
"refresh_interval" : "1s",
"number_of_shards" : "850",
"analysis" : {
"normalizer" : {
"keyword_normalizer" : {
"filter" : [
"lowercase"
],
"type" : "custom"
}
},
"analyzer" : {
"keyword_analyzer" : {
"filter" : [
"lowercase"
],
"type" : "custom",
"tokenizer" : "keyword"
},
"name_analyzer" : {
"filter" : [
"lowercase"
],
"type" : "custom",
"tokenizer" : "name_tokenizer"
},
"path_analyzer" : {
"filter" : [
"lowercase"
],
"type" : "custom",
"tokenizer" : "path_tokenizer"
}
},
"tokenizer" : {
"name_tokenizer" : {
"type" : "ngram",
"min_gram" : "1",
"max_gram" : "5"
},
"path_tokenizer" : {
"pattern" : "/",
"type" : "pattern"
}
}
},
"number_of_replicas" : "1"
}
}
}
样例数据
POST idx-owncloud-img/_doc/1?routing=1
{
"ownerxxxxxx" : "002#######0oV",
"serxxxxxx" : 1,
"tagxxxxxx" : "",
"contxxxxxx" : "",
"xxxxxxAccoxxxxxxe" : "1",
"presxxxxxx" : "",
"conxxxxxx" : "jpg",
"xxxxxxerBxxxxxx" : "6#######573",
"ownerxxxxxxx" : "13#######62",
"presxxxxxxL" : "",
"duxxxxxx" : 0,
"paxxxxxx" : "00##########################################043",
"crtxxxxxx" : "20#######45",
"pxxxxxxtCatxxxxxx" : "001############################043",
"sxxxxxxThumxxxxxx" : "http://downl#################################################961",
"uxxxxxxerAxxxxxxt" : "1##############2",
"uxxxxxxderAccoxxxxxxe" : "1",
"uxxxxxxderxxxxxxID" : "0#####################V",
"lxxxxxxhumxxxxxxl" : "http://d###################################D961",
"thxxxxxxl" : "http://do###############################################################61",
"axxxxxxss" : "",
"uxxxxxxm" : "20##############8",
"cxxxxxx" : 3,
"coxxxxxx" : 1,
"prxxxxxx" : 10,
"coxxxxxx" : "0###################################cm",
"co2xxxxxx" : 5##############8,
"shoxxxxxx" : "20##############4",
"contxxxxxx" : "mm##############g",
"presxxxxxx" : "",
"oxxxxxxBmpxxxxxx" : "6#######3",
"md5" : "7##############1E"
}
loadgen 配置
root@loadgen:/opt/loadgen# cat loadgen.yml
statsd:
enabled: false
host: 192.168.3.98
port: 8125
namespace: loadgen.
variables:
- name: ip
type: file
path: dict/ip.txt
- name: message
type: file
path: dict/nginx.log
# - name: user
# type: file
# path: dict/user.txt
- name: id
type: sequence
- name: uuid
type: uuid
- name: now_local
type: now_local
- name: now_utc
type: now_utc
- name: now_unix
type: now_unix
- name: suffix
type: range
from: 12
to: 12
- name: bool
type: range
from: 0
to: 1
requests:
- request:
method: POST
runtime_variables:
batch_no: id
runtime_body_line_variables:
routing_no: uuid
basic_auth:
username: elastic
password: ####
url: https://xxx.elasticsearch.xxx.cn:9243/_bulk
body_repeat_times: 50
body: "{ \"create\" : { \"_index\" : \"idx-xxxxxx-xxxxxx\",\"_type\":\"_doc\", \"_id\" : \"$[[uuid]]\" , \"routing\" : \"$[[routing_no]]\" } }\n{ \"ownerxxxxxx\" : \"0011WsjCK0oV\", \"servxxxxxx\" : $[[bool]], \"tagxxxxxx\" : \"\", \"contxxxxxx\" : \"\", \"ownexxxxxxunxxxxxx\" : \"$[[bool]]\", \"prxxxxxxentLxxxxxx\" : \"\", \"conxxxxxx\" : \"jpg\", \"uxxxxxxexxxxxxID\" : \"$[[id]]\", \"owxxxxxxccxxxxxxt\" : \"$[[routing_no]]\", \"prxxxxxxtUxxxxxxL\" : \"\", \"durxxxxxxn\" : 0, \"paxxxxxx\" : \"00019700101000000001/0011WsjCK0oV00019700101000000043\", \"crxxxxxx\" : \"$[[id]]\", \"paxxxxxxntxxxxxxogIxxxxxx\" : \"0011WsjCK0oV00019700101000000043\", \"sxxxxxxThumxxxxxx\" : \"http://xxx.xxx.cn:80/storageWeb/servlet/GetFileByURLServlet?root=/mnt/wfs133&fileid=KB1af35f100578d655b2cfbd7edd2cb50e.jpg&ct=1&type=0&code=80B0EAB7F429F1A32F76EB895F5FF4DE1853D254604FAB67A7C33FDF92BE7220&exp=315&account=MTM2MzgzMTU1NjI=&p=0&ui=0011WsjCK0oV&ci=0011WsjCK0oV06320210812125345tcm&userSiteId=usersite-s&cn=mmexport162592513503...&oprChannel=10000000&dom=D961\", \"xxxxxxderAxxxxxxnt\" : \"$[[routing_no]]\", \"upxxxxxxerAcxxxxxxtype\" : \"$[[bool]]\", \"uploaderNDUserID\" : \"$[[uuid]]\", \"largeThumbnail\" : \"http://xxx.xxx.cn:80/storageWeb/servlet/GetFileByURLServlet?root=/mnt/wfs133&fileid=KB1af35f100578d655b2cfbd7edd2cb50e.jpg&ct=1&type=1&code=80B0EAB7F429F1A32F76EB895F5FF4DE1853D254604FAB67A7C33FDF92BE7220&exp=315&account=MTM2MzgzMTU1NjI=&p=0&ui=0011WsjCK0oV&ci=0011WsjCK0oV06320210812125345tcm&userSiteId=usersite-s&cn=mmexport162592513503...&oprChannel=10000000&dom=D961\", \"xxxxxxil\" : \"http://download.xxx.xxx.com:80/storageWeb/servlet/GetFileByURLServlet?root=/mnt/wfs133&fileid=KB1af35f100578d655b2cfbd7edd2cb50e.jpg&ct=1&type=2&code=80B0EAB7F429F1A32F76EB895F5FF4DE1853D254604FAB67A7C33FDF92BE7220&exp=315&account=MTM2MzgzMTU1NjI=&p=0&ui=0011WsjCK0oV&ci=0011WsjCK0oV06320210812125345tcm&userSiteId=usersite-s&cn=mmexport162592513503...&oprChannel=10000000&dom=D961\", \"adxxxxxx\" : \"\", \"upxxxxxx\" : \"$[[now_unix]]\", \"cxxxxxx\" : 3, \"contxxxxxxe\" : $[[bool]], \"prixxxxxx\" : 10, \"conxxxxxx\" : \"0011WsjCK0oV06320210812125345tcm\", \"contxxxxxx\" : $[[id]], \"shoxxxxxx\" : \"$[[id]]\", \"contxxxxxxe\" : \"mmexport1625925135032.jpg\", \"prxxxxxxtHxxxxxx\" : \"\", \"oxxxxxxrBmxxxxxxID\" : \"$[[id]]\", \"md5\" : \"$[[uuid]]\" }\n"
运行测试``
开启 gzip 流量压缩,执行压测:
root@loadgen:/opt/loadgen# ./loadgen-linux-amd64 -config loadgen.yml -d 6000 -c 100 -compress
1 副本 100 并发
0 副本 100 并发
0 副本 200 并发
写入队列已经存在大量堆积和拒绝的现象了:
1 副本 200 并发
1 副本 400 并发
1 副本 800 并发
1 副本批次 500 并发 100
1 副本批次 2000 并发 100
1 副本批次 5000 并发 100
1 副本批次 5000 并发 200
单索引 425 分片
索引定义
PUT idx-xxxxxx-xxxxxx-425
{
"aliases" : {
"alias-xxxxxx-xxxxxx" : { }
},
"mappings" : {
"dynamic" : "strict",
"_routing" : {
"required" : true
},
"_source" : {
"excludes" : [
"isExtract*",
"batchNo"
]
},
"properties" : {
"addxxxxxx" : {
"type" : "text",
"term_vector" : "with_positions_offsets"
},
"cxxxxxx" : {
"type" : "byte"
},
"coxxxxxxc" : {
"type" : "text",
"boost" : 4.0,
"term_vector" : "with_positions_offsets"
},
"coxxxxxx" : {
"type" : "keyword",
"doc_values" : false
},
"conxxxxxxe" : {
"type" : "text",
"boost" : 16.0,
"term_vector" : "with_positions_offsets",
"fields" : {
"keyword" : {
"type" : "keyword",
"normalizer" : "keyword_normalizer"
}
},
"analyzer" : "name_analyzer",
"search_analyzer" : "keyword_analyzer"
},
"coxxxxxxze" : {
"type" : "long",
"index" : false,
"doc_values" : false
},
"conxxxxxxfix" : {
"type" : "keyword",
"doc_values" : false
},
"coxxxxxxpe" : {
"type" : "short"
},
"cxxxxxxm" : {
"type" : "date",
"ignore_malformed" : true,
"format" : "yyyyMMddHHmmss"
},
"duxxxxxxon" : {
"type" : "long",
"index" : false,
"doc_values" : false
},
"laxxxxxxbnail" : {
"type" : "keyword",
"boost" : 8.0,
"index" : false,
"doc_values" : false
},
"md5" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"ordxxxxxxNamxxxxxx" : {
"type" : "alias",
"path" : "contName.keyword"
},
"oxxxxxxccoxxxxxxt" : {
"type" : "keyword",
"doc_values" : false
},
"owxxxxxxcounxxxxxxpe" : {
"type" : "keyword",
"doc_values" : false
},
"owxxxxxxpUsxxxxxxD" : {
"type" : "keyword",
"doc_values" : false
},
"oxxxxxxDUsexxxxxxD" : {
"type" : "keyword",
"doc_values" : false
},
"pxxxxxxtalxxxxxxD" : {
"type" : "keyword"
},
"patxxxxxx" : {
"type" : "text",
"boost" : 8.0,
"term_vector" : "with_positions_offsets",
"fields" : {
"keyword" : {
"type" : "keyword"
}
},
"analyzer" : "path_analyzer"
},
"prxxxxxxntHxxxxxx" : {
"type" : "keyword",
"boost" : 8.0,
"index" : false,
"doc_values" : false
},
"prxxxxxxntLxxxxxx" : {
"type" : "keyword",
"boost" : 8.0,
"index" : false,
"doc_values" : false
},
"prxxxxxxURxxxxxx" : {
"type" : "keyword",
"boost" : 8.0,
"index" : false,
"doc_values" : false
},
"pxxxxxxity" : {
"type" : "short",
"index" : false
},
"sxxxxxxch_nxxxxxxe" : {
"type" : "alias",
"path" : "contName"
},
"sexxxxxxeTxxxxxxe" : {
"type" : "byte"
},
"sxxxxxxTm" : {
"type" : "date",
"ignore_malformed" : true,
"format" : "yyyyMMddHHmmss"
},
"smxxxxxxThuxxxxxxl" : {
"type" : "keyword",
"boost" : 8.0,
"index" : false,
"doc_values" : false
},
"taxxxxxxa" : {
"type" : "text",
"term_vector" : "with_positions_offsets"
},
"txxxxxxnaxxxxxx" : {
"type" : "keyword",
"boost" : 8.0,
"index" : false,
"doc_values" : false
},
"uxxxxxxm" : {
"type" : "date",
"ignore_malformed" : true,
"format" : "yyyyMMddHHmmss"
},
"upxxxxxxdexxxxxxount" : {
"type" : "keyword",
"doc_values" : false
},
"upxxxxxxrAcxxxxxxpe" : {
"type" : "keyword",
"doc_values" : false
},
"upxxxxxxmpUsxxxxxx" : {
"type" : "keyword",
"doc_values" : false
},
"uxxxxxxerNDxxxxxxD" : {
"type" : "keyword",
"doc_values" : false
}
}
},
"settings" : {
"index" : {
"max_ngram_diff" : "50",
"refresh_interval" : "1s",
"number_of_shards" : "425",
"analysis" : {
"normalizer" : {
"keyword_normalizer" : {
"filter" : [
"lowercase"
],
"type" : "custom"
}
},
"analyzer" : {
"keyword_analyzer" : {
"filter" : [
"lowercase"
],
"type" : "custom",
"tokenizer" : "keyword"
},
"name_analyzer" : {
"filter" : [
"lowercase"
],
"type" : "custom",
"tokenizer" : "name_tokenizer"
},
"path_analyzer" : {
"filter" : [
"lowercase"
],
"type" : "custom",
"tokenizer" : "path_tokenizer"
}
},
"tokenizer" : {
"name_tokenizer" : {
"type" : "ngram",
"min_gram" : "1",
"max_gram" : "5"
},
"path_tokenizer" : {
"pattern" : "/",
"type" : "pattern"
}
}
},
"number_of_replicas" : "1"
}
}
}
1 副本批次 50 并发 100
1 副本批次 50 并发 200
1 副本批次 50 并发 400
1 副本批次 50 并发 800
1 副本批次 500 并发 100
1 副本批次 2000 并发 100
1 副本批次 5000 并发 100
单索引 50 分片
1 副本批次 50 并发 100
1 副本批次 500 并发 100
1 副本批次 1000 并发 100
1 副本批次 5000 并发 100
走网关单索引 425 分片
1 副本批次 50 并发 400>200
1 副本批次 500 并发 100
1 副本批次 500 并发 200
1 副本批次 500 并发 400
1 副本批次 5000 并发 100
1 副本批次 5000 并发 200
1 副本批次 5000 并发 400
走网关单索引 850 分片
1 副本批次 50 并发 400
1 副本批次 500 并发 400
1 副本批次 5000 并发 400
压测结果
索引数 | 分片数 | 副本数 | 批次大小 | 压测并发 | 平均写入吞吐(eps) |
---|---|---|---|---|---|
1 | 850 | 1 | 50 | 100 | 10,000 |
1 | 850 | 0 | 50 | 100 | 30,000 |
1 | 850 | 0 | 50 | 200 | 40,000 |
1 | 850 | 1 | 50 | 200 | 18,000 |
1 | 850 | 1 | 50 | 400 | 27,500 |
1 | 850 | 1 | 50 | 800 | 29,700 |
1 | 850 | 1 | 500 | 100 | 30,187 |
1 | 850 | 1 | 2000 | 100 | 68,000 |
1 | 850 | 1 | 5000 | 100 | 98,915 |
1 | 850 | 1 | 5000 | 200 | 78,462 |
1 | 425 | 1 | 50 | 100 | 12,695 |
1 | 425 | 1 | 500 | 100 | 46818 |
1 | 425 | 1 | 2000 | 100 | 100,000 |
1 | 425 | 1 | 5000 | 100 | 130,000 |
1 | 50 | 1 | 50 | 100 | 32,987 |
1 | 50 | 1 | 500 | 100 | 96,207 |
1 | 50 | 1 | 1000 | 100 | 147,719 |
1 | 50 | 1 | 5000 | 100 | 156,961 |
走网关节点异步合并模式:
索引数 | 分片数 | 副本数 | 批次大小 | 压测并发 | 平均写入吞吐(eps) |
---|---|---|---|---|---|
1 | 425 | 1 | 50 | 100 | 500 |
1 | 425 | 1 | 50 | 200 | 1,000 |
1 | 425 | 1 | 50 | 400 | 2,000 |
1 | 425 | 1 | 500 | 100 | 4,800 |
1 | 425 | 1 | 500 | 200 | 9,350 |
1 | 425 | 1 | 500 | 400 | 17,000 |
1 | 425 | 1 | 5000 | 100 | 50,000 |
1 | 425 | 1 | 5000 | 200 | 100,000 |
1 | 425 | 1 | 5000 | 400 | 175,000 |
1 | 850 | 1 | 50 | 400 | 2000 |
1 | 850 | 1 | 500 | 400 | 18,800 |
1 | 850 | 1 | 5000 | 400 | 137,000 |
结论
大分片索引,850 或者 425,在并发即使只有 100 的情况下就有可能出现占满线程池,出现请求拒绝的情况,单个批次的文档数比较小的情况下,更容易出现。
而同样格式的索引,在 50 个分片的情况下,索引的吞吐是 425 分片的两倍,850 分片的三倍,且线程池基本上没有堆积,或者堆积很快处理完。单次请求的文档数越多,写入的效率越高。
某些场景下索引分片虽然做了 Routing 处理,但是超大分片索引存在严重的转发效率问题,建议按照业务维度,或者当前的 Routing 维度进行索引的划分,将超大索引拆分成若干个子索引,单个索引的分片数尽量不要超过 20 个。