kibana、grafana上的简单统计查询都会达到磁盘瓶颈。
时间区间就算是减少在2天,数据量估计就才7-8亿条。索引数据大小750G(仅primary shard)。3个索引。
下面是一个简单查询fileNameXXXXX:parameterXXXXX 的profile, 截取部分。其他shard都差不多。有好多都在几百几千ms。导致最后查询耗时 "took": 12452
这期间能够明显观察到iostat util达到100%。
在grafana的看板多查询时。很容易导致负载达到cpu总数的2-3倍 - 比较担心节点ping失败。
然后kibana就无法使用了。只能等待或者主动cancel掉search来解决。
1.统计查询这么慢,感觉是不是在对es的使用哪里出了问题。但是grafana直接对es查询,基本是没法插手的。
2.看了下缓存。query_cache的值都是空的,request_cache信息如下(heap为30G)
```
"total": {
"request_cache": {
"memory_size": "1.4gb",
"memory_size_in_bytes": 1596359129,
"evictions": 102209,
"hit_count": 3519401,
"miss_count": 2160238
}
}
```
3.可能很多东西没有描述清晰。感觉不知道从哪里下手。
profile数据
```
{
"id": "[VCMPiqWZSYW4hnNDj_NExg][index-name-XXXXXXXX][0]",
"searches": [
{
"query": [
{
"type": "BooleanQuery",
"description": "+(+fileNameXXXXX:parameterXXXXX +MatchNoDocsQuery[\"User requested \"match_none\" query.\"]) #(ConstantScore(*:*))^0.0",
"time": "1346.505059ms",
"breakdown": {
"score": 0,
"build_scorer_count": 28,
"match_count": 0,
"create_weight": 67656,
"next_doc": 0,
"match": 0,
"create_weight_count": 1,
"next_doc_count": 0,
"score_count": 0,
"build_scorer": 1346437374,
"advance": 0,
"advance_count": 0
},
"children": [
{
"type": "BooleanQuery",
"description": "+fileNameXXXXX:parameterXXXXX +MatchNoDocsQuery[\"User requested \"match_none\" query.\"]",
"time": "1346.419769ms",
"breakdown": {
"score": 0,
"build_scorer_count": 28,
"match_count": 0,
"create_weight": 24997,
"next_doc": 0,
"match": 0,
"create_weight_count": 1,
"next_doc_count": 0,
"score_count": 0,
"build_scorer": 1346394743,
"advance": 0,
"advance_count": 0
},
"children": [
{
"type": "TermQuery",
"description": "fileNameXXXXX:parameterXXXXX",
"time": "1346.326085ms",
"breakdown": {
"score": 0,
"build_scorer_count": 28,
"match_count": 0,
"create_weight": 1454,
"next_doc": 0,
"match": 0,
"create_weight_count": 1,
"next_doc_count": 0,
"score_count": 0,
"build_scorer": 1346324602,
"advance": 0,
"advance_count": 0
}
},
{
"type": "MatchNoDocsQuery",
"description": "MatchNoDocsQuery[\"User requested \"match_none\" query.\"]",
"time": "0.01745900000ms",
"breakdown": {
"score": 0,
"build_scorer_count": 28,
"match_count": 0,
"create_weight": 2571,
"next_doc": 0,
"match": 0,
"create_weight_count": 1,
"next_doc_count": 0,
"score_count": 0,
"build_scorer": 14859,
"advance": 0,
"advance_count": 0
}
}
]
},
{
"type": "BoostQuery",
"description": "(ConstantScore(*:*))^0.0",
"time": "0.005811000000ms",
"breakdown": {
"score": 0,
"build_scorer_count": 0,
"match_count": 0,
"create_weight": 5810,
"next_doc": 0,
"match": 0,
"create_weight_count": 1,
"next_doc_count": 0,
"score_count": 0,
"build_scorer": 0,
"advance": 0,
"advance_count": 0
},
"children": [
{
"type": "MatchAllDocsQuery",
"description": "*:*",
"time": "0.0007590000000ms",
"breakdown": {
"score": 0,
"build_scorer_count": 0,
"match_count": 0,
"create_weight": 758,
"next_doc": 0,
"match": 0,
"create_weight_count": 1,
"next_doc_count": 0,
"score_count": 0,
"build_scorer": 0,
"advance": 0,
"advance_count": 0
}
}
]
}
]
}
],
"rewrite_time": 461471,
"collector": [
{
"name": "CancellableCollector",
"reason": "search_cancelled",
"time": "0.3679900000ms",
"children": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time": "0.3463110000ms",
"children": [
{
"name": "SimpleFieldCollector",
"reason": "search_top_hits",
"time": "0.1946940000ms"
},
{
"name": "ProfilingAggregator: [org.elasticsearch.search.profile.aggregation.ProfilingAggregator@5dcbc248]",
"reason": "aggregation",
"time": "0.09764600000ms"
}
]
}
]
}
]
}
],
"aggregations": [
{
"type": "org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramAggregator",
"description": "2",
"time": "0.003712000000ms",
"breakdown": {
"reduce": 0,
"build_aggregation": 1610,
"build_aggregation_count": 1,
"initialize": 2100,
"initialize_count": 1,
"reduce_count": 0,
"collect": 0,
"collect_count": 0
}
}
]
}
```
时间区间就算是减少在2天,数据量估计就才7-8亿条。索引数据大小750G(仅primary shard)。3个索引。
下面是一个简单查询fileNameXXXXX:parameterXXXXX 的profile, 截取部分。其他shard都差不多。有好多都在几百几千ms。导致最后查询耗时 "took": 12452
这期间能够明显观察到iostat util达到100%。
在grafana的看板多查询时。很容易导致负载达到cpu总数的2-3倍 - 比较担心节点ping失败。
然后kibana就无法使用了。只能等待或者主动cancel掉search来解决。
1.统计查询这么慢,感觉是不是在对es的使用哪里出了问题。但是grafana直接对es查询,基本是没法插手的。
2.看了下缓存。query_cache的值都是空的,request_cache信息如下(heap为30G)
```
"total": {
"request_cache": {
"memory_size": "1.4gb",
"memory_size_in_bytes": 1596359129,
"evictions": 102209,
"hit_count": 3519401,
"miss_count": 2160238
}
}
```
3.可能很多东西没有描述清晰。感觉不知道从哪里下手。
profile数据
```
{
"id": "[VCMPiqWZSYW4hnNDj_NExg][index-name-XXXXXXXX][0]",
"searches": [
{
"query": [
{
"type": "BooleanQuery",
"description": "+(+fileNameXXXXX:parameterXXXXX +MatchNoDocsQuery[\"User requested \"match_none\" query.\"]) #(ConstantScore(*:*))^0.0",
"time": "1346.505059ms",
"breakdown": {
"score": 0,
"build_scorer_count": 28,
"match_count": 0,
"create_weight": 67656,
"next_doc": 0,
"match": 0,
"create_weight_count": 1,
"next_doc_count": 0,
"score_count": 0,
"build_scorer": 1346437374,
"advance": 0,
"advance_count": 0
},
"children": [
{
"type": "BooleanQuery",
"description": "+fileNameXXXXX:parameterXXXXX +MatchNoDocsQuery[\"User requested \"match_none\" query.\"]",
"time": "1346.419769ms",
"breakdown": {
"score": 0,
"build_scorer_count": 28,
"match_count": 0,
"create_weight": 24997,
"next_doc": 0,
"match": 0,
"create_weight_count": 1,
"next_doc_count": 0,
"score_count": 0,
"build_scorer": 1346394743,
"advance": 0,
"advance_count": 0
},
"children": [
{
"type": "TermQuery",
"description": "fileNameXXXXX:parameterXXXXX",
"time": "1346.326085ms",
"breakdown": {
"score": 0,
"build_scorer_count": 28,
"match_count": 0,
"create_weight": 1454,
"next_doc": 0,
"match": 0,
"create_weight_count": 1,
"next_doc_count": 0,
"score_count": 0,
"build_scorer": 1346324602,
"advance": 0,
"advance_count": 0
}
},
{
"type": "MatchNoDocsQuery",
"description": "MatchNoDocsQuery[\"User requested \"match_none\" query.\"]",
"time": "0.01745900000ms",
"breakdown": {
"score": 0,
"build_scorer_count": 28,
"match_count": 0,
"create_weight": 2571,
"next_doc": 0,
"match": 0,
"create_weight_count": 1,
"next_doc_count": 0,
"score_count": 0,
"build_scorer": 14859,
"advance": 0,
"advance_count": 0
}
}
]
},
{
"type": "BoostQuery",
"description": "(ConstantScore(*:*))^0.0",
"time": "0.005811000000ms",
"breakdown": {
"score": 0,
"build_scorer_count": 0,
"match_count": 0,
"create_weight": 5810,
"next_doc": 0,
"match": 0,
"create_weight_count": 1,
"next_doc_count": 0,
"score_count": 0,
"build_scorer": 0,
"advance": 0,
"advance_count": 0
},
"children": [
{
"type": "MatchAllDocsQuery",
"description": "*:*",
"time": "0.0007590000000ms",
"breakdown": {
"score": 0,
"build_scorer_count": 0,
"match_count": 0,
"create_weight": 758,
"next_doc": 0,
"match": 0,
"create_weight_count": 1,
"next_doc_count": 0,
"score_count": 0,
"build_scorer": 0,
"advance": 0,
"advance_count": 0
}
}
]
}
]
}
],
"rewrite_time": 461471,
"collector": [
{
"name": "CancellableCollector",
"reason": "search_cancelled",
"time": "0.3679900000ms",
"children": [
{
"name": "MultiCollector",
"reason": "search_multi",
"time": "0.3463110000ms",
"children": [
{
"name": "SimpleFieldCollector",
"reason": "search_top_hits",
"time": "0.1946940000ms"
},
{
"name": "ProfilingAggregator: [org.elasticsearch.search.profile.aggregation.ProfilingAggregator@5dcbc248]",
"reason": "aggregation",
"time": "0.09764600000ms"
}
]
}
]
}
]
}
],
"aggregations": [
{
"type": "org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramAggregator",
"description": "2",
"time": "0.003712000000ms",
"breakdown": {
"reduce": 0,
"build_aggregation": 1610,
"build_aggregation_count": 1,
"initialize": 2100,
"initialize_count": 1,
"reduce_count": 0,
"collect": 0,
"collect_count": 0
}
}
]
}
```
1 个回复
rockybean - Elastic Certified Engineer, ElasticStack Fans,公众号:ElasticTalk
赞同来自:
然后把每一步的结果补充在这里,再补充下你索引的配置,包括 mapping、index 等。