生产环境:
java 1.7.0
logstash 2.4.0
logstash安装在400台服务器上
java 1.8.0
es 2.3.4
kibana 4.5.3
es 和 kibana 安装在一台服务器上
功能:logstash 从每台服务器上读取 log文件,过滤后写入 es
logstash里的配置文件 stdin.conf
input {
file{
# discover_interval => 30
ignore_older => 600
# sincedb_write_interval => 30
stat_interval => 10
close_older => 60
max_open_files => 55535
path => ["/usr/local/xx/logs/fox.log.*"]
exclude => ["/usr/local/xx/logs/*.zip","/usr/local/xx/logs/fox.log.2017-02*"]
codec => plain {charset => "ISO-8859-1"}
}
}
filter{
if([message] !~ "ANALYSISLOG="){
ruby {
code => "event.cancel"
}
}
mutate{
split => ["message","ANALYSISLOG="]
add_field => {
"analysis_json" => "%{[message][1]}"
}
add_field => {
"message_head" => "%{[message][0]}"
}
}
json {
source => "analysis_json"
}
json {
source => "logContent"
}
mutate{
split => ["message_head","|"]
add_field => {
"time_head" => "%{[message_head][0]}"
}
add_field => {
"serverid" => "%{[message_head][4]}"
}
}
mutate{
split => ["time_head"," "]
add_field => {
"date_int" => "%{[time_head][0]}"
}
remove_field => ["message","@version","host","path","message_head","analysis_json","time_head", "logContent"]
}
}
output{
#stdout { codec => dots}
elasticsearch {
hosts => "xx.xx.xx.xx"
index => "logstash-test-%{+YYYY.MM.dd}"
# flush_size => 1000
idle_flush_time => 30
}
}
在每台服务器上执行nohup /home/logstash-2.4.0/bin/logstash -f /home/logstash-2.4.0/stdin.conf &
这个 logstash 进程在跑一天或者两天就会挂掉,并在 logstash 目录下有heapdump.hprof文件产生
[root@125-60 logstash-2.4.0]# bin/logstash -f stdin.conf -w 32 |pv -abt > /dev/null
3.12kiB 0:57:55 [ 940miB/s]
在一台 logstash 机器上使用stdout { codec => dots}测试了一下,每秒最高达到了1000m 个 event
400台 logstash 机器同时往一台 es 上发数据,不知道是否会导致 es 挂掉,但目前来说是没有挂掉的
请教各位,我的 logstash 是怎么挂掉的?如何解决?
thanks
java 1.7.0
logstash 2.4.0
logstash安装在400台服务器上
java 1.8.0
es 2.3.4
kibana 4.5.3
es 和 kibana 安装在一台服务器上
功能:logstash 从每台服务器上读取 log文件,过滤后写入 es
logstash里的配置文件 stdin.conf
input {
file{
# discover_interval => 30
ignore_older => 600
# sincedb_write_interval => 30
stat_interval => 10
close_older => 60
max_open_files => 55535
path => ["/usr/local/xx/logs/fox.log.*"]
exclude => ["/usr/local/xx/logs/*.zip","/usr/local/xx/logs/fox.log.2017-02*"]
codec => plain {charset => "ISO-8859-1"}
}
}
filter{
if([message] !~ "ANALYSISLOG="){
ruby {
code => "event.cancel"
}
}
mutate{
split => ["message","ANALYSISLOG="]
add_field => {
"analysis_json" => "%{[message][1]}"
}
add_field => {
"message_head" => "%{[message][0]}"
}
}
json {
source => "analysis_json"
}
json {
source => "logContent"
}
mutate{
split => ["message_head","|"]
add_field => {
"time_head" => "%{[message_head][0]}"
}
add_field => {
"serverid" => "%{[message_head][4]}"
}
}
mutate{
split => ["time_head"," "]
add_field => {
"date_int" => "%{[time_head][0]}"
}
remove_field => ["message","@version","host","path","message_head","analysis_json","time_head", "logContent"]
}
}
output{
#stdout { codec => dots}
elasticsearch {
hosts => "xx.xx.xx.xx"
index => "logstash-test-%{+YYYY.MM.dd}"
# flush_size => 1000
idle_flush_time => 30
}
}
在每台服务器上执行nohup /home/logstash-2.4.0/bin/logstash -f /home/logstash-2.4.0/stdin.conf &
这个 logstash 进程在跑一天或者两天就会挂掉,并在 logstash 目录下有heapdump.hprof文件产生
[root@125-60 logstash-2.4.0]# bin/logstash -f stdin.conf -w 32 |pv -abt > /dev/null
3.12kiB 0:57:55 [ 940miB/s]
在一台 logstash 机器上使用stdout { codec => dots}测试了一下,每秒最高达到了1000m 个 event
400台 logstash 机器同时往一台 es 上发数据,不知道是否会导致 es 挂掉,但目前来说是没有挂掉的
请教各位,我的 logstash 是怎么挂掉的?如何解决?
thanks
3 个回复
chinabinner - 80后 IT 男~~~
赞同来自:
medcl - 今晚打老虎。
赞同来自:
建议加上flush_size来一起控制flush比较保险。
使用环境变量 LS_HEAP_SIZE 可以设置logstash的内存堆栈大小。
内存溢出的具体原因可以分析dump文件。
zhu315555147 - java
赞同来自: