Logstash处理Nginx中文Post内容乱码
本文章记录采集中有中文字符,出现乱码问题的处理.
解决
scripts/nginx_request_body.rb
def filter(event)
# 设置index日期
event.set('index_day', event.timestamp.time.localtime.strftime('%Y%m%d'))
# 切割message
message_array = event.get("message").split("||")
event.set("host", message_array[1])
...
# request_body字段
request_body = message_array[13]
if request_body.nil? || request_body == '-' || request_body.empty? then
return [event]
end
# 排除form-data
if request_body.include? "Content-Disposition: form-data" then
event.set("[post_data]", "form-data")
return [event]
end
# 重点
request_body = request_body.gsub('\\x22','"')
# \\xx to chr
pt = 0
new_request_body = ''
begin
while pt < request_body.length do
if request_body[pt] == '\\' and request_body[pt + 1] == 'x' then
word = (request_body[pt + 2] + request_body[pt + 3]).to_i(16).chr
new_request_body = new_request_body + word
pt = pt + 4
else
new_request_body = new_request_body + request_body[pt]
pt = pt + 1
end
end
rescue
event.set("[post_data]", request_body)
return [event]
end
# 内容脱敏采集
begin
Array["password","user.pwd","user_pwd"].each{ | e|
if new_request_body.include? e
new_request_body.gsub!(/#{e}=.*&/,"#{e}=***&")
end
}
new_request_body.gsub('\"','')
event.set("[post_data]", new_request_body)
rescue
event.set("[post_data]", "setting post_data error!")
end
return [event]
end
nginx.conf
filter {
if [fields][out_topic] == "logstash-nginx_access" {
# 引入脚本
ruby {
path => "/etc/logstash/scripts/nginx_request_body.rb"
}
useragent {
source => "http_user_agent"
target => "user_agent"
}
geoip {
source => "remote_addr"
target => "geoip"
database => "/var/GeoLite/GeoLite2-City.mmdb"
add_field => [ "[geoip][coordinates]", "%{[geoip][longitude]}" ]
add_field => [ "[geoip][coordinates]", "%{[geoip][latitude]}" ]
}
mutate {
convert => [
"status","integer",
"request_time","float",
"upstream_response_time","float",
"[geoip][coordinates]", "float"
]
remove_field => "message"
remove_field => "prospector"
remove_field => "request"
}
}
}