-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathupdate_log_filter
executable file
·108 lines (92 loc) · 4.95 KB
/
update_log_filter
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env perl
# vim: ft=perl
# Copyright 2013 L. Wayne Walker <[email protected],[email protected]> All Rights Reserved.
our $BASE_DIR = ($ENV{'LOG_BLEACH_TEST_HOME'} || $ENV{'HOME'}) . '/.log_bleach/';
if ( ! -d $BASE_DIR) {
print `log_bleach --init`;
}
our $IP_ADDRESS = '\b(?:[012]?\d?\d\.){3}[012]?\d?\d\b';
our $TS_SYSLOG = '(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) *[ 0123][0-9] \d\d:\d\d:\d\d';
our $TS_SYSLOG_WITH_TIMEZONE = '(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) *[ 0123][0-9] \d\d:\d\d:\d\d\s+[A-Z]{3,4}\b';
our $THREE_LETTER_WEEKDAY = '\b(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)\b';
our $TS_SYSLOG_WITH_WEEKDAY_WITH_TIMEZONE = $THREE_LETTER_WEEKDAY . '\s+' . $TS_SYSLOG_WITH_TIMEZONE;
our $TS_SYSLOG_WITH_WEEKDAY = $THREE_LETTER_WEEKDAY . '\s+' . $TS_SYSLOG;
our $SYSLOG_PREFIX = $TS_SYSLOG . ' \S+ \S+(?:\[\d+\])?: ';
our $SYSLOG_PREFIX_WITH_WEEKDAY = $TS_SYSLOG_WITH_WEEKDAY . ' \S+ \S+(?:\[\d+\])?: ';
our $ISODATE = '\d\d\d\d-\d\d-\d\d[ T]\d\d:\d\d:\d\d';
our $ISODATE_WITH_TZ_TLA = $ISODATE . '\s+[A-Z]{3}\b';
our $ISODATE_WITH_TZ_TLA_MILS = $ISODATE . '\s+[A-Z]{3}\b\s+\d{1,3}';
our $ISODATE_WITH_USEC = '\d\d\d\d-\d\d-\d\d[ T]\d\d:\d\d:\d\d.\d\d\d\d\d\d';
our $RN_LOG_PREFIX = $ISODATE_WITH_USEC . '\s+\d+\s+($line|DEBUG|WARNING|NOTICE|INFO|CRITICAL|ERROR)\s+--\s+\d+\s+\| ';
#HEXNUM = '(?:\b|0[xX])[A-Fa-f0-9]*[A-Fa-f][A-Fa-f0-9]*\b';
our $HEXNUM = '(?:\b|0?[xX])[A-Fa-f0-9]{2,}\b';
our $FLOAT = '[\-+]?\d+\.\d+(?:[eE][\-+]?\d+)?';
our $INTEGER = '[\-+]?\d+';
our $REGEX_SPECIAL_CHAR = '[\|\+\{\}\(\)\[\]\$\^\?\\\\\*\`\/\.]';
our $FULL_FILE_PATH = '(?:\/[\-A-Za-z0-9_.,:]+){2,}\/?';
our $BASIC_EMAIL = '\w+([-=+.\']\w+)*\@\w+([-.]\w+)*\.\w+([-.]\w+)*';
our $BASIC_HOSTNAME = '\b(?:[\w-]+\.)+[\w-]+\b\.?';
our $BASIC_TUPLE_LIST = '\b(?:\w+=(?:\S*|"[^"]*"\b),?\s*)+';
our $UUID = '........-....-....-....-............';
our $RUBY_STACK_TRACE_FILE_LINE = $FULL_FILE_PATH . ':\d+:in\s+\S+';
our $WHITESPACE_REGION = '\s+';
our $PATTERN_WORD = '\S+';
our $PATTERN_END_MATCHING = '.*';
our $PATTERN_MATCH_ALL = '.*';
$basename = $ARGV[0];
open(my $source, "<", $BASE_DIR . 'source/' . $basename);
open(my $parsed, '>', $BASE_DIR . 'parsed/' . $basename . '.parsed');
open(my $perlre, '>', $BASE_DIR . 'perlre/' . $basename . '.perlre');
foreach my $line (<$source>) {
chomp $line;
# Windows is evil
$line =~ s///;
next if not $line;
my @literals;
while ($line =~ s#<PATTERN_LITERAL>([^<]*)<PATTERN_LITERAL/>#PATTERN_LITERAL_PLACEHOLDER#)
{
push(@literals, $1);
}
# Find datestamp if any and replace it
$line =~ s/${TS_SYSLOG_WITH_WEEKDAY_WITH_TIMEZONE}/<%=TS_SYSLOG_WITH_WEEKDAY_WITH_TIMEZONE%>/g;
$line =~ s/${TS_SYSLOG_WITH_WEEKDAY}/<%=TS_SYSLOG_WITH_WEEKDAY%>/g;
$line =~ s/${TS_SYSLOG_WITH_TIMEZONE}/<%=TS_SYSLOG_WITH_TIMEZONE%>/g;
$line =~ s/${RN_LOG_PREFIX}/<%=RN_LOG_PREFIX%>/g;
$line =~ s/${ISODATE_WITH_TZ_TLA_MILS}/<%=ISODATE_WITH_TZ_TLA_MILS%>/g;
$line =~ s/${ISODATE_WITH_TZ_TLA}/<%=ISODATE_WITH_TZ_TLA%>/g;
$line =~ s/${ISODATE_WITH_USEC}/<%=ISODATE_WITH_USEC%>/g;
$line =~ s/${ISODATE}/<%=ISODATE%>/g;
$line =~ s/${SYSLOG_PREFIX_WITH_WEEKDAY}/<%=SYSLOG_PREFIX_WITH_WEEKDAY%>/g;
$line =~ s/${SYSLOG_PREFIX}/<%=SYSLOG_PREFIX%>/g;
$line =~ s/${TS_SYSLOG_WITH_WEEKDAY}/<%=TS_SYSLOG_WITH_WEEKDAY%>/g;
$line =~ s/${TS_SYSLOG}/<%=TS_SYSLOG%>/g;
$line =~ s/${THREE_LETTER_WEEKDAY}/<%=THREE_LETTER_WEEKDAY%>/g;
$line =~ s/${RUBY_STACK_TRACE_FILE_LINE}/<%=RUBY_STACK_TRACE_FILE_LINE%>/g;
$line =~ s/${FULL_FILE_PATH}/<%=FULL_FILE_PATH%>/g;
$line =~ s/${IP_ADDRESS}/<%=IP_ADDRESS%>/g;
$line =~ s/${UUID}/<%=UUID%>/g;
$line =~ s/${FLOAT}/<%=FLOAT%>/g;
$line =~ s/${HEXNUM}/<%=HEXNUM%>/g;
$line =~ s/${INTEGER}/<%=INTEGER%>/g;
$line =~ s/${WHITESPACE_REGION}/<%=WHITESPACE_REGION%>/g;
foreach my $literal (@literals)
{
$line =~ s/PATTERN_LITERAL_PLACEHOLDER/$literal/;
}
$line =~ s/(${REGEX_SPECIAL_CHAR})/\\$1/g;
$line =~ s/PATTERN_WORD/<%=PATTERN_WORD%>/g;
$line =~ s/PATTERN_END_MATCHING.*/<%=PATTERN_END_MATCHING%>/;
$line =~ s/PATTERN_MATCH_ALL/<%=PATTERN_MATCH_ALL%>/g;
$line =~ s/PATTERN_FILE_PATH/<%=FULL_FILE_PATH%>/g;
$line =~ s/PATTERN_EMAIL/<%=BASIC_EMAIL%>/g;
$line =~ s/PATTERN_HOSTNAME/<%=BASIC_HOSTNAME%>/g;
$line =~ s/PATTERN_TUPLE_LIST/<%=BASIC_TUPLE_LIST%>/g;
$line = '\\A' . $line . '\\s*\\Z';
$regex = $line;
$regex =~ s/<%=([A-Z_]*)%>/${$1}/g;
print $perlre $regex, "\n";
$line =~ s/<%=/<%= /g;
$line =~ s/%>/ %>/g;
print $parsed $line, "\n";
}