As an exercise of pyparse I wanted to parse the syslog of a snort output using pyparse. The pyparsing module is very flexible and handy in this situation. I was able to effectively parse the output into a new comma delimited file then easily used Logparser to create graphs of the data such as top ports, source ip’s etc.
import string
from pyparsing import alphas,nums, alphanums, Literal, Combine, Word, Group, Suppress, OneOrMore, delimitedList, ZeroOrMore, Optional
testdata = """
<133> Apr 1 00:00:00 server1 snort[32268]: [1:1983:6] BACKDOOR DeepThroat 3.1 Connection attempt [Classification: A Network Trojan was detected] [Priority: 1]: {UDP} 10.1.1.1:161 -> 192.168.1.1:4120
"""
# Grammar
logLineBNF = None
def getLogLineBNF():
global logLineBNF
if logLineBNF is None:
integer = Word( nums )
ipAddress = delimitedList( integer, ".", combine=True )
#timeZoneOffset = Word("+-",nums)
code = Suppress(Group(Combine('<' + Word(nums) + '>' + ' ')))
month = Word(string.uppercase, string.lowercase, exact=3)
serverDateTime = Group(Combine( month + ' '+ integer + ' ' + integer + ":" + integer + ":" + integer ) )
serverName = Word(alphanums)
misc_code = Suppress(Combine(Suppress(Word(alphas) + '[') + Word(nums) + Suppress(']' + ':')))
snortId = Combine(Suppress('[') + Word(nums) + ':' + Word(nums) + Suppress(':' + Word(nums) + ']'))
description = ZeroOrMore('(' + Word(alphanums+'.-_ []') + ')') + OneOrMore( Word(alphanums+'-./>!:$_ ')) + ZeroOrMore(Suppress('[') + Word(nums) + ']')
classification = OneOrMore('[' + Suppress(Word(alphas) + ': ') + OneOrMore(Word(alphanums)) + ']' ).setParseAction( lambda tokens : (tokens[-2]))
priority = Suppress(':')
code_1 = Suppress(Group(Combine('<' + Word(alphanums) + '>' + ' ')))
proto = Suppress('{') + Word(alphanums+':') + Suppress('}')
src = delimitedList( integer, ".", combine=True )
src_prt = Optional(ZeroOrMore(Suppress(':') + Word(nums)),default='_')
out = Suppress(Literal("->"))
dst = delimitedList( integer, ".", combine=True )
dst_prt = Optional(ZeroOrMore(Suppress(':') + Word(nums)),default='_')
#Grammar
The download link contains the full properly formatted python script. Run as is. Your logfile may contain extra characters not accounted for here but this worked for several days worth of traffic in a medium sized network.