Tuesday, May 7, 2013

ArcSight Export Parser Script

For those of you not familiar with it, ArcSight is a pretty robust correlation engine. I've been working with it for quite a while now, and I've never really been happy with the case management system they have. When an alert is triggered, it pulls in some information and saves it to a Case. Unfortunately the management of these cases are kind of messy and can easily become unwieldy for large environments. If you have HP Openview, you can have ArcSight export the cases directly to your ticketing system. But what if you use another ticketing system or a custom built ticketing system? Well, then you can export the files to the system and write your own API to parse the export files and send to the ticketing system of your choice.

That's what I've been working on the last few weeks. With an ArcSight ESM appliance, I'm limited to using bash, perl (limited libraries), or python 2.4. Since I'm more familiar with python, I went that route. Although, you could easily write a parser in perl. In fact, ArcSight has an approved perl script that parses these exports and formats them to a customized email, so you can route events that way. Still python was what I went with.

It's still a work in progress, but here's the basic functions:

# simple function to consolidate headers from base events
def getHeaders(allevents):
   x = []
   for i in range(len(allevents)):
     for thing in allevents[i]:
       if x.count(thing[0]) == 0:
         x.insert(i,thing[0])
   x.reverse()
   return x


# creates an HTML table for use in our web-based ticketing system
def createHtml(baseEventList):
   if os.path.exists('tmp_alert/'):
     removeDir('tmp_alert/')
   fname="tmp_alert/tmp.html"
   d = os.path.dirname(fname)
   if not os.path.exists(d):
     os.makedirs(d)
   f = open(fname, 'w')
   f.write('<!DOCTYPE html>\n')
   f.write('<html>\n')
   f.write('<body>\n')
   f.write('<p>Base Security Events</p>\n')
   #grab the list of headers
   x = getHeaders(baseEventList)
   f.write('<br>\n')

   #the baseTable class is a css file on the server
   #change if you have your own custom style sheet
   f.write('<table class=baseTable>\n')
   f.write('<tr>\n')
   #grab and write headers
   for i in x:
     f.write('<th> %s </th>\n' % i)
   f.write('</tr>\n')
   #loop over all events
   for event in baseEventList:
     f.write('<tr>\n')
     offset = 0
     #loop again to create matching value fields in our table
     for item in range(len(event)):
       itemwritten = False
       for header in x:
         if itemwritten:
           pass
         else:
           if header == event[item][0]:
             f.write('<td> %s</td>\n' % event[item][1])
             itemwritten = True
       f.write('</tr>\n')
   f.write('</table>')
   f.close()


# function to grab all base events using a list of ID numbers
def grabBaseEvents(xmlcontents, baseEventIDList, arcObj):
   #takes the dom document object for a single xml and baseEvent ID list
   baseEvent = []
   allEvents = []
   #for each sec event
   for node in xmlcontents.getElementsByTagName('SecurityEvent'):
     attr_list = node.attributes
     #iterate over sec event attributs
     for attr in range(attr_list.length):
       if attr_list.item(attr).name == 'id':
         #look at all the event IDs
         for baseevent in baseEventIDList:
         #when we have a match, iterate over the current
         #sec event child nodes and append them to a base event list
         baseEvent = []
         if baseevent == attr_list.item(attr).value:
           for x in node.childNodes:
             if x.hasChildNodes():
               if x.nodeName == 'destinationProcessName':
                 arcObj.setAttr('TPN',x.firstChild.nodeValue)
               baseEvent.append((x.nodeName,x.firstChild.nodeValue))
         #when we're done with the base event, append that to an
         #element in the allEvents list
         if len(baseEvent) != 0:
           allEvents.append(baseEvent)
   return allEvents


# Object to store and retrieve RuleFire Event values
class arcObject:
   def __init__(self):
     self.AlertName = "deviceCustomString6 not filled in"
     self.AA = "None"
     self.AH = "None"
     self.AU = "None"
     self.TA = "None"
     self.TH = "None"
     self.TU = "None"
     self.rulename = "None"
     self.time = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime())
     self.filename = "None"
     self.targetproc = "None"
   def getAttr(self, attr):
     if attr == 'AA':
       return self.AA
     elif attr == 'AN':
       return self.AlertName
     elif attr == 'AH':
       return self.AH
     elif attr == 'AU':
       return self.AU
     elif attr == 'TA':
       return self.TA
     elif attr == 'TH':
       return self.TH
     elif attr == 'TU':
       return self.TU
     elif attr == 'rulename':
       return self.rulename
     elif attr == 'time':
       return self.time
     elif attr == 'FN':
       return self.filename
     elif attr == 'TPN':
       return self.targetproc
     else:
       pass
   def setAttr(self, attr, value):
     if attr == 'AA':
       self.AA = value
     elif attr == 'AN':
       self.AlertName = value
     elif attr == 'AH':
       self.AH = value
     elif attr == 'AU':
       self.AU = value
     elif attr == 'TA':
       self.TA = value
     elif attr == 'TH':
       self.TH = value
     elif attr == 'TU':
       self.TU = value
     elif attr == 'rulename':
       self.rulename = value
     elif attr == 'time':
       self.time = value
     elif attr == 'FN':
       self.filename = value
     elif attr == 'TPN':
       self.targetproc = value
     else:
       pass

Essentially I will parse the xml file for a correlation event, then parse the event for specific fields that I will store in the arcsight object. I'm thinking I'll use these fields later to fill in json files that I can post to the webserver. But for now I just store them and then grab the base event ID's associated with each event.

You may have noticed the deviceCustomString6 message I apply to the default alertName variable. That is something I have ArcSight set before it exports the rule-fire. I use that field to store the rulename + time + attacker address and a number of other useful things. The reason was that our ticketing system only lets us query on that alertName field. This way we can search for any of those fields (i.e. all rules triggered by a particular host).

Then I take those ID's, create a list of base events, using my grabBaseEvents function. Then I run that through my createHtml function which will create a table with the base events. This HTML file will probably be zipped up and posted to the web server as well.

Here's what the main loop looks like so far. The addNSend function will be what ultimately adds the fields to the json file and POSTs it to the server.

dir = glob.glob('*.xml')
for xmlfile in dir:
  contents = parse(xmlfile)
  #print "parsing " +xmlfile
  #for each Security Event
  for i in contents.getElementsByTagName('SecurityEvent'):
  x = i.attributes
  #if external ID attribute exists, it's a rule fire
    if x.item(1).name != 'externalID':
    arcObj = arcObject()
    arcObj.setAttr('rulename', x.item(2).value)
    baseEventIDs = []
    #map rule fire values to alert web fields
    for node in i.childNodes:
      if node.hasChildNodes():
        if node.nodeName == 'fileName':
          if node.firstChild.nodeValue != x.item(1).value:
            arcObj.setAttr('FN', node.firstChild.nodeValue)
          elif node.nodeName == 'deviceCustomString6':
            arcObj.setAttr('AN',node.firstChild.nodeValue)
          elif node.nodeName == 'sourceAddress':
            arcObj.setAttr('AA', node.firstChild.nodeValue)
          elif node.nodeName == 'sourceHostName':
            arcObj.setAttr('AH', node.firstChild.nodeValue)
          elif node.nodeName == 'sourceUserName':
            arcObj.setAttr('AU', node.firstChild.nodeValue)
          elif node.nodeName == 'destinationAddress':
            arcObj.setAttr('TA', node.firstChild.nodeValue)
          elif node.nodeName == 'destinationHostName':
            arcObj.setAttr('TH', node.firstChild.nodeValue)
          elif node.nodeName == 'destinationUserName':
            arcObj.setAttr('TU', node.firstChild.nodeValue)
          elif node.nodeName == 'destinationProcessName':
            arcObj.setAttr('TPN', node.firstChild.nodeValue)
          elif node.nodeName == 'managerReceiptTime':
            arcObj.setAttr('time', node.firstChild.nodeValue.rstrip('.0'))
          #check for baseEventIDs
          elif node.nodeName == 'baseEventIds':
            for ref in node.getElementsByTagName('ref'):
              attr_list_obj = ref.attributes
              for attr in range(attr_list_obj.length):
                if attr_list_obj.item(attr).name == 'id':
                  baseEventIDs.append(attr_list_obj.item(attr).value)
                  #find base events and create html to store
                  #grabBaseEvents should return a list of key/value pairs for

                  #each event
            listOfBaseEvents = grabBaseEvents(contents, baseEventIDs, arcObj)
            #createHtml creates the tmp_alert directory and the html
            createHtml(listOfBaseEvents)
          else:
            pass
      #addNsend(arcObj)
  addFileToFolder(xmlfile,'/var/tmp/processed')
  os.remove(xmlfile)


All in all, it works pretty well. I still need to make some formatting changes and add some logic to parse deeper into the xml for some relevant fields, but that's not difficult. Just time consuming. For now it's fine with the handful of fields that it parses for. I guess I could also have it pretty up the html file a bit. As it is, the headers are still smashed together field names like sourceAddress, and what-not.

Maybe when I have it complete, I can post it to ArcSight 724. No telling how many other people are in need of an API for a non-supported ticketing system. With a little modification, I bet this script would work for quite a few other systems.

-Newt

1 comment:

  1. Hey thanks for the post. Did you have any luck extracting attachments? They seem to be almost Base64 (but it's not valid Base64...)?

    ReplyDelete