Package lib :: Package cuckoo :: Package common :: Module netlog
[hide private]
[frames] | no frames]

Source Code for Module lib.cuckoo.common.netlog

  1  # Copyright (C) 2010-2014 Cuckoo Foundation. 
  2  # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org 
  3  # See the file 'docs/LICENSE' for copying permission. 
  4   
  5  import logging 
  6  import struct 
  7  import datetime 
  8  import string 
  9   
 10  try: 
 11      import bson 
 12      HAVE_BSON = True 
 13  except ImportError: 
 14      HAVE_BSON = False 
 15   
 16  if HAVE_BSON: 
 17      # The BSON module provided by pymongo works through its "BSON" class. 
 18      if hasattr(bson, "BSON"): 
 19          bson_decode = lambda d: bson.BSON(d).decode() 
 20      # The BSON module provided by "pip install bson" works through the 
 21      # "loads" function (just like pickle etc.) 
 22      elif hasattr(bson, "loads"): 
 23          bson_decode = lambda d: bson.loads(d) 
 24      else: 
 25          HAVE_BSON = False 
 26   
 27  from lib.cuckoo.common.defines import REG_SZ, REG_EXPAND_SZ 
 28  from lib.cuckoo.common.defines import REG_DWORD_BIG_ENDIAN 
 29  from lib.cuckoo.common.defines import REG_DWORD_LITTLE_ENDIAN 
 30  from lib.cuckoo.common.exceptions import CuckooResultError 
 31  from lib.cuckoo.common.logtbl import table as LOGTBL 
 32  from lib.cuckoo.common.utils import get_filename_from_path 
 33   
 34  log = logging.getLogger(__name__) 
 35   
 36   
 37  # should probably prettify this 
38 -def expand_format(fs):
39 out = "" 40 i = 0 41 while i<len(fs): 42 x = fs[i] 43 if x in string.digits: 44 out += fs[i+1] * int(x) 45 i += 1 46 else: 47 out += x 48 i += 1 49 return out
50 51 52 ############################################################################### 53 # Custom Cuckoomon "Netlog" protocol - by skier and rep 54 # Kind of deprecated, more generic BSON protocol below 55 ############################################################################### 56
57 -class NetlogParser(object):
58 - def __init__(self, handler):
59 self.handler = handler 60 61 self.formatmap = { 62 "s": self.read_string, 63 "S": self.read_string, 64 "u": self.read_string, 65 "U": self.read_string, 66 "b": self.read_buffer, 67 "B": self.read_buffer, 68 "i": self.read_int32, 69 "l": self.read_int32, 70 "L": self.read_int32, 71 "p": self.read_ptr, 72 "P": self.read_ptr, 73 "o": self.read_string, 74 "O": self.read_string, 75 "a": self.read_argv, 76 "A": self.read_argv, 77 "r": self.read_registry, 78 "R": self.read_registry, 79 }
80
81 - def read_next_message(self):
82 apiindex, status = struct.unpack("BB", self.handler.read(2)) 83 returnval, tid, timediff = struct.unpack("III", self.handler.read(12)) 84 context = (apiindex, status, returnval, tid, timediff) 85 86 if apiindex == 0: 87 # new process message 88 timelow = self.read_int32() 89 timehigh = self.read_int32() 90 # FILETIME is 100-nanoseconds from 1601 :/ 91 vmtimeunix = (timelow + (timehigh << 32)) 92 vmtimeunix = vmtimeunix / 10000000.0 - 11644473600 93 try: 94 vmtime = datetime.datetime.fromtimestamp(vmtimeunix) 95 except: 96 log.critical("vmtime in new-process-messsage out of range " 97 "(protocol out of sync?)") 98 return False 99 100 pid = self.read_int32() 101 ppid = self.read_int32() 102 103 try: 104 modulepath = self.read_string() 105 procname = get_filename_from_path(modulepath) 106 except: 107 log.exception("Exception in netlog protocol, stopping parser.") 108 return False 109 110 if len(procname) > 255: 111 log.critical("Huge process name (>255), assuming netlog " 112 "protocol out of sync.") 113 log.debug("Process name: %s", repr(procname)) 114 return False 115 116 self.handler.log_process(context, vmtime, pid, ppid, 117 modulepath, procname) 118 119 elif apiindex == 1: 120 # new thread message 121 pid = self.read_int32() 122 self.handler.log_thread(context, pid) 123 124 else: 125 # actual API call 126 try: 127 apiname, modulename, parseinfo = LOGTBL[apiindex] 128 except IndexError: 129 log.debug("Netlog LOGTBL lookup error for API index {0} " 130 "(pid={1}, tid={2})".format(apiindex, None, tid)) 131 return False 132 133 formatspecifiers = expand_format(parseinfo[0]) 134 argnames = parseinfo[1:] 135 arguments = [] 136 for pos in range(len(formatspecifiers)): 137 fs = formatspecifiers[pos] 138 argname = argnames[pos] 139 fn = self.formatmap.get(fs, None) 140 if fn: 141 try: 142 r = fn() 143 except: 144 log.exception("Exception in netlog protocol, " 145 "stopping parser.") 146 return False 147 148 arguments.append((argname, r)) 149 else: 150 log.warning("No handler for format specifier {0} on " 151 "apitype {1}".format(fs, apiname)) 152 153 self.handler.log_call(context, apiname, modulename, arguments) 154 155 return True
156
157 - def read_int32(self):
158 """Reads a 32bit integer from the socket.""" 159 return struct.unpack("I", self.handler.read(4))[0]
160
161 - def read_ptr(self):
162 """Read a pointer from the socket.""" 163 value = self.read_int32() 164 return "0x%08x" % value
165
166 - def read_string(self):
167 """Reads an utf8 string from the socket.""" 168 length, maxlength = struct.unpack("II", self.handler.read(8)) 169 if length < 0 or length > 0x10000: 170 log.critical("read_string length weirdness " 171 "length: %d maxlength: %d", length, maxlength) 172 raise CuckooResultError("read_string length failure, " 173 "protocol broken?") 174 175 s = self.handler.read(length) 176 if maxlength > length: 177 s += "... (truncated)" 178 return s
179
180 - def read_buffer(self):
181 """Reads a memory socket from the socket.""" 182 length, maxlength = struct.unpack("II", self.handler.read(8)) 183 # only return the maxlength, as we don't log the actual 184 # buffer right now 185 buf = self.handler.read(length) 186 if maxlength > length: 187 buf += " ... (truncated)" 188 return buf
189
190 - def read_registry(self):
191 """Read logged registry data from the socket.""" 192 typ = struct.unpack("I", self.handler.read(4))[0] 193 # do something depending on type 194 if typ == REG_DWORD_BIG_ENDIAN or typ == REG_DWORD_LITTLE_ENDIAN: 195 value = self.read_int32() 196 elif typ == REG_SZ or typ == REG_EXPAND_SZ: 197 value = self.read_string() 198 else: 199 value = "(unable to dump buffer content)" 200 return value
201
202 - def read_list(self, fn):
203 """Reads a list of _fn_ from the socket.""" 204 count = struct.unpack("I", self.handler.read(4))[0] 205 ret = [] 206 for x in xrange(count): 207 item = fn() 208 ret.append(item) 209 return ret
210
211 - def read_argv(self):
212 return self.read_list(self.read_string)
213 214 215 ############################################################################### 216 # Generic BSON based protocol - by rep 217 # Allows all kinds of languages / sources to generate input for Cuckoo, 218 # thus we can reuse report generation / signatures for other API trace sources 219 ############################################################################### 220 221 TYPECONVERTERS = { 222 "p": lambda v: "0x%08x" % default_converter(v), 223 } 224 225 # 1 Mb max message length 226 MAX_MESSAGE_LENGTH = 20 * 1024 * 1024 227
228 -def default_converter(v):
229 # fix signed ints (bson is kind of limited there) 230 if type(v) in (int, long) and v < 0: 231 return v + 0x100000000 232 return v
233
234 -def check_names_for_typeinfo(arginfo):
235 argnames = [i[0] if type(i) in (list, tuple) else i for i in arginfo] 236 237 converters = [] 238 for i in arginfo: 239 if type(i) in (list, tuple): 240 r = TYPECONVERTERS.get(i[1], None) 241 if not r: 242 log.debug("Analyzer sent unknown format " 243 "specifier '{0}'".format(i[1])) 244 r = default_converter 245 converters.append(r) 246 else: 247 converters.append(default_converter) 248 249 return argnames, converters
250 251
252 -class BsonParser(object):
253 - def __init__(self, handler):
254 self.handler = handler 255 self.infomap = {} 256 257 if not HAVE_BSON: 258 log.critical("Starting BsonParser, but bson is not available! (install with `pip install bson`)")
259
260 - def read_next_message(self):
261 data = self.handler.read(4) 262 blen = struct.unpack("I", data)[0] 263 if blen > MAX_MESSAGE_LENGTH: 264 log.critical("BSON message larger than MAX_MESSAGE_LENGTH, " 265 "stopping handler.") 266 return False 267 268 data += self.handler.read(blen-4) 269 270 try: 271 dec = bson_decode(data) 272 except Exception as e: 273 log.warning("BsonParser decoding problem {0} on " 274 "data[:50] {1}".format(e, repr(data[:50]))) 275 return False 276 277 mtype = dec.get("type", "none") 278 index = dec.get("I", -1) 279 tid = dec.get("T", 0) 280 time = dec.get("t", 0) 281 282 #context = (apiindex, status, returnval, tid, timediff) 283 context = [index, 1, 0, tid, time] 284 285 if mtype == "info": 286 # API call index info message, explaining the argument names, etc 287 name = dec.get("name", "NONAME") 288 arginfo = dec.get("args", []) 289 category = dec.get("category") 290 291 # Bson dumps that were generated before cuckoomon exported the 292 # "category" field have to get the category using the old method. 293 if not category: 294 # Try to find the entry/entries with this api name. 295 category = [_ for _ in LOGTBL if _[0] == name] 296 297 # If we found an entry, take its category, otherwise we take 298 # the default string "unknown." 299 category = category[0][1] if category else "unknown" 300 301 argnames, converters = check_names_for_typeinfo(arginfo) 302 self.infomap[index] = name, arginfo, argnames, converters, category 303 304 elif mtype == "debug": 305 log.info("Debug message from monitor: " 306 "{0}".format(dec.get("msg", ""))) 307 308 elif mtype == "new_process": 309 # new_process message from VMI monitor 310 vmtime = datetime.datetime.fromtimestamp(dec.get("starttime", 0)) 311 procname = dec.get("name", "NONAME") 312 ppid = 0 313 modulepath = "DUMMY" 314 315 self.handler.log_process(context, vmtime, None, ppid, 316 modulepath, procname) 317 318 else: 319 # regular api call 320 if not index in self.infomap: 321 log.warning("Got API with unknown index - monitor needs " 322 "to explain first: {0}".format(dec)) 323 return True 324 325 apiname, arginfo, argnames, converters, category = self.infomap[index] 326 args = dec.get("args", []) 327 328 if len(args) != len(argnames): 329 log.warning("Inconsistent arg count (compared to arg names) " 330 "on {2}: {0} names {1}".format(dec, argnames, 331 apiname)) 332 return True 333 334 argdict = dict((argnames[i], converters[i](args[i])) 335 for i in range(len(args))) 336 337 if apiname == "__process__": 338 # special new process message from cuckoomon 339 timelow = argdict["TimeLow"] 340 timehigh = argdict["TimeHigh"] 341 # FILETIME is 100-nanoseconds from 1601 :/ 342 vmtimeunix = (timelow + (timehigh << 32)) 343 vmtimeunix = vmtimeunix / 10000000.0 - 11644473600 344 vmtime = datetime.datetime.fromtimestamp(vmtimeunix) 345 346 pid = argdict["ProcessIdentifier"] 347 ppid = argdict["ParentProcessIdentifier"] 348 modulepath = argdict["ModulePath"] 349 procname = get_filename_from_path(modulepath) 350 351 self.handler.log_process(context, vmtime, pid, ppid, 352 modulepath, procname) 353 return True 354 355 elif apiname == "__thread__": 356 pid = argdict["ProcessIdentifier"] 357 self.handler.log_thread(context, pid) 358 return True 359 360 context[1] = argdict.pop("is_success", 1) 361 context[2] = argdict.pop("retval", 0) 362 arguments = argdict.items() 363 arguments += dec.get("aux", {}).items() 364 365 self.handler.log_call(context, apiname, category, arguments) 366 367 return True
368