Package lib :: Package cuckoo :: Package common :: Module netlog
[hide private]
[frames] | no frames]

Source Code for Module lib.cuckoo.common.netlog

  1  # Copyright (C) 2010-2013 Claudio Guarnieri. 
  2  # Copyright (C) 2014-2016 Cuckoo Foundation. 
  3  # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org 
  4  # See the file 'docs/LICENSE' for copying permission. 
  5   
  6  import datetime 
  7  import hashlib 
  8  import logging 
  9  import os.path 
 10  import struct 
 11   
 12  try: 
 13      import bson 
 14      HAVE_BSON = True 
 15  except ImportError: 
 16      HAVE_BSON = False 
 17  else: 
 18      # The BSON module provided by pymongo works through its "BSON" class. 
 19      if hasattr(bson, "BSON"): 
 20          bson_decode = lambda d: bson.BSON(d).decode() 
 21      # The BSON module provided by "pip install bson" works through the 
 22      # "loads" function (just like pickle etc.) 
 23      elif hasattr(bson, "loads"): 
 24          bson_decode = lambda d: bson.loads(d) 
 25   
 26  from lib.cuckoo.common.abstracts import ProtocolHandler 
 27  from lib.cuckoo.common.utils import get_filename_from_path 
 28  from lib.cuckoo.common.exceptions import CuckooResultError 
 29   
 30  log = logging.getLogger(__name__) 
 31   
 32  # 20 Mb max message length. 
 33  MAX_MESSAGE_LENGTH = 20 * 1024 * 1024 
 34   
35 -def pointer_converter_32bit(v):
36 return "0x%08x" % (v % 2**32)
37
38 -def pointer_converter_64bit(v):
39 return "0x%016x" % (v % 2**64)
40
41 -def default_converter_32bit(v):
42 if isinstance(v, (int, long)) and v < 0: 43 return v % 2**32 44 45 # Try to avoid various unicode issues through usage of latin-1 encoding. 46 if isinstance(v, str): 47 return v.decode("latin-1") 48 return v
49
50 -def default_converter_64bit(v):
51 # Don't convert signed 64-bit integers into unsigned 64-bit integers as 52 # MongoDB doesn't support 64-bit unsigned integers (and ElasticSearch 53 # probably doesn't either). 54 # if isinstance(v, (int, long)) and v < 0: 55 # return v % 2**64 56 57 # Try to avoid various unicode issues through usage of latin-1 encoding. 58 if isinstance(v, str): 59 return v.decode("latin-1") 60 return v
61
62 -class BsonParser(ProtocolHandler):
63 """Receives and interprets .bson logs from the monitor. 64 65 The monitor provides us with "info" messages that explain how the function 66 arguments will come through later on. This class remembers these info 67 mappings and then transforms the api call messages accordingly. 68 69 Other message types typically get passed through after renaming the 70 keys slightly. 71 """ 72 converters_32bit = { 73 None: default_converter_32bit, 74 "p": pointer_converter_32bit, 75 "x": pointer_converter_32bit, 76 } 77 78 converters_64bit = { 79 None: default_converter_64bit, 80 "p": pointer_converter_64bit, 81 "x": pointer_converter_32bit, 82 } 83
84 - def init(self):
85 self.fd = self.handler 86 87 self.infomap = {} 88 self.flags_value = {} 89 self.flags_bitmask = {} 90 self.pid = None 91 self.is_64bit = False 92 self.buffer_sha1 = None 93 94 if not HAVE_BSON: 95 log.critical( 96 "Starting BsonParser, but bson is not available! " 97 "(install with `pip install bson`)" 98 )
99
100 - def resolve_flags(self, apiname, argdict, flags):
101 # Resolve 1:1 values. 102 for argument, values in self.flags_value[apiname].items(): 103 if isinstance(argdict[argument], str): 104 value = int(argdict[argument], 16) 105 else: 106 value = argdict[argument] 107 108 if value in values: 109 flags[argument] = values[value] 110 111 # Resolve bitmasks. 112 for argument, values in self.flags_bitmask[apiname].items(): 113 if argument in flags: 114 continue 115 116 flags[argument] = [] 117 118 if isinstance(argdict[argument], str): 119 value = int(argdict[argument], 16) 120 else: 121 value = argdict[argument] 122 123 for key, flag in values: 124 # TODO Have the monitor provide actual bitmasks as well. 125 if (value & key) == key: 126 flags[argument].append(flag) 127 128 flags[argument] = "|".join(flags[argument])
129
130 - def determine_unserializers(self, arginfo):
131 """Determines which unserializers (or converters) have to be used in 132 order to parse the various arguments for this function call. Keeps in 133 mind whether the current bson is 32-bit or 64-bit.""" 134 argnames, converters = [], [] 135 136 for argument in arginfo: 137 if isinstance(argument, (tuple, list)): 138 argument, argtype = argument 139 else: 140 argtype = None 141 142 if self.is_64bit: 143 converter = self.converters_64bit[argtype] 144 else: 145 converter = self.converters_32bit[argtype] 146 147 argnames.append(argument) 148 converters.append(converter) 149 150 return argnames, converters
151
152 - def __iter__(self):
153 self.fd.seek(0) 154 155 while True: 156 data = self.fd.read(4) 157 if not data: 158 return 159 160 if len(data) != 4: 161 log.critical("BsonParser lacking data.") 162 return 163 164 blen = struct.unpack("I", data)[0] 165 if blen > MAX_MESSAGE_LENGTH: 166 log.critical( 167 "BSON message larger than MAX_MESSAGE_LENGTH, " 168 "stopping handler." 169 ) 170 return 171 172 data += self.fd.read(blen-4) 173 if len(data) < blen: 174 log.critical("BsonParser lacking data.") 175 return 176 177 try: 178 dec = bson_decode(data) 179 except Exception as e: 180 log.warning( 181 "BsonParser decoding problem %s on data[:50] %s", 182 e, repr(data[:50]) 183 ) 184 return 185 186 mtype = dec.get("type", "none") 187 index = dec.get("I", -1) 188 189 if mtype == "info": 190 # API call index info message, explaining the argument names, etc. 191 name = dec.get("name", "NONAME") 192 arginfo = dec.get("args", []) 193 category = dec.get("category") 194 195 argnames, converters = self.determine_unserializers(arginfo) 196 self.infomap[index] = name, arginfo, argnames, converters, category 197 198 if dec.get("flags_value"): 199 self.flags_value[name] = {} 200 for arg, values in dec["flags_value"].items(): 201 self.flags_value[name][arg] = dict(values) 202 203 if dec.get("flags_bitmask"): 204 self.flags_bitmask[name] = {} 205 for arg, values in dec["flags_bitmask"].items(): 206 self.flags_bitmask[name][arg] = values 207 continue 208 209 # Handle dumped buffers. 210 if mtype == "buffer": 211 buf = dec.get("buffer") 212 sha1 = dec.get("checksum") 213 self.buffer_sha1 = hashlib.sha1(buf).hexdigest() 214 215 # Why do we pass along a sha1 checksum again? 216 if sha1 != self.buffer_sha1: 217 log.warning("Incorrect sha1 passed along for a buffer.") 218 219 # If the parent is netlogs ResultHandler then we actually dump 220 # it - this should only be the case during the analysis, any 221 # after processing will then be ignored. 222 from lib.cuckoo.core.resultserver import ResultHandler 223 224 if isinstance(self.fd, ResultHandler): 225 filepath = os.path.join( 226 self.fd.storagepath, "buffer", self.buffer_sha1 227 ) 228 with open(filepath, "wb") as f: 229 f.write(buf) 230 231 continue 232 233 tid = dec.get("T", 0) 234 time = dec.get("t", 0) 235 236 parsed = { 237 "type": mtype, 238 "tid": tid, 239 "time": time, 240 } 241 242 if mtype == "debug": 243 parsed["message"] = dec.get("msg", "") 244 log.info("Debug message from monitor: %s", parsed["message"]) 245 else: 246 # Regular api call from monitor 247 if index not in self.infomap: 248 log.warning("Got API with unknown index - monitor needs " 249 "to explain first: {0}".format(dec)) 250 continue 251 252 apiname, arginfo, argnames, converters, category = self.infomap[index] 253 args = dec.get("args", []) 254 255 if len(args) != len(argnames): 256 log.warning( 257 "Inconsistent arg count (compared to arg names) " 258 "on %s: %s names %s", dec, argnames, apiname 259 ) 260 continue 261 262 argdict = {} 263 for idx, value in enumerate(args): 264 argdict[argnames[idx]] = converters[idx](value) 265 266 # Special new process message from the monitor. 267 if apiname == "__process__": 268 parsed["type"] = "process" 269 270 if "TimeLow" in argdict: 271 timelow = argdict["TimeLow"] 272 timehigh = argdict["TimeHigh"] 273 274 parsed["pid"] = pid = argdict["ProcessIdentifier"] 275 parsed["ppid"] = argdict["ParentProcessIdentifier"] 276 modulepath = argdict["ModulePath"] 277 278 elif "time_low" in argdict: 279 timelow = argdict["time_low"] 280 timehigh = argdict["time_high"] 281 282 if "pid" in argdict: 283 parsed["pid"] = pid = argdict["pid"] 284 parsed["ppid"] = argdict["ppid"] 285 else: 286 parsed["pid"] = pid = argdict["process_identifier"] 287 parsed["ppid"] = argdict["parent_process_identifier"] 288 289 modulepath = argdict["module_path"] 290 291 else: 292 raise CuckooResultError( 293 "I don't recognize the bson log contents." 294 ) 295 296 # FILETIME is 100-nanoseconds from 1601 :/ 297 vmtimeunix = (timelow + (timehigh << 32)) 298 vmtimeunix = vmtimeunix / 10000000.0 - 11644473600 299 vmtime = datetime.datetime.fromtimestamp(vmtimeunix) 300 parsed["first_seen"] = vmtime 301 302 procname = get_filename_from_path(modulepath) 303 parsed["process_path"] = modulepath 304 parsed["process_name"] = procname 305 parsed["command_line"] = argdict.get("command_line") 306 307 # Is this a 64-bit process? 308 if argdict.get("is_64bit"): 309 self.is_64bit = True 310 311 # Is this process being "tracked"? 312 parsed["track"] = bool(argdict.get("track", 1)) 313 parsed["modules"] = argdict.get("modules", {}) 314 315 self.pid = pid 316 317 elif apiname == "__thread__": 318 parsed["pid"] = pid = argdict["ProcessIdentifier"] 319 320 # elif apiname == "__anomaly__": 321 # tid = argdict["ThreadIdentifier"] 322 # subcategory = argdict["Subcategory"] 323 # msg = argdict["Message"] 324 # self.handler.log_anomaly(subcategory, tid, msg) 325 # return True 326 327 else: 328 parsed["type"] = "apicall" 329 parsed["pid"] = self.pid 330 parsed["api"] = apiname 331 parsed["category"] = category 332 parsed["status"] = argdict.pop("is_success", 1) 333 parsed["return_value"] = argdict.pop("retval", 0) 334 parsed["arguments"] = argdict 335 parsed["flags"] = {} 336 337 parsed["stacktrace"] = dec.get("s", []) 338 parsed["uniqhash"] = dec.get("h", 0) 339 340 if "e" in dec and "E" in dec: 341 parsed["last_error"] = dec["e"] 342 parsed["nt_status"] = dec["E"] 343 344 if apiname in self.flags_value: 345 self.resolve_flags(apiname, argdict, parsed["flags"]) 346 347 if self.buffer_sha1: 348 parsed["buffer"] = self.buffer_sha1 349 self.buffer_sha1 = None 350 351 yield parsed
352