1
2
3
4
5
6 import datetime
7 import hashlib
8 import logging
9 import os.path
10 import struct
11
12 try:
13 import bson
14 HAVE_BSON = True
15 except ImportError:
16 HAVE_BSON = False
17 else:
18
19 if hasattr(bson, "BSON"):
20 bson_decode = lambda d: bson.BSON(d).decode()
21
22
23 elif hasattr(bson, "loads"):
24 bson_decode = lambda d: bson.loads(d)
25
26 from lib.cuckoo.common.abstracts import ProtocolHandler
27 from lib.cuckoo.common.utils import get_filename_from_path
28 from lib.cuckoo.common.exceptions import CuckooResultError
29
30 log = logging.getLogger(__name__)
31
32
33 MAX_MESSAGE_LENGTH = 20 * 1024 * 1024
34
36 return "0x%08x" % (v % 2**32)
37
39 return "0x%016x" % (v % 2**64)
40
42 if isinstance(v, (int, long)) and v < 0:
43 return v % 2**32
44
45
46 if isinstance(v, str):
47 return v.decode("latin-1")
48 return v
49
51
52
53
54
55
56
57
58 if isinstance(v, str):
59 return v.decode("latin-1")
60 return v
61
63 """Receives and interprets .bson logs from the monitor.
64
65 The monitor provides us with "info" messages that explain how the function
66 arguments will come through later on. This class remembers these info
67 mappings and then transforms the api call messages accordingly.
68
69 Other message types typically get passed through after renaming the
70 keys slightly.
71 """
72 converters_32bit = {
73 None: default_converter_32bit,
74 "p": pointer_converter_32bit,
75 "x": pointer_converter_32bit,
76 }
77
78 converters_64bit = {
79 None: default_converter_64bit,
80 "p": pointer_converter_64bit,
81 "x": pointer_converter_32bit,
82 }
83
85 self.fd = self.handler
86
87 self.infomap = {}
88 self.flags_value = {}
89 self.flags_bitmask = {}
90 self.pid = None
91 self.is_64bit = False
92 self.buffer_sha1 = None
93
94 if not HAVE_BSON:
95 log.critical(
96 "Starting BsonParser, but bson is not available! "
97 "(install with `pip install bson`)"
98 )
99
101
102 for argument, values in self.flags_value[apiname].items():
103 if isinstance(argdict[argument], str):
104 value = int(argdict[argument], 16)
105 else:
106 value = argdict[argument]
107
108 if value in values:
109 flags[argument] = values[value]
110
111
112 for argument, values in self.flags_bitmask[apiname].items():
113 if argument in flags:
114 continue
115
116 flags[argument] = []
117
118 if isinstance(argdict[argument], str):
119 value = int(argdict[argument], 16)
120 else:
121 value = argdict[argument]
122
123 for key, flag in values:
124
125 if (value & key) == key:
126 flags[argument].append(flag)
127
128 flags[argument] = "|".join(flags[argument])
129
131 """Determines which unserializers (or converters) have to be used in
132 order to parse the various arguments for this function call. Keeps in
133 mind whether the current bson is 32-bit or 64-bit."""
134 argnames, converters = [], []
135
136 for argument in arginfo:
137 if isinstance(argument, (tuple, list)):
138 argument, argtype = argument
139 else:
140 argtype = None
141
142 if self.is_64bit:
143 converter = self.converters_64bit[argtype]
144 else:
145 converter = self.converters_32bit[argtype]
146
147 argnames.append(argument)
148 converters.append(converter)
149
150 return argnames, converters
151
153 self.fd.seek(0)
154
155 while True:
156 data = self.fd.read(4)
157 if not data:
158 return
159
160 if len(data) != 4:
161 log.critical("BsonParser lacking data.")
162 return
163
164 blen = struct.unpack("I", data)[0]
165 if blen > MAX_MESSAGE_LENGTH:
166 log.critical(
167 "BSON message larger than MAX_MESSAGE_LENGTH, "
168 "stopping handler."
169 )
170 return
171
172 data += self.fd.read(blen-4)
173 if len(data) < blen:
174 log.critical("BsonParser lacking data.")
175 return
176
177 try:
178 dec = bson_decode(data)
179 except Exception as e:
180 log.warning(
181 "BsonParser decoding problem %s on data[:50] %s",
182 e, repr(data[:50])
183 )
184 return
185
186 mtype = dec.get("type", "none")
187 index = dec.get("I", -1)
188
189 if mtype == "info":
190
191 name = dec.get("name", "NONAME")
192 arginfo = dec.get("args", [])
193 category = dec.get("category")
194
195 argnames, converters = self.determine_unserializers(arginfo)
196 self.infomap[index] = name, arginfo, argnames, converters, category
197
198 if dec.get("flags_value"):
199 self.flags_value[name] = {}
200 for arg, values in dec["flags_value"].items():
201 self.flags_value[name][arg] = dict(values)
202
203 if dec.get("flags_bitmask"):
204 self.flags_bitmask[name] = {}
205 for arg, values in dec["flags_bitmask"].items():
206 self.flags_bitmask[name][arg] = values
207 continue
208
209
210 if mtype == "buffer":
211 buf = dec.get("buffer")
212 sha1 = dec.get("checksum")
213 self.buffer_sha1 = hashlib.sha1(buf).hexdigest()
214
215
216 if sha1 != self.buffer_sha1:
217 log.warning("Incorrect sha1 passed along for a buffer.")
218
219
220
221
222 from lib.cuckoo.core.resultserver import ResultHandler
223
224 if isinstance(self.fd, ResultHandler):
225 filepath = os.path.join(
226 self.fd.storagepath, "buffer", self.buffer_sha1
227 )
228 with open(filepath, "wb") as f:
229 f.write(buf)
230
231 continue
232
233 tid = dec.get("T", 0)
234 time = dec.get("t", 0)
235
236 parsed = {
237 "type": mtype,
238 "tid": tid,
239 "time": time,
240 }
241
242 if mtype == "debug":
243 parsed["message"] = dec.get("msg", "")
244 log.info("Debug message from monitor: %s", parsed["message"])
245 else:
246
247 if index not in self.infomap:
248 log.warning("Got API with unknown index - monitor needs "
249 "to explain first: {0}".format(dec))
250 continue
251
252 apiname, arginfo, argnames, converters, category = self.infomap[index]
253 args = dec.get("args", [])
254
255 if len(args) != len(argnames):
256 log.warning(
257 "Inconsistent arg count (compared to arg names) "
258 "on %s: %s names %s", dec, argnames, apiname
259 )
260 continue
261
262 argdict = {}
263 for idx, value in enumerate(args):
264 argdict[argnames[idx]] = converters[idx](value)
265
266
267 if apiname == "__process__":
268 parsed["type"] = "process"
269
270 if "TimeLow" in argdict:
271 timelow = argdict["TimeLow"]
272 timehigh = argdict["TimeHigh"]
273
274 parsed["pid"] = pid = argdict["ProcessIdentifier"]
275 parsed["ppid"] = argdict["ParentProcessIdentifier"]
276 modulepath = argdict["ModulePath"]
277
278 elif "time_low" in argdict:
279 timelow = argdict["time_low"]
280 timehigh = argdict["time_high"]
281
282 if "pid" in argdict:
283 parsed["pid"] = pid = argdict["pid"]
284 parsed["ppid"] = argdict["ppid"]
285 else:
286 parsed["pid"] = pid = argdict["process_identifier"]
287 parsed["ppid"] = argdict["parent_process_identifier"]
288
289 modulepath = argdict["module_path"]
290
291 else:
292 raise CuckooResultError(
293 "I don't recognize the bson log contents."
294 )
295
296
297 vmtimeunix = (timelow + (timehigh << 32))
298 vmtimeunix = vmtimeunix / 10000000.0 - 11644473600
299 vmtime = datetime.datetime.fromtimestamp(vmtimeunix)
300 parsed["first_seen"] = vmtime
301
302 procname = get_filename_from_path(modulepath)
303 parsed["process_path"] = modulepath
304 parsed["process_name"] = procname
305 parsed["command_line"] = argdict.get("command_line")
306
307
308 if argdict.get("is_64bit"):
309 self.is_64bit = True
310
311
312 parsed["track"] = bool(argdict.get("track", 1))
313 parsed["modules"] = argdict.get("modules", {})
314
315 self.pid = pid
316
317 elif apiname == "__thread__":
318 parsed["pid"] = pid = argdict["ProcessIdentifier"]
319
320
321
322
323
324
325
326
327 else:
328 parsed["type"] = "apicall"
329 parsed["pid"] = self.pid
330 parsed["api"] = apiname
331 parsed["category"] = category
332 parsed["status"] = argdict.pop("is_success", 1)
333 parsed["return_value"] = argdict.pop("retval", 0)
334 parsed["arguments"] = argdict
335 parsed["flags"] = {}
336
337 parsed["stacktrace"] = dec.get("s", [])
338 parsed["uniqhash"] = dec.get("h", 0)
339
340 if "e" in dec and "E" in dec:
341 parsed["last_error"] = dec["e"]
342 parsed["nt_status"] = dec["E"]
343
344 if apiname in self.flags_value:
345 self.resolve_flags(apiname, argdict, parsed["flags"])
346
347 if self.buffer_sha1:
348 parsed["buffer"] = self.buffer_sha1
349 self.buffer_sha1 = None
350
351 yield parsed
352