1
2
3
4
5
6 import collections
7 import json
8 import logging
9 import os
10
11 from lib.cuckoo.common.abstracts import Processing, BehaviorHandler
12 from lib.cuckoo.common.config import Config
13
14 from .platform.windows import WindowsMonitor
15 from .platform.linux import LinuxSystemTap
16
17 log = logging.getLogger(__name__)
18
20 """Generates overview summary information (not split by process)."""
21
22 key = "summary"
23 event_types = ["generic"]
24
28
30 self.results[event["category"]].add(event["value"])
31
36
38 """Anomaly detected during analysis.
39 For example: a malware tried to remove Cuckoo's hooks.
40 """
41
42 key = "anomaly"
43 event_types = ["anomaly"]
44
48
50 """Process API calls.
51 @param call: API call object
52 @param process: process object
53 """
54 category, funcname, message = None, None, None
55 for row in call["arguments"]:
56 if row["name"] == "Subcategory":
57 category = row["value"]
58 if row["name"] == "FunctionName":
59 funcname = row["value"]
60 if row["name"] == "Message":
61 message = row["value"]
62
63 self.anomalies.append(dict(
64
65
66 category=category,
67 funcname=funcname,
68 message=message,
69 ))
70
72 """Fetch all anomalies."""
73 return self.anomalies
74
76 """Generates process tree."""
77
78 key = "processtree"
79 event_types = ["process"]
80
84
86 if process["pid"] in self.processes:
87 log.warning(
88 "Found the same process identifier twice, this "
89 "shouldn't happen!"
90 )
91 return
92
93 self.processes[process["pid"]] = {
94 "pid": process["pid"],
95 "ppid": process["ppid"],
96 "process_name": process["process_name"],
97 "command_line": process.get("command_line"),
98 "first_seen": process["first_seen"],
99 "children": [],
100 "track": process.get("track", True),
101 }
102
104 root = {
105 "children": [],
106 }
107 first_seen = lambda x: x["first_seen"]
108
109 for p in sorted(self.processes.values(), key=first_seen):
110 self.processes.get(p["ppid"], root)["children"].append(p)
111
112 return sorted(root["children"], key=first_seen)
113
115 """Generates summary information."""
116
117 key = "generic"
118 event_types = ["process", "generic"]
119
123
125 if process["pid"] in self.processes:
126 return
127
128 self.processes[process["pid"]] = {
129 "pid": process["pid"],
130 "ppid": process["ppid"],
131 "process_name": process["process_name"],
132 "process_path": process["process_path"],
133 "first_seen": process["first_seen"],
134 "summary": collections.defaultdict(set),
135 }
136
138 if event["pid"] in self.processes:
139
140 pid, category = event["pid"], event["category"]
141 self.processes[pid]["summary"][category].add(event["value"])
142 else:
143 log.warning("Generic event for unknown process id %u", event["pid"])
144
146 for process in self.processes.values():
147 for key, value in process["summary"].items():
148 process["summary"][key] = list(value)
149
150 return self.processes.values()
151
153 """Counts API calls."""
154 key = "apistats"
155 event_types = ["apicall"]
156
158 super(ApiStats, self).__init__(*args, **kwargs)
159 self.processes = collections.defaultdict(lambda: collections.defaultdict(lambda: 0))
160
162 self.processes["%d" % event["pid"]][event["api"]] += 1
163
165 return self.processes
166
189
191 """Behavior Analyzer.
192
193 The behavior key in the results dict will contain both default content
194 keys that contain generic / abstracted analysis info, available on any
195 platform, as well as platform / analyzer specific output.
196
197 Typically the analyzer behavior contains some sort of "process" separation
198 as we're tracking different processes in most cases.
199
200 There are several handlers that produce the respective keys / subkeys.
201 Overall the platform / analyzer specific ones parse / process the captured
202 data and yield both their own output, but also a standard structure that
203 is then captured by the "generic" handlers so they can generate the
204 standard result structures.
205
206 The resulting structure contains some iterator onions for the monitored
207 function calls that stream the content when some sink (reporting,
208 signatures) needs it, thereby reducing memory footprint.
209
210 So hopefully in the end each analysis should be fine with 2 passes over
211 the results, once during processing (creating the generic output,
212 summaries, etc) and once during reporting (well once for each report type
213 if multiple are enabled).
214 """
215
216 key = "behavior"
217
219 """Enumerate all behavior logs."""
220 if not os.path.exists(self.logs_path):
221 log.warning("Analysis results folder does not exist at path %r.", self.logs_path)
222 return
223
224 logs = os.listdir(self.logs_path)
225 if not logs:
226 log.warning("Analysis results folder does not contain any behavior log files.")
227 return
228
229 for fname in logs:
230 path = os.path.join(self.logs_path, fname)
231 if not os.path.isfile(path):
232 log.warning("Behavior log file %r is not a file.", fname)
233 continue
234
235 analysis_size_limit = self.cfg.processing.analysis_size_limit
236 if analysis_size_limit and \
237 os.stat(path).st_size > analysis_size_limit:
238
239 log.critical("Behavior log file %r is too big, skipped.", fname)
240 continue
241
242 yield path
243
245 """Run analysis.
246 @return: results dict.
247 """
248 self.cfg = Config()
249 self.state = {}
250
251
252 handlers = [
253 GenericBehavior(self),
254 ProcessTree(self),
255 Summary(self),
256 Anomaly(self),
257 ApiStats(self),
258
259
260 WindowsMonitor(self),
261 LinuxSystemTap(self),
262
263
264 RebootInformation(self),
265 ]
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280 interest_map = {}
281 for h in handlers:
282 for event_type in h.event_types:
283 if event_type not in interest_map:
284 interest_map[event_type] = []
285
286
287
288 if hasattr(h, "handle_%s_event" % event_type):
289 fn = getattr(h, "handle_%s_event" % event_type)
290 interest_map[event_type].append(fn)
291 elif h.handle_event not in interest_map[event_type]:
292 interest_map[event_type].append(h.handle_event)
293
294
295
296
297 for path in self._enum_logs():
298 for handler in handlers:
299
300 if not handler.handles_path(path):
301 continue
302
303
304 for event in handler.parse(path):
305
306 for hhandler in interest_map.get(event["type"], []):
307 res = hhandler(event)
308
309
310
311 if not res:
312 continue
313
314 for subevent in res:
315 for hhandler2 in interest_map.get(subevent["type"], []):
316 hhandler2(subevent)
317
318 behavior = {}
319
320 for handler in handlers:
321 try:
322 r = handler.run()
323 if not r:
324 continue
325
326 behavior[handler.key] = r
327 except:
328 log.exception("Failed to run partial behavior class \"%s\"", handler.key)
329
330 return behavior
331