Package modules :: Package processing :: Module behavior
[hide private]
[frames] | no frames]

Source Code for Module modules.processing.behavior

  1  # Copyright (C) 2010-2013 Claudio Guarnieri. 
  2  # Copyright (C) 2014-2016 Cuckoo Foundation. 
  3  # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org 
  4  # See the file 'docs/LICENSE' for copying permission. 
  5   
  6  import collections 
  7  import json 
  8  import logging 
  9  import os 
 10   
 11  from lib.cuckoo.common.abstracts import Processing, BehaviorHandler 
 12  from lib.cuckoo.common.config import Config 
 13   
 14  from .platform.windows import WindowsMonitor 
 15  from .platform.linux import LinuxSystemTap 
 16   
 17  log = logging.getLogger(__name__) 
 18   
19 -class Summary(BehaviorHandler):
20 """Generates overview summary information (not split by process).""" 21 22 key = "summary" 23 event_types = ["generic"] 24
25 - def __init__(self, *args, **kwargs):
26 super(Summary, self).__init__(*args, **kwargs) 27 self.results = collections.defaultdict(set)
28
29 - def handle_event(self, event):
30 self.results[event["category"]].add(event["value"])
31
32 - def run(self):
33 for key, value in self.results.items(): 34 self.results[key] = list(value) 35 return self.results
36
37 -class Anomaly(BehaviorHandler):
38 """Anomaly detected during analysis. 39 For example: a malware tried to remove Cuckoo's hooks. 40 """ 41 42 key = "anomaly" 43 event_types = ["anomaly"] 44
45 - def __init__(self, *args, **kwargs):
46 super(Anomaly, self).__init__(*args, **kwargs) 47 self.anomalies = []
48
49 - def handle_event(self, call):
50 """Process API calls. 51 @param call: API call object 52 @param process: process object 53 """ 54 category, funcname, message = None, None, None 55 for row in call["arguments"]: 56 if row["name"] == "Subcategory": 57 category = row["value"] 58 if row["name"] == "FunctionName": 59 funcname = row["value"] 60 if row["name"] == "Message": 61 message = row["value"] 62 63 self.anomalies.append(dict( 64 # name=process["process_name"], 65 # pid=process["process_id"], 66 category=category, 67 funcname=funcname, 68 message=message, 69 ))
70
71 - def run(self):
72 """Fetch all anomalies.""" 73 return self.anomalies
74
75 -class ProcessTree(BehaviorHandler):
76 """Generates process tree.""" 77 78 key = "processtree" 79 event_types = ["process"] 80
81 - def __init__(self, *args, **kwargs):
82 super(ProcessTree, self).__init__(*args, **kwargs) 83 self.processes = {}
84
85 - def handle_event(self, process):
86 if process["pid"] in self.processes: 87 log.warning( 88 "Found the same process identifier twice, this " 89 "shouldn't happen!" 90 ) 91 return 92 93 self.processes[process["pid"]] = { 94 "pid": process["pid"], 95 "ppid": process["ppid"], 96 "process_name": process["process_name"], 97 "command_line": process.get("command_line"), 98 "first_seen": process["first_seen"], 99 "children": [], 100 "track": process.get("track", True), 101 }
102
103 - def run(self):
104 root = { 105 "children": [], 106 } 107 first_seen = lambda x: x["first_seen"] 108 109 for p in sorted(self.processes.values(), key=first_seen): 110 self.processes.get(p["ppid"], root)["children"].append(p) 111 112 return sorted(root["children"], key=first_seen)
113
114 -class GenericBehavior(BehaviorHandler):
115 """Generates summary information.""" 116 117 key = "generic" 118 event_types = ["process", "generic"] 119
120 - def __init__(self, *args, **kwargs):
121 super(GenericBehavior, self).__init__(*args, **kwargs) 122 self.processes = {}
123
124 - def handle_process_event(self, process):
125 if process["pid"] in self.processes: 126 return 127 128 self.processes[process["pid"]] = { 129 "pid": process["pid"], 130 "ppid": process["ppid"], 131 "process_name": process["process_name"], 132 "process_path": process["process_path"], 133 "first_seen": process["first_seen"], 134 "summary": collections.defaultdict(set), 135 }
136
137 - def handle_generic_event(self, event):
138 if event["pid"] in self.processes: 139 # TODO: rewrite / generalize / more flexible 140 pid, category = event["pid"], event["category"] 141 self.processes[pid]["summary"][category].add(event["value"]) 142 else: 143 log.warning("Generic event for unknown process id %u", event["pid"])
144
145 - def run(self):
146 for process in self.processes.values(): 147 for key, value in process["summary"].items(): 148 process["summary"][key] = list(value) 149 150 return self.processes.values()
151
152 -class ApiStats(BehaviorHandler):
153 """Counts API calls.""" 154 key = "apistats" 155 event_types = ["apicall"] 156
157 - def __init__(self, *args, **kwargs):
158 super(ApiStats, self).__init__(*args, **kwargs) 159 self.processes = collections.defaultdict(lambda: collections.defaultdict(lambda: 0))
160
161 - def handle_event(self, event):
162 self.processes["%d" % event["pid"]][event["api"]] += 1
163
164 - def run(self):
165 return self.processes
166
167 -class RebootInformation(BehaviorHandler):
168 """Provides specific information useful for reboot analysis. 169 170 In reality this is not a true BehaviorHandler as it doesn't return any 171 data into the JSON report, but instead it writes a log file which will be 172 interpreted when doing a reboot analysis. 173 """ 174 175 event_types = ["reboot"] 176
177 - def __init__(self, *args, **kwargs):
178 super(RebootInformation, self).__init__(*args, **kwargs) 179 self.events = []
180
181 - def handle_event(self, event):
182 self.events.append((event["time"], event))
183
184 - def run(self):
185 reboot_path = os.path.join(self.analysis.analysis_path, "reboot.json") 186 with open(reboot_path, "wb") as f: 187 for ts, event in sorted(self.events): 188 f.write("%s\n" % json.dumps(event))
189
190 -class BehaviorAnalysis(Processing):
191 """Behavior Analyzer. 192 193 The behavior key in the results dict will contain both default content 194 keys that contain generic / abstracted analysis info, available on any 195 platform, as well as platform / analyzer specific output. 196 197 Typically the analyzer behavior contains some sort of "process" separation 198 as we're tracking different processes in most cases. 199 200 There are several handlers that produce the respective keys / subkeys. 201 Overall the platform / analyzer specific ones parse / process the captured 202 data and yield both their own output, but also a standard structure that 203 is then captured by the "generic" handlers so they can generate the 204 standard result structures. 205 206 The resulting structure contains some iterator onions for the monitored 207 function calls that stream the content when some sink (reporting, 208 signatures) needs it, thereby reducing memory footprint. 209 210 So hopefully in the end each analysis should be fine with 2 passes over 211 the results, once during processing (creating the generic output, 212 summaries, etc) and once during reporting (well once for each report type 213 if multiple are enabled). 214 """ 215 216 key = "behavior" 217
218 - def _enum_logs(self):
219 """Enumerate all behavior logs.""" 220 if not os.path.exists(self.logs_path): 221 log.warning("Analysis results folder does not exist at path %r.", self.logs_path) 222 return 223 224 logs = os.listdir(self.logs_path) 225 if not logs: 226 log.warning("Analysis results folder does not contain any behavior log files.") 227 return 228 229 for fname in logs: 230 path = os.path.join(self.logs_path, fname) 231 if not os.path.isfile(path): 232 log.warning("Behavior log file %r is not a file.", fname) 233 continue 234 235 analysis_size_limit = self.cfg.processing.analysis_size_limit 236 if analysis_size_limit and \ 237 os.stat(path).st_size > analysis_size_limit: 238 # This needs to be a big alert. 239 log.critical("Behavior log file %r is too big, skipped.", fname) 240 continue 241 242 yield path
243
244 - def run(self):
245 """Run analysis. 246 @return: results dict. 247 """ 248 self.cfg = Config() 249 self.state = {} 250 251 # these handlers will be present for any analysis, regardless of platform/format 252 handlers = [ 253 GenericBehavior(self), 254 ProcessTree(self), 255 Summary(self), 256 Anomaly(self), 257 ApiStats(self), 258 259 # platform specific stuff 260 WindowsMonitor(self), 261 LinuxSystemTap(self), 262 263 # Reboot information. 264 RebootInformation(self), 265 ] 266 267 # doesn't really work if there's no task, let's rely on the file name for now 268 # # certain handlers only makes sense for a specific platform 269 # # this allows us to use the same filenames/formats without confusion 270 # if self.task.machine.platform == "windows": 271 # handlers += [ 272 # WindowsMonitor(self), 273 # ] 274 # elif self.task.machine.platform == "linux": 275 # handlers += [ 276 # LinuxSystemTap(self), 277 # ] 278 279 # create a lookup map 280 interest_map = {} 281 for h in handlers: 282 for event_type in h.event_types: 283 if event_type not in interest_map: 284 interest_map[event_type] = [] 285 286 # If available go for the specific event type handler rather 287 # than the generic handle_event. 288 if hasattr(h, "handle_%s_event" % event_type): 289 fn = getattr(h, "handle_%s_event" % event_type) 290 interest_map[event_type].append(fn) 291 elif h.handle_event not in interest_map[event_type]: 292 interest_map[event_type].append(h.handle_event) 293 294 # Each log file should be parsed by one of the handlers. This handler 295 # then yields every event in it which are forwarded to the various 296 # behavior/analysis/etc handlers. 297 for path in self._enum_logs(): 298 for handler in handlers: 299 # ... whether it is responsible 300 if not handler.handles_path(path): 301 continue 302 303 # ... and then let it parse the file 304 for event in handler.parse(path): 305 # pass down the parsed message to interested handlers 306 for hhandler in interest_map.get(event["type"], []): 307 res = hhandler(event) 308 # We support one layer of "generating" new events, 309 # which we'll pass on again (in case the handler 310 # returns some). 311 if not res: 312 continue 313 314 for subevent in res: 315 for hhandler2 in interest_map.get(subevent["type"], []): 316 hhandler2(subevent) 317 318 behavior = {} 319 320 for handler in handlers: 321 try: 322 r = handler.run() 323 if not r: 324 continue 325 326 behavior[handler.key] = r 327 except: 328 log.exception("Failed to run partial behavior class \"%s\"", handler.key) 329 330 return behavior
331