Package lib :: Package cuckoo :: Package core :: Module scheduler
[hide private]
[frames] | no frames]

Source Code for Module lib.cuckoo.core.scheduler

  1  # Copyright (C) 2010-2014 Cuckoo Foundation. 
  2  # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org 
  3  # See the file 'docs/LICENSE' for copying permission. 
  4   
  5  import os 
  6  import time 
  7  import shutil 
  8  import logging 
  9  import Queue 
 10  from threading import Thread, Lock 
 11   
 12  from lib.cuckoo.common.config import Config 
 13  from lib.cuckoo.common.constants import CUCKOO_ROOT 
 14  from lib.cuckoo.common.exceptions import CuckooMachineError, CuckooGuestError 
 15  from lib.cuckoo.common.exceptions import CuckooOperationalError 
 16  from lib.cuckoo.common.exceptions import CuckooCriticalError 
 17  from lib.cuckoo.common.objects import File 
 18  from lib.cuckoo.common.utils import create_folder 
 19  from lib.cuckoo.core.database import Database, TASK_COMPLETED, TASK_REPORTED 
 20  from lib.cuckoo.core.guest import GuestManager 
 21  from lib.cuckoo.core.plugins import list_plugins, RunAuxiliary, RunProcessing 
 22  from lib.cuckoo.core.plugins import RunSignatures, RunReporting 
 23  from lib.cuckoo.core.resultserver import Resultserver 
 24   
 25  log = logging.getLogger(__name__) 
 26   
 27  machinery = None 
 28  machine_lock = Lock() 
 29   
 30  total_analysis_count = 0 
 31  active_analysis_count = 0 
 32   
 33   
34 -class CuckooDeadMachine(Exception):
35 """Exception thrown when a machine turns dead. 36 37 When this exception has been thrown, the analysis task will start again, 38 and will try to use another machine, when available. 39 """ 40 pass
41 42
43 -class AnalysisManager(Thread):
44 """Analysis Manager. 45 46 This class handles the full analysis process for a given task. It takes 47 care of selecting the analysis machine, preparing the configuration and 48 interacting with the guest agent and analyzer components to launch and 49 complete the analysis and store, process and report its results. 50 """ 51
52 - def __init__(self, task, error_queue):
53 """@param task: task object containing the details for the analysis.""" 54 Thread.__init__(self) 55 Thread.daemon = True 56 57 self.task = task 58 self.errors = error_queue 59 self.cfg = Config() 60 self.storage = "" 61 self.binary = "" 62 self.machine = None
63
64 - def init_storage(self):
65 """Initialize analysis storage folder.""" 66 self.storage = os.path.join(CUCKOO_ROOT, 67 "storage", 68 "analyses", 69 str(self.task.id)) 70 71 # If the analysis storage folder already exists, we need to abort the 72 # analysis or previous results will be overwritten and lost. 73 if os.path.exists(self.storage): 74 log.error("Analysis results folder already exists at path \"%s\"," 75 " analysis aborted", self.storage) 76 return False 77 78 # If we're not able to create the analysis storage folder, we have to 79 # abort the analysis. 80 try: 81 create_folder(folder=self.storage) 82 except CuckooOperationalError: 83 log.error("Unable to create analysis folder %s", self.storage) 84 return False 85 86 return True
87
88 - def check_file(self):
89 """Checks the integrity of the file to be analyzed.""" 90 sample = Database().view_sample(self.task.sample_id) 91 92 sha256 = File(self.task.target).get_sha256() 93 if sha256 != sample.sha256: 94 log.error("Target file has been modified after submission: \"%s\"", self.task.target) 95 return False 96 97 return True
98
99 - def store_file(self):
100 """Store a copy of the file being analyzed.""" 101 if not os.path.exists(self.task.target): 102 log.error("The file to analyze does not exist at path \"%s\", " 103 "analysis aborted", self.task.target) 104 return False 105 106 sha256 = File(self.task.target).get_sha256() 107 self.binary = os.path.join(CUCKOO_ROOT, "storage", "binaries", sha256) 108 109 if os.path.exists(self.binary): 110 log.info("File already exists at \"%s\"", self.binary) 111 else: 112 # TODO: do we really need to abort the analysis in case we are not 113 # able to store a copy of the file? 114 try: 115 shutil.copy(self.task.target, self.binary) 116 except (IOError, shutil.Error) as e: 117 log.error("Unable to store file from \"%s\" to \"%s\", " 118 "analysis aborted", self.task.target, self.binary) 119 return False 120 121 try: 122 new_binary_path = os.path.join(self.storage, "binary") 123 124 if hasattr(os, "symlink"): 125 os.symlink(self.binary, new_binary_path) 126 else: 127 shutil.copy(self.binary, new_binary_path) 128 except (AttributeError, OSError) as e: 129 log.error("Unable to create symlink/copy from \"%s\" to " 130 "\"%s\": %s", self.binary, self.storage, e) 131 132 return True
133
134 - def acquire_machine(self):
135 """Acquire an analysis machine from the pool of available ones.""" 136 machine = None 137 138 # Start a loop to acquire the a machine to run the analysis on. 139 while True: 140 machine_lock.acquire() 141 142 # In some cases it's possible that we enter this loop without 143 # having any available machines. We should make sure this is not 144 # such case, or the analysis task will fail completely. 145 if not machinery.availables(): 146 machine_lock.release() 147 time.sleep(1) 148 continue 149 150 # If the user specified a specific machine ID, a platform to be 151 # used or machine tags acquire the machine accordingly. 152 try: 153 machine = machinery.acquire(machine_id=self.task.machine, 154 platform=self.task.platform, 155 tags=self.task.tags) 156 finally: 157 machine_lock.release() 158 159 # If no machine is available at this moment, wait for one second 160 # and try again. 161 if not machine: 162 log.debug("Task #%d: no machine available yet", self.task.id) 163 time.sleep(1) 164 else: 165 log.info("Task #%d: acquired machine %s (label=%s)", 166 self.task.id, machine.name, machine.label) 167 break 168 169 self.machine = machine
170
171 - def build_options(self):
172 """Generate analysis options. 173 @return: options dict. 174 """ 175 options = {} 176 177 options["id"] = self.task.id 178 options["ip"] = self.machine.resultserver_ip 179 options["port"] = self.machine.resultserver_port 180 options["category"] = self.task.category 181 options["target"] = self.task.target 182 options["package"] = self.task.package 183 options["options"] = self.task.options 184 options["enforce_timeout"] = self.task.enforce_timeout 185 options["clock"] = self.task.clock 186 187 if not self.task.timeout or self.task.timeout == 0: 188 options["timeout"] = self.cfg.timeouts.default 189 else: 190 options["timeout"] = self.task.timeout 191 192 if self.task.category == "file": 193 options["file_name"] = File(self.task.target).get_name() 194 options["file_type"] = File(self.task.target).get_type() 195 196 return options
197
198 - def launch_analysis(self):
199 """Start analysis.""" 200 succeeded = False 201 dead_machine = False 202 203 log.info("Starting analysis of %s \"%s\" (task=%d)", 204 self.task.category.upper(), self.task.target, self.task.id) 205 206 # Initialize the the analysis folders. 207 if not self.init_storage(): 208 return False 209 210 if self.task.category == "file": 211 # Check whether the file has been changed for some unknown reason. 212 # And fail this analysis if it has been modified. 213 if not self.check_file(): 214 return False 215 216 # Store a copy of the original file. 217 if not self.store_file(): 218 return False 219 220 # Acquire analysis machine. 221 try: 222 self.acquire_machine() 223 except CuckooOperationalError as e: 224 log.error("Cannot acquire machine: {0}".format(e)) 225 return False 226 227 # Generate the analysis configuration file. 228 options = self.build_options() 229 230 # At this point we can tell the Resultserver about it. 231 try: 232 Resultserver().add_task(self.task, self.machine) 233 except Exception as e: 234 machinery.release(self.machine.label) 235 self.errors.put(e) 236 237 aux = RunAuxiliary(task=self.task, machine=self.machine) 238 aux.start() 239 240 try: 241 # Mark the selected analysis machine in the database as started. 242 guest_log = Database().guest_start(self.task.id, 243 self.machine.name, 244 self.machine.label, 245 machinery.__class__.__name__) 246 # Start the machine. 247 machinery.start(self.machine.label) 248 except CuckooMachineError as e: 249 log.error(str(e), extra={"task_id": self.task.id}) 250 dead_machine = True 251 else: 252 try: 253 # Initialize the guest manager. 254 guest = GuestManager(self.machine.name, self.machine.ip, self.machine.platform) 255 # Start the analysis. 256 guest.start_analysis(options) 257 except CuckooGuestError as e: 258 log.error(str(e), extra={"task_id": self.task.id}) 259 else: 260 # Wait for analysis completion. 261 try: 262 guest.wait_for_completion() 263 succeeded = True 264 except CuckooGuestError as e: 265 log.error(str(e), extra={"task_id": self.task.id}) 266 succeeded = False 267 268 finally: 269 # Stop Auxiliary modules. 270 aux.stop() 271 272 # Take a memory dump of the machine before shutting it off. 273 if self.cfg.cuckoo.memory_dump or self.task.memory: 274 try: 275 machinery.dump_memory(self.machine.label, 276 os.path.join(self.storage, "memory.dmp")) 277 except NotImplementedError: 278 log.error("The memory dump functionality is not available " 279 "for the current machine manager") 280 except CuckooMachineError as e: 281 log.error(e) 282 283 try: 284 # Stop the analysis machine. 285 machinery.stop(self.machine.label) 286 except CuckooMachineError as e: 287 log.warning("Unable to stop machine %s: %s", 288 self.machine.label, e) 289 290 # Mark the machine in the database as stopped. Unless this machine 291 # has been marked as dead, we just keep it as "started" in the 292 # database so it'll not be used later on in this session. 293 Database().guest_stop(guest_log) 294 295 # After all this, we can make the Resultserver forget about the 296 # internal state for this analysis task. 297 Resultserver().del_task(self.task, self.machine) 298 299 if dead_machine: 300 # Remove the guest from the database, so that we can assign a 301 # new guest when the task is being analyzed with another 302 # machine. 303 Database().guest_remove(guest_log) 304 305 # Remove the analysis directory that has been created so 306 # far, as launch_analysis() is going to be doing that again. 307 shutil.rmtree(self.storage) 308 309 # This machine has turned dead, so we throw an exception here 310 # which informs the AnalysisManager that it should analyze 311 # this task again with another available machine. 312 raise CuckooDeadMachine() 313 314 try: 315 # Release the analysis machine. But only if the machine has 316 # not turned dead yet. 317 machinery.release(self.machine.label) 318 except CuckooMachineError as e: 319 log.error("Unable to release machine %s, reason %s. " 320 "You might need to restore it manually", 321 self.machine.label, e) 322 323 return succeeded
324
325 - def process_results(self):
326 """Process the analysis results and generate the enabled reports.""" 327 results = RunProcessing(task_id=self.task.id).run() 328 RunSignatures(results=results).run() 329 RunReporting(task_id=self.task.id, results=results).run() 330 331 # If the target is a file and the user enabled the option, 332 # delete the original copy. 333 if self.task.category == "file" and self.cfg.cuckoo.delete_original: 334 if not os.path.exists(self.task.target): 335 log.warning("Original file does not exist anymore: \"%s\": " 336 "File not found", self.task.target) 337 else: 338 try: 339 os.remove(self.task.target) 340 except OSError as e: 341 log.error("Unable to delete original file at path " 342 "\"%s\": %s", self.task.target, e) 343 344 # If the target is a file and the user enabled the delete copy of 345 # the binary option, then delete the copy. 346 if self.task.category == "file" and self.cfg.cuckoo.delete_bin_copy: 347 if not os.path.exists(self.binary): 348 log.warning("Copy of the original file does not exist anymore: \"%s\": File not found", self.binary) 349 else: 350 try: 351 os.remove(self.binary) 352 except OSError as e: 353 log.error("Unable to delete the copy of the original file at path \"%s\": %s", self.binary, e) 354 355 log.info("Task #%d: reports generation completed (path=%s)", 356 self.task.id, self.storage) 357 358 return True
359
360 - def run(self):
361 """Run manager thread.""" 362 global active_analysis_count 363 active_analysis_count += 1 364 try: 365 while True: 366 try: 367 success = self.launch_analysis() 368 except CuckooDeadMachine: 369 continue 370 371 break 372 373 Database().set_status(self.task.id, TASK_COMPLETED) 374 375 log.debug("Released database task #%d with status %s", 376 self.task.id, success) 377 378 if self.cfg.cuckoo.process_results: 379 self.process_results() 380 Database().set_status(self.task.id, TASK_REPORTED) 381 382 log.info("Task #%d: analysis procedure completed", self.task.id) 383 except: 384 log.exception("Failure in AnalysisManager.run") 385 386 active_analysis_count -= 1
387
388 -class Scheduler:
389 """Tasks Scheduler. 390 391 This class is responsible for the main execution loop of the tool. It 392 prepares the analysis machines and keep waiting and loading for new 393 analysis tasks. 394 Whenever a new task is available, it launches AnalysisManager which will 395 take care of running the full analysis process and operating with the 396 assigned analysis machine. 397 """ 398
399 - def __init__(self):
400 self.running = True 401 self.cfg = Config() 402 self.db = Database()
403
404 - def initialize(self):
405 """Initialize the machine manager.""" 406 global machinery 407 408 machinery_name = self.cfg.cuckoo.machinery 409 410 log.info("Using \"%s\" machine manager", machinery_name) 411 412 # Get registered class name. Only one machine manager is imported, 413 # therefore there should be only one class in the list. 414 plugin = list_plugins("machinery")[0] 415 # Initialize the machine manager. 416 machinery = plugin() 417 418 # Find its configuration file. 419 conf = os.path.join(CUCKOO_ROOT, "conf", "%s.conf" % machinery_name) 420 421 if not os.path.exists(conf): 422 raise CuckooCriticalError("The configuration file for machine " 423 "manager \"{0}\" does not exist at path:" 424 " {1}".format(machinery_name, conf)) 425 426 # Provide a dictionary with the configuration options to the 427 # machine manager instance. 428 machinery.set_options(Config(conf)) 429 # Initialize the machine manager. 430 try: 431 machinery.initialize(machinery_name) 432 except CuckooMachineError as e: 433 raise CuckooCriticalError("Error initializing machines: %s" % e) 434 435 # At this point all the available machines should have been identified 436 # and added to the list. If none were found, Cuckoo needs to abort the 437 # execution. 438 if len(machinery.machines()) == 0: 439 raise CuckooCriticalError("No machines available") 440 else: 441 log.info("Loaded %s machine/s", len(machinery.machines()))
442
443 - def stop(self):
444 """Stop scheduler.""" 445 self.running = False 446 # Shutdown machine manager (used to kill machines that still alive). 447 machinery.shutdown()
448
449 - def start(self):
450 """Start scheduler.""" 451 global total_analysis_count 452 self.initialize() 453 454 log.info("Waiting for analysis tasks...") 455 456 # Message queue with threads to transmit exceptions (used as IPC). 457 errors = Queue.Queue() 458 459 maxcount = self.cfg.cuckoo.max_analysis_count 460 461 # This loop runs forever. 462 while self.running: 463 time.sleep(1) 464 465 # If not enough free diskspace is available, then we print an 466 # error message and wait another round (this check is ignored 467 # when freespace is set to zero). 468 if self.cfg.cuckoo.freespace: 469 # Resolve the full base path to the analysis folder, just in 470 # case somebody decides to make a symlink out of it. 471 dir_path = os.path.join(CUCKOO_ROOT, "storage", "analyses") 472 473 # TODO: Windows support 474 if hasattr(os, "statvfs"): 475 dir_stats = os.statvfs(dir_path) 476 477 # Free diskspace in megabytes. 478 space_available = dir_stats.f_bavail * dir_stats.f_frsize 479 space_available /= 1024 * 1024 480 481 if space_available < self.cfg.cuckoo.freespace: 482 log.error("Not enough free diskspace! (Only %d MB!)", 483 space_available) 484 continue 485 486 # If no machines are available, it's pointless to fetch for 487 # pending tasks. Loop over. 488 if machinery.availables() == 0: 489 continue 490 491 # Exits if max_analysis_count is defined in config file and 492 # is reached. 493 if maxcount and total_analysis_count >= maxcount: 494 if active_analysis_count <= 0: 495 self.stop() 496 else: 497 # Fetch a pending analysis task. 498 task = self.db.fetch() 499 500 if task: 501 log.debug("Processing task #%s", task.id) 502 total_analysis_count += 1 503 504 # Initialize the analysis manager. 505 analysis = AnalysisManager(task, errors) 506 # Start. 507 analysis.start() 508 509 # Deal with errors. 510 try: 511 error = errors.get(block=False) 512 except Queue.Empty: 513 pass 514 else: 515 raise error
516