Package lib :: Package cuckoo :: Package common :: Module objects
[hide private]
[frames] | no frames]

Source Code for Module lib.cuckoo.common.objects

  1  # Copyright (C) 2010-2013 Claudio Guarnieri. 
  2  # Copyright (C) 2014-2016 Cuckoo Foundation. 
  3  # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org 
  4  # See the file 'docs/LICENSE' for copying permission. 
  5   
  6  import binascii 
  7  import hashlib 
  8  import logging 
  9  import mmap 
 10  import os 
 11  import re 
 12  import subprocess 
 13   
 14  from lib.cuckoo.common.constants import CUCKOO_ROOT 
 15  from lib.cuckoo.common.whitelist import is_whitelisted_domain 
 16   
 17  try: 
 18      import magic 
 19      HAVE_MAGIC = True 
 20  except ImportError: 
 21      HAVE_MAGIC = False 
 22   
 23  try: 
 24      import pydeep 
 25      HAVE_PYDEEP = True 
 26  except ImportError: 
 27      HAVE_PYDEEP = False 
 28   
 29  try: 
 30      import yara 
 31      HAVE_YARA = True 
 32  except ImportError: 
 33      HAVE_YARA = False 
 34   
 35  try: 
 36      import pefile 
 37      HAVE_PEFILE = True 
 38  except ImportError: 
 39      HAVE_PEFILE = False 
 40   
 41  try: 
 42      import androguard 
 43      HAVE_ANDROGUARD = True 
 44  except ImportError: 
 45      HAVE_ANDROGUARD = False 
 46   
 47  log = logging.getLogger(__name__) 
 48   
 49  FILE_CHUNK_SIZE = 16 * 1024 
 50   
 51  URL_REGEX = ( 
 52      # HTTP/HTTPS. 
 53      "(https?:\\/\\/)" 
 54      "(([" 
 55      # IP address. 
 56      "(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\." 
 57      "(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\." 
 58      "(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\." 
 59      "(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])]|" 
 60      # Or domain name. 
 61      "[a-zA-Z0-9\\.-]+)" 
 62      # Optional port. 
 63      "(\\:\\d+)?" 
 64      # URI. 
 65      "(/[\\(\\)a-zA-Z0-9_:%?=/\\.-]*)?" 
 66  ) 
67 68 -class Dictionary(dict):
69 """Cuckoo custom dict.""" 70
71 - def __getattr__(self, key):
72 return self.get(key, None)
73 74 __setattr__ = dict.__setitem__ 75 __delattr__ = dict.__delitem__
76
77 -class URL:
78 """URL base object.""" 79
80 - def __init__(self, url):
81 """@param url: URL""" 82 self.url = url
83
84 -class File(object):
85 """Basic file object class with all useful utilities.""" 86 87 # To be substituted with a category. 88 YARA_RULEPATH = os.path.join(CUCKOO_ROOT, "data", "yara", "index_%s.yar") 89 90 # static fields which indicate whether the user has been 91 # notified about missing dependencies already 92 notified_yara = False 93 notified_pefile = False 94 notified_androguard = False 95 96 # Given that ssdeep hashes are not really used much in practice we're just 97 # going to disable its warning by default for now. 98 notified_pydeep = True 99 100 # The yara rules should not change during one session of processing tasks, 101 # thus we can cache them. If they are updated, one should restart Cuckoo 102 # or the processing tasks. 103 yara_rules = {} 104
105 - def __init__(self, file_path):
106 """@param file_path: file path.""" 107 self.file_path = file_path 108 109 # these will be populated when first accessed 110 self._file_data = None 111 self._crc32 = None 112 self._md5 = None 113 self._sha1 = None 114 self._sha256 = None 115 self._sha512 = None
116
117 - def get_name(self):
118 """Get file name. 119 @return: file name. 120 """ 121 file_name = os.path.basename(self.file_path) 122 return file_name
123
124 - def valid(self):
125 return os.path.exists(self.file_path) and \ 126 os.path.isfile(self.file_path) and \ 127 os.path.getsize(self.file_path) != 0
128
129 - def get_data(self):
130 """Read file contents. 131 @return: data. 132 """ 133 return self.file_data
134
135 - def get_chunks(self):
136 """Read file contents in chunks (generator).""" 137 138 with open(self.file_path, "rb") as fd: 139 while True: 140 chunk = fd.read(FILE_CHUNK_SIZE) 141 if not chunk: 142 break 143 yield chunk
144
145 - def calc_hashes(self):
146 """Calculate all possible hashes for this file.""" 147 crc = 0 148 md5 = hashlib.md5() 149 sha1 = hashlib.sha1() 150 sha256 = hashlib.sha256() 151 sha512 = hashlib.sha512() 152 153 for chunk in self.get_chunks(): 154 crc = binascii.crc32(chunk, crc) 155 md5.update(chunk) 156 sha1.update(chunk) 157 sha256.update(chunk) 158 sha512.update(chunk) 159 160 self._crc32 = "".join("%02X" % ((crc >> i) & 0xff) 161 for i in [24, 16, 8, 0]) 162 self._md5 = md5.hexdigest() 163 self._sha1 = sha1.hexdigest() 164 self._sha256 = sha256.hexdigest() 165 self._sha512 = sha512.hexdigest()
166 167 @property
168 - def file_data(self):
169 if not self._file_data: 170 self._file_data = open(self.file_path, "rb").read() 171 return self._file_data
172
173 - def get_size(self):
174 """Get file size. 175 @return: file size. 176 """ 177 return os.path.getsize(self.file_path)
178
179 - def get_crc32(self):
180 """Get CRC32. 181 @return: CRC32. 182 """ 183 if not self._crc32: 184 self.calc_hashes() 185 return self._crc32
186
187 - def get_md5(self):
188 """Get MD5. 189 @return: MD5. 190 """ 191 if not self._md5: 192 self.calc_hashes() 193 return self._md5
194
195 - def get_sha1(self):
196 """Get SHA1. 197 @return: SHA1. 198 """ 199 if not self._sha1: 200 self.calc_hashes() 201 return self._sha1
202
203 - def get_sha256(self):
204 """Get SHA256. 205 @return: SHA256. 206 """ 207 if not self._sha256: 208 self.calc_hashes() 209 return self._sha256
210
211 - def get_sha512(self):
212 """ 213 Get SHA512. 214 @return: SHA512. 215 """ 216 if not self._sha512: 217 self.calc_hashes() 218 return self._sha512
219
220 - def get_ssdeep(self):
221 """Get SSDEEP. 222 @return: SSDEEP. 223 """ 224 if not HAVE_PYDEEP: 225 if not File.notified_pydeep: 226 File.notified_pydeep = True 227 log.warning("Unable to import pydeep (install with `pip install pydeep`)") 228 return None 229 230 try: 231 return pydeep.hash_file(self.file_path) 232 except Exception: 233 return None
234
235 - def get_type(self):
236 """Get MIME file type. 237 @return: file type. 238 """ 239 file_type = None 240 if HAVE_MAGIC: 241 try: 242 ms = magic.open(magic.MAGIC_NONE) 243 ms.load() 244 file_type = ms.file(self.file_path) 245 except: 246 try: 247 file_type = magic.from_file(self.file_path) 248 except Exception as e: 249 log.debug("Error getting magic from file %s: %s", 250 self.file_path, e) 251 finally: 252 try: 253 ms.close() 254 except: 255 pass 256 257 if file_type is None: 258 try: 259 p = subprocess.Popen(["file", "-b", self.file_path], 260 stdout=subprocess.PIPE) 261 file_type = p.stdout.read().strip() 262 except Exception as e: 263 log.debug("Error running file(1) on %s: %s", 264 self.file_path, e) 265 266 return file_type
267
268 - def get_content_type(self):
269 """Get MIME content file type (example: image/jpeg). 270 @return: file content type. 271 """ 272 file_type = None 273 if HAVE_MAGIC: 274 try: 275 ms = magic.open(magic.MAGIC_MIME) 276 ms.load() 277 file_type = ms.file(self.file_path) 278 except: 279 try: 280 file_type = magic.from_file(self.file_path, mime=True) 281 except: 282 pass 283 finally: 284 try: 285 ms.close() 286 except: 287 pass 288 289 if file_type is None: 290 try: 291 args = ["file", "-b", "--mime-type", self.file_path] 292 file_type = subprocess.check_output(args).strip() 293 except: 294 pass 295 296 return file_type
297
298 - def get_exported_functions(self):
299 """Get the exported function names of this PE file.""" 300 filetype = self.get_type() 301 if "MS-DOS" not in filetype and "PE32" not in self.get_type(): 302 return 303 304 if not HAVE_PEFILE: 305 if not File.notified_pefile: 306 File.notified_pefile = True 307 log.warning("Unable to import pefile (`pip install pefile`)") 308 return 309 310 try: 311 pe = pefile.PE(self.file_path) 312 if not hasattr(pe, "DIRECTORY_ENTRY_EXPORT"): 313 return 314 315 for export in pe.DIRECTORY_ENTRY_EXPORT.symbols: 316 if export.name: 317 yield export.name 318 except Exception as e: 319 log.warning("Error enumerating exported functions: %s", e)
320
321 - def get_imported_functions(self):
322 """Get the imported functions of this PE file.""" 323 filetype = self.get_type() 324 if "MS-DOS" not in filetype and "PE32" not in self.get_type(): 325 return 326 327 if not HAVE_PEFILE: 328 if not File.notified_pefile: 329 File.notified_pefile = True 330 log.warning("Unable to import pefile (`pip install pefile`)") 331 return 332 333 try: 334 pe = pefile.PE(self.file_path) 335 if not hasattr(pe, "DIRECTORY_ENTRY_IMPORT"): 336 return 337 338 for imp in pe.DIRECTORY_ENTRY_IMPORT: 339 for entry in imp.imports: 340 yield dict(dll=imp.dll, 341 name=entry.name, 342 ordinal=entry.ordinal, 343 hint=entry.hint, 344 address=entry.address) 345 except Exception as e: 346 log.warning("Error enumerating imported functions: %s", e)
347
348 - def get_apk_entry(self):
349 """Get the entry point for this APK. The entry point is denoted by a 350 package and main activity name.""" 351 filetype = self.get_type() 352 if "Zip archive data" not in filetype and "Java archive data" not in filetype: 353 return "", "" 354 355 if not HAVE_ANDROGUARD: 356 if not File.notified_androguard: 357 File.notified_androguard = True 358 log.warning("Unable to import androguard (`pip install androguard`)") 359 return "", "" 360 361 try: 362 a = androguard.core.bytecodes.apk.APK(self.file_path) 363 if not a.is_valid_APK(): 364 return "", "" 365 366 package = a.get_package() 367 if not package: 368 log.warning("Unable to find the main package, this analysis " 369 "will probably fail.") 370 return "", "" 371 372 main_activity = a.get_main_activity() 373 if main_activity: 374 log.debug("Picked package %s and main activity %s.", 375 package, main_activity) 376 return package, main_activity 377 378 activities = a.get_activities() 379 for activity in activities: 380 if "main" in activity or "start" in activity: 381 log.debug("Choosing package %s and main activity due to " 382 "its name %s.", package, activity) 383 return package, activity 384 385 if activities and activities[0]: 386 log.debug("Picked package %s and the first activity %s.", 387 package, activities[0]) 388 return package, activities[0] 389 except Exception as e: 390 log.warning("Error extracting package and main activity: %s.", e) 391 392 return "", ""
393
394 - def _yara_encode_string(self, s):
395 # Beware, spaghetti code ahead. 396 try: 397 new = s.encode("utf-8") 398 except UnicodeDecodeError: 399 s = s.lstrip("uU").encode("hex").upper() 400 s = " ".join(s[i:i+2] for i in range(0, len(s), 2)) 401 new = "{ %s }" % s 402 403 return new
404
405 - def _yara_matches_177(self, matches):
406 """Extract matches from the Yara output for version 1.7.7.""" 407 ret = [] 408 for _, rule_matches in matches.items(): 409 for match in rule_matches: 410 strings = set() 411 412 for s in match["strings"]: 413 strings.add(self._yara_encode_string(s["data"])) 414 415 ret.append({ 416 "name": match["rule"], 417 "meta": match["meta"], 418 "strings": list(strings), 419 }) 420 421 return ret
422
423 - def get_yara(self, category="binaries"):
424 """Get Yara signatures matches. 425 @return: matched Yara signatures. 426 """ 427 results = [] 428 429 if not HAVE_YARA: 430 if not File.notified_yara: 431 File.notified_yara = True 432 log.warning("Unable to import yara (please compile from sources)") 433 return results 434 435 # Compile the Yara rules only the first time. 436 if category not in File.yara_rules: 437 rulepath = self.YARA_RULEPATH % category 438 if not os.path.exists(rulepath): 439 log.warning("The specified rule file at %s doesn't exist, " 440 "skip", rulepath) 441 return results 442 443 try: 444 File.yara_rules[category] = yara.compile(rulepath) 445 except: 446 log.exception("Error compiling the Yara rules.") 447 return 448 449 if not os.path.getsize(self.file_path): 450 return results 451 452 try: 453 matches = File.yara_rules[category].match(self.file_path) 454 455 if getattr(yara, "__version__", None) == "1.7.7": 456 return self._yara_matches_177(matches) 457 458 results = [] 459 460 for match in matches: 461 strings = set() 462 for s in match.strings: 463 strings.add(self._yara_encode_string(s[2])) 464 465 results.append({ 466 "name": match.rule, 467 "meta": match.meta, 468 "strings": list(strings), 469 }) 470 471 except Exception as e: 472 log.exception("Unable to match Yara signatures: %s", e) 473 474 return results
475
476 - def get_urls(self):
477 """Extract all URLs embedded in this file through a simple regex.""" 478 if not os.path.getsize(self.file_path): 479 return [] 480 481 # http://stackoverflow.com/a/454589 482 urls = set() 483 f = open(self.file_path, "rb") 484 m = mmap.mmap(f.fileno(), 0, access=mmap.PROT_READ) 485 486 for url in re.findall(URL_REGEX, m): 487 if not is_whitelisted_domain(url[1]): 488 urls.add("".join(url)) 489 490 return list(urls)
491
492 - def get_all(self):
493 """Get all information available. 494 @return: information dict. 495 """ 496 infos = {} 497 infos["name"] = self.get_name() 498 infos["path"] = self.file_path 499 infos["size"] = self.get_size() 500 infos["crc32"] = self.get_crc32() 501 infos["md5"] = self.get_md5() 502 infos["sha1"] = self.get_sha1() 503 infos["sha256"] = self.get_sha256() 504 infos["sha512"] = self.get_sha512() 505 infos["ssdeep"] = self.get_ssdeep() 506 infos["type"] = self.get_type() 507 infos["yara"] = self.get_yara() 508 infos["urls"] = self.get_urls() 509 return infos
510