1
2
3
4
5
6 import binascii
7 import hashlib
8 import logging
9 import mmap
10 import os
11 import re
12 import subprocess
13
14 from lib.cuckoo.common.constants import CUCKOO_ROOT
15 from lib.cuckoo.common.whitelist import is_whitelisted_domain
16
17 try:
18 import magic
19 HAVE_MAGIC = True
20 except ImportError:
21 HAVE_MAGIC = False
22
23 try:
24 import pydeep
25 HAVE_PYDEEP = True
26 except ImportError:
27 HAVE_PYDEEP = False
28
29 try:
30 import yara
31 HAVE_YARA = True
32 except ImportError:
33 HAVE_YARA = False
34
35 try:
36 import pefile
37 HAVE_PEFILE = True
38 except ImportError:
39 HAVE_PEFILE = False
40
41 try:
42 import androguard
43 HAVE_ANDROGUARD = True
44 except ImportError:
45 HAVE_ANDROGUARD = False
46
47 log = logging.getLogger(__name__)
48
49 FILE_CHUNK_SIZE = 16 * 1024
50
51 URL_REGEX = (
52
53 "(https?:\\/\\/)"
54 "((["
55
56 "(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\."
57 "(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\."
58 "(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\."
59 "(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])]|"
60
61 "[a-zA-Z0-9\\.-]+)"
62
63 "(\\:\\d+)?"
64
65 "(/[\\(\\)a-zA-Z0-9_:%?=/\\.-]*)?"
66 )
69 """Cuckoo custom dict."""
70
73
74 __setattr__ = dict.__setitem__
75 __delattr__ = dict.__delitem__
76
78 """URL base object."""
79
81 """@param url: URL"""
82 self.url = url
83
85 """Basic file object class with all useful utilities."""
86
87
88 YARA_RULEPATH = os.path.join(CUCKOO_ROOT, "data", "yara", "index_%s.yar")
89
90
91
92 notified_yara = False
93 notified_pefile = False
94 notified_androguard = False
95
96
97
98 notified_pydeep = True
99
100
101
102
103 yara_rules = {}
104
106 """@param file_path: file path."""
107 self.file_path = file_path
108
109
110 self._file_data = None
111 self._crc32 = None
112 self._md5 = None
113 self._sha1 = None
114 self._sha256 = None
115 self._sha512 = None
116
118 """Get file name.
119 @return: file name.
120 """
121 file_name = os.path.basename(self.file_path)
122 return file_name
123
125 return os.path.exists(self.file_path) and \
126 os.path.isfile(self.file_path) and \
127 os.path.getsize(self.file_path) != 0
128
130 """Read file contents.
131 @return: data.
132 """
133 return self.file_data
134
136 """Read file contents in chunks (generator)."""
137
138 with open(self.file_path, "rb") as fd:
139 while True:
140 chunk = fd.read(FILE_CHUNK_SIZE)
141 if not chunk:
142 break
143 yield chunk
144
146 """Calculate all possible hashes for this file."""
147 crc = 0
148 md5 = hashlib.md5()
149 sha1 = hashlib.sha1()
150 sha256 = hashlib.sha256()
151 sha512 = hashlib.sha512()
152
153 for chunk in self.get_chunks():
154 crc = binascii.crc32(chunk, crc)
155 md5.update(chunk)
156 sha1.update(chunk)
157 sha256.update(chunk)
158 sha512.update(chunk)
159
160 self._crc32 = "".join("%02X" % ((crc >> i) & 0xff)
161 for i in [24, 16, 8, 0])
162 self._md5 = md5.hexdigest()
163 self._sha1 = sha1.hexdigest()
164 self._sha256 = sha256.hexdigest()
165 self._sha512 = sha512.hexdigest()
166
167 @property
169 if not self._file_data:
170 self._file_data = open(self.file_path, "rb").read()
171 return self._file_data
172
174 """Get file size.
175 @return: file size.
176 """
177 return os.path.getsize(self.file_path)
178
180 """Get CRC32.
181 @return: CRC32.
182 """
183 if not self._crc32:
184 self.calc_hashes()
185 return self._crc32
186
188 """Get MD5.
189 @return: MD5.
190 """
191 if not self._md5:
192 self.calc_hashes()
193 return self._md5
194
196 """Get SHA1.
197 @return: SHA1.
198 """
199 if not self._sha1:
200 self.calc_hashes()
201 return self._sha1
202
204 """Get SHA256.
205 @return: SHA256.
206 """
207 if not self._sha256:
208 self.calc_hashes()
209 return self._sha256
210
212 """
213 Get SHA512.
214 @return: SHA512.
215 """
216 if not self._sha512:
217 self.calc_hashes()
218 return self._sha512
219
221 """Get SSDEEP.
222 @return: SSDEEP.
223 """
224 if not HAVE_PYDEEP:
225 if not File.notified_pydeep:
226 File.notified_pydeep = True
227 log.warning("Unable to import pydeep (install with `pip install pydeep`)")
228 return None
229
230 try:
231 return pydeep.hash_file(self.file_path)
232 except Exception:
233 return None
234
236 """Get MIME file type.
237 @return: file type.
238 """
239 file_type = None
240 if HAVE_MAGIC:
241 try:
242 ms = magic.open(magic.MAGIC_NONE)
243 ms.load()
244 file_type = ms.file(self.file_path)
245 except:
246 try:
247 file_type = magic.from_file(self.file_path)
248 except Exception as e:
249 log.debug("Error getting magic from file %s: %s",
250 self.file_path, e)
251 finally:
252 try:
253 ms.close()
254 except:
255 pass
256
257 if file_type is None:
258 try:
259 p = subprocess.Popen(["file", "-b", self.file_path],
260 stdout=subprocess.PIPE)
261 file_type = p.stdout.read().strip()
262 except Exception as e:
263 log.debug("Error running file(1) on %s: %s",
264 self.file_path, e)
265
266 return file_type
267
269 """Get MIME content file type (example: image/jpeg).
270 @return: file content type.
271 """
272 file_type = None
273 if HAVE_MAGIC:
274 try:
275 ms = magic.open(magic.MAGIC_MIME)
276 ms.load()
277 file_type = ms.file(self.file_path)
278 except:
279 try:
280 file_type = magic.from_file(self.file_path, mime=True)
281 except:
282 pass
283 finally:
284 try:
285 ms.close()
286 except:
287 pass
288
289 if file_type is None:
290 try:
291 args = ["file", "-b", "--mime-type", self.file_path]
292 file_type = subprocess.check_output(args).strip()
293 except:
294 pass
295
296 return file_type
297
299 """Get the exported function names of this PE file."""
300 filetype = self.get_type()
301 if "MS-DOS" not in filetype and "PE32" not in self.get_type():
302 return
303
304 if not HAVE_PEFILE:
305 if not File.notified_pefile:
306 File.notified_pefile = True
307 log.warning("Unable to import pefile (`pip install pefile`)")
308 return
309
310 try:
311 pe = pefile.PE(self.file_path)
312 if not hasattr(pe, "DIRECTORY_ENTRY_EXPORT"):
313 return
314
315 for export in pe.DIRECTORY_ENTRY_EXPORT.symbols:
316 if export.name:
317 yield export.name
318 except Exception as e:
319 log.warning("Error enumerating exported functions: %s", e)
320
322 """Get the imported functions of this PE file."""
323 filetype = self.get_type()
324 if "MS-DOS" not in filetype and "PE32" not in self.get_type():
325 return
326
327 if not HAVE_PEFILE:
328 if not File.notified_pefile:
329 File.notified_pefile = True
330 log.warning("Unable to import pefile (`pip install pefile`)")
331 return
332
333 try:
334 pe = pefile.PE(self.file_path)
335 if not hasattr(pe, "DIRECTORY_ENTRY_IMPORT"):
336 return
337
338 for imp in pe.DIRECTORY_ENTRY_IMPORT:
339 for entry in imp.imports:
340 yield dict(dll=imp.dll,
341 name=entry.name,
342 ordinal=entry.ordinal,
343 hint=entry.hint,
344 address=entry.address)
345 except Exception as e:
346 log.warning("Error enumerating imported functions: %s", e)
347
348 - def get_apk_entry(self):
349 """Get the entry point for this APK. The entry point is denoted by a
350 package and main activity name."""
351 filetype = self.get_type()
352 if "Zip archive data" not in filetype and "Java archive data" not in filetype:
353 return "", ""
354
355 if not HAVE_ANDROGUARD:
356 if not File.notified_androguard:
357 File.notified_androguard = True
358 log.warning("Unable to import androguard (`pip install androguard`)")
359 return "", ""
360
361 try:
362 a = androguard.core.bytecodes.apk.APK(self.file_path)
363 if not a.is_valid_APK():
364 return "", ""
365
366 package = a.get_package()
367 if not package:
368 log.warning("Unable to find the main package, this analysis "
369 "will probably fail.")
370 return "", ""
371
372 main_activity = a.get_main_activity()
373 if main_activity:
374 log.debug("Picked package %s and main activity %s.",
375 package, main_activity)
376 return package, main_activity
377
378 activities = a.get_activities()
379 for activity in activities:
380 if "main" in activity or "start" in activity:
381 log.debug("Choosing package %s and main activity due to "
382 "its name %s.", package, activity)
383 return package, activity
384
385 if activities and activities[0]:
386 log.debug("Picked package %s and the first activity %s.",
387 package, activities[0])
388 return package, activities[0]
389 except Exception as e:
390 log.warning("Error extracting package and main activity: %s.", e)
391
392 return "", ""
393
395
396 try:
397 new = s.encode("utf-8")
398 except UnicodeDecodeError:
399 s = s.lstrip("uU").encode("hex").upper()
400 s = " ".join(s[i:i+2] for i in range(0, len(s), 2))
401 new = "{ %s }" % s
402
403 return new
404
406 """Extract matches from the Yara output for version 1.7.7."""
407 ret = []
408 for _, rule_matches in matches.items():
409 for match in rule_matches:
410 strings = set()
411
412 for s in match["strings"]:
413 strings.add(self._yara_encode_string(s["data"]))
414
415 ret.append({
416 "name": match["rule"],
417 "meta": match["meta"],
418 "strings": list(strings),
419 })
420
421 return ret
422
423 - def get_yara(self, category="binaries"):
475
477 """Extract all URLs embedded in this file through a simple regex."""
478 if not os.path.getsize(self.file_path):
479 return []
480
481
482 urls = set()
483 f = open(self.file_path, "rb")
484 m = mmap.mmap(f.fileno(), 0, access=mmap.PROT_READ)
485
486 for url in re.findall(URL_REGEX, m):
487 if not is_whitelisted_domain(url[1]):
488 urls.add("".join(url))
489
490 return list(urls)
491
493 """Get all information available.
494 @return: information dict.
495 """
496 infos = {}
497 infos["name"] = self.get_name()
498 infos["path"] = self.file_path
499 infos["size"] = self.get_size()
500 infos["crc32"] = self.get_crc32()
501 infos["md5"] = self.get_md5()
502 infos["sha1"] = self.get_sha1()
503 infos["sha256"] = self.get_sha256()
504 infos["sha512"] = self.get_sha512()
505 infos["ssdeep"] = self.get_ssdeep()
506 infos["type"] = self.get_type()
507 infos["yara"] = self.get_yara()
508 infos["urls"] = self.get_urls()
509 return infos
510