1
2
3
4
5
6 import datetime
7 import logging
8 import os
9 import re
10 import struct
11 import zipfile
12
13 try:
14 import bs4
15 HAVE_BS4 = True
16 except ImportError:
17 HAVE_BS4 = False
18
19 try:
20 import magic
21 HAVE_MAGIC = True
22 except ImportError:
23 HAVE_MAGIC = False
24
25 try:
26 import pefile
27 import peutils
28 HAVE_PEFILE = True
29 except ImportError:
30 HAVE_PEFILE = False
31
32 try:
33 import M2Crypto
34 HAVE_MCRYPTO = True
35 except ImportError:
36 HAVE_MCRYPTO = False
37
38 try:
39 import oletools.olevba
40 HAVE_OLETOOLS = True
41 except ImportError:
42 HAVE_OLETOOLS = False
43
44 try:
45 import peepdf.PDFCore
46 import peepdf.JSAnalysis
47 HAVE_PEEPDF = True
48 except ImportError:
49 HAVE_PEEPDF = False
50
51 try:
52 import PyV8
53 HAVE_PYV8 = True
54
55 PyV8
56 except:
57 HAVE_PYV8 = False
58
59 from lib.cuckoo.common.abstracts import Processing
60 from lib.cuckoo.common.constants import CUCKOO_ROOT
61 from lib.cuckoo.common.objects import File
62 from lib.cuckoo.common.utils import convert_to_printable
63 from lib.cuckoo.common.utils import to_unicode
64 from lib.cuckoo.misc import dispatch
65
66 log = logging.getLogger(__name__)
67
68
69
70
72 """PE analysis."""
73
75 """@param file_path: file path."""
76 self.file_path = file_path
77 self.pe = None
78
80 """Gets filetype, uses libmagic if available.
81 @param data: data to be analyzed.
82 @return: file type or None.
83 """
84 if not HAVE_MAGIC:
85 return None
86
87 try:
88 ms = magic.open(magic.MAGIC_NONE)
89 ms.load()
90 file_type = ms.buffer(data)
91 except:
92 try:
93 file_type = magic.from_buffer(data)
94 except Exception:
95 return None
96 finally:
97 try:
98 ms.close()
99 except:
100 pass
101
102 return file_type
103
105 """Gets PEID signatures.
106 @return: matched signatures or None.
107 """
108 try:
109 sig_path = os.path.join(CUCKOO_ROOT, "data",
110 "peutils", "UserDB.TXT")
111 signatures = peutils.SignatureDatabase(sig_path)
112 return signatures.match(self.pe, ep_only=True)
113 except:
114 return None
115
117 """Gets imported symbols.
118 @return: imported symbols dict or None.
119 """
120 imports = []
121
122 for entry in getattr(self.pe, "DIRECTORY_ENTRY_IMPORT", []):
123 try:
124 symbols = []
125 for imported_symbol in entry.imports:
126 symbols.append({
127 "address": hex(imported_symbol.address),
128 "name": imported_symbol.name,
129 })
130
131 imports.append({
132 "dll": convert_to_printable(entry.dll),
133 "imports": symbols,
134 })
135 except:
136 log.exception("Unable to parse imported symbols.")
137
138 return imports
139
141 """Gets exported symbols.
142 @return: exported symbols dict or None.
143 """
144 exports = []
145
146 if hasattr(self.pe, "DIRECTORY_ENTRY_EXPORT"):
147 for exported_symbol in self.pe.DIRECTORY_ENTRY_EXPORT.symbols:
148 exports.append({
149 "address": hex(self.pe.OPTIONAL_HEADER.ImageBase +
150 exported_symbol.address),
151 "name": exported_symbol.name,
152 "ordinal": exported_symbol.ordinal,
153 })
154
155 return exports
156
158 """Gets sections.
159 @return: sections dict or None.
160 """
161 sections = []
162
163 for entry in self.pe.sections:
164 try:
165 section = {}
166 section["name"] = convert_to_printable(entry.Name.strip("\x00"))
167 section["virtual_address"] = "0x{0:08x}".format(entry.VirtualAddress)
168 section["virtual_size"] = "0x{0:08x}".format(entry.Misc_VirtualSize)
169 section["size_of_data"] = "0x{0:08x}".format(entry.SizeOfRawData)
170 section["entropy"] = entry.get_entropy()
171 sections.append(section)
172 except:
173 continue
174
175 return sections
176
178 """Get resources.
179 @return: resources dict or None.
180 """
181 resources = []
182
183 if hasattr(self.pe, "DIRECTORY_ENTRY_RESOURCE"):
184 for resource_type in self.pe.DIRECTORY_ENTRY_RESOURCE.entries:
185 try:
186 resource = {}
187
188 if resource_type.name is not None:
189 name = str(resource_type.name)
190 else:
191 name = str(pefile.RESOURCE_TYPE.get(resource_type.struct.Id))
192
193 if hasattr(resource_type, "directory"):
194 for resource_id in resource_type.directory.entries:
195 if hasattr(resource_id, "directory"):
196 for resource_lang in resource_id.directory.entries:
197 data = self.pe.get_data(resource_lang.data.struct.OffsetToData, resource_lang.data.struct.Size)
198 filetype = self._get_filetype(data)
199 language = pefile.LANG.get(resource_lang.data.lang, None)
200 sublanguage = pefile.get_sublang_name_for_lang(resource_lang.data.lang, resource_lang.data.sublang)
201
202 resource["name"] = name
203 resource["offset"] = "0x{0:08x}".format(resource_lang.data.struct.OffsetToData)
204 resource["size"] = "0x{0:08x}".format(resource_lang.data.struct.Size)
205 resource["filetype"] = filetype
206 resource["language"] = language
207 resource["sublanguage"] = sublanguage
208 resources.append(resource)
209 except:
210 continue
211
212 return resources
213
215 """Get version info.
216 @return: info dict or None.
217 """
218 infos = []
219 if hasattr(self.pe, "VS_VERSIONINFO"):
220 if hasattr(self.pe, "FileInfo"):
221 for entry in self.pe.FileInfo:
222 try:
223 if hasattr(entry, "StringTable"):
224 for st_entry in entry.StringTable:
225 for str_entry in st_entry.entries.items():
226 entry = {}
227 entry["name"] = convert_to_printable(str_entry[0])
228 entry["value"] = convert_to_printable(str_entry[1])
229 infos.append(entry)
230 elif hasattr(entry, "Var"):
231 for var_entry in entry.Var:
232 if hasattr(var_entry, "entry"):
233 entry = {}
234 entry["name"] = convert_to_printable(var_entry.entry.keys()[0])
235 entry["value"] = convert_to_printable(var_entry.entry.values()[0])
236 infos.append(entry)
237 except:
238 continue
239
240 return infos
241
243 """Gets imphash.
244 @return: imphash string or None.
245 """
246 try:
247 return self.pe.get_imphash()
248 except AttributeError:
249 return None
250
252 """Get compilation timestamp.
253 @return: timestamp or None.
254 """
255 try:
256 pe_timestamp = self.pe.FILE_HEADER.TimeDateStamp
257 except AttributeError:
258 return None
259
260 dt = datetime.datetime.fromtimestamp(pe_timestamp)
261 return dt.strftime("%Y-%m-%d %H:%M:%S")
262
264 """Get the path to any available debugging symbols."""
265 try:
266 for entry in getattr(self.pe, "DIRECTORY_ENTRY_DEBUG", []):
267 raw_offset = entry.struct.PointerToRawData
268 size_data = entry.struct.SizeOfData
269 debug_data = self.pe.__data__[raw_offset:raw_offset+size_data]
270
271 if debug_data.startswith("RSDS"):
272 return debug_data[24:].strip("\x00").decode("latin-1")
273 except:
274 log.exception("Exception parsing PDB path")
275
277 """If this executable is signed, get its signature(s)."""
278 dir_index = pefile.DIRECTORY_ENTRY["IMAGE_DIRECTORY_ENTRY_SECURITY"]
279 if len(self.pe.OPTIONAL_HEADER.DATA_DIRECTORY) < dir_index:
280 return []
281
282 dir_entry = self.pe.OPTIONAL_HEADER.DATA_DIRECTORY[dir_index]
283 if not dir_entry or not dir_entry.VirtualAddress or not dir_entry.Size:
284 return []
285
286 if not HAVE_MCRYPTO:
287 log.critical("You do not have the m2crypto library installed "
288 "preventing certificate extraction: "
289 "pip install m2crypto")
290 return []
291
292 signatures = self.pe.write()[dir_entry.VirtualAddress+8:]
293 bio = M2Crypto.BIO.MemoryBuffer(signatures)
294 if not bio:
295 return []
296
297 pkcs7_obj = M2Crypto.m2.pkcs7_read_bio_der(bio.bio_ptr())
298 if not pkcs7_obj:
299 return []
300
301 ret = []
302 p7 = M2Crypto.SMIME.PKCS7(pkcs7_obj)
303 for cert in p7.get0_signers(M2Crypto.X509.X509_Stack()) or []:
304 subject = cert.get_subject()
305 ret.append({
306 "serial_number": "%032x" % cert.get_serial_number(),
307 "common_name": subject.CN,
308 "country": subject.C,
309 "locality": subject.L,
310 "organization": subject.O,
311 "email": subject.Email,
312 "sha1": "%040x" % int(cert.get_fingerprint("sha1"), 16),
313 "md5": "%032x" % int(cert.get_fingerprint("md5"), 16),
314 })
315
316 if subject.GN and subject.SN:
317 ret[-1]["full_name"] = "%s %s" % (subject.GN, subject.SN)
318 elif subject.GN:
319 ret[-1]["full_name"] = subject.GN
320 elif subject.SN:
321 ret[-1]["full_name"] = subject.SN
322
323 return ret
324
350
352 """Deobfuscates and interprets Windows Script Files."""
353 encoding = [
354 1, 2, 0, 1, 2, 0, 2, 0, 0, 2, 0, 2, 1, 0, 2, 0,
355 1, 0, 2, 0, 1, 1, 2, 0, 0, 2, 1, 0, 2, 0, 0, 2,
356 1, 1, 0, 2, 0, 2, 0, 1, 0, 1, 1, 2, 0, 1, 0, 2,
357 1, 0, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 0, 1, 0, 2,
358 ]
359
360 lookup = [
361 [0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
362 0x08, 0x7b, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
363 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
364 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
365 0x32, 0x30, 0x21, 0x29, 0x5b, 0x38, 0x33, 0x3d,
366 0x58, 0x3a, 0x35, 0x65, 0x39, 0x5c, 0x56, 0x73,
367 0x66, 0x4e, 0x45, 0x6b, 0x62, 0x59, 0x78, 0x5e,
368 0x7d, 0x4a, 0x6d, 0x71, 0x00, 0x60, 0x00, 0x53,
369 0x00, 0x42, 0x27, 0x48, 0x72, 0x75, 0x31, 0x37,
370 0x4d, 0x52, 0x22, 0x54, 0x6a, 0x47, 0x64, 0x2d,
371 0x20, 0x7f, 0x2e, 0x4c, 0x5d, 0x7e, 0x6c, 0x6f,
372 0x79, 0x74, 0x43, 0x26, 0x76, 0x25, 0x24, 0x2b,
373 0x28, 0x23, 0x41, 0x34, 0x09, 0x2a, 0x44, 0x3f,
374 0x77, 0x3b, 0x55, 0x69, 0x61, 0x63, 0x50, 0x67,
375 0x51, 0x49, 0x4f, 0x46, 0x68, 0x7c, 0x36, 0x70,
376 0x6e, 0x7a, 0x2f, 0x5f, 0x4b, 0x5a, 0x2c, 0x57],
377 [0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
378 0x08, 0x57, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
379 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
380 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
381 0x2e, 0x47, 0x7a, 0x56, 0x42, 0x6a, 0x2f, 0x26,
382 0x49, 0x41, 0x34, 0x32, 0x5b, 0x76, 0x72, 0x43,
383 0x38, 0x39, 0x70, 0x45, 0x68, 0x71, 0x4f, 0x09,
384 0x62, 0x44, 0x23, 0x75, 0x00, 0x7e, 0x00, 0x5e,
385 0x00, 0x77, 0x4a, 0x61, 0x5d, 0x22, 0x4b, 0x6f,
386 0x4e, 0x3b, 0x4c, 0x50, 0x67, 0x2a, 0x7d, 0x74,
387 0x54, 0x2b, 0x2d, 0x2c, 0x30, 0x6e, 0x6b, 0x66,
388 0x35, 0x25, 0x21, 0x64, 0x4d, 0x52, 0x63, 0x3f,
389 0x7b, 0x78, 0x29, 0x28, 0x73, 0x59, 0x33, 0x7f,
390 0x6d, 0x55, 0x53, 0x7c, 0x3a, 0x5f, 0x65, 0x46,
391 0x58, 0x31, 0x69, 0x6c, 0x5a, 0x48, 0x27, 0x5c,
392 0x3d, 0x24, 0x79, 0x37, 0x60, 0x51, 0x20, 0x36],
393 [0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
394 0x08, 0x6e, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
395 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
396 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
397 0x2d, 0x75, 0x52, 0x60, 0x71, 0x5e, 0x49, 0x5c,
398 0x62, 0x7d, 0x29, 0x36, 0x20, 0x7c, 0x7a, 0x7f,
399 0x6b, 0x63, 0x33, 0x2b, 0x68, 0x51, 0x66, 0x76,
400 0x31, 0x64, 0x54, 0x43, 0x00, 0x3a, 0x00, 0x7e,
401 0x00, 0x45, 0x2c, 0x2a, 0x74, 0x27, 0x37, 0x44,
402 0x79, 0x59, 0x2f, 0x6f, 0x26, 0x72, 0x6a, 0x39,
403 0x7b, 0x3f, 0x38, 0x77, 0x67, 0x53, 0x47, 0x34,
404 0x78, 0x5d, 0x30, 0x23, 0x5a, 0x5b, 0x6c, 0x48,
405 0x55, 0x70, 0x69, 0x2e, 0x4c, 0x21, 0x24, 0x4e,
406 0x50, 0x09, 0x56, 0x73, 0x35, 0x61, 0x4b, 0x58,
407 0x3b, 0x57, 0x22, 0x6d, 0x4d, 0x25, 0x28, 0x46,
408 0x4a, 0x32, 0x41, 0x3d, 0x5f, 0x4f, 0x42, 0x65],
409 ]
410
411 unescape = {
412 "#": "\r", "&": "\n", "!": "<", "*": ">", "$": "@",
413 }
414
415 script_re = "<\\s*script\\s*.*>.*?<\\s*/\\s*script\\s*>"
416
418 self.filepath = filepath
419
420 - def decode(self, source, start="#@~^", end="^#~@"):
421 if start not in source or end not in source:
422 return
423
424 o = source.index(start) + len(start) + 8
425 end = source.index(end) - 8
426
427 c, m, r = 0, 0, []
428
429 while o < end:
430 ch = ord(source[o])
431 if source[o] == "@":
432 r.append(ord(self.unescape.get(source[o+1], "?")))
433 c += r[-1]
434 o, m = o + 1, m + 1
435 elif ch < 128:
436 r.append(self.lookup[self.encoding[m % 64]][ch])
437 c += r[-1]
438 m = m + 1
439 else:
440 r.append(ch)
441
442 o = o + 1
443
444 if (c % 2**32) != struct.unpack("I", source[o:o+8].decode("base64"))[0]:
445 log.info("Invalid checksum for JScript.Encoded WSF file!")
446
447 return "".join(chr(ch) for ch in r)
448
450 ret = []
451 source = open(self.filepath, "rb").read()
452
453
454 source = re.sub("/\\*.*?\\*/", "", source, flags=re.S)
455
456 for script in re.findall(self.script_re, source, re.I | re.S):
457 try:
458 x = bs4.BeautifulSoup(script, "html.parser")
459 language = x.script.attrs.get("language", "").lower()
460 except:
461 language = None
462
463
464
465
466 source = re.match("<.*>(.*)</.*>$", script, re.S).group(0)
467
468
469 if language in ("jscript.encode", "vbscript.encode"):
470 source = self.decode(source)
471
472 ret.append(to_unicode(source))
473
474 return ret
475
477 """Static analysis of Microsoft Office documents."""
478 deobf = [
479
480
481
482
483
484
485 [
486
487 "\\\"(?P<a>.*?)\\\"\\s+\\&\\s+\\\"(?P<b>.*?)\\\"",
488 lambda x: '"%s%s"' % (x.group("a"), x.group("b")),
489 0,
490 ],
491 ]
492
493 eps_comments = "\\(([\\w\\s]+)\\)"
494
496 self.filepath = filepath
497 self.files = {}
498
500 """Get embedded Macros if this is an Office document."""
501 if not HAVE_OLETOOLS:
502 log.warning(
503 "In order to do static analysis of Microsoft Word documents "
504 "we're going to require oletools (`pip install oletools`)"
505 )
506 return
507
508 try:
509 p = oletools.olevba.VBA_Parser(self.filepath)
510 except TypeError:
511 return
512
513
514 if p.type == "Text":
515 return
516
517 try:
518 for f, s, v, c in p.extract_macros():
519 yield {
520 "stream": s,
521 "filename": v.decode("latin-1"),
522 "orig_code": c.decode("latin-1"),
523 "deobf": self.deobfuscate(c.decode("latin-1")),
524 }
525 except ValueError as e:
526 log.warning(
527 "Error extracting macros from office document (this is an "
528 "issue with oletools - please report upstream): %s", e
529 )
530
532 """Bruteforce approach of regex-based deobfuscation."""
533 changes = 1
534 while changes:
535 changes = 0
536
537 for pattern, repl, flags in self.deobf:
538 count = 1
539 while count:
540 code, count = re.subn(pattern, repl, code, flags=flags)
541 changes += count
542
543 return code
544
546 """Unpacks .docx-based zip files."""
547 try:
548 z = zipfile.ZipFile(self.filepath)
549 for name in z.namelist():
550 self.files[name] = z.read(name)
551 except:
552 return
553
555 """Extract some information from Encapsulated Post Script files."""
556 ret = []
557 for filename, content in self.files.items():
558 if filename.lower().endswith(".eps"):
559 ret.extend(re.findall(self.eps_comments, content))
560 return ret
561
569
571 """Static analysis of PDF documents."""
572
574 self.filepath = filepath
575
577
578 if s.startswith(u"\xfe\xff"):
579 return s[2:].encode("latin-1").decode("utf-16be")
580
581
582 if s.startswith(u"\xff\xfe"):
583 return s[2:].encode("latin-1").decode("utf-16le")
584
585 return s
586
589
591 if not HAVE_PEEPDF:
592 log.warning(
593 "Unable to perform static PDF analysis as PeePDF is missing "
594 "(install with `pip install peepdf`)"
595 )
596 return
597
598 p = peepdf.PDFCore.PDFParser()
599 r, f = p.parse(
600 self.filepath, forceMode=True,
601 looseMode=True, manualAnalysis=False
602 )
603 if r:
604 log.warning("Error parsing PDF file, error code %s", r)
605 return
606
607 ret = []
608
609 for version in xrange(f.updates + 1):
610 md = f.getBasicMetadata(version)
611 row = {
612 "version": version,
613 "creator": self._sanitize(md, "creator"),
614 "creation": self._sanitize(md, "creation"),
615 "title": self._sanitize(md, "title"),
616 "subject": self._sanitize(md, "subject"),
617 "producer": self._sanitize(md, "producer"),
618 "author": self._sanitize(md, "author"),
619 "modification": self._sanitize(md, "modification"),
620 "javascript": [],
621 "urls": [],
622 }
623
624 for obj in f.body[version].objects.values():
625 if obj.object.type == "stream":
626 stream = obj.object.decodedStream
627
628
629 if not peepdf.JSAnalysis.isJavascript(stream):
630 continue
631
632 row["javascript"].append({
633 "orig_code": stream.decode("latin-1"),
634 "urls": [],
635 })
636 continue
637
638 if obj.object.type == "dictionary":
639 for url in obj.object.urlsFound:
640 row["urls"].append(self._parse_string(url))
641
642 for url in obj.object.uriList:
643 row["urls"].append(self._parse_string(url))
644
645 ret.append(row)
646
647 return ret
648
650 """Static analysis."""
651 PUBKEY_RE = "(-----BEGIN PUBLIC KEY-----[a-zA-Z0-9\\n\\+/]+-----END PUBLIC KEY-----)"
652 PRIVKEY_RE = "(-----BEGIN RSA PRIVATE KEY-----[a-zA-Z0-9\\n\\+/]+-----END RSA PRIVATE KEY-----)"
653
654 office_ext = [
655 "doc", "docm", "dotm", "docx", "ppt", "pptm", "pptx", "potm",
656 "ppam", "ppsm", "xls", "xlsm", "xlsx",
657 ]
658
660 """Run analysis.
661 @return: results dict.
662 """
663 self.key = "static"
664 static = {}
665
666
667 if self.task["category"] != "file" or \
668 not os.path.exists(self.file_path):
669 return
670
671 package = self.task.get("package")
672
673 if self.task["category"] == "file":
674 ext = os.path.splitext(self.task["target"])[1].lstrip(".").lower()
675 else:
676 ext = None
677
678 if ext == "exe" or "PE32" in File(self.file_path).get_type():
679 if HAVE_PEFILE:
680 static.update(PortableExecutable(self.file_path).run())
681 static["keys"] = self._get_keys()
682
683 if package == "wsf" or ext == "wsf":
684 static["wsf"] = WindowsScriptFile(self.file_path).run()
685
686 if package in ("doc", "ppt", "xls") or ext in self.office_ext:
687 static["office"] = OfficeDocument(self.file_path).run()
688
689 def pdf_worker(filepath):
690 return PdfDocument(filepath).run()
691
692 if package == "pdf" or ext == "pdf":
693 timeout = int(self.options.get("pdf_timeout", 60))
694 static["pdf"] = dispatch(
695 pdf_worker, (self.file_path,), timeout=timeout
696 )
697
698 return static
699
701 """Get any embedded plaintext public and/or private keys."""
702 buf = open(self.file_path).read()
703 ret = set()
704 ret.update(re.findall(self.PUBKEY_RE, buf))
705 ret.update(re.findall(self.PRIVKEY_RE, buf))
706 return list(ret)
707