Package modules :: Package processing :: Module network
[hide private]
[frames] | no frames]

Source Code for Module modules.processing.network

   1  # Copyright (C) 2010-2013 Claudio Guarnieri. 
   2  # Copyright (C) 2014-2016 Cuckoo Foundation. 
   3  # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org 
   4  # See the file 'docs/LICENSE' for copying permission. 
   5   
   6  import hashlib 
   7  import logging 
   8  import json 
   9  import os 
  10  import re 
  11  import socket 
  12  import struct 
  13  import tempfile 
  14  import urlparse 
  15   
  16  from lib.cuckoo.common.abstracts import Processing 
  17  from lib.cuckoo.common.config import Config 
  18  from lib.cuckoo.common.constants import LATEST_HTTPREPLAY, CUCKOO_ROOT 
  19  from lib.cuckoo.common.dns import resolve 
  20  from lib.cuckoo.common.irc import ircMessage 
  21  from lib.cuckoo.common.objects import File 
  22  from lib.cuckoo.common.utils import convert_to_printable, versiontuple 
  23  from lib.cuckoo.common.exceptions import CuckooProcessingError 
  24   
  25  try: 
  26      import dpkt 
  27      HAVE_DPKT = True 
  28  except ImportError: 
  29      HAVE_DPKT = False 
  30   
  31  try: 
  32      import httpreplay 
  33      import httpreplay.cut 
  34   
  35      # Be less verbose about httpreplay logging messages. 
  36      logging.getLogger("httpreplay").setLevel(logging.CRITICAL) 
  37   
  38      HAVE_HTTPREPLAY = True 
  39  except ImportError: 
  40      HAVE_HTTPREPLAY = False 
  41   
  42  # Imports for the batch sort. 
  43  # http://stackoverflow.com/questions/10665925/how-to-sort-huge-files-with-python 
  44  # http://code.activestate.com/recipes/576755/ 
  45  import heapq 
  46  from itertools import islice 
  47  from collections import namedtuple 
  48   
  49  Keyed = namedtuple("Keyed", ["key", "obj"]) 
  50  Packet = namedtuple("Packet", ["raw", "ts"]) 
  51   
  52  log = logging.getLogger(__name__) 
  53  cfg = Config() 
  54   
  55  # Urge users to upgrade to the latest version. 
  56  _v = getattr(httpreplay, "__version__", None) if HAVE_HTTPREPLAY else None 
  57  if _v and versiontuple(_v) < versiontuple(LATEST_HTTPREPLAY): 
  58      log.warning( 
  59          "You are using version %s of HTTPReplay, rather than the latest " 
  60          "version %s, which may not handle various corner cases and/or TLS " 
  61          "cipher suites correctly. This could result in not getting all the " 
  62          "HTTP/HTTPS streams that are available or corrupt some streams that " 
  63          "were not handled correctly before. Please upgrade it to the latest " 
  64          "version (`pip install --upgrade httpreplay`).", 
  65          _v, LATEST_HTTPREPLAY, 
  66      ) 
  67   
68 -class Pcap(object):
69 """Reads network data from PCAP file.""" 70 ssl_ports = 443, 71 72 notified_dpkt = False 73
74 - def __init__(self, filepath, options):
75 """Creates a new instance. 76 @param filepath: path to PCAP file 77 @param options: config options 78 """ 79 self.filepath = filepath 80 self.options = options 81 82 # List of all hosts. 83 self.hosts = [] 84 # List containing all non-private IP addresses. 85 self.unique_hosts = [] 86 # List of unique domains. 87 self.unique_domains = [] 88 # List containing all TCP packets. 89 self.tcp_connections = [] 90 self.tcp_connections_seen = set() 91 # Lookup table to identify connection requests to services or IP 92 # addresses that are no longer available. 93 self.tcp_connections_dead = {} 94 self.dead_hosts = {} 95 self.alive_hosts = {} 96 # List containing all UDP packets. 97 self.udp_connections = [] 98 self.udp_connections_seen = set() 99 # List containing all ICMP requests. 100 self.icmp_requests = [] 101 # List containing all HTTP requests. 102 self.http_requests = {} 103 # List containing all TLS/SSL3 key combinations. 104 self.tls_keys = [] 105 # List containing all DNS requests. 106 self.dns_requests = {} 107 self.dns_answers = set() 108 # List containing all SMTP requests. 109 self.smtp_requests = [] 110 # Reconstructed SMTP flow. 111 self.smtp_flow = {} 112 # List containing all IRC requests. 113 self.irc_requests = [] 114 # Dictionary containing all the results of this processing. 115 self.results = {} 116 # List containing all whitelist entries. 117 self.whitelist = self._build_whitelist() 118 # List for holding whitelisted IP-s according to DNS responses 119 self.whitelist_ips = [] 120 # state of whitelisting 121 self.whitelist_enabled = self._build_whitelist_conf() 122 # List of known good DNS servers 123 self.known_dns = self._build_known_dns()
124
125 - def _build_whitelist(self):
126 result = [] 127 whitelist_path = os.path.join( 128 CUCKOO_ROOT, "data", "whitelist", "domain.txt" 129 ) 130 for line in open(whitelist_path, 'rb'): 131 result.append(line.strip()) 132 return result
133
134 - def _build_whitelist_conf(self):
135 """Check if whitelisting is enabled.""" 136 if not self.options.get("whitelist-dns"): 137 log.debug("Whitelisting Disabled.") 138 return False 139 140 return True
141
142 - def _is_whitelisted(self, conn, hostname):
143 """Checks if whitelisting conditions are met""" 144 # is whitelistng enabled ? 145 if not self.whitelist_enabled: 146 return False 147 148 # is DNS recording coming from allowed NS server 149 if not self.known_dns: 150 pass 151 elif (conn.get("src") in self.known_dns or 152 conn.get("dst") in self.known_dns): 153 pass 154 else: 155 return False 156 157 # is hostname whitelisted 158 if hostname not in self.whitelist: 159 return False 160 161 return True
162
163 - def _build_known_dns(self):
164 """Build known DNS list.""" 165 result = [] 166 _known_dns = self.options.get("allowed-dns") 167 if _known_dns is not None: 168 for r in _known_dns.split(","): 169 result.append(r.strip()) 170 return result 171 172 return []
173
174 - def _dns_gethostbyname(self, name):
175 """Get host by name wrapper. 176 @param name: hostname. 177 @return: IP address or blank 178 """ 179 if cfg.processing.resolve_dns: 180 ip = resolve(name) 181 else: 182 ip = "" 183 return ip
184
185 - def _is_private_ip(self, ip):
186 """Check if the IP belongs to private network blocks. 187 @param ip: IP address to verify. 188 @return: boolean representing whether the IP belongs or not to 189 a private network block. 190 """ 191 networks = [ 192 "0.0.0.0/8", 193 "10.0.0.0/8", 194 "100.64.0.0/10", 195 "127.0.0.0/8", 196 "169.254.0.0/16", 197 "172.16.0.0/12", 198 "192.0.0.0/24", 199 "192.0.2.0/24", 200 "192.88.99.0/24", 201 "192.168.0.0/16", 202 "198.18.0.0/15", 203 "198.51.100.0/24", 204 "203.0.113.0/24", 205 "240.0.0.0/4", 206 "255.255.255.255/32", 207 "224.0.0.0/4" 208 ] 209 210 for network in networks: 211 try: 212 ipaddr = struct.unpack(">I", socket.inet_aton(ip))[0] 213 214 netaddr, bits = network.split("/") 215 216 network_low = struct.unpack(">I", socket.inet_aton(netaddr))[0] 217 network_high = network_low | (1 << (32 - int(bits))) - 1 218 219 if ipaddr <= network_high and ipaddr >= network_low: 220 return True 221 except: 222 continue 223 224 return False
225
226 - def _add_hosts(self, connection):
227 """Add IPs to unique list. 228 @param connection: connection data 229 """ 230 try: 231 # TODO: Perhaps this block should be removed. 232 # If there is a packet from a non-local IP address, which hasn't 233 # been seen before, it means that the connection wasn't initiated 234 # during the time of the current analysis. 235 if connection["src"] not in self.hosts: 236 ip = convert_to_printable(connection["src"]) 237 238 # We consider the IP only if it hasn't been seen before. 239 if ip not in self.hosts: 240 # If the IP is not a local one, this might be a leftover 241 # packet as described in issue #249. 242 if self._is_private_ip(ip): 243 self.hosts.append(ip) 244 245 if connection["dst"] not in self.hosts: 246 ip = convert_to_printable(connection["dst"]) 247 248 if ip not in self.hosts: 249 self.hosts.append(ip) 250 251 # We add external IPs to the list, only the first time 252 # we see them and if they're the destination of the 253 # first packet they appear in. 254 if not self._is_private_ip(ip) and ip not in self.whitelist_ips: 255 self.unique_hosts.append(ip) 256 except: 257 pass
258
259 - def _tcp_dissect(self, conn, data):
260 """Runs all TCP dissectors. 261 @param conn: connection. 262 @param data: payload data. 263 """ 264 if self._check_http(data): 265 self._add_http(data, conn["dport"]) 266 267 # SMTP. 268 if conn["dport"] == 25: 269 self._reassemble_smtp(conn, data) 270 271 # IRC. 272 if conn["dport"] != 21 and self._check_irc(data): 273 self._add_irc(data) 274 275 # HTTPS. 276 if conn["dport"] in self.ssl_ports or conn["sport"] in self.ssl_ports: 277 self._https_identify(conn, data)
278
279 - def _udp_dissect(self, conn, data):
280 """Runs all UDP dissectors. 281 @param conn: connection. 282 @param data: payload data. 283 """ 284 # Select DNS and MDNS traffic. 285 if conn["dport"] == 53 or conn["sport"] == 53 or conn["dport"] == 5353 or conn["sport"] == 5353: 286 if self._check_dns(data): 287 self._add_dns(conn, data)
288
289 - def _check_icmp(self, icmp_data):
290 """Checks for ICMP traffic. 291 @param icmp_data: ICMP data flow. 292 """ 293 try: 294 return isinstance(icmp_data, dpkt.icmp.ICMP) and \ 295 len(icmp_data.data) > 0 296 except: 297 return False
298
299 - def _icmp_dissect(self, conn, data):
300 """Runs all ICMP dissectors. 301 @param conn: connection. 302 @param data: payload data. 303 """ 304 305 if self._check_icmp(data): 306 # If ICMP packets are coming from the host, it probably isn't 307 # relevant traffic, hence we can skip from reporting it. 308 if conn["src"] == cfg.resultserver.ip: 309 return 310 311 entry = {} 312 entry["src"] = conn["src"] 313 entry["dst"] = conn["dst"] 314 entry["type"] = data.type 315 316 # Extract data from dpkg.icmp.ICMP. 317 try: 318 entry["data"] = convert_to_printable(data.data.data) 319 except: 320 entry["data"] = "" 321 322 self.icmp_requests.append(entry)
323
324 - def _check_dns(self, udpdata):
325 """Checks for DNS traffic. 326 @param udpdata: UDP data flow. 327 """ 328 try: 329 dpkt.dns.DNS(udpdata) 330 except: 331 return False 332 333 return True
334
335 - def _add_dns(self, conn, udpdata):
336 """Adds a DNS data flow. 337 @param udpdata: UDP data flow. 338 """ 339 dns = dpkt.dns.DNS(udpdata) 340 341 # DNS query parsing. 342 query = {} 343 # Temporary list for found A or AAAA responses. 344 _ip = [] 345 346 if dns.rcode == dpkt.dns.DNS_RCODE_NOERR or \ 347 dns.qr == dpkt.dns.DNS_R or \ 348 dns.opcode == dpkt.dns.DNS_QUERY or True: 349 # DNS question. 350 try: 351 q_name = dns.qd[0].name 352 q_type = dns.qd[0].type 353 except IndexError: 354 return False 355 356 # DNS RR type mapping. 357 # See: http://www.iana.org/assignments/dns-parameters/dns-parameters.xhtml#dns-parameters-4 358 # See: https://github.com/kbandla/dpkt/blob/master/dpkt/dns.py#L42 359 query["request"] = q_name 360 if q_type == dpkt.dns.DNS_A: 361 query["type"] = "A" 362 elif q_type == dpkt.dns.DNS_AAAA: 363 query["type"] = "AAAA" 364 elif q_type == dpkt.dns.DNS_CNAME: 365 query["type"] = "CNAME" 366 elif q_type == dpkt.dns.DNS_MX: 367 query["type"] = "MX" 368 elif q_type == dpkt.dns.DNS_PTR: 369 query["type"] = "PTR" 370 elif q_type == dpkt.dns.DNS_NS: 371 query["type"] = "NS" 372 elif q_type == dpkt.dns.DNS_SOA: 373 query["type"] = "SOA" 374 elif q_type == dpkt.dns.DNS_HINFO: 375 query["type"] = "HINFO" 376 elif q_type == dpkt.dns.DNS_TXT: 377 query["type"] = "TXT" 378 elif q_type == dpkt.dns.DNS_SRV: 379 query["type"] = "SRV" 380 elif q_type == dpkt.dns.DNS_ANY: 381 # For example MDNS requests have q_type=255. 382 query["type"] = "All" 383 else: 384 # Some requests are not parsed by dpkt. 385 query["type"] = "None" 386 387 # DNS answer. 388 query["answers"] = [] 389 for answer in dns.an: 390 ans = {} 391 if answer.type == dpkt.dns.DNS_A: 392 ans["type"] = "A" 393 try: 394 ans["data"] = socket.inet_ntoa(answer.rdata) 395 _ip.append(ans["data"]) 396 except socket.error: 397 continue 398 elif answer.type == dpkt.dns.DNS_AAAA: 399 ans["type"] = "AAAA" 400 try: 401 ans["data"] = socket.inet_ntop(socket.AF_INET6, 402 answer.rdata) 403 _ip.append(ans["data"]) 404 except (socket.error, ValueError): 405 continue 406 elif answer.type == dpkt.dns.DNS_CNAME: 407 ans["type"] = "CNAME" 408 ans["data"] = answer.cname 409 elif answer.type == dpkt.dns.DNS_MX: 410 ans["type"] = "MX" 411 ans["data"] = answer.mxname 412 elif answer.type == dpkt.dns.DNS_PTR: 413 ans["type"] = "PTR" 414 ans["data"] = answer.ptrname 415 elif answer.type == dpkt.dns.DNS_NS: 416 ans["type"] = "NS" 417 ans["data"] = answer.nsname 418 elif answer.type == dpkt.dns.DNS_SOA: 419 ans["type"] = "SOA" 420 ans["data"] = ",".join([answer.mname, 421 answer.rname, 422 str(answer.serial), 423 str(answer.refresh), 424 str(answer.retry), 425 str(answer.expire), 426 str(answer.minimum)]) 427 elif answer.type == dpkt.dns.DNS_HINFO: 428 ans["type"] = "HINFO" 429 ans["data"] = " ".join(answer.text) 430 elif answer.type == dpkt.dns.DNS_TXT: 431 ans["type"] = "TXT" 432 ans["data"] = " ".join(answer.text) 433 434 # TODO: add srv handling 435 query["answers"].append(ans) 436 437 if self._is_whitelisted(conn, q_name): 438 log.debug("DNS target {0} whitelisted. Skipping ...".format(q_name)) 439 self.whitelist_ips = self.whitelist_ips + _ip 440 return True 441 442 self._add_domain(query["request"]) 443 444 reqtuple = query["type"], query["request"] 445 if reqtuple not in self.dns_requests: 446 self.dns_requests[reqtuple] = query 447 else: 448 new_answers = set((i["type"], i["data"]) for i in query["answers"]) - self.dns_answers 449 self.dns_requests[reqtuple]["answers"] += [dict(type=i[0], data=i[1]) for i in new_answers] 450 451 return True
452
453 - def _add_domain(self, domain):
454 """Add a domain to unique list. 455 @param domain: domain name. 456 """ 457 filters = [ 458 ".*\\.windows\\.com$", 459 ".*\\.in\\-addr\\.arpa$" 460 ] 461 462 regexps = [re.compile(filter) for filter in filters] 463 for regexp in regexps: 464 if regexp.match(domain): 465 return 466 467 for entry in self.unique_domains: 468 if entry["domain"] == domain: 469 return 470 471 self.unique_domains.append({"domain": domain, 472 "ip": self._dns_gethostbyname(domain)})
473
474 - def _check_http(self, tcpdata):
475 """Checks for HTTP traffic. 476 @param tcpdata: TCP data flow. 477 """ 478 try: 479 r = dpkt.http.Request() 480 r.method, r.version, r.uri = None, None, None 481 r.unpack(tcpdata) 482 except dpkt.dpkt.UnpackError: 483 if r.method is not None or r.version is not None or \ 484 r.uri is not None: 485 return True 486 return False 487 488 return True
489
490 - def _add_http(self, tcpdata, dport):
491 """Adds an HTTP flow. 492 @param tcpdata: TCP data flow. 493 @param dport: destination port. 494 """ 495 if tcpdata in self.http_requests: 496 self.http_requests[tcpdata]["count"] += 1 497 return True 498 499 try: 500 http = dpkt.http.Request() 501 http.unpack(tcpdata) 502 except dpkt.dpkt.UnpackError: 503 pass 504 505 try: 506 entry = {"count": 1} 507 508 if "host" in http.headers: 509 entry["host"] = convert_to_printable(http.headers["host"]) 510 else: 511 entry["host"] = "" 512 513 entry["port"] = dport 514 515 # Manually deal with cases when destination port is not the 516 # default one and it is not included in host header. 517 netloc = entry["host"] 518 if dport != 80 and ":" not in netloc: 519 netloc += ":" + str(entry["port"]) 520 521 entry["data"] = convert_to_printable(tcpdata) 522 url = urlparse.urlunparse(("http", netloc, http.uri, 523 None, None, None)) 524 entry["uri"] = convert_to_printable(url) 525 entry["body"] = convert_to_printable(http.body) 526 entry["path"] = convert_to_printable(http.uri) 527 528 if "user-agent" in http.headers: 529 entry["user-agent"] = \ 530 convert_to_printable(http.headers["user-agent"]) 531 532 entry["version"] = convert_to_printable(http.version) 533 entry["method"] = convert_to_printable(http.method) 534 535 self.http_requests[tcpdata] = entry 536 except Exception: 537 return False 538 539 return True
540
541 - def _https_identify(self, conn, data):
542 """Extract a combination of the Session ID, Client Random, and Server 543 Random in order to identify the accompanying master secret later.""" 544 if not hasattr(dpkt.ssl, "TLSRecord"): 545 if not Pcap.notified_dpkt: 546 Pcap.notified_dpkt = True 547 log.warning("Using an old version of dpkt that does not " 548 "support TLS streams (install the latest with " 549 "`pip install dpkt`)") 550 return 551 552 try: 553 record = dpkt.ssl.TLSRecord(data) 554 except dpkt.NeedData: 555 return 556 except: 557 log.exception("Error reading possible TLS Record") 558 return 559 560 # Is this a valid TLS packet? 561 if record.type not in dpkt.ssl.RECORD_TYPES: 562 return 563 564 try: 565 record = dpkt.ssl.RECORD_TYPES[record.type](record.data) 566 except dpkt.ssl.SSL3Exception: 567 return 568 except dpkt.NeedData: 569 log.exception("Incomplete possible TLS Handshake record found") 570 return 571 572 # Is this a TLSv1 Handshake packet? 573 if not isinstance(record, dpkt.ssl.TLSHandshake): 574 return 575 576 # We're only interested in the TLS Server Hello packets. 577 if not isinstance(record.data, dpkt.ssl.TLSServerHello): 578 return 579 580 # Extract the server random and the session id. 581 self.tls_keys.append({ 582 "server_random": record.data.random.encode("hex"), 583 "session_id": record.data.session_id.encode("hex"), 584 })
585
586 - def _reassemble_smtp(self, conn, data):
587 """Reassemble a SMTP flow. 588 @param conn: connection dict. 589 @param data: raw data. 590 """ 591 if conn["dst"] in self.smtp_flow: 592 self.smtp_flow[conn["dst"]] += data 593 else: 594 self.smtp_flow[conn["dst"]] = data
595
596 - def _process_smtp(self):
597 """Process SMTP flow.""" 598 for conn, data in self.smtp_flow.iteritems(): 599 # Detect new SMTP flow. 600 if data.startswith(("EHLO", "HELO")): 601 self.smtp_requests.append({"dst": conn, "raw": data})
602
603 - def _check_irc(self, tcpdata):
604 """ 605 Checks for IRC traffic. 606 @param tcpdata: tcp data flow 607 """ 608 try: 609 req = ircMessage() 610 except Exception: 611 return False 612 613 return req.isthereIRC(tcpdata)
614
615 - def _add_irc(self, tcpdata):
616 """ 617 Adds an IRC communication. 618 @param tcpdata: TCP data in flow 619 @param dport: destination port 620 """ 621 622 try: 623 reqc = ircMessage() 624 reqs = ircMessage() 625 filters_sc = ["266"] 626 self.irc_requests = self.irc_requests + \ 627 reqc.getClientMessages(tcpdata) + \ 628 reqs.getServerMessagesFilter(tcpdata, filters_sc) 629 except Exception: 630 return False 631 632 return True
633
634 - def run(self):
635 """Process PCAP. 636 @return: dict with network analysis data. 637 """ 638 639 try: 640 file = open(self.filepath, "rb") 641 except (IOError, OSError): 642 log.error("Unable to open %s" % self.filepath) 643 return self.results 644 645 try: 646 pcap = dpkt.pcap.Reader(file) 647 except dpkt.dpkt.NeedData: 648 log.error("Unable to read PCAP file at path \"%s\".", 649 self.filepath) 650 return self.results 651 except ValueError: 652 log.error("Unable to read PCAP file at path \"%s\". File is " 653 "corrupted or wrong format." % self.filepath) 654 return self.results 655 656 offset = file.tell() 657 first_ts = None 658 for ts, buf in pcap: 659 if not first_ts: 660 first_ts = ts 661 662 try: 663 ip = iplayer_from_raw(buf, pcap.datalink()) 664 665 connection = {} 666 if isinstance(ip, dpkt.ip.IP): 667 connection["src"] = socket.inet_ntoa(ip.src) 668 connection["dst"] = socket.inet_ntoa(ip.dst) 669 elif isinstance(ip, dpkt.ip6.IP6): 670 connection["src"] = socket.inet_ntop(socket.AF_INET6, 671 ip.src) 672 connection["dst"] = socket.inet_ntop(socket.AF_INET6, 673 ip.dst) 674 else: 675 offset = file.tell() 676 continue 677 678 self._add_hosts(connection) 679 680 if ip.p == dpkt.ip.IP_PROTO_TCP: 681 tcp = ip.data 682 if not isinstance(tcp, dpkt.tcp.TCP): 683 tcp = dpkt.tcp.TCP(tcp) 684 685 if tcp.data: 686 connection["sport"] = tcp.sport 687 connection["dport"] = tcp.dport 688 self._tcp_dissect(connection, tcp.data) 689 690 src, sport, dst, dport = (connection["src"], connection["sport"], connection["dst"], connection["dport"]) 691 if not ((dst, dport, src, sport) in self.tcp_connections_seen or (src, sport, dst, dport) in self.tcp_connections_seen): 692 self.tcp_connections.append((src, sport, dst, dport, offset, ts-first_ts)) 693 self.tcp_connections_seen.add((src, sport, dst, dport)) 694 695 self.alive_hosts[dst, dport] = True 696 else: 697 ipconn = ( 698 connection["src"], tcp.sport, 699 connection["dst"], tcp.dport, 700 ) 701 seqack = self.tcp_connections_dead.get(ipconn) 702 if seqack == (tcp.seq, tcp.ack): 703 host = connection["dst"], tcp.dport 704 self.dead_hosts[host] = self.dead_hosts.get(host, 1) + 1 705 706 self.tcp_connections_dead[ipconn] = tcp.seq, tcp.ack 707 708 elif ip.p == dpkt.ip.IP_PROTO_UDP: 709 udp = ip.data 710 if not isinstance(udp, dpkt.udp.UDP): 711 udp = dpkt.udp.UDP(udp) 712 713 if len(udp.data) > 0: 714 connection["sport"] = udp.sport 715 connection["dport"] = udp.dport 716 self._udp_dissect(connection, udp.data) 717 718 src, sport, dst, dport = (connection["src"], connection["sport"], connection["dst"], connection["dport"]) 719 if not ((dst, dport, src, sport) in self.udp_connections_seen or (src, sport, dst, dport) in self.udp_connections_seen): 720 self.udp_connections.append((src, sport, dst, dport, offset, ts-first_ts)) 721 self.udp_connections_seen.add((src, sport, dst, dport)) 722 723 elif ip.p == dpkt.ip.IP_PROTO_ICMP: 724 icmp = ip.data 725 if not isinstance(icmp, dpkt.icmp.ICMP): 726 icmp = dpkt.icmp.ICMP(icmp) 727 728 self._icmp_dissect(connection, icmp) 729 730 offset = file.tell() 731 except AttributeError: 732 continue 733 except dpkt.dpkt.NeedData: 734 continue 735 except Exception as e: 736 log.exception("Failed to process packet: %s", e) 737 738 file.close() 739 740 # Post processors for reconstructed flows. 741 self._process_smtp() 742 743 # Build results dict. 744 self.results["hosts"] = self.unique_hosts 745 self.results["domains"] = self.unique_domains 746 self.results["tcp"] = [conn_from_flowtuple(i) for i in self.tcp_connections] 747 self.results["udp"] = [conn_from_flowtuple(i) for i in self.udp_connections] 748 self.results["icmp"] = self.icmp_requests 749 self.results["http"] = self.http_requests.values() 750 self.results["tls"] = self.tls_keys 751 self.results["dns"] = self.dns_requests.values() 752 self.results["smtp"] = self.smtp_requests 753 self.results["irc"] = self.irc_requests 754 755 self.results["dead_hosts"] = [] 756 757 # Report each IP/port combination as a dead host if we've had to retry 758 # at least 3 times to connect to it and if no successful connections 759 # were detected throughout the analysis. 760 for (ip, port), count in self.dead_hosts.items(): 761 if count < 3 or (ip, port) in self.alive_hosts: 762 continue 763 764 # Report once. 765 if (ip, port) not in self.results["dead_hosts"]: 766 self.results["dead_hosts"].append((ip, port)) 767 768 return self.results
769
770 -class Pcap2(object):
771 """Interprets the PCAP file through the httpreplay library which parses 772 the various protocols, decrypts and decodes them, and then provides us 773 with the high level representation of it.""" 774
775 - def __init__(self, pcap_path, tlsmaster, network_path):
776 self.pcap_path = pcap_path 777 self.network_path = network_path 778 779 self.handlers = { 780 25: httpreplay.cut.smtp_handler, 781 80: httpreplay.cut.http_handler, 782 8000: httpreplay.cut.http_handler, 783 8080: httpreplay.cut.http_handler, 784 443: lambda: httpreplay.cut.https_handler(tlsmaster), 785 4443: lambda: httpreplay.cut.https_handler(tlsmaster), 786 }
787
788 - def run(self):
789 results = { 790 "http_ex": [], 791 "https_ex": [], 792 } 793 794 if not os.path.exists(self.network_path): 795 os.mkdir(self.network_path) 796 797 r = httpreplay.reader.PcapReader(open(self.pcap_path, "rb")) 798 r.tcp = httpreplay.smegma.TCPPacketStreamer(r, self.handlers) 799 800 l = sorted(r.process(), key=lambda x: x[1]) 801 for s, ts, protocol, sent, recv in l: 802 srcip, srcport, dstip, dstport = s 803 804 if protocol == "http" or protocol == "https": 805 request = sent.raw.split("\r\n\r\n", 1)[0] 806 response = recv.raw.split("\r\n\r\n", 1)[0] 807 808 md5 = hashlib.md5(recv.body or "").hexdigest() 809 sha1 = hashlib.sha1(recv.body or "").hexdigest() 810 811 filepath = os.path.join(self.network_path, sha1) 812 open(filepath, "wb").write(recv.body or "") 813 814 results["%s_ex" % protocol].append({ 815 "src": srcip, "sport": srcport, 816 "dst": dstip, "dport": dstport, 817 "protocol": protocol, 818 "method": sent.method, 819 "host": sent.headers.get("host", dstip), 820 "uri": sent.uri, 821 "request": request.decode("latin-1"), 822 "response": response.decode("latin-1"), 823 "md5": md5, 824 "sha1": sha1, 825 "path": filepath, 826 }) 827 828 return results
829
830 -class NetworkAnalysis(Processing):
831 """Network analysis.""" 832 833 order = 2 834 key = "network" 835
836 - def run(self):
837 results = {} 838 839 # Include any results provided by the mitm script. 840 results["mitm"] = [] 841 if os.path.exists(self.mitmout_path): 842 for line in open(self.mitmout_path, "rb"): 843 try: 844 results["mitm"].append(json.loads(line)) 845 except: 846 results["mitm"].append(line) 847 848 if not os.path.exists(self.pcap_path): 849 log.warning("The PCAP file does not exist at path \"%s\".", 850 self.pcap_path) 851 return results 852 853 if os.path.getsize(self.pcap_path) == 0: 854 log.error("The PCAP file at path \"%s\" is empty." % self.pcap_path) 855 return results 856 857 # PCAP file hash. 858 if os.path.exists(self.pcap_path): 859 results["pcap_sha256"] = File(self.pcap_path).get_sha256() 860 861 if not HAVE_DPKT and not HAVE_HTTPREPLAY: 862 log.error("Both Python HTTPReplay and Python DPKT are not " 863 "installed, no PCAP analysis possible.") 864 return results 865 866 sorted_path = self.pcap_path.replace("dump.", "dump_sorted.") 867 if cfg.processing.sort_pcap: 868 sort_pcap(self.pcap_path, sorted_path) 869 pcap_path = sorted_path 870 871 # Sorted PCAP file hash. 872 if os.path.exists(sorted_path): 873 results["sorted_pcap_sha256"] = File(sorted_path).get_sha256() 874 else: 875 pcap_path = self.pcap_path 876 877 if HAVE_DPKT: 878 results.update(Pcap(pcap_path, self.options).run()) 879 880 if HAVE_HTTPREPLAY and os.path.exists(pcap_path): 881 try: 882 p2 = Pcap2(pcap_path, self.get_tlsmaster(), self.network_path) 883 results.update(p2.run()) 884 except: 885 log.exception("Error running httpreplay-based PCAP analysis") 886 887 return results
888
889 - def get_tlsmaster(self):
890 """Obtain the client/server random to TLS master secrets mapping that 891 we have obtained through dynamic analysis.""" 892 tlsmaster = {} 893 summary = self.results.get("behavior", {}).get("summary", {}) 894 for entry in summary.get("tls_master", []): 895 client_random, server_random, master_secret = entry 896 client_random = client_random.decode("hex") 897 server_random = server_random.decode("hex") 898 master_secret = master_secret.decode("hex") 899 tlsmaster[client_random, server_random] = master_secret 900 return tlsmaster
901
902 -def iplayer_from_raw(raw, linktype=1):
903 """Converts a raw packet to a dpkt packet regarding of link type. 904 @param raw: raw packet 905 @param linktype: integer describing link type as expected by dpkt 906 """ 907 if linktype == 1: # ethernet 908 try: 909 pkt = dpkt.ethernet.Ethernet(raw) 910 return pkt.data 911 except dpkt.NeedData: 912 pass 913 elif linktype == 101: # raw 914 return dpkt.ip.IP(raw) 915 else: 916 raise CuckooProcessingError("unknown PCAP linktype")
917
918 -def conn_from_flowtuple(ft):
919 """Convert the flow tuple into a dictionary (suitable for JSON)""" 920 sip, sport, dip, dport, offset, relts = ft 921 return {"src": sip, "sport": sport, 922 "dst": dip, "dport": dport, 923 "offset": offset, "time": relts}
924 925 # input_iterator should be a class that also supports writing so we can use 926 # it for the temp files 927 # this code is mostly taken from some SO post, can't remember the url though
928 -def batch_sort(input_iterator, output_path, buffer_size=32000, output_class=None):
929 """batch sort helper with temporary files, supports sorting large stuff""" 930 if not output_class: 931 output_class = input_iterator.__class__ 932 933 chunks = [] 934 try: 935 while True: 936 current_chunk = list(islice(input_iterator, buffer_size)) 937 if not current_chunk: 938 break 939 940 current_chunk.sort() 941 fd, filepath = tempfile.mkstemp() 942 os.close(fd) 943 output_chunk = output_class(filepath) 944 chunks.append(output_chunk) 945 946 for elem in current_chunk: 947 output_chunk.write(elem.obj) 948 output_chunk.close() 949 950 output_file = output_class(output_path) 951 for elem in heapq.merge(*chunks): 952 output_file.write(elem.obj) 953 output_file.close() 954 finally: 955 for chunk in chunks: 956 try: 957 chunk.close() 958 os.remove(chunk.name) 959 except Exception: 960 pass
961
962 -class SortCap(object):
963 """SortCap is a wrapper around the packet lib (dpkt) that allows us to sort pcaps 964 together with the batch_sort function above.""" 965
966 - def __init__(self, path, linktype=1):
967 self.name = path 968 self.linktype = linktype 969 self.fd = None 970 self.ctr = 0 # counter to pass through packets without flow info (non-IP) 971 self.conns = set()
972
973 - def write(self, p):
974 if not self.fd: 975 self.fd = dpkt.pcap.Writer(open(self.name, "wb"), linktype=self.linktype) 976 self.fd.writepkt(p.raw, p.ts)
977
978 - def __iter__(self):
979 if not self.fd: 980 self.fd = dpkt.pcap.Reader(open(self.name, "rb")) 981 self.fditer = iter(self.fd) 982 self.linktype = self.fd.datalink() 983 return self
984
985 - def close(self):
986 if self.fd: 987 self.fd.close() 988 self.fd = None
989
990 - def next(self):
991 rp = next(self.fditer) 992 if rp is None: 993 return None 994 995 self.ctr += 1 996 997 ts, raw = rp 998 rpkt = Packet(raw, ts) 999 1000 sip, dip, sport, dport, proto = flowtuple_from_raw(raw, self.linktype) 1001 1002 # check other direction of same flow 1003 if (dip, sip, dport, sport, proto) in self.conns: 1004 flowtuple = (dip, sip, dport, sport, proto) 1005 else: 1006 flowtuple = (sip, dip, sport, dport, proto) 1007 1008 self.conns.add(flowtuple) 1009 return Keyed((flowtuple, ts, self.ctr), rpkt)
1010
1011 -def sort_pcap(inpath, outpath):
1012 """Use SortCap class together with batch_sort to sort a pcap""" 1013 inc = SortCap(inpath) 1014 batch_sort(inc, outpath, output_class=lambda path: SortCap(path, linktype=inc.linktype)) 1015 return 0
1016
1017 -def flowtuple_from_raw(raw, linktype=1):
1018 """Parse a packet from a pcap just enough to gain a flow description tuple""" 1019 ip = iplayer_from_raw(raw, linktype) 1020 1021 if isinstance(ip, dpkt.ip.IP): 1022 sip, dip = socket.inet_ntoa(ip.src), socket.inet_ntoa(ip.dst) 1023 proto = ip.p 1024 1025 if proto == dpkt.ip.IP_PROTO_TCP or proto == dpkt.ip.IP_PROTO_UDP: 1026 l3 = ip.data 1027 sport, dport = l3.sport, l3.dport 1028 else: 1029 sport, dport = 0, 0 1030 1031 else: 1032 sip, dip, proto = 0, 0, -1 1033 sport, dport = 0, 0 1034 1035 flowtuple = (sip, dip, sport, dport, proto) 1036 return flowtuple
1037
1038 -def payload_from_raw(raw, linktype=1):
1039 """Get the payload from a packet, the data below TCP/UDP basically""" 1040 ip = iplayer_from_raw(raw, linktype) 1041 try: 1042 return ip.data.data 1043 except: 1044 return ""
1045
1046 -def next_connection_packets(piter, linktype=1):
1047 """Extract all packets belonging to the same flow from a pcap packet 1048 iterator.""" 1049 first_ft = None 1050 1051 for ts, raw in piter: 1052 ft = flowtuple_from_raw(raw, linktype) 1053 if not first_ft: 1054 first_ft = ft 1055 1056 sip, dip, sport, dport, proto = ft 1057 if not (first_ft == ft or first_ft == (dip, sip, dport, sport, proto)): 1058 break 1059 1060 yield { 1061 "src": sip, "dst": dip, "sport": sport, "dport": dport, 1062 "raw": payload_from_raw(raw, linktype).encode("base64"), 1063 "direction": first_ft == ft, 1064 }
1065
1066 -def packets_for_stream(fobj, offset):
1067 """Open a PCAP, seek to a packet offset, then get all packets belonging to 1068 the same connection.""" 1069 pcap = dpkt.pcap.Reader(fobj) 1070 pcapiter = iter(pcap) 1071 ts, raw = pcapiter.next() 1072 1073 fobj.seek(offset) 1074 for p in next_connection_packets(pcapiter, linktype=pcap.datalink()): 1075 yield p
1076