1
2
3
4
5
6 import hashlib
7 import logging
8 import json
9 import os
10 import re
11 import socket
12 import struct
13 import tempfile
14 import urlparse
15
16 from lib.cuckoo.common.abstracts import Processing
17 from lib.cuckoo.common.config import Config
18 from lib.cuckoo.common.constants import LATEST_HTTPREPLAY, CUCKOO_ROOT
19 from lib.cuckoo.common.dns import resolve
20 from lib.cuckoo.common.irc import ircMessage
21 from lib.cuckoo.common.objects import File
22 from lib.cuckoo.common.utils import convert_to_printable, versiontuple
23 from lib.cuckoo.common.exceptions import CuckooProcessingError
24
25 try:
26 import dpkt
27 HAVE_DPKT = True
28 except ImportError:
29 HAVE_DPKT = False
30
31 try:
32 import httpreplay
33 import httpreplay.cut
34
35
36 logging.getLogger("httpreplay").setLevel(logging.CRITICAL)
37
38 HAVE_HTTPREPLAY = True
39 except ImportError:
40 HAVE_HTTPREPLAY = False
41
42
43
44
45 import heapq
46 from itertools import islice
47 from collections import namedtuple
48
49 Keyed = namedtuple("Keyed", ["key", "obj"])
50 Packet = namedtuple("Packet", ["raw", "ts"])
51
52 log = logging.getLogger(__name__)
53 cfg = Config()
54
55
56 _v = getattr(httpreplay, "__version__", None) if HAVE_HTTPREPLAY else None
57 if _v and versiontuple(_v) < versiontuple(LATEST_HTTPREPLAY):
58 log.warning(
59 "You are using version %s of HTTPReplay, rather than the latest "
60 "version %s, which may not handle various corner cases and/or TLS "
61 "cipher suites correctly. This could result in not getting all the "
62 "HTTP/HTTPS streams that are available or corrupt some streams that "
63 "were not handled correctly before. Please upgrade it to the latest "
64 "version (`pip install --upgrade httpreplay`).",
65 _v, LATEST_HTTPREPLAY,
66 )
67
69 """Reads network data from PCAP file."""
70 ssl_ports = 443,
71
72 notified_dpkt = False
73
75 """Creates a new instance.
76 @param filepath: path to PCAP file
77 @param options: config options
78 """
79 self.filepath = filepath
80 self.options = options
81
82
83 self.hosts = []
84
85 self.unique_hosts = []
86
87 self.unique_domains = []
88
89 self.tcp_connections = []
90 self.tcp_connections_seen = set()
91
92
93 self.tcp_connections_dead = {}
94 self.dead_hosts = {}
95 self.alive_hosts = {}
96
97 self.udp_connections = []
98 self.udp_connections_seen = set()
99
100 self.icmp_requests = []
101
102 self.http_requests = {}
103
104 self.tls_keys = []
105
106 self.dns_requests = {}
107 self.dns_answers = set()
108
109 self.smtp_requests = []
110
111 self.smtp_flow = {}
112
113 self.irc_requests = []
114
115 self.results = {}
116
117 self.whitelist = self._build_whitelist()
118
119 self.whitelist_ips = []
120
121 self.whitelist_enabled = self._build_whitelist_conf()
122
123 self.known_dns = self._build_known_dns()
124
126 result = []
127 whitelist_path = os.path.join(
128 CUCKOO_ROOT, "data", "whitelist", "domain.txt"
129 )
130 for line in open(whitelist_path, 'rb'):
131 result.append(line.strip())
132 return result
133
135 """Check if whitelisting is enabled."""
136 if not self.options.get("whitelist-dns"):
137 log.debug("Whitelisting Disabled.")
138 return False
139
140 return True
141
143 """Checks if whitelisting conditions are met"""
144
145 if not self.whitelist_enabled:
146 return False
147
148
149 if not self.known_dns:
150 pass
151 elif (conn.get("src") in self.known_dns or
152 conn.get("dst") in self.known_dns):
153 pass
154 else:
155 return False
156
157
158 if hostname not in self.whitelist:
159 return False
160
161 return True
162
164 """Build known DNS list."""
165 result = []
166 _known_dns = self.options.get("allowed-dns")
167 if _known_dns is not None:
168 for r in _known_dns.split(","):
169 result.append(r.strip())
170 return result
171
172 return []
173
175 """Get host by name wrapper.
176 @param name: hostname.
177 @return: IP address or blank
178 """
179 if cfg.processing.resolve_dns:
180 ip = resolve(name)
181 else:
182 ip = ""
183 return ip
184
186 """Check if the IP belongs to private network blocks.
187 @param ip: IP address to verify.
188 @return: boolean representing whether the IP belongs or not to
189 a private network block.
190 """
191 networks = [
192 "0.0.0.0/8",
193 "10.0.0.0/8",
194 "100.64.0.0/10",
195 "127.0.0.0/8",
196 "169.254.0.0/16",
197 "172.16.0.0/12",
198 "192.0.0.0/24",
199 "192.0.2.0/24",
200 "192.88.99.0/24",
201 "192.168.0.0/16",
202 "198.18.0.0/15",
203 "198.51.100.0/24",
204 "203.0.113.0/24",
205 "240.0.0.0/4",
206 "255.255.255.255/32",
207 "224.0.0.0/4"
208 ]
209
210 for network in networks:
211 try:
212 ipaddr = struct.unpack(">I", socket.inet_aton(ip))[0]
213
214 netaddr, bits = network.split("/")
215
216 network_low = struct.unpack(">I", socket.inet_aton(netaddr))[0]
217 network_high = network_low | (1 << (32 - int(bits))) - 1
218
219 if ipaddr <= network_high and ipaddr >= network_low:
220 return True
221 except:
222 continue
223
224 return False
225
227 """Add IPs to unique list.
228 @param connection: connection data
229 """
230 try:
231
232
233
234
235 if connection["src"] not in self.hosts:
236 ip = convert_to_printable(connection["src"])
237
238
239 if ip not in self.hosts:
240
241
242 if self._is_private_ip(ip):
243 self.hosts.append(ip)
244
245 if connection["dst"] not in self.hosts:
246 ip = convert_to_printable(connection["dst"])
247
248 if ip not in self.hosts:
249 self.hosts.append(ip)
250
251
252
253
254 if not self._is_private_ip(ip) and ip not in self.whitelist_ips:
255 self.unique_hosts.append(ip)
256 except:
257 pass
258
260 """Runs all TCP dissectors.
261 @param conn: connection.
262 @param data: payload data.
263 """
264 if self._check_http(data):
265 self._add_http(data, conn["dport"])
266
267
268 if conn["dport"] == 25:
269 self._reassemble_smtp(conn, data)
270
271
272 if conn["dport"] != 21 and self._check_irc(data):
273 self._add_irc(data)
274
275
276 if conn["dport"] in self.ssl_ports or conn["sport"] in self.ssl_ports:
277 self._https_identify(conn, data)
278
280 """Runs all UDP dissectors.
281 @param conn: connection.
282 @param data: payload data.
283 """
284
285 if conn["dport"] == 53 or conn["sport"] == 53 or conn["dport"] == 5353 or conn["sport"] == 5353:
286 if self._check_dns(data):
287 self._add_dns(conn, data)
288
290 """Checks for ICMP traffic.
291 @param icmp_data: ICMP data flow.
292 """
293 try:
294 return isinstance(icmp_data, dpkt.icmp.ICMP) and \
295 len(icmp_data.data) > 0
296 except:
297 return False
298
300 """Runs all ICMP dissectors.
301 @param conn: connection.
302 @param data: payload data.
303 """
304
305 if self._check_icmp(data):
306
307
308 if conn["src"] == cfg.resultserver.ip:
309 return
310
311 entry = {}
312 entry["src"] = conn["src"]
313 entry["dst"] = conn["dst"]
314 entry["type"] = data.type
315
316
317 try:
318 entry["data"] = convert_to_printable(data.data.data)
319 except:
320 entry["data"] = ""
321
322 self.icmp_requests.append(entry)
323
325 """Checks for DNS traffic.
326 @param udpdata: UDP data flow.
327 """
328 try:
329 dpkt.dns.DNS(udpdata)
330 except:
331 return False
332
333 return True
334
336 """Adds a DNS data flow.
337 @param udpdata: UDP data flow.
338 """
339 dns = dpkt.dns.DNS(udpdata)
340
341
342 query = {}
343
344 _ip = []
345
346 if dns.rcode == dpkt.dns.DNS_RCODE_NOERR or \
347 dns.qr == dpkt.dns.DNS_R or \
348 dns.opcode == dpkt.dns.DNS_QUERY or True:
349
350 try:
351 q_name = dns.qd[0].name
352 q_type = dns.qd[0].type
353 except IndexError:
354 return False
355
356
357
358
359 query["request"] = q_name
360 if q_type == dpkt.dns.DNS_A:
361 query["type"] = "A"
362 elif q_type == dpkt.dns.DNS_AAAA:
363 query["type"] = "AAAA"
364 elif q_type == dpkt.dns.DNS_CNAME:
365 query["type"] = "CNAME"
366 elif q_type == dpkt.dns.DNS_MX:
367 query["type"] = "MX"
368 elif q_type == dpkt.dns.DNS_PTR:
369 query["type"] = "PTR"
370 elif q_type == dpkt.dns.DNS_NS:
371 query["type"] = "NS"
372 elif q_type == dpkt.dns.DNS_SOA:
373 query["type"] = "SOA"
374 elif q_type == dpkt.dns.DNS_HINFO:
375 query["type"] = "HINFO"
376 elif q_type == dpkt.dns.DNS_TXT:
377 query["type"] = "TXT"
378 elif q_type == dpkt.dns.DNS_SRV:
379 query["type"] = "SRV"
380 elif q_type == dpkt.dns.DNS_ANY:
381
382 query["type"] = "All"
383 else:
384
385 query["type"] = "None"
386
387
388 query["answers"] = []
389 for answer in dns.an:
390 ans = {}
391 if answer.type == dpkt.dns.DNS_A:
392 ans["type"] = "A"
393 try:
394 ans["data"] = socket.inet_ntoa(answer.rdata)
395 _ip.append(ans["data"])
396 except socket.error:
397 continue
398 elif answer.type == dpkt.dns.DNS_AAAA:
399 ans["type"] = "AAAA"
400 try:
401 ans["data"] = socket.inet_ntop(socket.AF_INET6,
402 answer.rdata)
403 _ip.append(ans["data"])
404 except (socket.error, ValueError):
405 continue
406 elif answer.type == dpkt.dns.DNS_CNAME:
407 ans["type"] = "CNAME"
408 ans["data"] = answer.cname
409 elif answer.type == dpkt.dns.DNS_MX:
410 ans["type"] = "MX"
411 ans["data"] = answer.mxname
412 elif answer.type == dpkt.dns.DNS_PTR:
413 ans["type"] = "PTR"
414 ans["data"] = answer.ptrname
415 elif answer.type == dpkt.dns.DNS_NS:
416 ans["type"] = "NS"
417 ans["data"] = answer.nsname
418 elif answer.type == dpkt.dns.DNS_SOA:
419 ans["type"] = "SOA"
420 ans["data"] = ",".join([answer.mname,
421 answer.rname,
422 str(answer.serial),
423 str(answer.refresh),
424 str(answer.retry),
425 str(answer.expire),
426 str(answer.minimum)])
427 elif answer.type == dpkt.dns.DNS_HINFO:
428 ans["type"] = "HINFO"
429 ans["data"] = " ".join(answer.text)
430 elif answer.type == dpkt.dns.DNS_TXT:
431 ans["type"] = "TXT"
432 ans["data"] = " ".join(answer.text)
433
434
435 query["answers"].append(ans)
436
437 if self._is_whitelisted(conn, q_name):
438 log.debug("DNS target {0} whitelisted. Skipping ...".format(q_name))
439 self.whitelist_ips = self.whitelist_ips + _ip
440 return True
441
442 self._add_domain(query["request"])
443
444 reqtuple = query["type"], query["request"]
445 if reqtuple not in self.dns_requests:
446 self.dns_requests[reqtuple] = query
447 else:
448 new_answers = set((i["type"], i["data"]) for i in query["answers"]) - self.dns_answers
449 self.dns_requests[reqtuple]["answers"] += [dict(type=i[0], data=i[1]) for i in new_answers]
450
451 return True
452
453 - def _add_domain(self, domain):
454 """Add a domain to unique list.
455 @param domain: domain name.
456 """
457 filters = [
458 ".*\\.windows\\.com$",
459 ".*\\.in\\-addr\\.arpa$"
460 ]
461
462 regexps = [re.compile(filter) for filter in filters]
463 for regexp in regexps:
464 if regexp.match(domain):
465 return
466
467 for entry in self.unique_domains:
468 if entry["domain"] == domain:
469 return
470
471 self.unique_domains.append({"domain": domain,
472 "ip": self._dns_gethostbyname(domain)})
473
475 """Checks for HTTP traffic.
476 @param tcpdata: TCP data flow.
477 """
478 try:
479 r = dpkt.http.Request()
480 r.method, r.version, r.uri = None, None, None
481 r.unpack(tcpdata)
482 except dpkt.dpkt.UnpackError:
483 if r.method is not None or r.version is not None or \
484 r.uri is not None:
485 return True
486 return False
487
488 return True
489
491 """Adds an HTTP flow.
492 @param tcpdata: TCP data flow.
493 @param dport: destination port.
494 """
495 if tcpdata in self.http_requests:
496 self.http_requests[tcpdata]["count"] += 1
497 return True
498
499 try:
500 http = dpkt.http.Request()
501 http.unpack(tcpdata)
502 except dpkt.dpkt.UnpackError:
503 pass
504
505 try:
506 entry = {"count": 1}
507
508 if "host" in http.headers:
509 entry["host"] = convert_to_printable(http.headers["host"])
510 else:
511 entry["host"] = ""
512
513 entry["port"] = dport
514
515
516
517 netloc = entry["host"]
518 if dport != 80 and ":" not in netloc:
519 netloc += ":" + str(entry["port"])
520
521 entry["data"] = convert_to_printable(tcpdata)
522 url = urlparse.urlunparse(("http", netloc, http.uri,
523 None, None, None))
524 entry["uri"] = convert_to_printable(url)
525 entry["body"] = convert_to_printable(http.body)
526 entry["path"] = convert_to_printable(http.uri)
527
528 if "user-agent" in http.headers:
529 entry["user-agent"] = \
530 convert_to_printable(http.headers["user-agent"])
531
532 entry["version"] = convert_to_printable(http.version)
533 entry["method"] = convert_to_printable(http.method)
534
535 self.http_requests[tcpdata] = entry
536 except Exception:
537 return False
538
539 return True
540
542 """Extract a combination of the Session ID, Client Random, and Server
543 Random in order to identify the accompanying master secret later."""
544 if not hasattr(dpkt.ssl, "TLSRecord"):
545 if not Pcap.notified_dpkt:
546 Pcap.notified_dpkt = True
547 log.warning("Using an old version of dpkt that does not "
548 "support TLS streams (install the latest with "
549 "`pip install dpkt`)")
550 return
551
552 try:
553 record = dpkt.ssl.TLSRecord(data)
554 except dpkt.NeedData:
555 return
556 except:
557 log.exception("Error reading possible TLS Record")
558 return
559
560
561 if record.type not in dpkt.ssl.RECORD_TYPES:
562 return
563
564 try:
565 record = dpkt.ssl.RECORD_TYPES[record.type](record.data)
566 except dpkt.ssl.SSL3Exception:
567 return
568 except dpkt.NeedData:
569 log.exception("Incomplete possible TLS Handshake record found")
570 return
571
572
573 if not isinstance(record, dpkt.ssl.TLSHandshake):
574 return
575
576
577 if not isinstance(record.data, dpkt.ssl.TLSServerHello):
578 return
579
580
581 self.tls_keys.append({
582 "server_random": record.data.random.encode("hex"),
583 "session_id": record.data.session_id.encode("hex"),
584 })
585
587 """Reassemble a SMTP flow.
588 @param conn: connection dict.
589 @param data: raw data.
590 """
591 if conn["dst"] in self.smtp_flow:
592 self.smtp_flow[conn["dst"]] += data
593 else:
594 self.smtp_flow[conn["dst"]] = data
595
597 """Process SMTP flow."""
598 for conn, data in self.smtp_flow.iteritems():
599
600 if data.startswith(("EHLO", "HELO")):
601 self.smtp_requests.append({"dst": conn, "raw": data})
602
604 """
605 Checks for IRC traffic.
606 @param tcpdata: tcp data flow
607 """
608 try:
609 req = ircMessage()
610 except Exception:
611 return False
612
613 return req.isthereIRC(tcpdata)
614
616 """
617 Adds an IRC communication.
618 @param tcpdata: TCP data in flow
619 @param dport: destination port
620 """
621
622 try:
623 reqc = ircMessage()
624 reqs = ircMessage()
625 filters_sc = ["266"]
626 self.irc_requests = self.irc_requests + \
627 reqc.getClientMessages(tcpdata) + \
628 reqs.getServerMessagesFilter(tcpdata, filters_sc)
629 except Exception:
630 return False
631
632 return True
633
635 """Process PCAP.
636 @return: dict with network analysis data.
637 """
638
639 try:
640 file = open(self.filepath, "rb")
641 except (IOError, OSError):
642 log.error("Unable to open %s" % self.filepath)
643 return self.results
644
645 try:
646 pcap = dpkt.pcap.Reader(file)
647 except dpkt.dpkt.NeedData:
648 log.error("Unable to read PCAP file at path \"%s\".",
649 self.filepath)
650 return self.results
651 except ValueError:
652 log.error("Unable to read PCAP file at path \"%s\". File is "
653 "corrupted or wrong format." % self.filepath)
654 return self.results
655
656 offset = file.tell()
657 first_ts = None
658 for ts, buf in pcap:
659 if not first_ts:
660 first_ts = ts
661
662 try:
663 ip = iplayer_from_raw(buf, pcap.datalink())
664
665 connection = {}
666 if isinstance(ip, dpkt.ip.IP):
667 connection["src"] = socket.inet_ntoa(ip.src)
668 connection["dst"] = socket.inet_ntoa(ip.dst)
669 elif isinstance(ip, dpkt.ip6.IP6):
670 connection["src"] = socket.inet_ntop(socket.AF_INET6,
671 ip.src)
672 connection["dst"] = socket.inet_ntop(socket.AF_INET6,
673 ip.dst)
674 else:
675 offset = file.tell()
676 continue
677
678 self._add_hosts(connection)
679
680 if ip.p == dpkt.ip.IP_PROTO_TCP:
681 tcp = ip.data
682 if not isinstance(tcp, dpkt.tcp.TCP):
683 tcp = dpkt.tcp.TCP(tcp)
684
685 if tcp.data:
686 connection["sport"] = tcp.sport
687 connection["dport"] = tcp.dport
688 self._tcp_dissect(connection, tcp.data)
689
690 src, sport, dst, dport = (connection["src"], connection["sport"], connection["dst"], connection["dport"])
691 if not ((dst, dport, src, sport) in self.tcp_connections_seen or (src, sport, dst, dport) in self.tcp_connections_seen):
692 self.tcp_connections.append((src, sport, dst, dport, offset, ts-first_ts))
693 self.tcp_connections_seen.add((src, sport, dst, dport))
694
695 self.alive_hosts[dst, dport] = True
696 else:
697 ipconn = (
698 connection["src"], tcp.sport,
699 connection["dst"], tcp.dport,
700 )
701 seqack = self.tcp_connections_dead.get(ipconn)
702 if seqack == (tcp.seq, tcp.ack):
703 host = connection["dst"], tcp.dport
704 self.dead_hosts[host] = self.dead_hosts.get(host, 1) + 1
705
706 self.tcp_connections_dead[ipconn] = tcp.seq, tcp.ack
707
708 elif ip.p == dpkt.ip.IP_PROTO_UDP:
709 udp = ip.data
710 if not isinstance(udp, dpkt.udp.UDP):
711 udp = dpkt.udp.UDP(udp)
712
713 if len(udp.data) > 0:
714 connection["sport"] = udp.sport
715 connection["dport"] = udp.dport
716 self._udp_dissect(connection, udp.data)
717
718 src, sport, dst, dport = (connection["src"], connection["sport"], connection["dst"], connection["dport"])
719 if not ((dst, dport, src, sport) in self.udp_connections_seen or (src, sport, dst, dport) in self.udp_connections_seen):
720 self.udp_connections.append((src, sport, dst, dport, offset, ts-first_ts))
721 self.udp_connections_seen.add((src, sport, dst, dport))
722
723 elif ip.p == dpkt.ip.IP_PROTO_ICMP:
724 icmp = ip.data
725 if not isinstance(icmp, dpkt.icmp.ICMP):
726 icmp = dpkt.icmp.ICMP(icmp)
727
728 self._icmp_dissect(connection, icmp)
729
730 offset = file.tell()
731 except AttributeError:
732 continue
733 except dpkt.dpkt.NeedData:
734 continue
735 except Exception as e:
736 log.exception("Failed to process packet: %s", e)
737
738 file.close()
739
740
741 self._process_smtp()
742
743
744 self.results["hosts"] = self.unique_hosts
745 self.results["domains"] = self.unique_domains
746 self.results["tcp"] = [conn_from_flowtuple(i) for i in self.tcp_connections]
747 self.results["udp"] = [conn_from_flowtuple(i) for i in self.udp_connections]
748 self.results["icmp"] = self.icmp_requests
749 self.results["http"] = self.http_requests.values()
750 self.results["tls"] = self.tls_keys
751 self.results["dns"] = self.dns_requests.values()
752 self.results["smtp"] = self.smtp_requests
753 self.results["irc"] = self.irc_requests
754
755 self.results["dead_hosts"] = []
756
757
758
759
760 for (ip, port), count in self.dead_hosts.items():
761 if count < 3 or (ip, port) in self.alive_hosts:
762 continue
763
764
765 if (ip, port) not in self.results["dead_hosts"]:
766 self.results["dead_hosts"].append((ip, port))
767
768 return self.results
769
771 """Interprets the PCAP file through the httpreplay library which parses
772 the various protocols, decrypts and decodes them, and then provides us
773 with the high level representation of it."""
774
775 - def __init__(self, pcap_path, tlsmaster, network_path):
776 self.pcap_path = pcap_path
777 self.network_path = network_path
778
779 self.handlers = {
780 25: httpreplay.cut.smtp_handler,
781 80: httpreplay.cut.http_handler,
782 8000: httpreplay.cut.http_handler,
783 8080: httpreplay.cut.http_handler,
784 443: lambda: httpreplay.cut.https_handler(tlsmaster),
785 4443: lambda: httpreplay.cut.https_handler(tlsmaster),
786 }
787
789 results = {
790 "http_ex": [],
791 "https_ex": [],
792 }
793
794 if not os.path.exists(self.network_path):
795 os.mkdir(self.network_path)
796
797 r = httpreplay.reader.PcapReader(open(self.pcap_path, "rb"))
798 r.tcp = httpreplay.smegma.TCPPacketStreamer(r, self.handlers)
799
800 l = sorted(r.process(), key=lambda x: x[1])
801 for s, ts, protocol, sent, recv in l:
802 srcip, srcport, dstip, dstport = s
803
804 if protocol == "http" or protocol == "https":
805 request = sent.raw.split("\r\n\r\n", 1)[0]
806 response = recv.raw.split("\r\n\r\n", 1)[0]
807
808 md5 = hashlib.md5(recv.body or "").hexdigest()
809 sha1 = hashlib.sha1(recv.body or "").hexdigest()
810
811 filepath = os.path.join(self.network_path, sha1)
812 open(filepath, "wb").write(recv.body or "")
813
814 results["%s_ex" % protocol].append({
815 "src": srcip, "sport": srcport,
816 "dst": dstip, "dport": dstport,
817 "protocol": protocol,
818 "method": sent.method,
819 "host": sent.headers.get("host", dstip),
820 "uri": sent.uri,
821 "request": request.decode("latin-1"),
822 "response": response.decode("latin-1"),
823 "md5": md5,
824 "sha1": sha1,
825 "path": filepath,
826 })
827
828 return results
829
831 """Network analysis."""
832
833 order = 2
834 key = "network"
835
888
890 """Obtain the client/server random to TLS master secrets mapping that
891 we have obtained through dynamic analysis."""
892 tlsmaster = {}
893 summary = self.results.get("behavior", {}).get("summary", {})
894 for entry in summary.get("tls_master", []):
895 client_random, server_random, master_secret = entry
896 client_random = client_random.decode("hex")
897 server_random = server_random.decode("hex")
898 master_secret = master_secret.decode("hex")
899 tlsmaster[client_random, server_random] = master_secret
900 return tlsmaster
901
903 """Converts a raw packet to a dpkt packet regarding of link type.
904 @param raw: raw packet
905 @param linktype: integer describing link type as expected by dpkt
906 """
907 if linktype == 1:
908 try:
909 pkt = dpkt.ethernet.Ethernet(raw)
910 return pkt.data
911 except dpkt.NeedData:
912 pass
913 elif linktype == 101:
914 return dpkt.ip.IP(raw)
915 else:
916 raise CuckooProcessingError("unknown PCAP linktype")
917
919 """Convert the flow tuple into a dictionary (suitable for JSON)"""
920 sip, sport, dip, dport, offset, relts = ft
921 return {"src": sip, "sport": sport,
922 "dst": dip, "dport": dport,
923 "offset": offset, "time": relts}
924
925
926
927
928 -def batch_sort(input_iterator, output_path, buffer_size=32000, output_class=None):
929 """batch sort helper with temporary files, supports sorting large stuff"""
930 if not output_class:
931 output_class = input_iterator.__class__
932
933 chunks = []
934 try:
935 while True:
936 current_chunk = list(islice(input_iterator, buffer_size))
937 if not current_chunk:
938 break
939
940 current_chunk.sort()
941 fd, filepath = tempfile.mkstemp()
942 os.close(fd)
943 output_chunk = output_class(filepath)
944 chunks.append(output_chunk)
945
946 for elem in current_chunk:
947 output_chunk.write(elem.obj)
948 output_chunk.close()
949
950 output_file = output_class(output_path)
951 for elem in heapq.merge(*chunks):
952 output_file.write(elem.obj)
953 output_file.close()
954 finally:
955 for chunk in chunks:
956 try:
957 chunk.close()
958 os.remove(chunk.name)
959 except Exception:
960 pass
961
963 """SortCap is a wrapper around the packet lib (dpkt) that allows us to sort pcaps
964 together with the batch_sort function above."""
965
967 self.name = path
968 self.linktype = linktype
969 self.fd = None
970 self.ctr = 0
971 self.conns = set()
972
974 if not self.fd:
975 self.fd = dpkt.pcap.Writer(open(self.name, "wb"), linktype=self.linktype)
976 self.fd.writepkt(p.raw, p.ts)
977
979 if not self.fd:
980 self.fd = dpkt.pcap.Reader(open(self.name, "rb"))
981 self.fditer = iter(self.fd)
982 self.linktype = self.fd.datalink()
983 return self
984
986 if self.fd:
987 self.fd.close()
988 self.fd = None
989
991 rp = next(self.fditer)
992 if rp is None:
993 return None
994
995 self.ctr += 1
996
997 ts, raw = rp
998 rpkt = Packet(raw, ts)
999
1000 sip, dip, sport, dport, proto = flowtuple_from_raw(raw, self.linktype)
1001
1002
1003 if (dip, sip, dport, sport, proto) in self.conns:
1004 flowtuple = (dip, sip, dport, sport, proto)
1005 else:
1006 flowtuple = (sip, dip, sport, dport, proto)
1007
1008 self.conns.add(flowtuple)
1009 return Keyed((flowtuple, ts, self.ctr), rpkt)
1010
1012 """Use SortCap class together with batch_sort to sort a pcap"""
1013 inc = SortCap(inpath)
1014 batch_sort(inc, outpath, output_class=lambda path: SortCap(path, linktype=inc.linktype))
1015 return 0
1016
1018 """Parse a packet from a pcap just enough to gain a flow description tuple"""
1019 ip = iplayer_from_raw(raw, linktype)
1020
1021 if isinstance(ip, dpkt.ip.IP):
1022 sip, dip = socket.inet_ntoa(ip.src), socket.inet_ntoa(ip.dst)
1023 proto = ip.p
1024
1025 if proto == dpkt.ip.IP_PROTO_TCP or proto == dpkt.ip.IP_PROTO_UDP:
1026 l3 = ip.data
1027 sport, dport = l3.sport, l3.dport
1028 else:
1029 sport, dport = 0, 0
1030
1031 else:
1032 sip, dip, proto = 0, 0, -1
1033 sport, dport = 0, 0
1034
1035 flowtuple = (sip, dip, sport, dport, proto)
1036 return flowtuple
1037
1039 """Get the payload from a packet, the data below TCP/UDP basically"""
1040 ip = iplayer_from_raw(raw, linktype)
1041 try:
1042 return ip.data.data
1043 except:
1044 return ""
1045
1047 """Extract all packets belonging to the same flow from a pcap packet
1048 iterator."""
1049 first_ft = None
1050
1051 for ts, raw in piter:
1052 ft = flowtuple_from_raw(raw, linktype)
1053 if not first_ft:
1054 first_ft = ft
1055
1056 sip, dip, sport, dport, proto = ft
1057 if not (first_ft == ft or first_ft == (dip, sip, dport, sport, proto)):
1058 break
1059
1060 yield {
1061 "src": sip, "dst": dip, "sport": sport, "dport": dport,
1062 "raw": payload_from_raw(raw, linktype).encode("base64"),
1063 "direction": first_ft == ft,
1064 }
1065
1067 """Open a PCAP, seek to a packet offset, then get all packets belonging to
1068 the same connection."""
1069 pcap = dpkt.pcap.Reader(fobj)
1070 pcapiter = iter(pcap)
1071 ts, raw = pcapiter.next()
1072
1073 fobj.seek(offset)
1074 for p in next_connection_packets(pcapiter, linktype=pcap.datalink()):
1075 yield p
1076