Package modules :: Package reporting :: Module mongodb
[hide private]
[frames] | no frames]

Source Code for Module modules.reporting.mongodb

  1  # Copyright (C) 2010-2013 Claudio Guarnieri. 
  2  # Copyright (C) 2014-2016 Cuckoo Foundation. 
  3  # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org 
  4  # See the file 'docs/LICENSE' for copying permission. 
  5   
  6  import os 
  7   
  8  from lib.cuckoo.common.abstracts import Report 
  9  from lib.cuckoo.common.exceptions import CuckooDependencyError 
 10  from lib.cuckoo.common.exceptions import CuckooReportError 
 11  from lib.cuckoo.common.objects import File 
 12   
 13  try: 
 14      from pymongo import MongoClient 
 15      from pymongo.errors import ConnectionFailure 
 16      from gridfs import GridFS 
 17      from gridfs.errors import FileExists 
 18      HAVE_MONGO = True 
 19  except ImportError: 
 20      HAVE_MONGO = False 
 21   
22 -class MongoDB(Report):
23 """Stores report in MongoDB.""" 24 25 # Mongo schema version, used for data migration. 26 SCHEMA_VERSION = "1" 27
28 - def connect(self):
29 """Connects to Mongo database, loads options and set connectors. 30 @raise CuckooReportError: if unable to connect. 31 """ 32 host = self.options.get("host", "127.0.0.1") 33 port = int(self.options.get("port", 27017)) 34 db = self.options.get("db", "cuckoo") 35 36 try: 37 self.conn = MongoClient(host, port) 38 self.db = self.conn[db] 39 self.fs = GridFS(self.db) 40 except TypeError: 41 raise CuckooReportError("Mongo connection port must be integer") 42 except ConnectionFailure: 43 raise CuckooReportError("Cannot connect to MongoDB")
44
45 - def store_file(self, file_obj, filename=""):
46 """Store a file in GridFS. 47 @param file_obj: object to the file to store 48 @param filename: name of the file to store 49 @return: object id of the stored file 50 """ 51 if not filename: 52 filename = file_obj.get_name() 53 54 existing = self.db.fs.files.find_one({"sha256": file_obj.get_sha256()}) 55 56 if existing: 57 return existing["_id"] 58 59 new = self.fs.new_file(filename=filename, 60 contentType=file_obj.get_content_type(), 61 sha256=file_obj.get_sha256()) 62 63 for chunk in file_obj.get_chunks(): 64 new.write(chunk) 65 66 try: 67 new.close() 68 return new._id 69 except FileExists: 70 to_find = {"sha256": file_obj.get_sha256()} 71 return self.db.fs.files.find_one(to_find)["_id"]
72
73 - def run(self, results):
74 """Writes report. 75 @param results: analysis results dictionary. 76 @raise CuckooReportError: if fails to connect or write to MongoDB. 77 """ 78 if not HAVE_MONGO: 79 raise CuckooDependencyError( 80 "Unable to import pymongo (install with " 81 "`pip install pymongo`)" 82 ) 83 84 self.connect() 85 86 # Set mongo schema version. 87 # TODO: This is not optimal becuase it run each analysis. Need to run 88 # only one time at startup. 89 if "cuckoo_schema" in self.db.collection_names(): 90 if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION: 91 CuckooReportError("Mongo schema version not expected, check data migration tool") 92 else: 93 self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) 94 95 # Set an unique index on stored files, to avoid duplicates. 96 # From pymongo docs: 97 # Returns the name of the created index if an index is actually 98 # created. 99 # Returns None if the index already exists. 100 # TODO: This is not optimal because it run each analysis. Need to run 101 # only one time at startup. 102 self.db.fs.files.ensure_index("sha256", unique=True, 103 sparse=True, name="sha256_unique") 104 105 # Create a copy of the dictionary. This is done in order to not modify 106 # the original dictionary and possibly compromise the following 107 # reporting modules. 108 report = dict(results) 109 if "network" not in report: 110 report["network"] = {} 111 112 # This will likely hardcode the cuckoo.log to this point, but that 113 # should be fine. 114 if report.get("debug"): 115 report["debug"]["cuckoo"] = list(report["debug"]["cuckoo"]) 116 117 # Store path of the analysis path. 118 report["info"]["analysis_path"] = self.analysis_path 119 120 # Store the sample in GridFS. 121 if results.get("info", {}).get("category") == "file" and "target" in results: 122 sample = File(self.file_path) 123 if sample.valid(): 124 fname = results["target"]["file"]["name"] 125 sample_id = self.store_file(sample, filename=fname) 126 report["target"] = {"file_id": sample_id} 127 report["target"].update(results["target"]) 128 129 # Store the PCAP file in GridFS and reference it back in the report. 130 pcap_path = os.path.join(self.analysis_path, "dump.pcap") 131 pcap = File(pcap_path) 132 if pcap.valid(): 133 pcap_id = self.store_file(pcap) 134 report["network"]["pcap_id"] = pcap_id 135 136 sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap") 137 spcap = File(sorted_pcap_path) 138 if spcap.valid(): 139 spcap_id = self.store_file(spcap) 140 report["network"]["sorted_pcap_id"] = spcap_id 141 142 mitmproxy_path = os.path.join(self.analysis_path, "dump.mitm") 143 mitmpr = File(mitmproxy_path) 144 if mitmpr.valid(): 145 mitmpr_id = self.store_file(mitmpr) 146 report["network"]["mitmproxy_id"] = mitmpr_id 147 148 # Store the process memory dump file and extracted files in GridFS and 149 # reference it back in the report. 150 if "procmemory" in report and self.options.get("store_memdump", False): 151 for idx, procmem in enumerate(report["procmemory"]): 152 procmem_path = os.path.join( 153 self.analysis_path, "memory", "%s.dmp" % procmem["pid"] 154 ) 155 procmem_file = File(procmem_path) 156 if procmem_file.valid(): 157 procmem_id = self.store_file(procmem_file) 158 procmem["procmem_id"] = procmem_id 159 160 for extracted in procmem.get("extracted", []): 161 f = File(extracted["path"]) 162 if f.valid(): 163 extracted["extracted_id"] = self.store_file(f) 164 165 # Walk through the dropped files, store them in GridFS and update the 166 # report with the ObjectIds. 167 new_dropped = [] 168 if "dropped" in report: 169 for dropped in report["dropped"]: 170 new_drop = dict(dropped) 171 drop = File(dropped["path"]) 172 if drop.valid(): 173 dropped_id = self.store_file(drop, filename=dropped["name"]) 174 new_drop["object_id"] = dropped_id 175 176 new_dropped.append(new_drop) 177 178 report["dropped"] = new_dropped 179 180 # Add screenshots. 181 report["shots"] = [] 182 if os.path.exists(self.shots_path): 183 # Walk through the files and select the JPGs. 184 for shot_file in sorted(os.listdir(self.shots_path)): 185 if not shot_file.endswith(".jpg"): 186 continue 187 188 shot_path = os.path.join(self.shots_path, shot_file) 189 shot = File(shot_path) 190 # If the screenshot path is a valid file, store it and 191 # reference it back in the report. 192 if shot.valid(): 193 shot_id = self.store_file(shot) 194 report["shots"].append(shot_id) 195 196 paginate = self.options.get("paginate", 100) 197 198 # Store chunks of API calls in a different collection and reference 199 # those chunks back in the report. In this way we should defeat the 200 # issue with the oversized reports exceeding MongoDB's boundaries. 201 # Also allows paging of the reports. 202 if "behavior" in report and "processes" in report["behavior"]: 203 new_processes = [] 204 for process in report["behavior"]["processes"]: 205 new_process = dict(process) 206 207 chunk = [] 208 chunks_ids = [] 209 # Loop on each process call. 210 for call in process["calls"]: 211 # If the chunk size is paginate or if the loop is 212 # completed then store the chunk in MongoDB. 213 if len(chunk) == paginate: 214 to_insert = {"pid": process["pid"], "calls": chunk} 215 chunk_id = self.db.calls.insert(to_insert) 216 chunks_ids.append(chunk_id) 217 # Reset the chunk. 218 chunk = [] 219 220 # Append call to the chunk. 221 chunk.append(call) 222 223 # Store leftovers. 224 if chunk: 225 to_insert = {"pid": process["pid"], "calls": chunk} 226 chunk_id = self.db.calls.insert(to_insert) 227 chunks_ids.append(chunk_id) 228 229 # Add list of chunks. 230 new_process["calls"] = chunks_ids 231 new_processes.append(new_process) 232 233 # Store the results in the report. 234 report["behavior"] = dict(report["behavior"]) 235 report["behavior"]["processes"] = new_processes 236 237 if report.get("procmon"): 238 procmon, chunk = [], [] 239 240 for entry in report["procmon"]: 241 if len(chunk) == paginate: 242 procmon.append(self.db.procmon.insert(chunk)) 243 chunk = [] 244 245 chunk.append(entry) 246 247 if chunk: 248 procmon.append(self.db.procmon.insert(chunk)) 249 250 report["procmon"] = procmon 251 252 # Store the report and retrieve its object id. 253 self.db.analysis.save(report) 254 self.conn.close()
255