Package modules :: Package reporting :: Module mongodb
[hide private]
[frames] | no frames]

Source Code for Module modules.reporting.mongodb

  1  # Copyright (C) 2010-2014 Cuckoo Foundation. 
  2  # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org 
  3  # See the file 'docs/LICENSE' for copying permission. 
  4   
  5  import os 
  6   
  7  from lib.cuckoo.common.abstracts import Report 
  8  from lib.cuckoo.common.exceptions import CuckooDependencyError 
  9  from lib.cuckoo.common.exceptions import CuckooReportError 
 10  from lib.cuckoo.common.objects import File 
 11   
 12  try: 
 13      from pymongo.connection import Connection 
 14      from pymongo.errors import ConnectionFailure 
 15      from gridfs import GridFS 
 16      from gridfs.errors import FileExists 
 17      HAVE_MONGO = True 
 18  except ImportError: 
 19      HAVE_MONGO = False 
 20   
21 -class MongoDB(Report):
22 """Stores report in MongoDB.""" 23 24 # Mongo schema version, used for data migration. 25 SCHEMA_VERSION = "1" 26
27 - def connect(self):
28 """Connects to Mongo database, loads options and set connectors. 29 @raise CuckooReportError: if unable to connect. 30 """ 31 host = self.options.get("host", "127.0.0.1") 32 port = self.options.get("port", 27017) 33 34 try: 35 self.conn = Connection(host, port) 36 self.db = self.conn.cuckoo 37 self.fs = GridFS(self.db) 38 except TypeError: 39 raise CuckooReportError("Mongo connection port must be integer") 40 except ConnectionFailure: 41 raise CuckooReportError("Cannot connect to MongoDB")
42
43 - def store_file(self, file_obj, filename=""):
44 """Store a file in GridFS. 45 @param file_obj: object to the file to store 46 @param filename: name of the file to store 47 @return: object id of the stored file 48 """ 49 if not filename: 50 filename = file_obj.get_name() 51 52 existing = self.db.fs.files.find_one({"sha256": file_obj.get_sha256()}) 53 54 if existing: 55 return existing["_id"] 56 else: 57 new = self.fs.new_file(filename=filename, 58 sha256=file_obj.get_sha256()) 59 for chunk in file_obj.get_chunks(): 60 new.write(chunk) 61 try: 62 new.close() 63 except FileExists: 64 to_find = {"sha256": file_obj.get_sha256()} 65 return self.db.fs.files.find_one(to_find)["_id"] 66 else: 67 return new._id
68
69 - def run(self, results):
70 """Writes report. 71 @param results: analysis results dictionary. 72 @raise CuckooReportError: if fails to connect or write to MongoDB. 73 """ 74 # We put the raise here and not at the import because it would 75 # otherwise trigger even if the module is not enabled in the config. 76 if not HAVE_MONGO: 77 raise CuckooDependencyError("Unable to import pymongo " 78 "(install with `pip install pymongo`)") 79 80 self.connect() 81 82 # Set mongo schema version. 83 # TODO: This is not optimal becuase it run each analysis. Need to run 84 # only one time at startup. 85 if "cuckoo_schema" in self.db.collection_names(): 86 if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION: 87 CuckooReportError("Mongo schema version not expected, check data migration tool") 88 else: 89 self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) 90 91 # Set an unique index on stored files, to avoid duplicates. 92 # From pymongo docs: 93 # Returns the name of the created index if an index is actually 94 # created. 95 # Returns None if the index already exists. 96 # TODO: This is not optimal because it run each analysis. Need to run 97 # only one time at startup. 98 self.db.fs.files.ensure_index("sha256", unique=True, 99 sparse=True, name="sha256_unique") 100 101 # Create a copy of the dictionary. This is done in order to not modify 102 # the original dictionary and possibly compromise the following 103 # reporting modules. 104 report = dict(results) 105 106 # Store the sample in GridFS. 107 if results["info"]["category"] == "file": 108 sample = File(self.file_path) 109 if sample.valid(): 110 fname = results["target"]["file"]["name"] 111 sample_id = self.store_file(sample, filename=fname) 112 report["target"] = {"file_id": sample_id} 113 report["target"].update(results["target"]) 114 115 # Store the PCAP file in GridFS and reference it back in the report. 116 pcap_path = os.path.join(self.analysis_path, "dump.pcap") 117 pcap = File(pcap_path) 118 if pcap.valid(): 119 pcap_id = self.store_file(pcap) 120 report["network"] = {"pcap_id": pcap_id} 121 report["network"].update(results["network"]) 122 123 # Walk through the dropped files, store them in GridFS and update the 124 # report with the ObjectIds. 125 new_dropped = [] 126 for dropped in report["dropped"]: 127 new_drop = dict(dropped) 128 drop = File(dropped["path"]) 129 if drop.valid(): 130 dropped_id = self.store_file(drop, filename=dropped["name"]) 131 new_drop["object_id"] = dropped_id 132 133 new_dropped.append(new_drop) 134 135 report["dropped"] = new_dropped 136 137 # Add screenshots. 138 report["shots"] = [] 139 shots_path = os.path.join(self.analysis_path, "shots") 140 if os.path.exists(shots_path): 141 # Walk through the files and select the JPGs. 142 shots = [shot for shot in os.listdir(shots_path) 143 if shot.endswith(".jpg")] 144 145 for shot_file in sorted(shots): 146 shot_path = os.path.join(self.analysis_path, 147 "shots", shot_file) 148 shot = File(shot_path) 149 # If the screenshot path is a valid file, store it and 150 # reference it back in the report. 151 if shot.valid(): 152 shot_id = self.store_file(shot) 153 report["shots"].append(shot_id) 154 155 # Store chunks of API calls in a different collection and reference 156 # those chunks back in the report. In this way we should defeat the 157 # issue with the oversized reports exceeding MongoDB's boundaries. 158 # Also allows paging of the reports. 159 new_processes = [] 160 for process in report["behavior"]["processes"]: 161 new_process = dict(process) 162 163 chunk = [] 164 chunks_ids = [] 165 # Loop on each process call. 166 for index, call in enumerate(process["calls"]): 167 # If the chunk size is 100 or if the loop is completed then 168 # store the chunk in MongoDB. 169 if len(chunk) == 100: 170 to_insert = {"pid": process["process_id"], 171 "calls": chunk} 172 chunk_id = self.db.calls.insert(to_insert) 173 chunks_ids.append(chunk_id) 174 # Reset the chunk. 175 chunk = [] 176 177 # Append call to the chunk. 178 chunk.append(call) 179 180 # Store leftovers. 181 if chunk: 182 to_insert = {"pid": process["process_id"], "calls": chunk} 183 chunk_id = self.db.calls.insert(to_insert) 184 chunks_ids.append(chunk_id) 185 186 # Add list of chunks. 187 new_process["calls"] = chunks_ids 188 new_processes.append(new_process) 189 190 # Store the results in the report. 191 report["behavior"] = dict(report["behavior"]) 192 report["behavior"]["processes"] = new_processes 193 194 # Store the report and retrieve its object id. 195 self.db.analysis.save(report) 196 self.conn.disconnect()
197