Package modules :: Package reporting :: Module elasticsearch
[hide private]
[frames] | no frames]

Source Code for Module modules.reporting.elasticsearch

  1  # Copyright (C) 2016 Cuckoo Foundation. 
  2  # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org 
  3  # See the file 'docs/LICENSE' for copying permission. 
  4   
  5  from __future__ import absolute_import 
  6   
  7  import datetime 
  8  import json 
  9  import logging 
 10  import time 
 11  import os 
 12   
 13  from lib.cuckoo.common.abstracts import Report 
 14  from lib.cuckoo.common.constants import CUCKOO_ROOT 
 15  from lib.cuckoo.common.exceptions import CuckooDependencyError 
 16  from lib.cuckoo.common.exceptions import CuckooReportError 
 17  from lib.cuckoo.common.utils import convert_to_printable 
 18   
 19  logging.getLogger("elasticsearch").setLevel(logging.WARNING) 
 20  logging.getLogger("elasticsearch.trace").setLevel(logging.WARNING) 
 21   
 22  try: 
 23      from elasticsearch import ( 
 24          Elasticsearch, ConnectionError, ConnectionTimeout, helpers 
 25      ) 
 26   
 27      HAVE_ELASTIC = True 
 28  except ImportError: 
 29      HAVE_ELASTIC = False 
 30   
 31  log = logging.getLogger(__name__) 
 32   
 33   
34 -class ElasticSearch(Report):
35 """Stores report in Elasticsearch.""" 36
37 - def connect(self):
38 """Connect to Elasticsearch. 39 @raise CuckooReportError: if unable to connect. 40 """ 41 hosts = [] 42 for host in self.options.get("hosts", "127.0.0.1:9200").split(","): 43 if host.strip(): 44 hosts.append(host.strip()) 45 46 self.index = self.options.get("index", "cuckoo") 47 48 # Do not change these types without changing the elasticsearch 49 # template as well. 50 self.report_type = "cuckoo" 51 self.call_type = "call" 52 53 # Get the index time option and set the dated index accordingly 54 index_type = self.options.get("index_time_pattern", "yearly") 55 if index_type.lower() == "yearly": 56 strf_time = "%Y" 57 elif index_type.lower() == "monthly": 58 strf_time = "%Y-%m" 59 elif index_type.lower() == "daily": 60 strf_time = "%Y-%m-%d" 61 62 date_index = datetime.datetime.utcnow().strftime(strf_time) 63 self.dated_index = "%s-%s" % (self.index, date_index) 64 65 # Gets the time which will be used for indexing the document into ES 66 # ES needs epoch time in seconds per the mapping 67 self.report_time = int(time.time()) 68 69 try: 70 self.es = Elasticsearch(hosts) 71 except TypeError: 72 raise CuckooReportError( 73 "Elasticsearch connection hosts must be host:port or host" 74 ) 75 except (ConnectionError, ConnectionTimeout) as e: 76 raise CuckooReportError("Cannot connect to Elasticsearch: %s" % e) 77 78 # check to see if the template exists apply it if it does not 79 if not self.es.indices.exists_template("cuckoo_template"): 80 if not self.apply_template(): 81 raise CuckooReportError("Cannot apply Elasticsearch template")
82
83 - def apply_template(self):
84 template_path = os.path.join( 85 CUCKOO_ROOT, "data", "elasticsearch", "template.json" 86 ) 87 if not os.path.exists(template_path): 88 return False 89 90 with open(template_path, "rw") as f: 91 try: 92 cuckoo_template = json.loads(f.read()) 93 except ValueError: 94 raise CuckooReportError( 95 "Unable to read valid JSON from the ElasticSearch " 96 "template JSON file located at: %s" % template_path 97 ) 98 99 # Create an index wildcard based off of the index name specified 100 # in the config file, this overwrites the settings in 101 # template.json. 102 cuckoo_template["template"] = self.index + "-*" 103 104 self.es.indices.put_template( 105 name="cuckoo_template", body=json.dumps(cuckoo_template) 106 ) 107 return True
108
109 - def get_base_document(self):
110 # Gets precached report time and the task_id. 111 header = { 112 "task_id": self.task["id"], 113 "report_time": self.report_time, 114 "report_id": self.task["id"] 115 } 116 return header
117
118 - def do_index(self, obj):
119 index = self.dated_index 120 121 base_document = self.get_base_document() 122 123 # Append the base document to the object to index. 124 base_document.update(obj) 125 126 try: 127 self.es.create( 128 index=index, doc_type=self.report_type, body=base_document 129 ) 130 except Exception as e: 131 raise CuckooReportError( 132 "Failed to save results in ElasticSearch for " 133 "task #%d: %s" % (self.task["id"], e) 134 )
135
136 - def do_bulk_index(self, bulk_reqs):
137 try: 138 helpers.bulk(self.es, bulk_reqs) 139 except Exception as e: 140 raise CuckooReportError( 141 "Failed to save results in ElasticSearch for " 142 "task #%d: %s" % (self.task["id"], e) 143 )
144
145 - def process_call(self, call):
146 """This function converts all arguments to strings to allow ES to map 147 them properly.""" 148 if "arguments" not in call or type(call["arguments"]) != dict: 149 return call 150 151 new_arguments = {} 152 for key, value in call["arguments"].iteritems(): 153 if type(value) is unicode or type(value) is str: 154 new_arguments[key] = convert_to_printable(value) 155 else: 156 new_arguments[key] = str(value) 157 158 call["arguments"] = new_arguments 159 return call
160
161 - def process_behavior(self, results, bulk_submit_size=1000):
162 """Index the behavioral data.""" 163 for process in results.get("behavior", {}).get("processes", []): 164 bulk_index = [] 165 166 for call in process["calls"]: 167 base_document = self.get_base_document() 168 call_document = { 169 "pid": process["pid"], 170 } 171 call_document.update(self.process_call(call)) 172 call_document.update(base_document) 173 bulk_index.append({ 174 "_index": self.dated_index, 175 "_type": self.call_type, 176 "_source": call_document 177 }) 178 if len(bulk_index) == bulk_submit_size: 179 self.do_bulk_index(bulk_index) 180 bulk_index = [] 181 182 if len(bulk_index) > 0: 183 self.do_bulk_index(bulk_index)
184
185 - def run(self, results):
186 """Index the Cuckoo report into ElasticSearch. 187 @param results: analysis results dictionary. 188 @raise CuckooReportError: if the connection or reporting failed. 189 """ 190 if not HAVE_ELASTIC: 191 raise CuckooDependencyError( 192 "Unable to import elasticsearch (install with " 193 "`pip install elasticsearch`)" 194 ) 195 196 self.connect() 197 198 # Index target information, the behavioral summary, and 199 # VirusTotal results. 200 self.do_index({ 201 "cuckoo_node": self.options.get("cuckoo_node"), 202 "target": results.get("target"), 203 "summary": results.get("behavior", {}).get("summary"), 204 "virustotal": results.get("virustotal"), 205 "irma": results.get("irma"), 206 "signatures": results.get("signatures"), 207 "dropped": results.get("dropped"), 208 }) 209 210 # Index the API calls. 211 if self.options.get("calls"): 212 self.process_behavior(results)
213