Package lib :: Package cuckoo :: Package common :: Module virustotal
[hide private]
[frames] | no frames]

Source Code for Module lib.cuckoo.common.virustotal

  1  # Copyright (C) 2010-2013 Claudio Guarnieri. 
  2  # Copyright (C) 2014-2016 Cuckoo Foundation. 
  3  # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org 
  4  # See the file 'docs/LICENSE' for copying permission. 
  5   
  6  import logging 
  7  import re 
  8   
  9  try: 
 10      import requests 
 11      HAVE_REQUESTS = True 
 12   
 13      # Disable requests/urllib3 debug & info messages. 
 14      logging.getLogger("requests").setLevel(logging.WARNING) 
 15      logging.getLogger("urllib3").setLevel(logging.WARNING) 
 16  except ImportError: 
 17      HAVE_REQUESTS = False 
 18   
 19  from lib.cuckoo.common.exceptions import CuckooOperationalError 
 20  from lib.cuckoo.common.objects import File 
 21   
22 -class VirusTotalResourceNotScanned(CuckooOperationalError):
23 """This resource has not been scanned yet."""
24
25 -class VirusTotalAPI(object):
26 """Wrapper to VirusTotal API.""" 27 28 FILE_REPORT = "https://www.virustotal.com/vtapi/v2/file/report" 29 URL_REPORT = "https://www.virustotal.com/vtapi/v2/url/report" 30 FILE_SCAN = "https://www.virustotal.com/vtapi/v2/file/scan" 31 URL_SCAN = "https://www.virustotal.com/vtapi/v2/url/scan" 32 33 VARIANT_BLACKLIST = [ 34 "generic", "malware", "trojan", "agent", "win32", "multi", "w32", 35 "trojanclicker", "trojware", "win", "a variant of win32", "trj", 36 "susp", "dangerousobject", "backdoor", "clicker", "variant", "heur", 37 "gen", "virus", "dropper", "generic suspicious", "spyware", "program", 38 "suspectcrc", "corrupt", "behaveslike", "crypt", "adclicker", 39 "troj", "injector", "cryptor", "packed", "adware", "macro", "msil4", 40 "suspicious", "worm", "msil", "msword", "drop", "keygen", "office", 41 "password", "malpack", "lookslike", "banker", "riskware", "unwanted", 42 "unclassifiedmalware", "ransom", "trojan horse", "trjndwnlder", 43 "trojandwnldr", "autorun", "trojandownloader", "trojandwnldr", "text", 44 "download", "excel", "msilobfuscator", "rootkit", "application", 45 "a variant of win64", "w97m", "shellcode", "o97m", "exploit", 46 "x97m", "maliciousmacro", "downldr", "msexcel", "pp97m", "other", 47 "trojandropper", "crypter", "a variant of msil", "macrodown", 48 "trojanapt", "dwnldr", "downldexe", "dload", "trojanhorse", "toolbar", 49 "mailer", "obfus", "obfuscator", "suspicious file", "optional", 50 "suspected of trojan", "heuristic", "rogue", "virtool", "infostealer", 51 "generic downloader", "generic malware", "undef", "inject", "packer", 52 "generic backdoor", "word", "macosx", "hack", "unknown", "downloader", 53 "trojanspy", "dldr", "msoffice", "osx32", "script", "stealer", 54 "not a virus", "html", "expl", "shellkode", "downagent", "win64", 55 "applicunwnt", "heur2", "ddos", "avkill", "servstart", "normal", 56 "encoder", "w2km_dloader", "docdl", "w97m_dloadr", "mo97", "dloader", 57 "x2km_dloadr", "w2km_dload", "w2km_dloade", "x2km_droppr", "exedown", 58 "encodefeature", "docdrop", "mw97", "adload", "a variant of pp97m", 59 "a variant of w97m", "badmacro", "bkdr", "docdrp", "exedrop", 60 "generic trojan", "malcrypt", "malicious website", "ransomlock", 61 "ransomcrypt", "reputation", "trojanransom", "pepatch", "risk", 62 "adplugin", "webtoolbar", "malagent", "genmalicious", "vbinject", 63 "vbcrypt", "inject2", "mdropper", "download3", "keylogger", 64 "downloader11", "damaged", "file", "dldrop", "msil7", "injcrypt", 65 "patched", "patchfile", "downware", "dropped", 66 ] 67
68 - def __init__(self, apikey, timeout, scan=0):
69 """Initialize VirusTotal API with the API key and timeout. 70 @param api_key: virustotal api key 71 @param timeout: request and response timeout 72 @param scan: send file to scan or just get report 73 """ 74 self.apikey = apikey 75 self.timeout = timeout 76 self.scan = scan
77
78 - def _request_json(self, url, **kwargs):
79 """Wrapper around doing a request and parsing its JSON output.""" 80 if not HAVE_REQUESTS: 81 raise CuckooOperationalError( 82 "The VirusTotal processing module requires the requests " 83 "library (install with `pip install requests`)") 84 85 try: 86 r = requests.post(url, timeout=self.timeout, **kwargs) 87 return r.json() if r.status_code == 200 else {} 88 except (requests.ConnectionError, ValueError) as e: 89 raise CuckooOperationalError("Unable to fetch VirusTotal " 90 "results: %r" % e.message)
91
92 - def _get_report(self, url, resource, summary=False):
93 """Fetch the report of a file or URL.""" 94 data = dict(resource=resource, apikey=self.apikey) 95 96 r = self._request_json(url, data=data) 97 98 # This URL has not been analyzed yet - send a request to analyze it 99 # and return with the permalink. 100 if not r.get("response_code"): 101 if self.scan: 102 raise VirusTotalResourceNotScanned 103 else: 104 return { 105 "summary": { 106 "error": "resource has not been scanned yet", 107 } 108 } 109 110 results = { 111 "summary": { 112 "positives": r.get("positives", 0), 113 "permalink": r.get("permalink"), 114 "scan_date": r.get("scan_date"), 115 }, 116 } 117 118 # For backwards compatibility. 119 results.update(r) 120 121 if not summary: 122 results["scans"] = {} 123 results["normalized"] = [] 124 125 # Embed all VirusTotal results into the report. 126 for engine, signature in r.get("scans", {}).items(): 127 signature["normalized"] = self.normalize(signature["result"]) 128 results["scans"][engine.replace(".", "_")] = signature 129 130 # Normalize each detected variant in order to try to find the 131 # exact malware family. 132 norm_lower = [] 133 for signature in results["scans"].values(): 134 for normalized in signature["normalized"]: 135 if normalized.lower() not in norm_lower: 136 results["normalized"].append(normalized) 137 norm_lower.append(normalized.lower()) 138 139 return results
140
141 - def url_report(self, url, summary=False):
142 """Get the report of an existing URL scan. 143 @param url: URL 144 @param summary: if you want a summary report""" 145 return self._get_report(self.URL_REPORT, url, summary)
146
147 - def file_report(self, filepath, summary=False):
148 """Get the report of an existing file scan. 149 @param filepath: file path 150 @param summary: if you want a summary report""" 151 resource = File(filepath).get_md5() 152 return self._get_report(self.FILE_REPORT, resource, summary)
153
154 - def url_scan(self, url):
155 """Submit a URL to be scanned. 156 @param url: URL 157 """ 158 data = dict(apikey=self.apikey, url=url) 159 r = self._request_json(self.URL_SCAN, data=data) 160 return dict(summary=dict(permalink=r.get("permalink")))
161
162 - def file_scan(self, filepath):
163 """Submit a file to be scanned. 164 @param filepath: file path 165 """ 166 data = dict(apikey=self.apikey) 167 files = {"file": open(filepath, "rb")} 168 r = self._request_json(self.FILE_SCAN, data=data, files=files) 169 return dict(summary=dict(permalink=r.get("permalink")))
170
171 - def normalize(self, variant):
172 """Normalize the variant name provided by an Anti Virus engine. This 173 attempts to extract the useful parts of a variant name by stripping 174 all the boilerplate stuff from it.""" 175 if not variant: 176 return [] 177 178 ret = [] 179 180 # Handles "CVE-2012-1234", "CVE2012-1234". 181 cve = re.search("CVE[-_]?(\\d{4})[-_](\\d{4})", variant) 182 if cve: 183 ret.append("CVE-%s-%s" % (cve.group(1), cve.group(2))) 184 185 # Handles "CVE121234". 186 cve = re.search("CVE(\\d{2})(\\d{4})", variant) 187 if cve: 188 ret.append("CVE-20%s-%s" % (cve.group(1), cve.group(2))) 189 190 for word in re.split("[\\.\\,\\-\\(\\)\\[\\]/!:_]", variant): 191 word = word.strip() 192 if len(word) < 4: 193 continue 194 195 if word.lower() in self.VARIANT_BLACKLIST: 196 continue 197 198 # Random hashes that are specific to this file. 199 if re.match("[a-fA-F0-9]+$", word): 200 continue 201 202 # Family names followed by "potentially unwanted". 203 if re.match("[a-zA-Z]{1,2} potentially unwanted", word.lower()): 204 continue 205 206 ret.append(word) 207 return ret
208