1
2
3
4
5 import binascii
6 import hashlib
7 import logging
8 import os
9 import subprocess
10
11 from lib.cuckoo.common.constants import CUCKOO_ROOT
12
13 try:
14 import magic
15 HAVE_MAGIC = True
16 except ImportError:
17 HAVE_MAGIC = False
18
19 try:
20 import pydeep
21 HAVE_PYDEEP = True
22 except ImportError:
23 HAVE_PYDEEP = False
24
25 try:
26 import yara
27 HAVE_YARA = True
28 except ImportError:
29 HAVE_YARA = False
30
31 log = logging.getLogger(__name__)
32
33 FILE_CHUNK_SIZE = 16 * 1024
36 """Cuckoo custom dict."""
37
40
41 __setattr__ = dict.__setitem__
42 __delattr__ = dict.__delitem__
43
45 """URL base object."""
46
48 """@param url: URL"""
49 self.url = url
50
52 """Basic file object class with all useful utilities."""
53
54
55
56 notified_yara = False
57 notified_pydeep = False
58
60 """@param file_path: file path."""
61 self.file_path = file_path
62
63
64 self._file_data = None
65 self._crc32 = None
66 self._md5 = None
67 self._sha1 = None
68 self._sha256 = None
69 self._sha512 = None
70
72 """Get file name.
73 @return: file name.
74 """
75 file_name = os.path.basename(self.file_path)
76 return file_name
77
79 return os.path.exists(self.file_path) and \
80 os.path.isfile(self.file_path) and \
81 os.path.getsize(self.file_path) != 0
82
84 """Read file contents.
85 @return: data.
86 """
87 return self.file_data
88
90 """Read file contents in chunks (generator)."""
91
92 with open(self.file_path, "rb") as fd:
93 while True:
94 chunk = fd.read(FILE_CHUNK_SIZE)
95 if not chunk: break
96 yield chunk
97
99 """Calculate all possible hashes for this file."""
100 crc = 0
101 md5 = hashlib.md5()
102 sha1 = hashlib.sha1()
103 sha256 = hashlib.sha256()
104 sha512 = hashlib.sha512()
105
106 for chunk in self.get_chunks():
107 crc = binascii.crc32(chunk, crc)
108 md5.update(chunk)
109 sha1.update(chunk)
110 sha256.update(chunk)
111 sha512.update(chunk)
112
113 self._crc32 = "".join("%02X" % ((crc>>i)&0xff) for i in [24, 16, 8, 0])
114 self._md5 = md5.hexdigest()
115 self._sha1 = sha1.hexdigest()
116 self._sha256 = sha256.hexdigest()
117 self._sha512 = sha512.hexdigest()
118
119 @property
121 if not self._file_data: self._file_data = open(self.file_path, "rb").read()
122 return self._file_data
123
125 """Get file size.
126 @return: file size.
127 """
128 return os.path.getsize(self.file_path)
129
131 """Get CRC32.
132 @return: CRC32.
133 """
134 if not self._crc32: self.calc_hashes()
135 return self._crc32
136
138 """Get MD5.
139 @return: MD5.
140 """
141 if not self._md5: self.calc_hashes()
142 return self._md5
143
145 """Get SHA1.
146 @return: SHA1.
147 """
148 if not self._sha1: self.calc_hashes()
149 return self._sha1
150
152 """Get SHA256.
153 @return: SHA256.
154 """
155 if not self._sha256: self.calc_hashes()
156 return self._sha256
157
159 """
160 Get SHA512.
161 @return: SHA512.
162 """
163 if not self._sha512: self.calc_hashes()
164 return self._sha512
165
167 """Get SSDEEP.
168 @return: SSDEEP.
169 """
170 if not HAVE_PYDEEP:
171 if not File.notified_pydeep:
172 File.notified_pydeep = True
173 log.warning("Unable to import pydeep (install with `pip install pydeep`)")
174 return None
175
176 try:
177 return pydeep.hash_file(self.file_path)
178 except Exception:
179 return None
180
182 """Get MIME file type.
183 @return: file type.
184 """
185 file_type = None
186 if HAVE_MAGIC:
187 try:
188 ms = magic.open(magic.MAGIC_NONE)
189 ms.load()
190 file_type = ms.file(self.file_path)
191 except:
192 try:
193 file_type = magic.from_file(self.file_path)
194 except:
195 pass
196 finally:
197 try:
198 ms.close()
199 except:
200 pass
201
202 if file_type is None:
203 try:
204 p = subprocess.Popen(["file", "-b", self.file_path],
205 stdout=subprocess.PIPE)
206 file_type = p.stdout.read().strip()
207 except:
208 pass
209
210 return file_type
211
213 """Get Yara signatures matches.
214 @return: matched Yara signatures.
215 """
216 matches = []
217
218 if HAVE_YARA:
219 if os.path.getsize(self.file_path) > 0:
220 try:
221 rules = yara.compile(rulepath)
222
223 for match in rules.match(self.file_path):
224 strings = []
225 for s in match.strings:
226
227 try:
228 new = s[2].encode("utf-8")
229 except UnicodeDecodeError:
230 s = s[2].lstrip("uU").encode("hex").upper()
231 s = " ".join(s[i:i+2] for i in range(0, len(s), 2))
232 new = "{ %s }" % s
233
234 if new not in strings:
235 strings.append(new)
236
237 matches.append({"name": match.rule,
238 "meta": match.meta,
239 "strings": strings})
240 except yara.Error as e:
241 log.warning("Unable to match Yara signatures: %s", e)
242 else:
243 if not File.notified_yara:
244 File.notified_yara = True
245 log.warning("Unable to import yara (please compile from sources)")
246
247 return matches
248
250 """Get all information available.
251 @return: information dict.
252 """
253 infos = {}
254 infos["name"] = self.get_name()
255 infos["path"] = self.file_path
256 infos["size"] = self.get_size()
257 infos["crc32"] = self.get_crc32()
258 infos["md5"] = self.get_md5()
259 infos["sha1"] = self.get_sha1()
260 infos["sha256"] = self.get_sha256()
261 infos["sha512"] = self.get_sha512()
262 infos["ssdeep"] = self.get_ssdeep()
263 infos["type"] = self.get_type()
264 infos["yara"] = self.get_yara()
265
266 return infos
267