1
2
3
4
5
6 import hashlib
7 import logging
8 import os
9 import sys
10 import shutil
11 import ntpath
12 import string
13 import tempfile
14 import xmlrpclib
15 import inspect
16 import platform
17 import threading
18 import json
19 import multiprocessing
20 import warnings
21
22 from cStringIO import StringIO
23 from datetime import datetime
24
25 from lib.cuckoo.common.exceptions import CuckooOperationalError
26 from lib.cuckoo.common.config import Config
27
28 from lib.cuckoo.common.constants import CUCKOO_ROOT, CUCKOO_VERSION
29 from lib.cuckoo.common.constants import GITHUB_URL, ISSUES_PAGE_URL
30
31 try:
32 import bs4
33 HAVE_BS4 = True
34 except ImportError:
35 HAVE_BS4 = False
36
37 try:
38 import chardet
39 HAVE_CHARDET = True
40 except ImportError:
41 HAVE_CHARDET = False
42
43 try:
44 import jsbeautifier
45 HAVE_JSBEAUTIFIER = True
46 except ImportError:
47 HAVE_JSBEAUTIFIER = False
48
49 log = logging.getLogger(__name__)
50
52 """Create directories.
53 @param root: root path.
54 @param folders: folders list to be created.
55 @raise CuckooOperationalError: if fails to create folder.
56 """
57 for folder in folders:
58 create_folder(root, folder)
59
61 """Create directory.
62 @param root: root path.
63 @param folder: folder name to be created.
64 @raise CuckooOperationalError: if fails to create folder.
65 """
66 folder_path = os.path.join(root, folder)
67 if folder and not os.path.isdir(folder_path):
68 try:
69 os.makedirs(folder_path)
70 except OSError:
71 raise CuckooOperationalError("Unable to create folder: %s" %
72 folder_path)
73
75 """Delete a folder and all its subdirectories.
76 @param folder: path to delete.
77 @raise CuckooOperationalError: if fails to delete folder.
78 """
79 if os.path.exists(folder):
80 try:
81 shutil.rmtree(folder)
82 except OSError:
83 raise CuckooOperationalError("Unable to delete folder: "
84 "{0}".format(folder))
85
86
87
88 PRINTABLE_CHARACTERS = \
89 string.letters + string.digits + string.punctuation + " \t\r\n"
90
92 """Escapes characters.
93 @param c: dirty char.
94 @return: sanitized char.
95 """
96 if c in PRINTABLE_CHARACTERS:
97 return c
98 else:
99 return "\\x%02x" % ord(c)
100
102 """ Test if a string is printable."""
103 for c in s:
104 if c not in PRINTABLE_CHARACTERS:
105 return False
106 return True
107
109 """Convert char to printable.
110 @param s: string.
111 @return: sanitized string.
112 """
113 if is_printable(s):
114 return s
115 return "".join(convert_char(c) for c in s)
116
118 """Parse a datatime string and returns a datetime in iso format.
119 @param timestamp: timestamp string
120 @return: ISO datetime
121 """
122 return datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S").isoformat()
123
125 """Cross-platform filename extraction from path.
126 @param path: file path.
127 @return: filename.
128 """
129 dirpath, filename = ntpath.split(path)
130 return filename if filename else ntpath.basename(dirpath)
131
133 """Store a temporary file.
134 @param filedata: content of the original file.
135 @param filename: name of the original file.
136 @param path: optional path for temp directory.
137 @return: path to the temporary file.
138 """
139 filename = get_filename_from_path(filename)
140
141
142 filename = filename[:100]
143
144 options = Config()
145
146 if path:
147 target_path = path
148 else:
149 tmp_path = options.cuckoo.get("tmppath", "/tmp")
150 target_path = os.path.join(tmp_path, "cuckoo-tmp")
151 if not os.path.exists(target_path):
152 os.mkdir(target_path)
153
154 tmp_dir = tempfile.mkdtemp(prefix="upload_", dir=target_path)
155 tmp_file_path = os.path.join(tmp_dir, filename)
156 with open(tmp_file_path, "wb") as tmp_file:
157
158 if hasattr(filedata, "read"):
159 chunk = filedata.read(1024)
160 while chunk:
161 tmp_file.write(chunk)
162 chunk = filedata.read(1024)
163 else:
164 tmp_file.write(filedata)
165
166 return tmp_file_path
167
169 """Timeout server for XMLRPC.
170 XMLRPC + timeout - still a bit ugly - but at least gets rid of setdefaulttimeout
171 inspired by http://stackoverflow.com/questions/372365/set-timeout-for-xmlrpclib-serverproxy
172 (although their stuff was messy, this is cleaner)
173 @see: http://stackoverflow.com/questions/372365/set-timeout-for-xmlrpclib-serverproxy
174 """
179
181 t = self._ServerProxy__transport
182 t.timeout = timeout
183
184 if hasattr(t, "_connection") and t._connection[1] and t._connection[1].sock:
185 t._connection[1].sock.settimeout(timeout)
186
189 self.timeout = kwargs.pop("timeout", None)
190 xmlrpclib.Transport.__init__(self, *args, **kwargs)
191
197
199 """Singleton.
200 @see: http://stackoverflow.com/questions/6760685/creating-a-singleton-in-python
201 """
202 _instances = {}
203
208
210 """Singleton per thread."""
211 _instances = threading.local()
212
217
219 """Attempt to fix non uft-8 string into utf-8. It tries to guess input encoding,
220 if fail retry with a replace strategy (so undetectable chars will be escaped).
221 @see: fuller list of encodings at http://docs.python.org/library/codecs.html#standard-encodings
222 """
223
224 def brute_enc(s2):
225 """Trying to decode via simple brute forcing."""
226 encodings = ("ascii", "utf8", "latin1")
227 for enc in encodings:
228 try:
229 return unicode(s2, enc)
230 except UnicodeDecodeError:
231 pass
232 return None
233
234 def chardet_enc(s2):
235 """Guess encoding via chardet."""
236 enc = chardet.detect(s2)["encoding"]
237
238 try:
239 return unicode(s2, enc)
240 except UnicodeDecodeError:
241 pass
242 return None
243
244
245 if isinstance(s, unicode):
246 return s
247
248
249 result = brute_enc(s)
250
251
252 if not result and HAVE_CHARDET:
253 result = chardet_enc(s)
254
255
256
257 if not result:
258 result = unicode(s, errors="replace")
259
260 return result
261
263 """Cleanup utility function, strips some unwanted parts from values."""
264 v = str(v)
265 if v.startswith("\\??\\"):
266 v = v[4:]
267 return v
268
270 """Classlock decorator (created for database.Database).
271 Used to put a lock to avoid sqlite errors.
272 """
273 def inner(self, *args, **kwargs):
274 curframe = inspect.currentframe()
275 calframe = inspect.getouterframes(curframe, 2)
276
277 if calframe[1][1].endswith("database.py"):
278 return f(self, *args, **kwargs)
279
280 with self._lock:
281 return f(self, *args, **kwargs)
282
283 return inner
284
287 self.tlock = threading.Lock()
288 self.mlock = multiprocessing.Lock()
289
293
294 - def __exit__(self, type, value, traceback):
297
299 """Calculates an hash on a file by path.
300 @param method: callable hashing method
301 @param path: file path
302 @return: computed hash string
303 """
304 f = open(filepath, "rb")
305 h = method()
306 while True:
307 buf = f.read(1024 * 1024)
308 if not buf:
309 break
310 h.update(buf)
311 return h.hexdigest()
312
315
318
321
322 GUIDS = {}
323
325 if not GUIDS:
326 for line in open(os.path.join(CUCKOO_ROOT, "data", "guids.txt")):
327 try:
328 guid, name, url = line.strip().split()
329 except:
330 log.debug("Invalid GUID entry: %s", line)
331 continue
332
333 GUIDS["{%s}" % guid] = name
334
335 return GUIDS.get(guid)
336
338 """Creates a message describing an unhandled exception."""
339 def get_os_release():
340 """Returns detailed OS release."""
341 if platform.linux_distribution()[0]:
342 return " ".join(platform.linux_distribution())
343 elif platform.mac_ver()[0]:
344 return "%s %s" % (platform.mac_ver()[0], platform.mac_ver()[2])
345 else:
346 return "Unknown"
347
348 msg = (
349 "Oops! Cuckoo failed in an unhandled exception!\nSometimes bugs are "
350 "already fixed in the development release, it is therefore "
351 "recommended to retry with the latest development release available "
352 "%s\nIf the error persists please open a new issue at %s\n\n" %
353 (GITHUB_URL, ISSUES_PAGE_URL)
354 )
355
356 msg += "=== Exception details ===\n"
357 msg += "Cuckoo version: %s\n" % CUCKOO_VERSION
358 msg += "OS version: %s\n" % os.name
359 msg += "OS release: %s\n" % get_os_release()
360 msg += "Python version: %s\n" % sys.version.split()[0]
361 msg += "Machine arch: %s\n" % platform.machine()
362
363 git_version = os.path.join(CUCKOO_ROOT, ".git", "refs", "heads", "master")
364 if os.path.exists(git_version):
365 try:
366 msg += "Git version: %s\n" % open(git_version, "rb").read().strip()
367 except:
368 pass
369
370 try:
371 import pip
372
373 msg += "Modules: %s\n" % " ".join(sorted(
374 "%s:%s" % (package.key, package.version)
375 for package in pip.get_installed_distributions()
376 ))
377 except ImportError:
378 pass
379
380 msg += "\n"
381 return msg
382
383 _jsbeautify_blacklist = [
384 "",
385 "error: Unknown p.a.c.k.e.r. encoding.\n",
386 ]
387
388 _jsbeautify_lock = threading.Lock()
389
391 """Beautifies Javascript through jsbeautifier and ignore some messages."""
392 if not HAVE_JSBEAUTIFIER:
393 return javascript
394
395 with _jsbeautify_lock:
396 origout, sys.stdout = sys.stdout, StringIO()
397 javascript = jsbeautifier.beautify(javascript)
398
399 if sys.stdout.getvalue() not in _jsbeautify_blacklist:
400 log.warning("jsbeautifier returned error: %s", sys.stdout.getvalue())
401
402 sys.stdout = origout
403 return javascript
404
406 """Beautifies HTML through BeautifulSoup4."""
407 if not HAVE_BS4:
408 return html
409
410
411
412 with warnings.catch_warnings():
413 warnings.simplefilter("ignore", lineno=182)
414 return bs4.BeautifulSoup(html, "html.parser").prettify()
415
417 """JSON serializer for objects not serializable by default json code"""
418 if isinstance(obj, datetime):
419 if obj.utcoffset() is not None:
420 obj = obj - obj.utcoffset()
421 return {"$dt": obj.isoformat()}
422 raise TypeError("Type not serializable")
423
425 """JSON object hook, deserializing datetimes ($date)"""
426 if "$dt" in obj:
427 x = obj["$dt"]
428 return datetime.strptime(x, "%Y-%m-%dT%H:%M:%S.%f")
429 return obj
430
432 """JSON encoding wrapper that handles datetime objects"""
433 return json.dumps(obj, default=json_default, **kwargs)
434
436 """JSON decoder that does ugly first-level datetime handling"""
437 return json.loads(x, object_hook=json_hook)
438
440 """Return the version as a tuple for easy comparison."""
441 return tuple(int(x) for x in v.split("."))
442