1
2
3
4
5
6 import logging
7 import os
8 import subprocess
9
10 from lib.cuckoo.common.abstracts import Processing
11
12 log = logging.getLogger(__name__)
13
15 """Screenshot file OCR analysis."""
16
18 """Run analysis.
19 @return: list of screenshots with OCR content.
20 """
21
22 self.key = "screenshots"
23 screenshots = []
24
25 tesseract = self.options.get("tesseract", "/usr/bin/tesseract")
26 if not os.path.exists(tesseract):
27 log.error("Could not find tesseract binary, "
28 "screenshot OCR aborted.")
29 return []
30
31 if os.path.exists(self.shots_path):
32
33 for shot_file in sorted(os.listdir(self.shots_path)):
34 if not shot_file.endswith(".jpg"):
35 continue
36
37
38 shot_path = os.path.join(self.shots_path, shot_file)
39
40
41 shot_entry = dict(path=shot_path, ocr="")
42
43 try:
44 args = [tesseract, shot_path, "stdout"]
45 shot_entry["ocr"] = subprocess.check_output(args)
46 except subprocess.CalledProcessError as e:
47 log.info("Error running tesseract: %s", e)
48
49
50 screenshots.append(shot_entry)
51
52 return screenshots
53