Package CedarBackup3 :: Package extend :: Module amazons3
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup3.extend.amazons3

  1  # -*- coding: iso-8859-1 -*- 
  2  # vim: set ft=python ts=3 sw=3 expandtab: 
  3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  4  # 
  5  #              C E D A R 
  6  #          S O L U T I O N S       "Software done right." 
  7  #           S O F T W A R E 
  8  # 
  9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 10  # 
 11  # Copyright (c) 2014-2015 Kenneth J. Pronovici. 
 12  # All rights reserved. 
 13  # 
 14  # This program is free software; you can redistribute it and/or 
 15  # modify it under the terms of the GNU General Public License, 
 16  # Version 2, as published by the Free Software Foundation. 
 17  # 
 18  # This program is distributed in the hope that it will be useful, 
 19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
 21  # 
 22  # Copies of the GNU General Public License are available from 
 23  # the Free Software Foundation website, http://www.gnu.org/. 
 24  # 
 25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 26  # 
 27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
 28  # Language : Python 3 (>= 3.4) 
 29  # Project  : Official Cedar Backup Extensions 
 30  # Purpose  : "Store" type extension that writes data to Amazon S3. 
 31  # 
 32  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 33   
 34  ######################################################################## 
 35  # Module documentation 
 36  ######################################################################## 
 37   
 38  """ 
 39  Store-type extension that writes data to Amazon S3. 
 40   
 41  This extension requires a new configuration section <amazons3> and is intended 
 42  to be run immediately after the standard stage action, replacing the standard 
 43  store action.  Aside from its own configuration, it requires the options and 
 44  staging configuration sections in the standard Cedar Backup configuration file. 
 45  Since it is intended to replace the store action, it does not rely on any store 
 46  configuration. 
 47   
 48  The underlying functionality relies on the U{AWS CLI interface 
 49  <http://aws.amazon.com/documentation/cli/>}.  Before you use this extension, 
 50  you need to set up your Amazon S3 account and configure the AWS CLI connection 
 51  per Amazon's documentation.  The extension assumes that the backup is being 
 52  executed as root, and switches over to the configured backup user to 
 53  communicate with AWS.  So, make sure you configure AWS CLI as the backup user 
 54  and not root. 
 55   
 56  You can optionally configure Cedar Backup to encrypt data before sending it 
 57  to S3.  To do that, provide a complete command line using the C{${input}} and 
 58  C{${output}} variables to represent the original input file and the encrypted 
 59  output file.  This command will be executed as the backup user. 
 60   
 61  For instance, you can use something like this with GPG:: 
 62   
 63     /usr/bin/gpg -c --no-use-agent --batch --yes --passphrase-file /home/backup/.passphrase -o ${output} ${input} 
 64   
 65  The GPG mechanism depends on a strong passphrase for security.  One way to 
 66  generate a strong passphrase is using your system random number generator, i.e.:: 
 67   
 68     dd if=/dev/urandom count=20 bs=1 | xxd -ps 
 69   
 70  (See U{StackExchange <http://security.stackexchange.com/questions/14867/gpg-encryption-security>} 
 71  for more details about that advice.) If you decide to use encryption, make sure 
 72  you save off the passphrase in a safe place, so you can get at your backup data 
 73  later if you need to.  And obviously, make sure to set permissions on the 
 74  passphrase file so it can only be read by the backup user. 
 75   
 76  This extension was written for and tested on Linux.  It will throw an exception 
 77  if run on Windows. 
 78   
 79  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
 80  """ 
 81   
 82  ######################################################################## 
 83  # Imported modules 
 84  ######################################################################## 
 85   
 86  # System modules 
 87  import sys 
 88  import os 
 89  import logging 
 90  import tempfile 
 91  import datetime 
 92  import json 
 93  import shutil 
 94  from functools import total_ordering 
 95   
 96  # Cedar Backup modules 
 97  from CedarBackup3.filesystem import FilesystemList, BackupFileList 
 98  from CedarBackup3.util import resolveCommand, executeCommand, isRunningAsRoot, changeOwnership, isStartOfWeek 
 99  from CedarBackup3.util import displayBytes, UNIT_BYTES 
100  from CedarBackup3.xmlutil import createInputDom, addContainerNode, addBooleanNode, addStringNode 
101  from CedarBackup3.xmlutil import readFirstChild, readString, readBoolean 
102  from CedarBackup3.actions.util import writeIndicatorFile 
103  from CedarBackup3.actions.constants import DIR_TIME_FORMAT, STAGE_INDICATOR 
104  from CedarBackup3.config import ByteQuantity, readByteQuantity, addByteQuantityNode 
105   
106   
107  ######################################################################## 
108  # Module-wide constants and variables 
109  ######################################################################## 
110   
111  logger = logging.getLogger("CedarBackup3.log.extend.amazons3") 
112   
113  SU_COMMAND    = [ "su" ] 
114  AWS_COMMAND   = [ "aws" ] 
115   
116  STORE_INDICATOR = "cback.amazons3" 
117 118 119 ######################################################################## 120 # AmazonS3Config class definition 121 ######################################################################## 122 123 @total_ordering 124 -class AmazonS3Config(object):
125 126 """ 127 Class representing Amazon S3 configuration. 128 129 Amazon S3 configuration is used for storing backup data in Amazon's S3 cloud 130 storage using the C{s3cmd} tool. 131 132 The following restrictions exist on data in this class: 133 134 - The s3Bucket value must be a non-empty string 135 - The encryptCommand value, if set, must be a non-empty string 136 - The full backup size limit, if set, must be a ByteQuantity >= 0 137 - The incremental backup size limit, if set, must be a ByteQuantity >= 0 138 139 @sort: __init__, __repr__, __str__, __cmp__, __eq__, __lt__, __gt__, 140 warnMidnite, s3Bucket 141 """ 142
143 - def __init__(self, warnMidnite=None, s3Bucket=None, encryptCommand=None, 144 fullBackupSizeLimit=None, incrementalBackupSizeLimit=None):
145 """ 146 Constructor for the C{AmazonS3Config} class. 147 148 @param warnMidnite: Whether to generate warnings for crossing midnite. 149 @param s3Bucket: Name of the Amazon S3 bucket in which to store the data 150 @param encryptCommand: Command used to encrypt backup data before upload to S3 151 @param fullBackupSizeLimit: Maximum size of a full backup, a ByteQuantity 152 @param incrementalBackupSizeLimit: Maximum size of an incremental backup, a ByteQuantity 153 154 @raise ValueError: If one of the values is invalid. 155 """ 156 self._warnMidnite = None 157 self._s3Bucket = None 158 self._encryptCommand = None 159 self._fullBackupSizeLimit = None 160 self._incrementalBackupSizeLimit = None 161 self.warnMidnite = warnMidnite 162 self.s3Bucket = s3Bucket 163 self.encryptCommand = encryptCommand 164 self.fullBackupSizeLimit = fullBackupSizeLimit 165 self.incrementalBackupSizeLimit = incrementalBackupSizeLimit
166
167 - def __repr__(self):
168 """ 169 Official string representation for class instance. 170 """ 171 return "AmazonS3Config(%s, %s, %s, %s, %s)" % (self.warnMidnite, self.s3Bucket, self.encryptCommand, 172 self.fullBackupSizeLimit, self.incrementalBackupSizeLimit)
173
174 - def __str__(self):
175 """ 176 Informal string representation for class instance. 177 """ 178 return self.__repr__()
179
180 - def __eq__(self, other):
181 """Equals operator, iplemented in terms of original Python 2 compare operator.""" 182 return self.__cmp__(other) == 0
183
184 - def __lt__(self, other):
185 """Less-than operator, iplemented in terms of original Python 2 compare operator.""" 186 return self.__cmp__(other) < 0
187
188 - def __gt__(self, other):
189 """Greater-than operator, iplemented in terms of original Python 2 compare operator.""" 190 return self.__cmp__(other) > 0
191
192 - def __cmp__(self, other):
193 """ 194 Original Python 2 comparison operator. 195 @param other: Other object to compare to. 196 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 197 """ 198 if other is None: 199 return 1 200 if self.warnMidnite != other.warnMidnite: 201 if self.warnMidnite < other.warnMidnite: 202 return -1 203 else: 204 return 1 205 if self.s3Bucket != other.s3Bucket: 206 if str(self.s3Bucket or "") < str(other.s3Bucket or ""): 207 return -1 208 else: 209 return 1 210 if self.encryptCommand != other.encryptCommand: 211 if str(self.encryptCommand or "") < str(other.encryptCommand or ""): 212 return -1 213 else: 214 return 1 215 if self.fullBackupSizeLimit != other.fullBackupSizeLimit: 216 if (self.fullBackupSizeLimit or ByteQuantity()) < (other.fullBackupSizeLimit or ByteQuantity()): 217 return -1 218 else: 219 return 1 220 if self.incrementalBackupSizeLimit != other.incrementalBackupSizeLimit: 221 if (self.incrementalBackupSizeLimit or ByteQuantity()) < (other.incrementalBackupSizeLimit or ByteQuantity()): 222 return -1 223 else: 224 return 1 225 return 0
226
227 - def _setWarnMidnite(self, value):
228 """ 229 Property target used to set the midnite warning flag. 230 No validations, but we normalize the value to C{True} or C{False}. 231 """ 232 if value: 233 self._warnMidnite = True 234 else: 235 self._warnMidnite = False
236
237 - def _getWarnMidnite(self):
238 """ 239 Property target used to get the midnite warning flag. 240 """ 241 return self._warnMidnite
242
243 - def _setS3Bucket(self, value):
244 """ 245 Property target used to set the S3 bucket. 246 """ 247 if value is not None: 248 if len(value) < 1: 249 raise ValueError("S3 bucket must be non-empty string.") 250 self._s3Bucket = value
251
252 - def _getS3Bucket(self):
253 """ 254 Property target used to get the S3 bucket. 255 """ 256 return self._s3Bucket
257
258 - def _setEncryptCommand(self, value):
259 """ 260 Property target used to set the encrypt command. 261 """ 262 if value is not None: 263 if len(value) < 1: 264 raise ValueError("Encrypt command must be non-empty string.") 265 self._encryptCommand = value
266
267 - def _getEncryptCommand(self):
268 """ 269 Property target used to get the encrypt command. 270 """ 271 return self._encryptCommand
272
273 - def _setFullBackupSizeLimit(self, value):
274 """ 275 Property target used to set the full backup size limit. 276 The value must be an integer >= 0. 277 @raise ValueError: If the value is not valid. 278 """ 279 if value is None: 280 self._fullBackupSizeLimit = None 281 else: 282 if isinstance(value, ByteQuantity): 283 self._fullBackupSizeLimit = value 284 else: 285 self._fullBackupSizeLimit = ByteQuantity(value, UNIT_BYTES)
286
287 - def _getFullBackupSizeLimit(self):
288 """ 289 Property target used to get the full backup size limit. 290 """ 291 return self._fullBackupSizeLimit
292
293 - def _setIncrementalBackupSizeLimit(self, value):
294 """ 295 Property target used to set the incremental backup size limit. 296 The value must be an integer >= 0. 297 @raise ValueError: If the value is not valid. 298 """ 299 if value is None: 300 self._incrementalBackupSizeLimit = None 301 else: 302 if isinstance(value, ByteQuantity): 303 self._incrementalBackupSizeLimit = value 304 else: 305 self._incrementalBackupSizeLimit = ByteQuantity(value, UNIT_BYTES)
306
308 """ 309 Property target used to get the incremental backup size limit. 310 """ 311 return self._incrementalBackupSizeLimit
312 313 warnMidnite = property(_getWarnMidnite, _setWarnMidnite, None, "Whether to generate warnings for crossing midnite.") 314 s3Bucket = property(_getS3Bucket, _setS3Bucket, None, doc="Amazon S3 Bucket in which to store data") 315 encryptCommand = property(_getEncryptCommand, _setEncryptCommand, None, doc="Command used to encrypt data before upload to S3") 316 fullBackupSizeLimit = property(_getFullBackupSizeLimit, _setFullBackupSizeLimit, None, 317 doc="Maximum size of a full backup, as a ByteQuantity") 318 incrementalBackupSizeLimit = property(_getIncrementalBackupSizeLimit, _setIncrementalBackupSizeLimit, None, 319 doc="Maximum size of an incremental backup, as a ByteQuantity")
320
321 322 ######################################################################## 323 # LocalConfig class definition 324 ######################################################################## 325 326 @total_ordering 327 -class LocalConfig(object):
328 329 """ 330 Class representing this extension's configuration document. 331 332 This is not a general-purpose configuration object like the main Cedar 333 Backup configuration object. Instead, it just knows how to parse and emit 334 amazons3-specific configuration values. Third parties who need to read and 335 write configuration related to this extension should access it through the 336 constructor, C{validate} and C{addConfig} methods. 337 338 @note: Lists within this class are "unordered" for equality comparisons. 339 340 @sort: __init__, __repr__, __str__, __cmp__, __eq__, __lt__, __gt__, 341 amazons3, validate, addConfig 342 """ 343
344 - def __init__(self, xmlData=None, xmlPath=None, validate=True):
345 """ 346 Initializes a configuration object. 347 348 If you initialize the object without passing either C{xmlData} or 349 C{xmlPath} then configuration will be empty and will be invalid until it 350 is filled in properly. 351 352 No reference to the original XML data or original path is saved off by 353 this class. Once the data has been parsed (successfully or not) this 354 original information is discarded. 355 356 Unless the C{validate} argument is C{False}, the L{LocalConfig.validate} 357 method will be called (with its default arguments) against configuration 358 after successfully parsing any passed-in XML. Keep in mind that even if 359 C{validate} is C{False}, it might not be possible to parse the passed-in 360 XML document if lower-level validations fail. 361 362 @note: It is strongly suggested that the C{validate} option always be set 363 to C{True} (the default) unless there is a specific need to read in 364 invalid configuration from disk. 365 366 @param xmlData: XML data representing configuration. 367 @type xmlData: String data. 368 369 @param xmlPath: Path to an XML file on disk. 370 @type xmlPath: Absolute path to a file on disk. 371 372 @param validate: Validate the document after parsing it. 373 @type validate: Boolean true/false. 374 375 @raise ValueError: If both C{xmlData} and C{xmlPath} are passed-in. 376 @raise ValueError: If the XML data in C{xmlData} or C{xmlPath} cannot be parsed. 377 @raise ValueError: If the parsed configuration document is not valid. 378 """ 379 self._amazons3 = None 380 self.amazons3 = None 381 if xmlData is not None and xmlPath is not None: 382 raise ValueError("Use either xmlData or xmlPath, but not both.") 383 if xmlData is not None: 384 self._parseXmlData(xmlData) 385 if validate: 386 self.validate() 387 elif xmlPath is not None: 388 with open(xmlPath) as f: 389 xmlData = f.read() 390 self._parseXmlData(xmlData) 391 if validate: 392 self.validate()
393
394 - def __repr__(self):
395 """ 396 Official string representation for class instance. 397 """ 398 return "LocalConfig(%s)" % (self.amazons3)
399
400 - def __str__(self):
401 """ 402 Informal string representation for class instance. 403 """ 404 return self.__repr__()
405
406 - def __eq__(self, other):
407 """Equals operator, iplemented in terms of original Python 2 compare operator.""" 408 return self.__cmp__(other) == 0
409
410 - def __lt__(self, other):
411 """Less-than operator, iplemented in terms of original Python 2 compare operator.""" 412 return self.__cmp__(other) < 0
413
414 - def __gt__(self, other):
415 """Greater-than operator, iplemented in terms of original Python 2 compare operator.""" 416 return self.__cmp__(other) > 0
417
418 - def __cmp__(self, other):
419 """ 420 Original Python 2 comparison operator. 421 Lists within this class are "unordered" for equality comparisons. 422 @param other: Other object to compare to. 423 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 424 """ 425 if other is None: 426 return 1 427 if self.amazons3 != other.amazons3: 428 if self.amazons3 < other.amazons3: 429 return -1 430 else: 431 return 1 432 return 0
433
434 - def _setAmazonS3(self, value):
435 """ 436 Property target used to set the amazons3 configuration value. 437 If not C{None}, the value must be a C{AmazonS3Config} object. 438 @raise ValueError: If the value is not a C{AmazonS3Config} 439 """ 440 if value is None: 441 self._amazons3 = None 442 else: 443 if not isinstance(value, AmazonS3Config): 444 raise ValueError("Value must be a C{AmazonS3Config} object.") 445 self._amazons3 = value
446
447 - def _getAmazonS3(self):
448 """ 449 Property target used to get the amazons3 configuration value. 450 """ 451 return self._amazons3
452 453 amazons3 = property(_getAmazonS3, _setAmazonS3, None, "AmazonS3 configuration in terms of a C{AmazonS3Config} object.") 454
455 - def validate(self):
456 """ 457 Validates configuration represented by the object. 458 459 AmazonS3 configuration must be filled in. Within that, the s3Bucket target must be filled in 460 461 @raise ValueError: If one of the validations fails. 462 """ 463 if self.amazons3 is None: 464 raise ValueError("AmazonS3 section is required.") 465 if self.amazons3.s3Bucket is None: 466 raise ValueError("AmazonS3 s3Bucket must be set.")
467
468 - def addConfig(self, xmlDom, parentNode):
469 """ 470 Adds an <amazons3> configuration section as the next child of a parent. 471 472 Third parties should use this function to write configuration related to 473 this extension. 474 475 We add the following fields to the document:: 476 477 warnMidnite //cb_config/amazons3/warn_midnite 478 s3Bucket //cb_config/amazons3/s3_bucket 479 encryptCommand //cb_config/amazons3/encrypt 480 fullBackupSizeLimit //cb_config/amazons3/full_size_limit 481 incrementalBackupSizeLimit //cb_config/amazons3/incr_size_limit 482 483 @param xmlDom: DOM tree as from C{impl.createDocument()}. 484 @param parentNode: Parent that the section should be appended to. 485 """ 486 if self.amazons3 is not None: 487 sectionNode = addContainerNode(xmlDom, parentNode, "amazons3") 488 addBooleanNode(xmlDom, sectionNode, "warn_midnite", self.amazons3.warnMidnite) 489 addStringNode(xmlDom, sectionNode, "s3_bucket", self.amazons3.s3Bucket) 490 addStringNode(xmlDom, sectionNode, "encrypt", self.amazons3.encryptCommand) 491 addByteQuantityNode(xmlDom, sectionNode, "full_size_limit", self.amazons3.fullBackupSizeLimit) 492 addByteQuantityNode(xmlDom, sectionNode, "incr_size_limit", self.amazons3.incrementalBackupSizeLimit)
493
494 - def _parseXmlData(self, xmlData):
495 """ 496 Internal method to parse an XML string into the object. 497 498 This method parses the XML document into a DOM tree (C{xmlDom}) and then 499 calls a static method to parse the amazons3 configuration section. 500 501 @param xmlData: XML data to be parsed 502 @type xmlData: String data 503 504 @raise ValueError: If the XML cannot be successfully parsed. 505 """ 506 (xmlDom, parentNode) = createInputDom(xmlData) 507 self._amazons3 = LocalConfig._parseAmazonS3(parentNode)
508 509 @staticmethod
510 - def _parseAmazonS3(parent):
511 """ 512 Parses an amazons3 configuration section. 513 514 We read the following individual fields:: 515 516 warnMidnite //cb_config/amazons3/warn_midnite 517 s3Bucket //cb_config/amazons3/s3_bucket 518 encryptCommand //cb_config/amazons3/encrypt 519 fullBackupSizeLimit //cb_config/amazons3/full_size_limit 520 incrementalBackupSizeLimit //cb_config/amazons3/incr_size_limit 521 522 @param parent: Parent node to search beneath. 523 524 @return: C{AmazonS3Config} object or C{None} if the section does not exist. 525 @raise ValueError: If some filled-in value is invalid. 526 """ 527 amazons3 = None 528 section = readFirstChild(parent, "amazons3") 529 if section is not None: 530 amazons3 = AmazonS3Config() 531 amazons3.warnMidnite = readBoolean(section, "warn_midnite") 532 amazons3.s3Bucket = readString(section, "s3_bucket") 533 amazons3.encryptCommand = readString(section, "encrypt") 534 amazons3.fullBackupSizeLimit = readByteQuantity(section, "full_size_limit") 535 amazons3.incrementalBackupSizeLimit = readByteQuantity(section, "incr_size_limit") 536 return amazons3
537
538 539 ######################################################################## 540 # Public functions 541 ######################################################################## 542 543 ########################### 544 # executeAction() function 545 ########################### 546 547 -def executeAction(configPath, options, config):
548 """ 549 Executes the amazons3 backup action. 550 551 @param configPath: Path to configuration file on disk. 552 @type configPath: String representing a path on disk. 553 554 @param options: Program command-line options. 555 @type options: Options object. 556 557 @param config: Program configuration. 558 @type config: Config object. 559 560 @raise ValueError: Under many generic error conditions 561 @raise IOError: If there are I/O problems reading or writing files 562 """ 563 logger.debug("Executing amazons3 extended action.") 564 if not isRunningAsRoot(): 565 logger.error("Error: the amazons3 extended action must be run as root.") 566 raise ValueError("The amazons3 extended action must be run as root.") 567 if sys.platform == "win32": 568 logger.error("Error: the amazons3 extended action is not supported on Windows.") 569 raise ValueError("The amazons3 extended action is not supported on Windows.") 570 if config.options is None or config.stage is None: 571 raise ValueError("Cedar Backup configuration is not properly filled in.") 572 local = LocalConfig(xmlPath=configPath) 573 stagingDirs = _findCorrectDailyDir(options, config, local) 574 _applySizeLimits(options, config, local, stagingDirs) 575 _writeToAmazonS3(config, local, stagingDirs) 576 _writeStoreIndicator(config, stagingDirs) 577 logger.info("Executed the amazons3 extended action successfully.")
578
579 580 ######################################################################## 581 # Private utility functions 582 ######################################################################## 583 584 ######################### 585 # _findCorrectDailyDir() 586 ######################### 587 588 -def _findCorrectDailyDir(options, config, local):
589 """ 590 Finds the correct daily staging directory to be written to Amazon S3. 591 592 This is substantially similar to the same function in store.py. The 593 main difference is that it doesn't rely on store configuration at all. 594 595 @param options: Options object. 596 @param config: Config object. 597 @param local: Local config object. 598 599 @return: Correct staging dir, as a dict mapping directory to date suffix. 600 @raise IOError: If the staging directory cannot be found. 601 """ 602 oneDay = datetime.timedelta(days=1) 603 today = datetime.date.today() 604 yesterday = today - oneDay 605 tomorrow = today + oneDay 606 todayDate = today.strftime(DIR_TIME_FORMAT) 607 yesterdayDate = yesterday.strftime(DIR_TIME_FORMAT) 608 tomorrowDate = tomorrow.strftime(DIR_TIME_FORMAT) 609 todayPath = os.path.join(config.stage.targetDir, todayDate) 610 yesterdayPath = os.path.join(config.stage.targetDir, yesterdayDate) 611 tomorrowPath = os.path.join(config.stage.targetDir, tomorrowDate) 612 todayStageInd = os.path.join(todayPath, STAGE_INDICATOR) 613 yesterdayStageInd = os.path.join(yesterdayPath, STAGE_INDICATOR) 614 tomorrowStageInd = os.path.join(tomorrowPath, STAGE_INDICATOR) 615 todayStoreInd = os.path.join(todayPath, STORE_INDICATOR) 616 yesterdayStoreInd = os.path.join(yesterdayPath, STORE_INDICATOR) 617 tomorrowStoreInd = os.path.join(tomorrowPath, STORE_INDICATOR) 618 if options.full: 619 if os.path.isdir(todayPath) and os.path.exists(todayStageInd): 620 logger.info("Amazon S3 process will use current day's staging directory [%s]", todayPath) 621 return { todayPath:todayDate } 622 raise IOError("Unable to find staging directory to process (only tried today due to full option).") 623 else: 624 if os.path.isdir(todayPath) and os.path.exists(todayStageInd) and not os.path.exists(todayStoreInd): 625 logger.info("Amazon S3 process will use current day's staging directory [%s]", todayPath) 626 return { todayPath:todayDate } 627 elif os.path.isdir(yesterdayPath) and os.path.exists(yesterdayStageInd) and not os.path.exists(yesterdayStoreInd): 628 logger.info("Amazon S3 process will use previous day's staging directory [%s]", yesterdayPath) 629 if local.amazons3.warnMidnite: 630 logger.warning("Warning: Amazon S3 process crossed midnite boundary to find data.") 631 return { yesterdayPath:yesterdayDate } 632 elif os.path.isdir(tomorrowPath) and os.path.exists(tomorrowStageInd) and not os.path.exists(tomorrowStoreInd): 633 logger.info("Amazon S3 process will use next day's staging directory [%s]", tomorrowPath) 634 if local.amazons3.warnMidnite: 635 logger.warning("Warning: Amazon S3 process crossed midnite boundary to find data.") 636 return { tomorrowPath:tomorrowDate } 637 raise IOError("Unable to find unused staging directory to process (tried today, yesterday, tomorrow).")
638
639 640 ############################## 641 # _applySizeLimits() function 642 ############################## 643 644 -def _applySizeLimits(options, config, local, stagingDirs):
645 """ 646 Apply size limits, throwing an exception if any limits are exceeded. 647 648 Size limits are optional. If a limit is set to None, it does not apply. 649 The full size limit applies if the full option is set or if today is the 650 start of the week. The incremental size limit applies otherwise. Limits 651 are applied to the total size of all the relevant staging directories. 652 653 @param options: Options object. 654 @param config: Config object. 655 @param local: Local config object. 656 @param stagingDirs: Dictionary mapping directory path to date suffix. 657 658 @raise ValueError: Under many generic error conditions 659 @raise ValueError: If a size limit has been exceeded 660 """ 661 if options.full or isStartOfWeek(config.options.startingDay): 662 logger.debug("Using Amazon S3 size limit for full backups.") 663 limit = local.amazons3.fullBackupSizeLimit 664 else: 665 logger.debug("Using Amazon S3 size limit for incremental backups.") 666 limit = local.amazons3.incrementalBackupSizeLimit 667 if limit is None: 668 logger.debug("No Amazon S3 size limit will be applied.") 669 else: 670 logger.debug("Amazon S3 size limit is: %s", limit) 671 contents = BackupFileList() 672 for stagingDir in stagingDirs: 673 contents.addDirContents(stagingDir) 674 total = contents.totalSize() 675 logger.debug("Amazon S3 backup size is: %s", displayBytes(total)) 676 if total > limit: 677 logger.error("Amazon S3 size limit exceeded: %s > %s", displayBytes(total), limit) 678 raise ValueError("Amazon S3 size limit exceeded: %s > %s" % (displayBytes(total), limit)) 679 else: 680 logger.info("Total size does not exceed Amazon S3 size limit, so backup can continue.")
681
682 683 ############################## 684 # _writeToAmazonS3() function 685 ############################## 686 687 -def _writeToAmazonS3(config, local, stagingDirs):
688 """ 689 Writes the indicated staging directories to an Amazon S3 bucket. 690 691 Each of the staging directories listed in C{stagingDirs} will be written to 692 the configured Amazon S3 bucket from local configuration. The directories 693 will be placed into the image at the root by date, so staging directory 694 C{/opt/stage/2005/02/10} will be placed into the S3 bucket at C{/2005/02/10}. 695 If an encrypt commmand is provided, the files will be encrypted first. 696 697 @param config: Config object. 698 @param local: Local config object. 699 @param stagingDirs: Dictionary mapping directory path to date suffix. 700 701 @raise ValueError: Under many generic error conditions 702 @raise IOError: If there is a problem writing to Amazon S3 703 """ 704 for stagingDir in list(stagingDirs.keys()): 705 logger.debug("Storing stage directory to Amazon S3 [%s].", stagingDir) 706 dateSuffix = stagingDirs[stagingDir] 707 s3BucketUrl = "s3://%s/%s" % (local.amazons3.s3Bucket, dateSuffix) 708 logger.debug("S3 bucket URL is [%s]", s3BucketUrl) 709 _clearExistingBackup(config, s3BucketUrl) 710 if local.amazons3.encryptCommand is None: 711 logger.debug("Encryption is disabled; files will be uploaded in cleartext.") 712 _uploadStagingDir(config, stagingDir, s3BucketUrl) 713 _verifyUpload(config, stagingDir, s3BucketUrl) 714 else: 715 logger.debug("Encryption is enabled; files will be uploaded after being encrypted.") 716 encryptedDir = tempfile.mkdtemp(dir=config.options.workingDir) 717 changeOwnership(encryptedDir, config.options.backupUser, config.options.backupGroup) 718 try: 719 _encryptStagingDir(config, local, stagingDir, encryptedDir) 720 _uploadStagingDir(config, encryptedDir, s3BucketUrl) 721 _verifyUpload(config, encryptedDir, s3BucketUrl) 722 finally: 723 if os.path.exists(encryptedDir): 724 shutil.rmtree(encryptedDir)
725
726 727 ################################## 728 # _writeStoreIndicator() function 729 ################################## 730 731 -def _writeStoreIndicator(config, stagingDirs):
732 """ 733 Writes a store indicator file into staging directories. 734 @param config: Config object. 735 @param stagingDirs: Dictionary mapping directory path to date suffix. 736 """ 737 for stagingDir in list(stagingDirs.keys()): 738 writeIndicatorFile(stagingDir, STORE_INDICATOR, 739 config.options.backupUser, 740 config.options.backupGroup)
741
742 743 ################################## 744 # _clearExistingBackup() function 745 ################################## 746 747 -def _clearExistingBackup(config, s3BucketUrl):
748 """ 749 Clear any existing backup files for an S3 bucket URL. 750 @param config: Config object. 751 @param s3BucketUrl: S3 bucket URL associated with the staging directory 752 """ 753 suCommand = resolveCommand(SU_COMMAND) 754 awsCommand = resolveCommand(AWS_COMMAND) 755 actualCommand = "%s s3 rm --recursive %s/" % (awsCommand[0], s3BucketUrl) 756 result = executeCommand(suCommand, [config.options.backupUser, "-c", actualCommand])[0] 757 if result != 0: 758 raise IOError("Error [%d] calling AWS CLI to clear existing backup for [%s]." % (result, s3BucketUrl)) 759 logger.debug("Completed clearing any existing backup in S3 for [%s]", s3BucketUrl)
760
761 762 ############################### 763 # _uploadStagingDir() function 764 ############################### 765 766 -def _uploadStagingDir(config, stagingDir, s3BucketUrl):
767 """ 768 Upload the contents of a staging directory out to the Amazon S3 cloud. 769 @param config: Config object. 770 @param stagingDir: Staging directory to upload 771 @param s3BucketUrl: S3 bucket URL associated with the staging directory 772 """ 773 # The version of awscli in Debian stretch (1.11.13-1) has a problem 774 # uploading empty files, due to running with Python 3 rather than Python 2 775 # as the upstream maintainers intended. To work around this, I'm explicitly 776 # excluding files like cback.stage, cback.collect, etc. which should be the 777 # only empty files we ever try to copy. See: https://github.com/aws/aws-cli/issues/2403 778 suCommand = resolveCommand(SU_COMMAND) 779 awsCommand = resolveCommand(AWS_COMMAND) 780 actualCommand = "%s s3 cp --recursive --exclude \"*cback.*\" %s/ %s/" % (awsCommand[0], stagingDir, s3BucketUrl) 781 result = executeCommand(suCommand, [config.options.backupUser, "-c", actualCommand])[0] 782 if result != 0: 783 raise IOError("Error [%d] calling AWS CLI to upload staging directory to [%s]." % (result, s3BucketUrl)) 784 logger.debug("Completed uploading staging dir [%s] to [%s]", stagingDir, s3BucketUrl)
785
786 787 ########################### 788 # _verifyUpload() function 789 ########################### 790 791 -def _verifyUpload(config, stagingDir, s3BucketUrl):
792 """ 793 Verify that a staging directory was properly uploaded to the Amazon S3 cloud. 794 @param config: Config object. 795 @param stagingDir: Staging directory to verify 796 @param s3BucketUrl: S3 bucket URL associated with the staging directory 797 """ 798 (bucket, prefix) = s3BucketUrl.replace("s3://", "").split("/", 1) 799 suCommand = resolveCommand(SU_COMMAND) 800 awsCommand = resolveCommand(AWS_COMMAND) 801 query = "Contents[].{Key: Key, Size: Size}" 802 actualCommand = "%s s3api list-objects --bucket %s --prefix %s --query '%s'" % (awsCommand[0], bucket, prefix, query) 803 (result, data) = executeCommand(suCommand, [config.options.backupUser, "-c", actualCommand], returnOutput=True) 804 if result != 0: 805 raise IOError("Error [%d] calling AWS CLI verify upload to [%s]." % (result, s3BucketUrl)) 806 contents = { } 807 for entry in json.loads("".join(data)): 808 key = entry["Key"].replace(prefix, "") 809 size = int(entry["Size"]) 810 contents[key] = size 811 files = FilesystemList() 812 files.excludeBasenamePatterns = [ r"cback\..*", ] # because these are excluded from the upload 813 files.addDirContents(stagingDir) 814 for entry in files: 815 if os.path.isfile(entry): 816 key = entry.replace(stagingDir, "") 817 size = int(os.stat(entry).st_size) 818 if not key in contents: 819 raise IOError("File was apparently not uploaded: [%s]" % entry) 820 else: 821 if size != contents[key]: 822 raise IOError("File size differs [%s], expected %s bytes but got %s bytes" % (entry, size, contents[key])) 823 logger.debug("Completed verifying upload from [%s] to [%s].", stagingDir, s3BucketUrl)
824
825 826 ################################ 827 # _encryptStagingDir() function 828 ################################ 829 830 -def _encryptStagingDir(config, local, stagingDir, encryptedDir):
831 """ 832 Encrypt a staging directory, creating a new directory in the process. 833 @param config: Config object. 834 @param stagingDir: Staging directory to use as source 835 @param encryptedDir: Target directory into which encrypted files should be written 836 """ 837 suCommand = resolveCommand(SU_COMMAND) 838 files = FilesystemList() 839 files.addDirContents(stagingDir) 840 for cleartext in files: 841 if os.path.isfile(cleartext): 842 encrypted = "%s%s" % (encryptedDir, cleartext.replace(stagingDir, "")) 843 if int(os.stat(cleartext).st_size) == 0: 844 with open(encrypted, 'a') as f: 845 f.close() # don't bother encrypting empty files 846 else: 847 actualCommand = local.amazons3.encryptCommand.replace("${input}", cleartext).replace("${output}", encrypted) 848 subdir = os.path.dirname(encrypted) 849 if not os.path.isdir(subdir): 850 os.makedirs(subdir) 851 changeOwnership(subdir, config.options.backupUser, config.options.backupGroup) 852 result = executeCommand(suCommand, [config.options.backupUser, "-c", actualCommand])[0] 853 if result != 0: 854 raise IOError("Error [%d] encrypting [%s]." % (result, cleartext)) 855 logger.debug("Completed encrypting staging directory [%s] into [%s]", stagingDir, encryptedDir)
856