From 01ad2ab832c175f78106276b74c250e14e69e563 Mon Sep 17 00:00:00 2001 From: drigato Date: Wed, 29 Oct 2014 17:42:42 -0400 Subject: [PATCH] Commented cloud storage classes --- .../cloud_storage_uploader.py | 43 ++++++++++++------- python_apps/pypo/cloud_storage_downloader.py | 32 +++++++++++++- 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/python_apps/airtime_analyzer/airtime_analyzer/cloud_storage_uploader.py b/python_apps/airtime_analyzer/airtime_analyzer/cloud_storage_uploader.py index 84dbdc054..f4c03de48 100644 --- a/python_apps/airtime_analyzer/airtime_analyzer/cloud_storage_uploader.py +++ b/python_apps/airtime_analyzer/airtime_analyzer/cloud_storage_uploader.py @@ -7,14 +7,20 @@ from libcloud.storage.types import Provider, ContainerDoesNotExistError, ObjectD class CloudStorageUploader: """ A class that uses Apache Libcloud's Storage API to upload objects into - various cloud storage backends. + a cloud storage backend. For this implementation all files will be uploaded + into a bucket on Amazon S3. + + It is important to note that every file, coming from different Airtime Pro + stations, will get uploaded into the same bucket on the same Amazon S3 + account. Attributes: _provider: Storage backend. For exmaple, Amazon S3, Google Storage. _bucket: Name of container on provider where files will get uploaded into. _api_key: Access key to objects on the provider's storage backend. _api_key_secret: Secret access key to objects on the provider's storage backend. -""" + """ + def __init__(self, provider, bucket, api_key, api_key_secret): self._provider = provider self._bucket = bucket @@ -22,29 +28,36 @@ class CloudStorageUploader: self._api_key_secret = api_key_secret def upload_obj(self, audio_file_path, metadata): - '''Uploads a file into a provider's cloud object storage. + """Uploads a file into Amazon S3 object storage. - Generates a unique object name + Before a file is uploaded onto Amazon S3 we generate a unique object + name consisting of the filename and a unqiue string using the uuid4 + module. Keyword arguments: audio_file_path: Path on disk to the audio file that is about to be - uploaded to cloud object storage. + uploaded to Amazon S3 object storage. metadata: ID3 tags and other metadata extracted from the audio file. - ''' + + Returns: + The metadata dictionary it received with three new keys: + filesize: The file's filesize in bytes. + filename: The file's filename. + resource_id: The unique object name used to identify the objects + on Amazon S3 + """ file_base_name = os.path.basename(audio_file_path) file_name, extension = os.path.splitext(file_base_name) - ''' - With Amazon S3 you cannot create a signed url if there are spaces - in the object name. URL encoding the object name doesn't solve the - problem. As a solution we will replace spaces with dashes. - ''' + # With Amazon S3 you cannot create a signed url if there are spaces + # in the object name. URL encoding the object name doesn't solve the + # problem. As a solution we will replace spaces with dashes. file_name = file_name.replace(" ", "-") object_name = "%s_%s%s" % (file_name, str(uuid.uuid4()), extension) - cls = get_driver(getattr(Provider, self._provider)) - driver = cls(self._api_key, self._api_key_secret) + provider_driver_class = get_driver(getattr(Provider, self._provider)) + driver = provider_driver_class(self._api_key, self._api_key_secret) try: container = driver.get_container(self._bucket) @@ -61,13 +74,13 @@ class CloudStorageUploader: metadata["filesize"] = os.path.getsize(audio_file_path) - '''remove file from organize directory''' + # Remove file from organize directory try: os.remove(audio_file_path) except OSError: logging.info("Could not remove %s from organize directory" % audio_file_path) - '''pass original filename to Airtime so we can store it in the db''' + # Pass original filename to Airtime so we can store it in the db metadata["filename"] = file_base_name metadata["resource_id"] = object_name diff --git a/python_apps/pypo/cloud_storage_downloader.py b/python_apps/pypo/cloud_storage_downloader.py index d75ad66d7..0a129f1d0 100644 --- a/python_apps/pypo/cloud_storage_downloader.py +++ b/python_apps/pypo/cloud_storage_downloader.py @@ -10,6 +10,19 @@ from libcloud.storage.providers import get_driver CONFIG_PATH = '/etc/airtime/airtime.conf' class CloudStorageDownloader: + """ A class that uses Apache Libcloud's Storage API to download objects from + a cloud storage backend. For this implementation all files are stored on + Amazon S3 and will be downloaded from there. + + This class is used with Airtime's playout engine service, PYPO. + + Attributes: + _provider: Storage backend. For exmaple, Amazon S3, Google Storage. + _bucket: Name of container on provider where files will get uploaded into. + _api_key: Access key to objects on the provider's storage backend. + _api_key_secret: Secret access key to objects on the provider's storage backend. + """ + def __init__(self): config = self.read_config_file(CONFIG_PATH) @@ -20,8 +33,18 @@ class CloudStorageDownloader: self._api_key_secret = config.get(CLOUD_STORAGE_CONFIG_SECTION, 'api_key_secret') def download_obj(self, dst, obj_name): - cls = get_driver(getattr(Provider, self._provider)) - driver = cls(self._api_key, self._api_key_secret) + """Downloads a file from Amazon S3 object storage to disk. + + Downloads an object to PYPO's temporary cache directory on disk. + If the file already exists in the cache directory the object + downloading is skipped. + + Keyword arguments: + dst: PYPO's temporary cache directory on disk. + obj_name: Name of the object to download to disk + """ + provider_driver_class = get_driver(getattr(Provider, self._provider)) + driver = provider_driver_class(self._api_key, self._api_key_secret) try: cloud_obj = driver.get_object(container_name=self._bucket, @@ -29,6 +52,11 @@ class CloudStorageDownloader: except ObjectDoesNotExistError: logging.info("%s does not exist on Amazon S3" % obj_name) + # If we detect the file path already exists in PYPO's cache directory + # we need to verify the contents of that file is the same (in case there + # was file corruption in a previous download for example) as the + # object's contents by comparing the hash. If the hash values are not + # equal we need to download the object to disk again. dst_exists = False if (os.path.isfile(dst)): dst_hash = hashlib.md5(open(dst).read()).hexdigest()