From 8d914bcd1314afd2d97bba2ea819325eabb6ed6c Mon Sep 17 00:00:00 2001 From: drigato Date: Tue, 6 Jan 2015 15:44:34 -0500 Subject: [PATCH] SAAS-529: Replace Apache Libcloud with Python-Boto SDK --- .../application/cloud_storage/Amazon_S3.php | 1 + .../cloud_storage_uploader.py | 44 +++---- .../cloud_storage_uploader_libcloud.py | 111 ++++++++++++++++++ python_apps/airtime_analyzer/setup.py | 1 + 4 files changed, 130 insertions(+), 27 deletions(-) create mode 100644 python_apps/airtime_analyzer/airtime_analyzer/cloud_storage_uploader_libcloud.py diff --git a/airtime_mvc/application/cloud_storage/Amazon_S3.php b/airtime_mvc/application/cloud_storage/Amazon_S3.php index f9de30d49..978e1ba07 100644 --- a/airtime_mvc/application/cloud_storage/Amazon_S3.php +++ b/airtime_mvc/application/cloud_storage/Amazon_S3.php @@ -18,6 +18,7 @@ class Amazon_S3 extends StorageBackend $this->s3Client = S3Client::factory(array( 'key' => $securityCredentials['api_key'], 'secret' => $securityCredentials['api_key_secret'], + 'region' => $securityCredentials['region'] )); } diff --git a/python_apps/airtime_analyzer/airtime_analyzer/cloud_storage_uploader.py b/python_apps/airtime_analyzer/airtime_analyzer/cloud_storage_uploader.py index 045c719f1..bea905b71 100644 --- a/python_apps/airtime_analyzer/airtime_analyzer/cloud_storage_uploader.py +++ b/python_apps/airtime_analyzer/airtime_analyzer/cloud_storage_uploader.py @@ -2,27 +2,25 @@ import os import logging import uuid import config_file -from libcloud.storage.providers import get_driver -from libcloud.storage.types import Provider, ContainerDoesNotExistError, ObjectDoesNotExistError +from boto.s3.connection import S3Connection +from boto.s3.key import Key CLOUD_CONFIG_PATH = '/etc/airtime-saas/cloud_storage.conf' STORAGE_BACKEND_FILE = "file" class CloudStorageUploader: - """ A class that uses Apache Libcloud's Storage API to upload objects into - a cloud storage backend. For this implementation all files will be uploaded - into a bucket on Amazon S3. + """ A class that uses Python-Boto SDK to upload objects into Amazon S3. It is important to note that every file, coming from different Airtime Pro stations, will get uploaded into the same bucket on the same Amazon S3 account. Attributes: - _provider: Storage backend. For exmaple, Amazon S3, Google Storage. - _bucket: Name of container on provider where files will get uploaded into. - _api_key: Access key to objects on the provider's storage backend. - _api_key_secret: Secret access key to objects on the provider's storage backend. + _host: Host name for the specific region assigned to the bucket. + _bucket: Name of container on Amazon S3 where files will get uploaded into. + _api_key: Access key to objects on Amazon S3. + _api_key_secret: Secret access key to objects on Amazon S3. """ def __init__(self): @@ -32,12 +30,12 @@ class CloudStorageUploader: CLOUD_STORAGE_CONFIG_SECTION = config.get("current_backend", "storage_backend") self._storage_backend = CLOUD_STORAGE_CONFIG_SECTION if self._storage_backend == STORAGE_BACKEND_FILE: - self._provider = "" + self._host = "" self._bucket = "" self._api_key = "" self._api_key_secret = "" else: - self._provider = config.get(CLOUD_STORAGE_CONFIG_SECTION, 'provider') + self._host = config.get(CLOUD_STORAGE_CONFIG_SECTION, 'host') self._bucket = config.get(CLOUD_STORAGE_CONFIG_SECTION, 'bucket') self._api_key = config.get(CLOUD_STORAGE_CONFIG_SECTION, 'api_key') self._api_key_secret = config.get(CLOUD_STORAGE_CONFIG_SECTION, 'api_key_secret') @@ -76,23 +74,15 @@ class CloudStorageUploader: # in the object name. URL encoding the object name doesn't solve the # problem. As a solution we will replace spaces with dashes. file_name = file_name.replace(" ", "-") - object_name = "%s_%s%s" % (file_name, str(uuid.uuid4()), extension) + resource_id = "%s_%s%s" % (file_name, str(uuid.uuid4()), extension) - provider_driver_class = get_driver(getattr(Provider, self._provider)) - driver = provider_driver_class(self._api_key, self._api_key_secret) + conn = S3Connection(self._api_key, self._api_key_secret, host=self._host) + bucket = conn.get_bucket(self._bucket) - try: - container = driver.get_container(self._bucket) - except ContainerDoesNotExistError: - container = driver.create_container(self._bucket) - - extra = {'meta_data': {'filename': file_base_name}} - - obj = driver.upload_object(file_path=audio_file_path, - container=container, - object_name=object_name, - verify_hash=False, - extra=extra) + key = Key(bucket) + key.key = resource_id + key.set_metadata('filename', file_base_name) + key.set_contents_from_filename(audio_file_path) metadata["filesize"] = os.path.getsize(audio_file_path) @@ -105,7 +95,7 @@ class CloudStorageUploader: # Pass original filename to Airtime so we can store it in the db metadata["filename"] = file_base_name - metadata["resource_id"] = object_name + metadata["resource_id"] = resource_id metadata["storage_backend"] = self._storage_backend return metadata diff --git a/python_apps/airtime_analyzer/airtime_analyzer/cloud_storage_uploader_libcloud.py b/python_apps/airtime_analyzer/airtime_analyzer/cloud_storage_uploader_libcloud.py new file mode 100644 index 000000000..045c719f1 --- /dev/null +++ b/python_apps/airtime_analyzer/airtime_analyzer/cloud_storage_uploader_libcloud.py @@ -0,0 +1,111 @@ +import os +import logging +import uuid +import config_file +from libcloud.storage.providers import get_driver +from libcloud.storage.types import Provider, ContainerDoesNotExistError, ObjectDoesNotExistError + + +CLOUD_CONFIG_PATH = '/etc/airtime-saas/cloud_storage.conf' +STORAGE_BACKEND_FILE = "file" + +class CloudStorageUploader: + """ A class that uses Apache Libcloud's Storage API to upload objects into + a cloud storage backend. For this implementation all files will be uploaded + into a bucket on Amazon S3. + + It is important to note that every file, coming from different Airtime Pro + stations, will get uploaded into the same bucket on the same Amazon S3 + account. + + Attributes: + _provider: Storage backend. For exmaple, Amazon S3, Google Storage. + _bucket: Name of container on provider where files will get uploaded into. + _api_key: Access key to objects on the provider's storage backend. + _api_key_secret: Secret access key to objects on the provider's storage backend. + """ + + def __init__(self): + + config = config_file.read_config_file(CLOUD_CONFIG_PATH) + + CLOUD_STORAGE_CONFIG_SECTION = config.get("current_backend", "storage_backend") + self._storage_backend = CLOUD_STORAGE_CONFIG_SECTION + if self._storage_backend == STORAGE_BACKEND_FILE: + self._provider = "" + self._bucket = "" + self._api_key = "" + self._api_key_secret = "" + else: + self._provider = config.get(CLOUD_STORAGE_CONFIG_SECTION, 'provider') + self._bucket = config.get(CLOUD_STORAGE_CONFIG_SECTION, 'bucket') + self._api_key = config.get(CLOUD_STORAGE_CONFIG_SECTION, 'api_key') + self._api_key_secret = config.get(CLOUD_STORAGE_CONFIG_SECTION, 'api_key_secret') + + def enabled(self): + if self._storage_backend == "file": + return False + else: + return True + + + def upload_obj(self, audio_file_path, metadata): + """Uploads a file into Amazon S3 object storage. + + Before a file is uploaded onto Amazon S3 we generate a unique object + name consisting of the filename and a unqiue string using the uuid4 + module. + + Keyword arguments: + audio_file_path: Path on disk to the audio file that is about to be + uploaded to Amazon S3 object storage. + metadata: ID3 tags and other metadata extracted from the audio file. + + Returns: + The metadata dictionary it received with three new keys: + filesize: The file's filesize in bytes. + filename: The file's filename. + resource_id: The unique object name used to identify the objects + on Amazon S3 + """ + + file_base_name = os.path.basename(audio_file_path) + file_name, extension = os.path.splitext(file_base_name) + + # With Amazon S3 you cannot create a signed url if there are spaces + # in the object name. URL encoding the object name doesn't solve the + # problem. As a solution we will replace spaces with dashes. + file_name = file_name.replace(" ", "-") + object_name = "%s_%s%s" % (file_name, str(uuid.uuid4()), extension) + + provider_driver_class = get_driver(getattr(Provider, self._provider)) + driver = provider_driver_class(self._api_key, self._api_key_secret) + + try: + container = driver.get_container(self._bucket) + except ContainerDoesNotExistError: + container = driver.create_container(self._bucket) + + extra = {'meta_data': {'filename': file_base_name}} + + obj = driver.upload_object(file_path=audio_file_path, + container=container, + object_name=object_name, + verify_hash=False, + extra=extra) + + metadata["filesize"] = os.path.getsize(audio_file_path) + + # Remove file from organize directory + try: + os.remove(audio_file_path) + except OSError: + logging.info("Could not remove %s from organize directory" % audio_file_path) + + # Pass original filename to Airtime so we can store it in the db + metadata["filename"] = file_base_name + + metadata["resource_id"] = object_name + metadata["storage_backend"] = self._storage_backend + return metadata + diff --git a/python_apps/airtime_analyzer/setup.py b/python_apps/airtime_analyzer/setup.py index f9f47a31e..5a37aed13 100644 --- a/python_apps/airtime_analyzer/setup.py +++ b/python_apps/airtime_analyzer/setup.py @@ -31,6 +31,7 @@ setup(name='airtime_analyzer', 'requests', 'apache-libcloud', 'rgain', + 'boto', # These next 3 are required for requests to support SSL with SNI. Learned this the hard way... # What sucks is that GCC is required to pip install these. #'ndg-httpsclient',