Merge branch 'cc-5709-airtime-analyzer-cloud-storage' into cc-5709-airtime-analyzer-cloud-storage-saas

Conflicts:
	python_apps/airtime_analyzer/airtime_analyzer/cloud_storage_uploader.py
This commit is contained in:
drigato 2015-01-06 15:46:20 -05:00
commit 08738cf391
4 changed files with 130 additions and 28 deletions

View File

@ -18,6 +18,7 @@ class Amazon_S3 extends StorageBackend
$this->s3Client = S3Client::factory(array(
'key' => $securityCredentials['api_key'],
'secret' => $securityCredentials['api_key_secret'],
'region' => $securityCredentials['region']
));
}

View File

@ -2,27 +2,25 @@ import os
import logging
import uuid
import config_file
from libcloud.storage.providers import get_driver
from libcloud.storage.types import Provider, ContainerDoesNotExistError, ObjectDoesNotExistError
from boto.s3.connection import S3Connection
from boto.s3.key import Key
CLOUD_CONFIG_PATH = '/etc/airtime-saas/cloud_storage.conf'
STORAGE_BACKEND_FILE = "file"
class CloudStorageUploader:
""" A class that uses Apache Libcloud's Storage API to upload objects into
a cloud storage backend. For this implementation all files will be uploaded
into a bucket on Amazon S3.
""" A class that uses Python-Boto SDK to upload objects into Amazon S3.
It is important to note that every file, coming from different Airtime Pro
stations, will get uploaded into the same bucket on the same Amazon S3
account.
Attributes:
_provider: Storage backend. For exmaple, Amazon S3, Google Storage.
_bucket: Name of container on provider where files will get uploaded into.
_api_key: Access key to objects on the provider's storage backend.
_api_key_secret: Secret access key to objects on the provider's storage backend.
_host: Host name for the specific region assigned to the bucket.
_bucket: Name of container on Amazon S3 where files will get uploaded into.
_api_key: Access key to objects on Amazon S3.
_api_key_secret: Secret access key to objects on Amazon S3.
"""
def __init__(self):
@ -32,12 +30,12 @@ class CloudStorageUploader:
CLOUD_STORAGE_CONFIG_SECTION = config.get("current_backend", "storage_backend")
self._storage_backend = CLOUD_STORAGE_CONFIG_SECTION
if self._storage_backend == STORAGE_BACKEND_FILE:
self._provider = ""
self._host = ""
self._bucket = ""
self._api_key = ""
self._api_key_secret = ""
else:
self._provider = config.get(CLOUD_STORAGE_CONFIG_SECTION, 'provider')
self._host = config.get(CLOUD_STORAGE_CONFIG_SECTION, 'host')
self._bucket = config.get(CLOUD_STORAGE_CONFIG_SECTION, 'bucket')
self._api_key = config.get(CLOUD_STORAGE_CONFIG_SECTION, 'api_key')
self._api_key_secret = config.get(CLOUD_STORAGE_CONFIG_SECTION, 'api_key_secret')
@ -76,24 +74,15 @@ class CloudStorageUploader:
# in the object name. URL encoding the object name doesn't solve the
# problem. As a solution we will replace spaces with dashes.
file_name = file_name.replace(" ", "-")
resource_id = "%s/%s_%s%s" % (metadata["file_prefix"], file_name, str(uuid.uuid4()), extension)
object_name = "%s/%s_%s%s" % (metadata["file_prefix"], file_name, str(uuid.uuid4()), extension)
provider_driver_class = get_driver(getattr(Provider, self._provider))
driver = provider_driver_class(self._api_key, self._api_key_secret)
conn = S3Connection(self._api_key, self._api_key_secret, host=self._host)
bucket = conn.get_bucket(self._bucket)
try:
container = driver.get_container(self._bucket)
except ContainerDoesNotExistError:
container = driver.create_container(self._bucket)
extra = {'meta_data': {'filename': file_base_name}}
obj = driver.upload_object(file_path=audio_file_path,
container=container,
object_name=object_name,
verify_hash=False,
extra=extra)
key = Key(bucket)
key.key = resource_id
key.set_metadata('filename', file_base_name)
key.set_contents_from_filename(audio_file_path)
metadata["filesize"] = os.path.getsize(audio_file_path)
@ -106,7 +95,7 @@ class CloudStorageUploader:
# Pass original filename to Airtime so we can store it in the db
metadata["filename"] = file_base_name
metadata["resource_id"] = object_name
metadata["resource_id"] = resource_id
metadata["storage_backend"] = self._storage_backend
return metadata

View File

@ -0,0 +1,111 @@
import os
import logging
import uuid
import config_file
from libcloud.storage.providers import get_driver
from libcloud.storage.types import Provider, ContainerDoesNotExistError, ObjectDoesNotExistError
CLOUD_CONFIG_PATH = '/etc/airtime-saas/cloud_storage.conf'
STORAGE_BACKEND_FILE = "file"
class CloudStorageUploader:
""" A class that uses Apache Libcloud's Storage API to upload objects into
a cloud storage backend. For this implementation all files will be uploaded
into a bucket on Amazon S3.
It is important to note that every file, coming from different Airtime Pro
stations, will get uploaded into the same bucket on the same Amazon S3
account.
Attributes:
_provider: Storage backend. For exmaple, Amazon S3, Google Storage.
_bucket: Name of container on provider where files will get uploaded into.
_api_key: Access key to objects on the provider's storage backend.
_api_key_secret: Secret access key to objects on the provider's storage backend.
"""
def __init__(self):
config = config_file.read_config_file(CLOUD_CONFIG_PATH)
CLOUD_STORAGE_CONFIG_SECTION = config.get("current_backend", "storage_backend")
self._storage_backend = CLOUD_STORAGE_CONFIG_SECTION
if self._storage_backend == STORAGE_BACKEND_FILE:
self._provider = ""
self._bucket = ""
self._api_key = ""
self._api_key_secret = ""
else:
self._provider = config.get(CLOUD_STORAGE_CONFIG_SECTION, 'provider')
self._bucket = config.get(CLOUD_STORAGE_CONFIG_SECTION, 'bucket')
self._api_key = config.get(CLOUD_STORAGE_CONFIG_SECTION, 'api_key')
self._api_key_secret = config.get(CLOUD_STORAGE_CONFIG_SECTION, 'api_key_secret')
def enabled(self):
if self._storage_backend == "file":
return False
else:
return True
def upload_obj(self, audio_file_path, metadata):
"""Uploads a file into Amazon S3 object storage.
Before a file is uploaded onto Amazon S3 we generate a unique object
name consisting of the filename and a unqiue string using the uuid4
module.
Keyword arguments:
audio_file_path: Path on disk to the audio file that is about to be
uploaded to Amazon S3 object storage.
metadata: ID3 tags and other metadata extracted from the audio file.
Returns:
The metadata dictionary it received with three new keys:
filesize: The file's filesize in bytes.
filename: The file's filename.
resource_id: The unique object name used to identify the objects
on Amazon S3
"""
file_base_name = os.path.basename(audio_file_path)
file_name, extension = os.path.splitext(file_base_name)
# With Amazon S3 you cannot create a signed url if there are spaces
# in the object name. URL encoding the object name doesn't solve the
# problem. As a solution we will replace spaces with dashes.
file_name = file_name.replace(" ", "-")
object_name = "%s_%s%s" % (file_name, str(uuid.uuid4()), extension)
provider_driver_class = get_driver(getattr(Provider, self._provider))
driver = provider_driver_class(self._api_key, self._api_key_secret)
try:
container = driver.get_container(self._bucket)
except ContainerDoesNotExistError:
container = driver.create_container(self._bucket)
extra = {'meta_data': {'filename': file_base_name}}
obj = driver.upload_object(file_path=audio_file_path,
container=container,
object_name=object_name,
verify_hash=False,
extra=extra)
metadata["filesize"] = os.path.getsize(audio_file_path)
# Remove file from organize directory
try:
os.remove(audio_file_path)
except OSError:
logging.info("Could not remove %s from organize directory" % audio_file_path)
# Pass original filename to Airtime so we can store it in the db
metadata["filename"] = file_base_name
metadata["resource_id"] = object_name
metadata["storage_backend"] = self._storage_backend
return metadata

View File

@ -31,6 +31,7 @@ setup(name='airtime_analyzer',
'requests',
'apache-libcloud',
'rgain',
'boto',
# These next 3 are required for requests to support SSL with SNI. Learned this the hard way...
# What sucks is that GCC is required to pip install these.
#'ndg-httpsclient',