Commented cloud storage classes

This commit is contained in:
drigato 2014-10-29 17:42:42 -04:00
parent 978fc43a82
commit 01ad2ab832
2 changed files with 58 additions and 17 deletions

View file

@ -7,14 +7,20 @@ from libcloud.storage.types import Provider, ContainerDoesNotExistError, ObjectD
class CloudStorageUploader:
""" A class that uses Apache Libcloud's Storage API to upload objects into
various cloud storage backends.
a cloud storage backend. For this implementation all files will be uploaded
into a bucket on Amazon S3.
It is important to note that every file, coming from different Airtime Pro
stations, will get uploaded into the same bucket on the same Amazon S3
account.
Attributes:
_provider: Storage backend. For exmaple, Amazon S3, Google Storage.
_bucket: Name of container on provider where files will get uploaded into.
_api_key: Access key to objects on the provider's storage backend.
_api_key_secret: Secret access key to objects on the provider's storage backend.
"""
"""
def __init__(self, provider, bucket, api_key, api_key_secret):
self._provider = provider
self._bucket = bucket
@ -22,29 +28,36 @@ class CloudStorageUploader:
self._api_key_secret = api_key_secret
def upload_obj(self, audio_file_path, metadata):
'''Uploads a file into a provider's cloud object storage.
"""Uploads a file into Amazon S3 object storage.
Generates a unique object name
Before a file is uploaded onto Amazon S3 we generate a unique object
name consisting of the filename and a unqiue string using the uuid4
module.
Keyword arguments:
audio_file_path: Path on disk to the audio file that is about to be
uploaded to cloud object storage.
uploaded to Amazon S3 object storage.
metadata: ID3 tags and other metadata extracted from the audio file.
'''
Returns:
The metadata dictionary it received with three new keys:
filesize: The file's filesize in bytes.
filename: The file's filename.
resource_id: The unique object name used to identify the objects
on Amazon S3
"""
file_base_name = os.path.basename(audio_file_path)
file_name, extension = os.path.splitext(file_base_name)
'''
With Amazon S3 you cannot create a signed url if there are spaces
in the object name. URL encoding the object name doesn't solve the
problem. As a solution we will replace spaces with dashes.
'''
# With Amazon S3 you cannot create a signed url if there are spaces
# in the object name. URL encoding the object name doesn't solve the
# problem. As a solution we will replace spaces with dashes.
file_name = file_name.replace(" ", "-")
object_name = "%s_%s%s" % (file_name, str(uuid.uuid4()), extension)
cls = get_driver(getattr(Provider, self._provider))
driver = cls(self._api_key, self._api_key_secret)
provider_driver_class = get_driver(getattr(Provider, self._provider))
driver = provider_driver_class(self._api_key, self._api_key_secret)
try:
container = driver.get_container(self._bucket)
@ -61,13 +74,13 @@ class CloudStorageUploader:
metadata["filesize"] = os.path.getsize(audio_file_path)
'''remove file from organize directory'''
# Remove file from organize directory
try:
os.remove(audio_file_path)
except OSError:
logging.info("Could not remove %s from organize directory" % audio_file_path)
'''pass original filename to Airtime so we can store it in the db'''
# Pass original filename to Airtime so we can store it in the db
metadata["filename"] = file_base_name
metadata["resource_id"] = object_name

View file

@ -10,6 +10,19 @@ from libcloud.storage.providers import get_driver
CONFIG_PATH = '/etc/airtime/airtime.conf'
class CloudStorageDownloader:
""" A class that uses Apache Libcloud's Storage API to download objects from
a cloud storage backend. For this implementation all files are stored on
Amazon S3 and will be downloaded from there.
This class is used with Airtime's playout engine service, PYPO.
Attributes:
_provider: Storage backend. For exmaple, Amazon S3, Google Storage.
_bucket: Name of container on provider where files will get uploaded into.
_api_key: Access key to objects on the provider's storage backend.
_api_key_secret: Secret access key to objects on the provider's storage backend.
"""
def __init__(self):
config = self.read_config_file(CONFIG_PATH)
@ -20,8 +33,18 @@ class CloudStorageDownloader:
self._api_key_secret = config.get(CLOUD_STORAGE_CONFIG_SECTION, 'api_key_secret')
def download_obj(self, dst, obj_name):
cls = get_driver(getattr(Provider, self._provider))
driver = cls(self._api_key, self._api_key_secret)
"""Downloads a file from Amazon S3 object storage to disk.
Downloads an object to PYPO's temporary cache directory on disk.
If the file already exists in the cache directory the object
downloading is skipped.
Keyword arguments:
dst: PYPO's temporary cache directory on disk.
obj_name: Name of the object to download to disk
"""
provider_driver_class = get_driver(getattr(Provider, self._provider))
driver = provider_driver_class(self._api_key, self._api_key_secret)
try:
cloud_obj = driver.get_object(container_name=self._bucket,
@ -29,6 +52,11 @@ class CloudStorageDownloader:
except ObjectDoesNotExistError:
logging.info("%s does not exist on Amazon S3" % obj_name)
# If we detect the file path already exists in PYPO's cache directory
# we need to verify the contents of that file is the same (in case there
# was file corruption in a previous download for example) as the
# object's contents by comparing the hash. If the hash values are not
# equal we need to download the object to disk again.
dst_exists = False
if (os.path.isfile(dst)):
dst_hash = hashlib.md5(open(dst).read()).hexdigest()