diff --git a/airtime_mvc/application/cloud_storage/Amazon_S3StorageBackend.php b/airtime_mvc/application/cloud_storage/Amazon_S3StorageBackend.php index b51b0059f..270015ae0 100644 --- a/airtime_mvc/application/cloud_storage/Amazon_S3StorageBackend.php +++ b/airtime_mvc/application/cloud_storage/Amazon_S3StorageBackend.php @@ -9,30 +9,76 @@ class Amazon_S3StorageBackend extends StorageBackend { private $s3Client; - + private $proxyHost; + public function Amazon_S3StorageBackend($securityCredentials) { $this->setBucket($securityCredentials['bucket']); $this->setAccessKey($securityCredentials['api_key']); $this->setSecretKey($securityCredentials['api_key_secret']); - $this->s3Client = S3Client::factory(array( + $s3Options = array( 'key' => $securityCredentials['api_key'], 'secret' => $securityCredentials['api_key_secret'], 'region' => $securityCredentials['region'] - )); - } + ); + if (array_key_exists("proxy_host", $securityCredentials)) { + $s3Options = array_merge($s3Options, array( + //'base_url' => "http://" . $securityCredentials['proxy_host'], + 'base_url' => "http://s3.amazonaws.com", + 'scheme' => "http", + //'force_path_style' => true, + 'signature' => 'v4' + )); + $this->proxyHost = $securityCredentials['proxy_host']; + } + $this->s3Client = S3Client::factory($s3Options); + } + public function getAbsoluteFilePath($resourceId) { return $this->s3Client->getObjectUrl($this->getBucket(), $resourceId); } - public function getSignedURL($resourceId) + /** Returns a signed download URL from Amazon S3, expiring in 60 minutes */ + public function getDownloadURLs($resourceId) { - return $this->s3Client->getObjectUrl($this->getBucket(), $resourceId, '+60 minutes'); + $urls = array(); + + $signedS3Url = $this->s3Client->getObjectUrl($this->getBucket(), $resourceId, '+60 minutes'); + + + //If we're using the proxy cache, we need to modify the request URL after it has + //been generated by the above. (The request signature must be for the amazonaws.com, + //not our proxy, since the proxy translates the host back to amazonaws.com) + if ($this->proxyHost) { + $p = parse_url($signedS3Url); + $p["host"] = $this->getBucket() . "." . $this->proxyHost; + $p["scheme"] = "http"; + //If the path contains the bucket name (which is the case with HTTPS requests to Amazon), + //we need to strip that part out, since we're forcing everything to HTTP. The Amazon S3 + //URL convention for HTTP is to prepend the bucket name to the hostname instead of having + //it in the path. + //eg. http://bucket.s3.amazonaws.com/ instead of https://s3.amazonaws.com/bucket/ + if (strpos($p["path"], $this->getBucket()) == 1) { + $p["path"] = substr($p["path"], 1 + strlen($this->getBucket())); + } + $proxyUrl = $p["scheme"] . "://" . $p["host"] . $p["path"] . "?" . $p["query"]; + //Add this proxy cache URL to the list of download URLs. + array_push($urls, $proxyUrl); + } + + //Add the direct S3 URL to the list (as a fallback) + array_push($urls, $signedS3Url); + + //http_build_url() would be nice to use but it requires pecl_http :-( + + //Logging::info($url); + + return $urls; } - + public function deletePhysicalFile($resourceId) { $bucket = $this->getBucket(); diff --git a/airtime_mvc/application/cloud_storage/FileStorageBackend.php b/airtime_mvc/application/cloud_storage/FileStorageBackend.php index 81effde71..e7a87147e 100644 --- a/airtime_mvc/application/cloud_storage/FileStorageBackend.php +++ b/airtime_mvc/application/cloud_storage/FileStorageBackend.php @@ -13,7 +13,7 @@ class FileStorageBackend extends StorageBackend return $resourceId; } - public function getSignedURL($resourceId) + public function getDownloadURLs($resourceId) { return ""; } diff --git a/airtime_mvc/application/cloud_storage/ProxyStorageBackend.php b/airtime_mvc/application/cloud_storage/ProxyStorageBackend.php index 4df1e19f7..d99c62eef 100644 --- a/airtime_mvc/application/cloud_storage/ProxyStorageBackend.php +++ b/airtime_mvc/application/cloud_storage/ProxyStorageBackend.php @@ -38,9 +38,9 @@ class ProxyStorageBackend extends StorageBackend return $this->storageBackend->getAbsoluteFilePath($resourceId); } - public function getSignedURL($resourceId) + public function getDownloadURLs($resourceId) { - return $this->storageBackend->getSignedURL($resourceId); + return $this->storageBackend->getDownloadURLs($resourceId); } public function deletePhysicalFile($resourceId) diff --git a/airtime_mvc/application/cloud_storage/StorageBackend.php b/airtime_mvc/application/cloud_storage/StorageBackend.php index 028534e61..f0d58ba42 100644 --- a/airtime_mvc/application/cloud_storage/StorageBackend.php +++ b/airtime_mvc/application/cloud_storage/StorageBackend.php @@ -15,7 +15,7 @@ abstract class StorageBackend /** Returns the file object's signed URL. The URL must be signed since they * privately stored on the storage backend. */ - abstract public function getSignedURL($resourceId); + abstract public function getDownloadURLs($resourceId); /** Deletes the file from the storage backend. */ abstract public function deletePhysicalFile($resourceId); diff --git a/airtime_mvc/application/common/FileIO.php b/airtime_mvc/application/common/FileIO.php index 4d1ffd68c..e921ca3c1 100644 --- a/airtime_mvc/application/common/FileIO.php +++ b/airtime_mvc/application/common/FileIO.php @@ -10,7 +10,7 @@ class Application_Common_FileIO * * This HTTP_RANGE compatible read file function is necessary for allowing streaming media to be skipped around in. * - * @param string $filePath - the full filepath pointing to the location of the file + * @param string $filePath - the full filepath or URL pointing to the location of the file * @param string $mimeType - the file's mime type. Defaults to 'audio/mp3' * @param integer $size - the file size, in bytes * @return void @@ -22,8 +22,7 @@ class Application_Common_FileIO { $fm = @fopen($filePath, 'rb'); if (!$fm) { - header ("HTTP/1.1 505 Internal server error"); - return; + throw new FileNotFoundException($filePath); } //Note that $size is allowed to be zero. If that's the case, it means we don't @@ -36,6 +35,8 @@ class Application_Common_FileIO $begin = 0; $end = $size - 1; + ob_start(); //Must start a buffer here for these header() functions + if (isset($_SERVER['HTTP_RANGE'])) { if (preg_match('/bytes=\h*(\d+)-(\d*)[\D.*]?/i', $_SERVER['HTTP_RANGE'], $matches)) { $begin = intval($matches[1]); @@ -51,6 +52,7 @@ class Application_Common_FileIO header('HTTP/1.1 200 OK'); } header("Content-Type: $mimeType"); + header("Content-Transfer-Encoding: binary"); header('Cache-Control: public, must-revalidate, max-age=0'); header('Pragma: no-cache'); header('Accept-Ranges: bytes'); @@ -60,7 +62,6 @@ class Application_Common_FileIO header("Content-Range: bytes $begin-$end/$size"); } } - header("Content-Transfer-Encoding: binary"); //We can have multiple levels of output buffering. Need to //keep looping until all have been disabled!!! diff --git a/airtime_mvc/application/controllers/ApiController.php b/airtime_mvc/application/controllers/ApiController.php index 5ecd5f1da..11f6f7889 100644 --- a/airtime_mvc/application/controllers/ApiController.php +++ b/airtime_mvc/application/controllers/ApiController.php @@ -1074,7 +1074,9 @@ class ApiController extends Zend_Controller_Action $dir->getId(),$all=false); foreach ($files as $f) { // if the file is from this mount - if (substr($f->getFilePath(), 0, strlen($rd)) === $rd) { + $filePaths = $f->getFilePaths(); + $filePath = $filePaths[0]; + if (substr($filePath, 0, strlen($rd)) === $rd) { $f->delete(); } } diff --git a/airtime_mvc/application/controllers/LibraryController.php b/airtime_mvc/application/controllers/LibraryController.php index 8c635baa6..212d57521 100644 --- a/airtime_mvc/application/controllers/LibraryController.php +++ b/airtime_mvc/application/controllers/LibraryController.php @@ -356,6 +356,8 @@ class LibraryController extends Zend_Controller_Action $res = $file->delete(); } catch (FileNoPermissionException $e) { $message = $noPermissionMsg; + } catch (DeleteScheduledFileException $e) { + $message = _("Could not delete file because it is scheduled in the future."); } catch (Exception $e) { //could throw a scheduled in future exception. $message = _("Could not delete file(s)."); diff --git a/airtime_mvc/application/models/ShowInstance.php b/airtime_mvc/application/models/ShowInstance.php index c15cdc631..e931b93b9 100644 --- a/airtime_mvc/application/models/ShowInstance.php +++ b/airtime_mvc/application/models/ShowInstance.php @@ -138,8 +138,11 @@ SQL; if (isset($file_id)) { $file = Application_Model_StoredFile::RecallById($file_id); - if (isset($file) && file_exists($file->getFilePath())) { - return $file; + if (isset($file)) { + $filePaths = $file->getFilePaths(); + if (file_exists($filePaths[0])) { + return $file; + } } } diff --git a/airtime_mvc/application/models/StoredFile.php b/airtime_mvc/application/models/StoredFile.php index 1049d449d..8e3dd7a59 100644 --- a/airtime_mvc/application/models/StoredFile.php +++ b/airtime_mvc/application/models/StoredFile.php @@ -362,8 +362,9 @@ SQL; { $exists = false; try { - $filePath = $this->getFilePath(); - $exists = (file_exists($this->getFilePath()) && !is_dir($filePath)); + $filePaths = $this->getFilePaths(); + $filePath = $filePaths[0]; + $exists = (file_exists($filePath) && !is_dir($filePath)); } catch (Exception $e) { return false; } @@ -399,7 +400,14 @@ SQL; //Delete the physical file from either the local stor directory //or from the cloud if ($this->_file->getDbImportStatus() == CcFiles::IMPORT_STATUS_SUCCESS) { - $this->_file->deletePhysicalFile(); + try { + $this->_file->deletePhysicalFile(); + } + catch (Exception $e) + { + //Just log the exception and continue. + Logging::error($e); + } } //Update the user's disk usage @@ -444,8 +452,6 @@ SQL; */ public function deleteByMediaMonitor($deleteFromPlaylist=false) { - $filepath = $this->getFilePath(); - if ($deleteFromPlaylist) { Application_Model_Playlist::DeleteFileFromAllPlaylists($this->getId()); } @@ -499,13 +505,13 @@ SQL; /** * Get the absolute filepath * - * @return string + * @return array of strings */ - public function getFilePath() + public function getFilePaths() { assert($this->_file); - return $this->_file->getURLForTrackPreviewOrDownload(); + return $this->_file->getURLsForTrackPreviewOrDownload(); } /** @@ -1243,9 +1249,11 @@ SQL; $genre = $file->getDbGenre(); $release = $file->getDbUtime(); try { + $filePaths = $this->getFilePaths(); + $filePath = $filePaths[0]; $soundcloud = new Application_Model_Soundcloud(); $soundcloud_res = $soundcloud->uploadTrack( - $this->getFilePath(), $this->getName(), $description, + $filePath, $this->getName(), $description, $tag, $release, $genre); $this->setSoundCloudFileId($soundcloud_res['id']); $this->setSoundCloudLinkToFile($soundcloud_res['permalink_url']); diff --git a/airtime_mvc/application/models/airtime/CcFiles.php b/airtime_mvc/application/models/airtime/CcFiles.php index ff9d3d7fc..4140485ea 100644 --- a/airtime_mvc/application/models/airtime/CcFiles.php +++ b/airtime_mvc/application/models/airtime/CcFiles.php @@ -386,9 +386,9 @@ class CcFiles extends BaseCcFiles { /** * Returns the file's absolute file path stored on disk. */ - public function getURLForTrackPreviewOrDownload() + public function getURLsForTrackPreviewOrDownload() { - return $this->getAbsoluteFilePath(); + return array($this->getAbsoluteFilePath()); } /** diff --git a/airtime_mvc/application/models/airtime/CloudFile.php b/airtime_mvc/application/models/airtime/CloudFile.php index 50b805ab1..3f0331ccc 100644 --- a/airtime_mvc/application/models/airtime/CloudFile.php +++ b/airtime_mvc/application/models/airtime/CloudFile.php @@ -27,12 +27,12 @@ class CloudFile extends BaseCloudFile * requesting the file's object via this URL, it needs to be signed because * all objects stored on Amazon S3 are private. */ - public function getURLForTrackPreviewOrDownload() + public function getURLsForTrackPreviewOrDownload() { if ($this->proxyStorageBackend == null) { $this->proxyStorageBackend = new ProxyStorageBackend($this->getStorageBackend()); } - return $this->proxyStorageBackend->getSignedURL($this->getResourceId()); + return $this->proxyStorageBackend->getDownloadURLs($this->getResourceId()); } /** diff --git a/airtime_mvc/application/services/MediaService.php b/airtime_mvc/application/services/MediaService.php index ee4238e8d..55eaeff37 100644 --- a/airtime_mvc/application/services/MediaService.php +++ b/airtime_mvc/application/services/MediaService.php @@ -55,10 +55,11 @@ class Application_Service_MediaService if ($media == null) { throw new FileNotFoundException(); } - $filepath = $media->getFilePath(); - // Make sure we don't have some wrong result beecause of caching + // Make sure we don't have some wrong result because of caching clearstatcache(); + $filePath = ""; + if ($media->getPropelOrm()->isValidPhysicalFile()) { $filename = $media->getPropelOrm()->getFilename(); //Download user left clicks a track and selects Download. @@ -71,13 +72,41 @@ class Application_Service_MediaService header('Content-Disposition: inline; filename="' . $filename . '"'); } - $filepath = $media->getFilePath(); - $size= $media->getFileSize(); - $mimeType = $media->getPropelOrm()->getDbMime(); - Application_Common_FileIO::smartReadFile($filepath, $size, $mimeType); + /* + In this block of code below, we're getting the list of download URLs for a track + and then streaming the file as the response. A file can be stored in more than one location, + with the alternate locations used as a fallback, so that's why we're looping until we + are able to actually send the file. + + This mechanism is used to try fetching our file from our internal S3 caching proxy server first. + If the file isn't found there (or the cache is down), then we attempt to download the file + directly from Amazon S3. We do this to save bandwidth costs! + */ + + $filePaths = $media->getFilePaths(); + assert(is_array($filePaths)); + + do { + //Read from $filePath and stream it to the browser. + $filePath = array_shift($filePaths); + try { + $size= $media->getFileSize(); + $mimeType = $media->getPropelOrm()->getDbMime(); + Application_Common_FileIO::smartReadFile($filePath, $size, $mimeType); + break; //Break out of the loop if we successfully read the file! + } catch (FileNotFoundException $e) { + //If we have no alternate filepaths left, then let the exception bubble up. + if (sizeof($filePaths) == 0) { + throw $e; + } + } + //Retry with the next alternate filepath in the list + } while (sizeof($filePaths) > 0); + exit; + } else { - throw new FileNotFoundException(); + throw new FileNotFoundException($filePath); } } diff --git a/python_apps/airtime_analyzer/airtime_analyzer/analyzer_pipeline.py b/python_apps/airtime_analyzer/airtime_analyzer/analyzer_pipeline.py index 206e6232e..04dadbed6 100644 --- a/python_apps/airtime_analyzer/airtime_analyzer/analyzer_pipeline.py +++ b/python_apps/airtime_analyzer/airtime_analyzer/analyzer_pipeline.py @@ -21,7 +21,9 @@ class AnalyzerPipeline: so that if it crashes, it does not kill the entire airtime_analyzer daemon and the failure to import can be reported back to the web application. """ - + + IMPORT_STATUS_FAILED = 2 + @staticmethod def run_analysis(queue, audio_file_path, import_directory, original_filename, storage_backend, file_prefix, cloud_storage_config): """Analyze and import an audio file, and put all extracted metadata into queue. @@ -86,12 +88,12 @@ class AnalyzerPipeline: queue.put(metadata) except UnplayableFileError as e: logging.exception(e) - metadata["import_status"] = 2 + metadata["import_status"] = IMPORT_STATUS_FAILED metadata["reason"] = "The file could not be played." raise e except Exception as e: # Ensures the traceback for this child process gets written to our log files: - logging.exception(e) + logging.exception(e) raise e @staticmethod diff --git a/python_apps/airtime_analyzer/airtime_analyzer/message_listener.py b/python_apps/airtime_analyzer/airtime_analyzer/message_listener.py index 111471153..17d749a56 100644 --- a/python_apps/airtime_analyzer/airtime_analyzer/message_listener.py +++ b/python_apps/airtime_analyzer/airtime_analyzer/message_listener.py @@ -226,18 +226,19 @@ class MessageListener: else: raise Exception("Analyzer process terminated unexpectedly.") ''' + metadata = {} q = Queue.Queue() try: AnalyzerPipeline.run_analysis(q, audio_file_path, import_directory, original_filename, storage_backend, file_prefix, cloud_storage_config) - results = q.get() + metadata = q.get() except Exception as e: - logging.error("Analyzer pipeline exception", e) - pass + logging.error("Analyzer pipeline exception: %s" % str(e)) + metadata["import_status"] = AnalyzerPipeline.IMPORT_STATUS_FAILED # Ensure our queue doesn't fill up and block due to unexpected behaviour. Defensive code. while not q.empty(): q.get() - return results + return metadata diff --git a/python_apps/airtime_analyzer/airtime_analyzer/playability_analyzer.py b/python_apps/airtime_analyzer/airtime_analyzer/playability_analyzer.py index 0ca8a84c1..eb9062713 100644 --- a/python_apps/airtime_analyzer/airtime_analyzer/playability_analyzer.py +++ b/python_apps/airtime_analyzer/airtime_analyzer/playability_analyzer.py @@ -27,6 +27,6 @@ class PlayabilityAnalyzer(Analyzer): logging.warn("Failed to run: %s - %s. %s" % (command[0], e.strerror, "Do you have liquidsoap installed?")) except (subprocess.CalledProcessError, Exception) as e: # liquidsoap returned an error code logging.warn(e) - raise UnplayableFileError + raise UnplayableFileError() return metadata diff --git a/python_apps/airtime_analyzer/airtime_analyzer/status_reporter.py b/python_apps/airtime_analyzer/airtime_analyzer/status_reporter.py index 23c6175c3..88fb6ff28 100644 --- a/python_apps/airtime_analyzer/airtime_analyzer/status_reporter.py +++ b/python_apps/airtime_analyzer/airtime_analyzer/status_reporter.py @@ -25,7 +25,7 @@ class PicklableHttpRequest: auth=requests.auth.HTTPBasicAuth(self.api_key, '')) def process_http_requests(ipc_queue, http_retry_queue_path): - ''' Runs in a separate process and performs all the HTTP requests where we're + ''' Runs in a separate thread and performs all the HTTP requests where we're reporting extracted audio file metadata or errors back to the Airtime web application. This process also checks every 5 seconds if there's failed HTTP requests that we