From be7a6854f0d579a1e3201a271cc52ab2cca57c40 Mon Sep 17 00:00:00 2001 From: drigato Date: Tue, 24 Feb 2015 11:00:41 -0500 Subject: [PATCH 1/4] SAAS-596: Store file size and hash in database Have pypo fetch the file size and md5, if necessary, and make request to Airtime to set these values --- .../application/controllers/ApiController.php | 12 +++--- airtime_mvc/application/models/StoredFile.php | 5 ++- python_apps/pypo/pypofile.py | 37 ++++++++++++++++++- 3 files changed, 47 insertions(+), 7 deletions(-) diff --git a/airtime_mvc/application/controllers/ApiController.php b/airtime_mvc/application/controllers/ApiController.php index 124b4b4d0..0293fad22 100644 --- a/airtime_mvc/application/controllers/ApiController.php +++ b/airtime_mvc/application/controllers/ApiController.php @@ -160,11 +160,11 @@ class ApiController extends Zend_Controller_Action // If we're passing in a Stored File object, it's faster // to use getFileSize() and pass in the result - if (!$size || $size <= 0) { + if (!isset($size) || $size < 0) { $size= filesize($location); } - if ($size <= 0) { + if ($size < 0) { throw new Exception("Invalid file size returned for file at $location"); } @@ -195,9 +195,11 @@ class ApiController extends Zend_Controller_Action header('Cache-Control: public, must-revalidate, max-age=0'); header('Pragma: no-cache'); header('Accept-Ranges: bytes'); - header('Content-Length:' . (($end - $begin) + 1)); - if (isset($_SERVER['HTTP_RANGE'])) { - header("Content-Range: bytes $begin-$end/$size"); + if ($size > 0) { + header('Content-Length:' . (($end - $begin) + 1)); + if (isset($_SERVER['HTTP_RANGE'])) { + header("Content-Range: bytes $begin-$end/$size"); + } } header("Content-Transfer-Encoding: binary"); diff --git a/airtime_mvc/application/models/StoredFile.php b/airtime_mvc/application/models/StoredFile.php index eb7825381..26cbea647 100644 --- a/airtime_mvc/application/models/StoredFile.php +++ b/airtime_mvc/application/models/StoredFile.php @@ -564,7 +564,10 @@ SQL; public function getFileSize() { $filesize = $this->_file->getFileSize(); - if ($filesize <= 0) { + + // It's OK for the file size to be zero. Pypo will make a request to Airtime and update + // the file size and md5 hash if they are not set. + if ($filesize < 0) { throw new Exception ("Could not determine filesize for file id: ".$this->_file->getDbId().". Filesize: ".$filesize); } return $filesize; diff --git a/python_apps/pypo/pypofile.py b/python_apps/pypo/pypofile.py index 58c3f91f3..630adfcbe 100644 --- a/python_apps/pypo/pypofile.py +++ b/python_apps/pypo/pypofile.py @@ -10,6 +10,9 @@ import sys import stat import requests import ConfigParser +import json +import hashlib +from requests.exceptions import ConnectionError, HTTPError, Timeout from std_err_override import LogWriter @@ -68,7 +71,6 @@ class PypoFile(Thread): host = config.get(CONFIG_SECTION, 'base_url') url = "http://%s/rest/media/%s/download" % (host, media_item["id"]) - with open(dst, "wb") as handle: response = requests.get(url, auth=requests.auth.HTTPBasicAuth(username, ''), stream=True, verify=False) @@ -85,11 +87,44 @@ class PypoFile(Thread): #make file world readable os.chmod(dst, stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) + if media_item['filesize'] == 0: + file_size = self.report_file_size_and_md5_to_airtime(dst, media_item["id"], host, username) + media_item["filesize"] = file_size + media_item['file_ready'] = True except Exception, e: self.logger.error("Could not copy from %s to %s" % (src, dst)) self.logger.error(e) + def report_file_size_and_md5_to_airtime(self, file_path, file_id, host_name, api_key): + try: + file_size = os.path.getsize(file_path) + + with open(file_path, 'rb') as fh: + m = hashlib.md5() + while True: + data = fh.read(8192) + if not data: + break + m.update(data) + md5_hash = m.hexdigest() + except (OSError, IOError) as e: + file_size = 0 + self.logger.error("Error getting file size and md5 hash for file id %s" % file_id) + self.logger.error(e) + + # Make PUT request to Airtime to update the file size and hash + put_url = "http://%s/rest/media/%s" % (host_name, file_id) + payload = json.dumps({'filesize': file_size, 'md5': md5_hash}) + try: + response = requests.put(put_url, data=payload, auth=requests.auth.HTTPBasicAuth(api_key, '')) + if not response.ok: + self.logger.error("Could not update media file %s with file size and md5 hash" % file_id) + except (ConnectionError, Timeout): + self.logger.error("Could not update media file %s with file size and md5 hash" % file_id) + + return file_size + def get_highest_priority_media_item(self, schedule): """ Get highest priority media_item in the queue. Currently the highest From 86ba5c547e197b4b261d42ce23746aa6fabb800c Mon Sep 17 00:00:00 2001 From: drigato Date: Tue, 24 Feb 2015 12:32:46 -0500 Subject: [PATCH 2/4] SAAS-596: Store file size and hash in database Fixed exception handling in pypo --- python_apps/pypo/pypofile.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/python_apps/pypo/pypofile.py b/python_apps/pypo/pypofile.py index 630adfcbe..829c3fe23 100644 --- a/python_apps/pypo/pypofile.py +++ b/python_apps/pypo/pypofile.py @@ -114,14 +114,18 @@ class PypoFile(Thread): self.logger.error(e) # Make PUT request to Airtime to update the file size and hash - put_url = "http://%s/rest/media/%s" % (host_name, file_id) - payload = json.dumps({'filesize': file_size, 'md5': md5_hash}) + error_msg = "Could not update media file %s with file size and md5 hash" % file_id try: + put_url = "http://%s/rest/media/%s" % (host_name, file_id) + payload = json.dumps({'filesize': file_size, 'md5': md5_hash}) response = requests.put(put_url, data=payload, auth=requests.auth.HTTPBasicAuth(api_key, '')) if not response.ok: - self.logger.error("Could not update media file %s with file size and md5 hash" % file_id) + self.logger.error(error_msg) except (ConnectionError, Timeout): - self.logger.error("Could not update media file %s with file size and md5 hash" % file_id) + self.logger.error(error_msg) + except Exception as e: + self.logger.error(error_msg) + self.logger.error(e) return file_size From 6113dff182b2f8e596d789e7d2ce61f6666abd99 Mon Sep 17 00:00:00 2001 From: drigato Date: Tue, 24 Feb 2015 15:19:46 -0500 Subject: [PATCH 3/4] Debugging logs --- python_apps/pypo/pypofile.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python_apps/pypo/pypofile.py b/python_apps/pypo/pypofile.py index 829c3fe23..5558027fe 100644 --- a/python_apps/pypo/pypofile.py +++ b/python_apps/pypo/pypofile.py @@ -44,10 +44,12 @@ class PypoFile(Thread): dst = media_item['dst'] src_size = media_item['filesize'] + self.logger.info("--- src size: %s ---" % src_size) dst_exists = True try: dst_size = os.path.getsize(dst) + self.logger.info("--- dst size: %s ---" % dst_size) except Exception, e: dst_exists = False @@ -63,6 +65,7 @@ class PypoFile(Thread): media_item['file_ready'] = not do_copy if do_copy: + self.logger.info("----doing copy-----") self.logger.debug("copying from %s to local cache %s" % (src, dst)) try: config = self.read_config_file(CONFIG_PATH) From d843de80cccb2f04d5eb809661b3281369969767 Mon Sep 17 00:00:00 2001 From: drigato Date: Tue, 24 Feb 2015 16:22:20 -0500 Subject: [PATCH 4/4] Remove log statements --- python_apps/pypo/pypofile.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python_apps/pypo/pypofile.py b/python_apps/pypo/pypofile.py index 5558027fe..829c3fe23 100644 --- a/python_apps/pypo/pypofile.py +++ b/python_apps/pypo/pypofile.py @@ -44,12 +44,10 @@ class PypoFile(Thread): dst = media_item['dst'] src_size = media_item['filesize'] - self.logger.info("--- src size: %s ---" % src_size) dst_exists = True try: dst_size = os.path.getsize(dst) - self.logger.info("--- dst size: %s ---" % dst_size) except Exception, e: dst_exists = False @@ -65,7 +63,6 @@ class PypoFile(Thread): media_item['file_ready'] = not do_copy if do_copy: - self.logger.info("----doing copy-----") self.logger.debug("copying from %s to local cache %s" % (src, dst)) try: config = self.read_config_file(CONFIG_PATH)