Merge branch 'cc-5709-airtime-analyzer-cloud-storage' into cc-5709-airtime-analyzer-cloud-storage-saas

2014-12-12 09:44:54 -05:00 · 2014-12-12 09:44:54 -05:00 · 7b80915c16
commit 7b80915c16
parent 891cfdb48e e7a742dbf4
19 changed files with 332 additions and 363 deletions
--- a/python_apps/airtime_analyzer/airtime_analyzer/analyzer_pipeline.py
+++ b/python_apps/airtime_analyzer/airtime_analyzer/analyzer_pipeline.py
@ -6,6 +6,9 @@ import multiprocessing
 from metadata_analyzer import MetadataAnalyzer
 from filemover_analyzer import FileMoverAnalyzer
 from cloud_storage_uploader import CloudStorageUploader
+from cuepoint_analyzer import CuePointAnalyzer
+from replaygain_analyzer import ReplayGainAnalyzer
+from playability_analyzer import *

 class AnalyzerPipeline:
    """ Analyzes and imports an audio file into the Airtime library. 
@ -54,10 +57,14 @@ class AnalyzerPipeline:
            metadata = dict()
            metadata = MetadataAnalyzer.analyze(audio_file_path, metadata)
            metadata["station_domain"] = station_domain
-            #metadata = FileMoverAnalyzer.move(audio_file_path, import_directory, original_filename, metadata)
+
+            metadata = CuePointAnalyzer.analyze(audio_file_path, metadata)
+            metadata = ReplayGainAnalyzer.analyze(audio_file_path, metadata)
+            metadata = PlayabilityAnalyzer.analyze(audio_file_path, metadata)
+
            csu = CloudStorageUploader()
            metadata = csu.upload_obj(audio_file_path, metadata)
-            metadata["import_status"] = 0 # imported
+            metadata["import_status"] = 0 # Successfully imported

            # Note that the queue we're putting the results into is our interprocess communication 
            # back to the main process.
@ -65,6 +72,11 @@ class AnalyzerPipeline:
            # Pass all the file metadata back to the main analyzer process, which then passes
            # it back to the Airtime web application.
            queue.put(metadata)
+        except UnplayableFileError as e:
+            logging.exception(e)
+            metadata["import_status"] = 2
+            metadata["reason"] = "The file could not be played."
+            raise e
        except Exception as e:
            # Ensures the traceback for this child process gets written to our log files:
            logging.exception(e) 
--- a/python_apps/airtime_analyzer/airtime_analyzer/cuepoint_analyzer.py
+++ b/python_apps/airtime_analyzer/airtime_analyzer/cuepoint_analyzer.py
@ -0,0 +1,45 @@
+import subprocess
+import logging
+import traceback
+import json
+import datetime
+from analyzer import Analyzer
+
+
+class CuePointAnalyzer(Analyzer):
+    ''' This class extracts the cue-in time, cue-out time, and length of a track using silan. '''
+
+    SILAN_EXECUTABLE = 'silan'
+
+    @staticmethod
+    def analyze(filename, metadata):
+        ''' Extracts the cue-in and cue-out times along and sets the file duration based on that.
+            The cue points are there to skip the silence at the start and end of a track, and are determined
+            using "silan", which analyzes the loudness in a track.
+        :param filename: The full path to the file to analyzer
+        :param metadata: A metadata dictionary where the results will be put
+        :return: The metadata dictionary
+        '''
+        ''' The silan -F 0.99 parameter tweaks the highpass filter. The default is 0.98, but at that setting,
+            the unit test on the short m4a file fails. With the new setting, it gets the correct cue-in time and
+            all the unit tests pass.
+        '''
+        command = [CuePointAnalyzer.SILAN_EXECUTABLE, '-b', '-F', '0.99', '-f', 'JSON', filename]
+        try:
+            results_json = subprocess.check_output(command, stderr=subprocess.STDOUT)
+            silan_results = json.loads(results_json)
+            metadata['length_seconds'] = float(silan_results['file duration'])
+            # Conver the length into a formatted time string
+            track_length = datetime.timedelta(seconds=metadata['length_seconds'])
+            metadata["length"] = str(track_length)
+            metadata['cuein'] = silan_results['sound'][0][0]
+            metadata['cueout'] = silan_results['sound'][0][1]
+
+        except OSError as e: # silan was not found
+            logging.warn("Failed to run: %s - %s. %s" % (command[0], e.strerror, "Do you have silan installed?"))
+        except subprocess.CalledProcessError as e: # silan returned an error code
+            logging.warn("%s %s %s", e.cmd, e.message, e.returncode)
+        except Exception as e:
+            logging.warn(e)
+
+        return metadata
--- a/python_apps/airtime_analyzer/airtime_analyzer/metadata_analyzer.py
+++ b/python_apps/airtime_analyzer/airtime_analyzer/metadata_analyzer.py
@ -149,7 +149,7 @@ class MetadataAnalyzer(Analyzer):
            metadata["mime"] = magic.from_file(filename, mime=True)
            metadata["channels"] = reader.getnchannels()
            metadata["sample_rate"] = reader.getframerate()
-            length_seconds = float(reader.getnframes()) / float(metadata["channels"] * metadata["sample_rate"])
+            length_seconds = float(reader.getnframes()) / float(metadata["sample_rate"])
            #Converting the length in seconds (float) to a formatted time string
            track_length = datetime.timedelta(seconds=length_seconds)
            metadata["length"] = str(track_length) #time.strftime("%H:%M:%S.%f", track_length)
--- a/python_apps/airtime_analyzer/airtime_analyzer/playability_analyzer.py
+++ b/python_apps/airtime_analyzer/airtime_analyzer/playability_analyzer.py
@ -0,0 +1,32 @@
+__author__ = 'asantoni'
+
+import subprocess
+import logging
+from analyzer import Analyzer
+
+class UnplayableFileError(Exception):
+    pass
+
+class PlayabilityAnalyzer(Analyzer):
+    ''' This class checks if a file can actually be played with Liquidsoap. '''
+
+    LIQUIDSOAP_EXECUTABLE = 'liquidsoap'
+
+    @staticmethod
+    def analyze(filename, metadata):
+        ''' Checks if a file can be played by Liquidsoap.
+        :param filename: The full path to the file to analyzer
+        :param metadata: A metadata dictionary where the results will be put
+        :return: The metadata dictionary
+        '''
+        command = [PlayabilityAnalyzer.LIQUIDSOAP_EXECUTABLE, '-v', '-c', "output.dummy(audio_to_stereo(single('%s')))" % filename]
+        try:
+            subprocess.check_output(command, stderr=subprocess.STDOUT)
+
+        except OSError as e: # liquidsoap was not found
+            logging.warn("Failed to run: %s - %s. %s" % (command[0], e.strerror, "Do you have liquidsoap installed?"))
+        except (subprocess.CalledProcessError, Exception) as e: # liquidsoap returned an error code
+            logging.warn(e)
+            raise UnplayableFileError
+
+        return metadata
--- a/python_apps/airtime_analyzer/airtime_analyzer/replaygain_analyzer.py
+++ b/python_apps/airtime_analyzer/airtime_analyzer/replaygain_analyzer.py
@ -1,14 +1,36 @@
+import subprocess
+import logging
 from analyzer import Analyzer

-''' TODO: ReplayGain is currently calculated by pypo but it should
-          be done here in the analyzer.
-'''
+
 class ReplayGainAnalyzer(Analyzer):
-    
-    def __init__(self):
-        pass
-    
+    ''' This class extracts the ReplayGain using a tool from the python-rgain package. '''
+
+    REPLAYGAIN_EXECUTABLE = 'replaygain' # From the python-rgain package
+
    @staticmethod
-    def analyze(filename):
-        pass
-    
+    def analyze(filename, metadata):
+        ''' Extracts the Replaygain loudness normalization factor of a track.
+        :param filename: The full path to the file to analyzer
+        :param metadata: A metadata dictionary where the results will be put
+        :return: The metadata dictionary
+        '''
+        ''' The -d flag means do a dry-run, ie. don't modify the file directly.
+        '''
+        command = [ReplayGainAnalyzer.REPLAYGAIN_EXECUTABLE, '-d', filename]
+        try:
+            results = subprocess.check_output(command, stderr=subprocess.STDOUT)
+            filename_token = "%s: " % filename
+            rg_pos = results.find(filename_token, results.find("Calculating Replay Gain information")) + len(filename_token)
+            db_pos = results.find(" dB", rg_pos)
+            replaygain = results[rg_pos:db_pos]
+            metadata['replay_gain'] = float(replaygain)
+
+        except OSError as e: # replaygain was not found
+            logging.warn("Failed to run: %s - %s. %s" % (command[0], e.strerror, "Do you have python-rgain installed?"))
+        except subprocess.CalledProcessError as e: # replaygain returned an error code
+            logging.warn("%s %s %s", e.cmd, e.message, e.returncode)
+        except Exception as e:
+            logging.warn(e)
+
+        return metadata
--- a/python_apps/airtime_analyzer/setup.py
+++ b/python_apps/airtime_analyzer/setup.py
@ -30,6 +30,7 @@ setup(name='airtime_analyzer',
          'python-daemon',
          'requests',
          'apache-libcloud',
+          'rgain',
          # These next 3 are required for requests to support SSL with SNI. Learned this the hard way...
          # What sucks is that GCC is required to pip install these. 
          #'ndg-httpsclient',
--- a/python_apps/airtime_analyzer/tests/cuepoint_analyzer_tests.py
+++ b/python_apps/airtime_analyzer/tests/cuepoint_analyzer_tests.py
@ -0,0 +1,63 @@
+from nose.tools import *
+from airtime_analyzer.cuepoint_analyzer import CuePointAnalyzer
+
+def check_default_metadata(metadata):
+    ''' Check that the values extract by Silan/CuePointAnalyzer on our test audio files match what we expect.
+    :param metadata: a metadata dictionary
+    :return: Nothing
+    '''
+    # We give silan some leeway here by specifying a tolerance
+    tolerance_seconds = 0.1
+    length_seconds = 3.9
+    assert abs(metadata['length_seconds'] - length_seconds) < tolerance_seconds
+    assert abs(metadata['cuein']) < tolerance_seconds
+    assert abs(metadata['cueout'] - length_seconds) < tolerance_seconds
+
+def test_missing_silan():
+    old_silan = CuePointAnalyzer.SILAN_EXECUTABLE
+    CuePointAnalyzer.SILAN_EXECUTABLE = 'foosdaf'
+    metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-utf8.mp3', dict())
+    CuePointAnalyzer.SILAN_EXECUTABLE = old_silan # Need to put this back
+
+def test_invalid_filepath():
+    metadata = CuePointAnalyzer.analyze(u'non-existent-file', dict())
+
+
+def test_mp3_utf8():
+    metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-utf8.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_mp3_dualmono():
+    metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-dualmono.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_mp3_jointstereo():
+    metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-jointstereo.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_mp3_simplestereo():
+    metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-simplestereo.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_mp3_stereo():
+    metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_mp3_mono():
+    metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-mono.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_ogg_stereo():
+    metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.ogg', dict())
+    check_default_metadata(metadata)
+
+def test_invalid_wma():
+    metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-invalid.wma', dict())
+
+def test_m4a_stereo():
+    metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.m4a', dict())
+    check_default_metadata(metadata)
+
+def test_wav_stereo():
+    metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.wav', dict())
+    check_default_metadata(metadata)
--- a/python_apps/airtime_analyzer/tests/metadata_analyzer_tests.py
+++ b/python_apps/airtime_analyzer/tests/metadata_analyzer_tests.py
@ -114,6 +114,18 @@ def test_mp3_utf8():
    assert metadata['mime'] == 'audio/mp3'
    assert metadata['track_total'] == u'10' # MP3s can have a track_total

+def test_invalid_wma():
+    metadata = MetadataAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-invalid.wma', dict())
+    assert metadata['mime'] == 'audio/x-ms-wma'
+
+def test_wav_stereo():
+    metadata = MetadataAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.wav', dict())
+    assert metadata['mime'] == 'audio/x-wav'
+    assert abs(metadata['length_seconds'] - 3.9) < 0.1
+    assert metadata['channels'] == 2
+    assert metadata['sample_rate'] == 44100
+
+
 # Make sure the parameter checking works
@raises(TypeError)
 def test_move_wrong_string_param1():
@ -132,7 +144,6 @@ def test_mp3_bad_channels():
        It'd be a pain in the ass to construct a real MP3 with an invalid number
        of channels by hand because that value is stored in every MP3 frame in the file
    '''
-    print "testing bad channels..."
    audio_file = mutagen.File(filename, easy=True)
    audio_file.info.mode = 1777
    with mock.patch('airtime_analyzer.metadata_analyzer.mutagen') as mock_mutagen:
@ -143,7 +154,6 @@ def test_mp3_bad_channels():
    check_default_metadata(metadata)
    assert metadata['channels'] == 1
    assert metadata['bit_rate'] == 64000
-    print metadata['length_seconds']
    assert abs(metadata['length_seconds'] - 3.9) < 0.1
    assert metadata['mime'] == 'audio/mp3' # Not unicode because MIMEs aren't.
    assert metadata['track_total'] == u'10' # MP3s can have a track_total
--- a/python_apps/airtime_analyzer/tests/playability_analyzer_tests.py
+++ b/python_apps/airtime_analyzer/tests/playability_analyzer_tests.py
@ -0,0 +1,61 @@
+from nose.tools import *
+from airtime_analyzer.playability_analyzer import *
+
+def check_default_metadata(metadata):
+    ''' Stub function for now in case we need it later.'''
+    pass
+
+def test_missing_liquidsoap():
+    old_ls = PlayabilityAnalyzer.LIQUIDSOAP_EXECUTABLE
+    PlayabilityAnalyzer.LIQUIDSOAP_EXECUTABLE = 'foosdaf'
+    metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-utf8.mp3', dict())
+    PlayabilityAnalyzer.LIQUIDSOAP_EXECUTABLE = old_ls # Need to put this back
+
+@raises(UnplayableFileError)
+def test_invalid_filepath():
+    metadata = PlayabilityAnalyzer.analyze(u'non-existent-file', dict())
+
+def test_mp3_utf8():
+    metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-utf8.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_mp3_dualmono():
+    metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-dualmono.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_mp3_jointstereo():
+    metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-jointstereo.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_mp3_simplestereo():
+    metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-simplestereo.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_mp3_stereo():
+    metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_mp3_mono():
+    metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-mono.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_ogg_stereo():
+    metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.ogg', dict())
+    check_default_metadata(metadata)
+
+@raises(UnplayableFileError)
+def test_invalid_wma():
+    metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-invalid.wma', dict())
+
+def test_m4a_stereo():
+    metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.m4a', dict())
+    check_default_metadata(metadata)
+
+def test_wav_stereo():
+    metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.wav', dict())
+    check_default_metadata(metadata)
+
+@raises(UnplayableFileError)
+def test_unknown():
+    metadata = PlayabilityAnalyzer.analyze(u'http://www.google.com', dict())
+    check_default_metadata(metadata)
--- a/python_apps/airtime_analyzer/tests/replaygain_analyzer_tests.py
+++ b/python_apps/airtime_analyzer/tests/replaygain_analyzer_tests.py
@ -0,0 +1,71 @@
+from nose.tools import *
+from airtime_analyzer.replaygain_analyzer import ReplayGainAnalyzer
+
+def check_default_metadata(metadata):
+    ''' Check that the values extract by Silan/CuePointAnalyzer on our test audio files match what we expect.
+    :param metadata: a metadata dictionary
+    :return: Nothing
+    '''
+    '''
+    # We give python-rgain some leeway here by specifying a tolerance. It's not perfectly consistent across codecs...
+    assert abs(metadata['cuein']) < tolerance_seconds
+    assert abs(metadata['cueout'] - length_seconds) < tolerance_seconds
+    '''
+    tolerance = 0.30
+    expected_replaygain = 5.0
+    print metadata['replay_gain']
+    assert abs(metadata['replay_gain'] - expected_replaygain) < tolerance
+
+def test_missing_replaygain():
+    old_rg = ReplayGainAnalyzer.REPLAYGAIN_EXECUTABLE
+    ReplayGainAnalyzer.REPLAYGAIN_EXECUTABLE = 'foosdaf'
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-utf8.mp3', dict())
+    ReplayGainAnalyzer.REPLAYGAIN_EXECUTABLE = old_rg # Need to put this back
+
+def test_invalid_filepath():
+    metadata = ReplayGainAnalyzer.analyze(u'non-existent-file', dict())
+
+
+def test_mp3_utf8():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-utf8.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_mp3_dualmono():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-dualmono.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_mp3_jointstereo():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-jointstereo.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_mp3_simplestereo():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-simplestereo.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_mp3_stereo():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_mp3_mono():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-mono.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_ogg_stereo():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.ogg', dict())
+    check_default_metadata(metadata)
+
+def test_invalid_wma():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-invalid.wma', dict())
+
+def test_mp3_missing_id3_header():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-mp3-missingid3header.mp3', dict())
+
+def test_m4a_stereo():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.m4a', dict())
+    check_default_metadata(metadata)
+
+''' WAVE is not supported by python-rgain yet
+def test_wav_stereo():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.wav', dict())
+    check_default_metadata(metadata)
+'''
--- a/python_apps/airtime_analyzer/tests/test_data/44100Hz-16bit-mp3-missingid3header.mp3
+++ b/python_apps/airtime_analyzer/tests/test_data/44100Hz-16bit-mp3-missingid3header.mp3
--- a/python_apps/airtime_analyzer/tests/test_data/44100Hz-16bit-stereo-invalid.wma
+++ b/python_apps/airtime_analyzer/tests/test_data/44100Hz-16bit-stereo-invalid.wma
--- a/python_apps/airtime_analyzer/tests/test_data/44100Hz-16bit-stereo.wav
+++ b/python_apps/airtime_analyzer/tests/test_data/44100Hz-16bit-stereo.wav