ReplayGain analysis in airtime_analyzer using python-rgain

* Implemented Replaygain analysis for Ogg Vorbis, MP3, MP4, and FLAC using python-rgain * Added unit tests for ReplayGainAnalyzer * Squashed stderr output in ReplayGainAnalyzer and CuePointAnalyzer * Clean up
2014-12-11 14:12:41 -05:00 · 2014-12-11 14:12:41 -05:00 · 4dd2768755
commit 4dd2768755
parent 38bd45b8dc
7 changed files with 89 additions and 36 deletions
--- a/python_apps/airtime_analyzer/airtime_analyzer/analyzer_pipeline.py
+++ b/python_apps/airtime_analyzer/airtime_analyzer/analyzer_pipeline.py
@ -6,6 +6,7 @@ import multiprocessing
 from metadata_analyzer import MetadataAnalyzer
 from filemover_analyzer import FileMoverAnalyzer
 from cuepoint_analyzer import CuePointAnalyzer
+from replaygain_analyzer import ReplayGainAnalyzer

 class AnalyzerPipeline:
    """ Analyzes and imports an audio file into the Airtime library. 
@ -53,6 +54,7 @@ class AnalyzerPipeline:
            metadata = dict()
            metadata = MetadataAnalyzer.analyze(audio_file_path, metadata)
            metadata = CuePointAnalyzer.analyze(audio_file_path, metadata)
+            metadata = ReplayGainAnalyzer.analyze(audio_file_path, metadata)
            metadata = FileMoverAnalyzer.move(audio_file_path, import_directory, original_filename, metadata)
            metadata["import_status"] = 0 # Successfully imported

--- a/python_apps/airtime_analyzer/airtime_analyzer/cuepoint_analyzer.py
+++ b/python_apps/airtime_analyzer/airtime_analyzer/cuepoint_analyzer.py
@ -11,9 +11,6 @@ class CuePointAnalyzer(Analyzer):

    SILAN_EXECUTABLE = 'silan'

-    def __init__(self):
-        pass
-
    @staticmethod
    def analyze(filename, metadata):
        ''' Extracts the cue-in and cue-out times along and sets the file duration based on that.
@ -29,7 +26,7 @@ class CuePointAnalyzer(Analyzer):
        '''
        command = [CuePointAnalyzer.SILAN_EXECUTABLE, '-b', '-F', '0.99', '-f', 'JSON', filename]
        try:
-            results_json = subprocess.check_output(command)
+            results_json = subprocess.check_output(command, stderr=subprocess.STDOUT)
            silan_results = json.loads(results_json)
            metadata['length_seconds'] = float(silan_results['file duration'])
            # Conver the length into a formatted time string
--- a/python_apps/airtime_analyzer/airtime_analyzer/replaygain_analyzer.py
+++ b/python_apps/airtime_analyzer/airtime_analyzer/replaygain_analyzer.py
@ -1,14 +1,12 @@
 import subprocess
+import logging
 from analyzer import Analyzer


 class ReplayGainAnalyzer(Analyzer):
-    ''' This class extracts the cue-in time, cue-out time, and length of a track using silan. '''
+    ''' This class extracts the ReplayGain using a tool from the python-rgain package. '''

-    BG1770GAIN_EXECUTABLE = 'bg1770gain'
-
-    def __init__(self):
-        pass
+    REPLAYGAIN_EXECUTABLE = 'replaygain' # From the python-rgain package

    @staticmethod
    def analyze(filename, metadata):
@ -17,23 +15,20 @@ class ReplayGainAnalyzer(Analyzer):
        :param metadata: A metadata dictionary where the results will be put
        :return: The metadata dictionary
        '''
-        ''' The -d 00:01:00 flag means it will let the decoding run for a maximum of 1 minute. This is a safeguard
-            in case the libavcodec decoder gets stuck in an infinite loop.
+        ''' The -d flag means do a dry-run, ie. don't modify the file directly.
        '''
-        command = [ReplayGainAnalyzer.BG1770GAIN_EXECUTABLE, '--replaygain', '-d', '00:01:00', '-f', 'JSON', filename]
+        command = [ReplayGainAnalyzer.REPLAYGAIN_EXECUTABLE, '-d', filename]
        try:
-            results_json = subprocess.check_output(command)
-            silan_results = json.loads(results_json)
-            metadata['length_seconds'] = float(silan_results['file duration'])
-            # Conver the length into a formatted time string
-            track_length = datetime.timedelta(seconds=metadata['length_seconds'])
-            metadata["length"] = str(track_length)
-            metadata['cuein'] = silan_results['sound'][0][0]
-            metadata['cueout'] = silan_results['sound'][0][1]
+            results = subprocess.check_output(command, stderr=subprocess.STDOUT)
+            filename_token = "%s: " % filename
+            rg_pos = results.find(filename_token, results.find("Calculating Replay Gain information")) + len(filename_token)
+            db_pos = results.find(" dB", rg_pos)
+            replaygain = results[rg_pos:db_pos]
+            metadata['replaygain'] = float(replaygain)

-        except OSError as e: # silan was not found
-            logging.warn("Failed to run: %s - %s. %s" % (command[0], e.strerror, "Do you have silan installed?"))
-        except subprocess.CalledProcessError as e: # silan returned an error code
+        except OSError as e: # replaygain was not found
+            logging.warn("Failed to run: %s - %s. %s" % (command[0], e.strerror, "Do you have python-rgain installed?"))
+        except subprocess.CalledProcessError as e: # replaygain returned an error code
            logging.warn("%s %s %s", e.cmd, e.message, e.returncode)
        except Exception as e:
            logging.warn(e)
--- a/python_apps/airtime_analyzer/setup.py
+++ b/python_apps/airtime_analyzer/setup.py
@ -29,6 +29,7 @@ setup(name='airtime_analyzer',
          'mock',
          'python-daemon',
          'requests',
+          'rgain',
          # These next 3 are required for requests to support SSL with SNI. Learned this the hard way...
          # What sucks is that GCC is required to pip install these. 
          #'ndg-httpsclient',
--- a/python_apps/airtime_analyzer/tests/cuepoint_analyzer_tests.py
+++ b/python_apps/airtime_analyzer/tests/cuepoint_analyzer_tests.py
@ -1,10 +1,6 @@
 from nose.tools import *
 from airtime_analyzer.cuepoint_analyzer import CuePointAnalyzer

-def test_constructor():
-    cpa = CuePointAnalyzer()
-
-
 def check_default_metadata(metadata):
    ''' Check that the values extract by Silan/CuePointAnalyzer on our test audio files match what we expect.
    :param metadata: a metadata dictionary
@ -65,12 +61,3 @@ def test_m4a_stereo():
 def test_wav_stereo():
    metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.wav', dict())
    check_default_metadata(metadata)
-
-    # FFMPEG / libav detect the AAC file as slightly shorter...
-'''
-    tolerance_seconds = 0.2
-    length_seconds = 3.8
-    assert abs(metadata['length_seconds'] - length_seconds) < tolerance_seconds
-    assert abs(metadata['cuein']) < tolerance_seconds
-    assert abs(metadata['cueout'] - length_seconds) < tolerance_seconds
-'''
--- a/python_apps/airtime_analyzer/tests/replaygain_analyzer_tests.py
+++ b/python_apps/airtime_analyzer/tests/replaygain_analyzer_tests.py
@ -0,0 +1,71 @@
+from nose.tools import *
+from airtime_analyzer.replaygain_analyzer import ReplayGainAnalyzer
+
+def check_default_metadata(metadata):
+    ''' Check that the values extract by Silan/CuePointAnalyzer on our test audio files match what we expect.
+    :param metadata: a metadata dictionary
+    :return: Nothing
+    '''
+    '''
+    # We give python-rgain some leeway here by specifying a tolerance. It's not perfectly consistent across codecs...
+    assert abs(metadata['cuein']) < tolerance_seconds
+    assert abs(metadata['cueout'] - length_seconds) < tolerance_seconds
+    '''
+    tolerance = 0.30
+    expected_replaygain = 5.0
+    print metadata['replaygain']
+    assert abs(metadata['replaygain'] - expected_replaygain) < tolerance
+
+def test_missing_replaygain():
+    old_rg = ReplayGainAnalyzer.REPLAYGAIN_EXECUTABLE
+    ReplayGainAnalyzer.REPLAYGAIN_EXECUTABLE = 'foosdaf'
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-utf8.mp3', dict())
+    ReplayGainAnalyzer.REPLAYGAIN_EXECUTABLE = old_rg # Need to put this back
+
+def test_invalid_filepath():
+    metadata = ReplayGainAnalyzer.analyze(u'non-existent-file', dict())
+
+
+def test_mp3_utf8():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-utf8.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_mp3_dualmono():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-dualmono.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_mp3_jointstereo():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-jointstereo.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_mp3_simplestereo():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-simplestereo.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_mp3_stereo():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_mp3_mono():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-mono.mp3', dict())
+    check_default_metadata(metadata)
+
+def test_ogg_stereo():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.ogg', dict())
+    check_default_metadata(metadata)
+
+def test_invalid_wma():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-invalid.wma', dict())
+
+def test_mp3_missing_id3_header():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-mp3-missingid3header.mp3', dict())
+
+def test_m4a_stereo():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.m4a', dict())
+    check_default_metadata(metadata)
+
+''' WAVE is not supported by python-rgain yet
+def test_wav_stereo():
+    metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.wav', dict())
+    check_default_metadata(metadata)
+'''
--- a/python_apps/airtime_analyzer/tests/test_data/44100Hz-16bit-mp3-missingid3header.mp3
+++ b/python_apps/airtime_analyzer/tests/test_data/44100Hz-16bit-mp3-missingid3header.mp3