Merge branch 'cc-5709-airtime-analyzer-cloud-storage' into cc-5709-airtime-analyzer-cloud-storage-saas

This commit is contained in:
drigato 2014-12-12 09:44:54 -05:00
commit 7b80915c16
19 changed files with 332 additions and 363 deletions

View file

@ -6,6 +6,9 @@ import multiprocessing
from metadata_analyzer import MetadataAnalyzer
from filemover_analyzer import FileMoverAnalyzer
from cloud_storage_uploader import CloudStorageUploader
from cuepoint_analyzer import CuePointAnalyzer
from replaygain_analyzer import ReplayGainAnalyzer
from playability_analyzer import *
class AnalyzerPipeline:
""" Analyzes and imports an audio file into the Airtime library.
@ -54,10 +57,14 @@ class AnalyzerPipeline:
metadata = dict()
metadata = MetadataAnalyzer.analyze(audio_file_path, metadata)
metadata["station_domain"] = station_domain
#metadata = FileMoverAnalyzer.move(audio_file_path, import_directory, original_filename, metadata)
metadata = CuePointAnalyzer.analyze(audio_file_path, metadata)
metadata = ReplayGainAnalyzer.analyze(audio_file_path, metadata)
metadata = PlayabilityAnalyzer.analyze(audio_file_path, metadata)
csu = CloudStorageUploader()
metadata = csu.upload_obj(audio_file_path, metadata)
metadata["import_status"] = 0 # imported
metadata["import_status"] = 0 # Successfully imported
# Note that the queue we're putting the results into is our interprocess communication
# back to the main process.
@ -65,6 +72,11 @@ class AnalyzerPipeline:
# Pass all the file metadata back to the main analyzer process, which then passes
# it back to the Airtime web application.
queue.put(metadata)
except UnplayableFileError as e:
logging.exception(e)
metadata["import_status"] = 2
metadata["reason"] = "The file could not be played."
raise e
except Exception as e:
# Ensures the traceback for this child process gets written to our log files:
logging.exception(e)

View file

@ -0,0 +1,45 @@
import subprocess
import logging
import traceback
import json
import datetime
from analyzer import Analyzer
class CuePointAnalyzer(Analyzer):
''' This class extracts the cue-in time, cue-out time, and length of a track using silan. '''
SILAN_EXECUTABLE = 'silan'
@staticmethod
def analyze(filename, metadata):
''' Extracts the cue-in and cue-out times along and sets the file duration based on that.
The cue points are there to skip the silence at the start and end of a track, and are determined
using "silan", which analyzes the loudness in a track.
:param filename: The full path to the file to analyzer
:param metadata: A metadata dictionary where the results will be put
:return: The metadata dictionary
'''
''' The silan -F 0.99 parameter tweaks the highpass filter. The default is 0.98, but at that setting,
the unit test on the short m4a file fails. With the new setting, it gets the correct cue-in time and
all the unit tests pass.
'''
command = [CuePointAnalyzer.SILAN_EXECUTABLE, '-b', '-F', '0.99', '-f', 'JSON', filename]
try:
results_json = subprocess.check_output(command, stderr=subprocess.STDOUT)
silan_results = json.loads(results_json)
metadata['length_seconds'] = float(silan_results['file duration'])
# Conver the length into a formatted time string
track_length = datetime.timedelta(seconds=metadata['length_seconds'])
metadata["length"] = str(track_length)
metadata['cuein'] = silan_results['sound'][0][0]
metadata['cueout'] = silan_results['sound'][0][1]
except OSError as e: # silan was not found
logging.warn("Failed to run: %s - %s. %s" % (command[0], e.strerror, "Do you have silan installed?"))
except subprocess.CalledProcessError as e: # silan returned an error code
logging.warn("%s %s %s", e.cmd, e.message, e.returncode)
except Exception as e:
logging.warn(e)
return metadata

View file

@ -149,7 +149,7 @@ class MetadataAnalyzer(Analyzer):
metadata["mime"] = magic.from_file(filename, mime=True)
metadata["channels"] = reader.getnchannels()
metadata["sample_rate"] = reader.getframerate()
length_seconds = float(reader.getnframes()) / float(metadata["channels"] * metadata["sample_rate"])
length_seconds = float(reader.getnframes()) / float(metadata["sample_rate"])
#Converting the length in seconds (float) to a formatted time string
track_length = datetime.timedelta(seconds=length_seconds)
metadata["length"] = str(track_length) #time.strftime("%H:%M:%S.%f", track_length)

View file

@ -0,0 +1,32 @@
__author__ = 'asantoni'
import subprocess
import logging
from analyzer import Analyzer
class UnplayableFileError(Exception):
pass
class PlayabilityAnalyzer(Analyzer):
''' This class checks if a file can actually be played with Liquidsoap. '''
LIQUIDSOAP_EXECUTABLE = 'liquidsoap'
@staticmethod
def analyze(filename, metadata):
''' Checks if a file can be played by Liquidsoap.
:param filename: The full path to the file to analyzer
:param metadata: A metadata dictionary where the results will be put
:return: The metadata dictionary
'''
command = [PlayabilityAnalyzer.LIQUIDSOAP_EXECUTABLE, '-v', '-c', "output.dummy(audio_to_stereo(single('%s')))" % filename]
try:
subprocess.check_output(command, stderr=subprocess.STDOUT)
except OSError as e: # liquidsoap was not found
logging.warn("Failed to run: %s - %s. %s" % (command[0], e.strerror, "Do you have liquidsoap installed?"))
except (subprocess.CalledProcessError, Exception) as e: # liquidsoap returned an error code
logging.warn(e)
raise UnplayableFileError
return metadata

View file

@ -1,14 +1,36 @@
import subprocess
import logging
from analyzer import Analyzer
''' TODO: ReplayGain is currently calculated by pypo but it should
be done here in the analyzer.
'''
class ReplayGainAnalyzer(Analyzer):
def __init__(self):
pass
''' This class extracts the ReplayGain using a tool from the python-rgain package. '''
REPLAYGAIN_EXECUTABLE = 'replaygain' # From the python-rgain package
@staticmethod
def analyze(filename):
pass
def analyze(filename, metadata):
''' Extracts the Replaygain loudness normalization factor of a track.
:param filename: The full path to the file to analyzer
:param metadata: A metadata dictionary where the results will be put
:return: The metadata dictionary
'''
''' The -d flag means do a dry-run, ie. don't modify the file directly.
'''
command = [ReplayGainAnalyzer.REPLAYGAIN_EXECUTABLE, '-d', filename]
try:
results = subprocess.check_output(command, stderr=subprocess.STDOUT)
filename_token = "%s: " % filename
rg_pos = results.find(filename_token, results.find("Calculating Replay Gain information")) + len(filename_token)
db_pos = results.find(" dB", rg_pos)
replaygain = results[rg_pos:db_pos]
metadata['replay_gain'] = float(replaygain)
except OSError as e: # replaygain was not found
logging.warn("Failed to run: %s - %s. %s" % (command[0], e.strerror, "Do you have python-rgain installed?"))
except subprocess.CalledProcessError as e: # replaygain returned an error code
logging.warn("%s %s %s", e.cmd, e.message, e.returncode)
except Exception as e:
logging.warn(e)
return metadata

View file

@ -30,6 +30,7 @@ setup(name='airtime_analyzer',
'python-daemon',
'requests',
'apache-libcloud',
'rgain',
# These next 3 are required for requests to support SSL with SNI. Learned this the hard way...
# What sucks is that GCC is required to pip install these.
#'ndg-httpsclient',

View file

@ -0,0 +1,63 @@
from nose.tools import *
from airtime_analyzer.cuepoint_analyzer import CuePointAnalyzer
def check_default_metadata(metadata):
''' Check that the values extract by Silan/CuePointAnalyzer on our test audio files match what we expect.
:param metadata: a metadata dictionary
:return: Nothing
'''
# We give silan some leeway here by specifying a tolerance
tolerance_seconds = 0.1
length_seconds = 3.9
assert abs(metadata['length_seconds'] - length_seconds) < tolerance_seconds
assert abs(metadata['cuein']) < tolerance_seconds
assert abs(metadata['cueout'] - length_seconds) < tolerance_seconds
def test_missing_silan():
old_silan = CuePointAnalyzer.SILAN_EXECUTABLE
CuePointAnalyzer.SILAN_EXECUTABLE = 'foosdaf'
metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-utf8.mp3', dict())
CuePointAnalyzer.SILAN_EXECUTABLE = old_silan # Need to put this back
def test_invalid_filepath():
metadata = CuePointAnalyzer.analyze(u'non-existent-file', dict())
def test_mp3_utf8():
metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-utf8.mp3', dict())
check_default_metadata(metadata)
def test_mp3_dualmono():
metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-dualmono.mp3', dict())
check_default_metadata(metadata)
def test_mp3_jointstereo():
metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-jointstereo.mp3', dict())
check_default_metadata(metadata)
def test_mp3_simplestereo():
metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-simplestereo.mp3', dict())
check_default_metadata(metadata)
def test_mp3_stereo():
metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.mp3', dict())
check_default_metadata(metadata)
def test_mp3_mono():
metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-mono.mp3', dict())
check_default_metadata(metadata)
def test_ogg_stereo():
metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.ogg', dict())
check_default_metadata(metadata)
def test_invalid_wma():
metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-invalid.wma', dict())
def test_m4a_stereo():
metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.m4a', dict())
check_default_metadata(metadata)
def test_wav_stereo():
metadata = CuePointAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.wav', dict())
check_default_metadata(metadata)

View file

@ -114,6 +114,18 @@ def test_mp3_utf8():
assert metadata['mime'] == 'audio/mp3'
assert metadata['track_total'] == u'10' # MP3s can have a track_total
def test_invalid_wma():
metadata = MetadataAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-invalid.wma', dict())
assert metadata['mime'] == 'audio/x-ms-wma'
def test_wav_stereo():
metadata = MetadataAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.wav', dict())
assert metadata['mime'] == 'audio/x-wav'
assert abs(metadata['length_seconds'] - 3.9) < 0.1
assert metadata['channels'] == 2
assert metadata['sample_rate'] == 44100
# Make sure the parameter checking works
@raises(TypeError)
def test_move_wrong_string_param1():
@ -132,7 +144,6 @@ def test_mp3_bad_channels():
It'd be a pain in the ass to construct a real MP3 with an invalid number
of channels by hand because that value is stored in every MP3 frame in the file
'''
print "testing bad channels..."
audio_file = mutagen.File(filename, easy=True)
audio_file.info.mode = 1777
with mock.patch('airtime_analyzer.metadata_analyzer.mutagen') as mock_mutagen:
@ -143,7 +154,6 @@ def test_mp3_bad_channels():
check_default_metadata(metadata)
assert metadata['channels'] == 1
assert metadata['bit_rate'] == 64000
print metadata['length_seconds']
assert abs(metadata['length_seconds'] - 3.9) < 0.1
assert metadata['mime'] == 'audio/mp3' # Not unicode because MIMEs aren't.
assert metadata['track_total'] == u'10' # MP3s can have a track_total

View file

@ -0,0 +1,61 @@
from nose.tools import *
from airtime_analyzer.playability_analyzer import *
def check_default_metadata(metadata):
''' Stub function for now in case we need it later.'''
pass
def test_missing_liquidsoap():
old_ls = PlayabilityAnalyzer.LIQUIDSOAP_EXECUTABLE
PlayabilityAnalyzer.LIQUIDSOAP_EXECUTABLE = 'foosdaf'
metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-utf8.mp3', dict())
PlayabilityAnalyzer.LIQUIDSOAP_EXECUTABLE = old_ls # Need to put this back
@raises(UnplayableFileError)
def test_invalid_filepath():
metadata = PlayabilityAnalyzer.analyze(u'non-existent-file', dict())
def test_mp3_utf8():
metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-utf8.mp3', dict())
check_default_metadata(metadata)
def test_mp3_dualmono():
metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-dualmono.mp3', dict())
check_default_metadata(metadata)
def test_mp3_jointstereo():
metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-jointstereo.mp3', dict())
check_default_metadata(metadata)
def test_mp3_simplestereo():
metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-simplestereo.mp3', dict())
check_default_metadata(metadata)
def test_mp3_stereo():
metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.mp3', dict())
check_default_metadata(metadata)
def test_mp3_mono():
metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-mono.mp3', dict())
check_default_metadata(metadata)
def test_ogg_stereo():
metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.ogg', dict())
check_default_metadata(metadata)
@raises(UnplayableFileError)
def test_invalid_wma():
metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-invalid.wma', dict())
def test_m4a_stereo():
metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.m4a', dict())
check_default_metadata(metadata)
def test_wav_stereo():
metadata = PlayabilityAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.wav', dict())
check_default_metadata(metadata)
@raises(UnplayableFileError)
def test_unknown():
metadata = PlayabilityAnalyzer.analyze(u'http://www.google.com', dict())
check_default_metadata(metadata)

View file

@ -0,0 +1,71 @@
from nose.tools import *
from airtime_analyzer.replaygain_analyzer import ReplayGainAnalyzer
def check_default_metadata(metadata):
''' Check that the values extract by Silan/CuePointAnalyzer on our test audio files match what we expect.
:param metadata: a metadata dictionary
:return: Nothing
'''
'''
# We give python-rgain some leeway here by specifying a tolerance. It's not perfectly consistent across codecs...
assert abs(metadata['cuein']) < tolerance_seconds
assert abs(metadata['cueout'] - length_seconds) < tolerance_seconds
'''
tolerance = 0.30
expected_replaygain = 5.0
print metadata['replay_gain']
assert abs(metadata['replay_gain'] - expected_replaygain) < tolerance
def test_missing_replaygain():
old_rg = ReplayGainAnalyzer.REPLAYGAIN_EXECUTABLE
ReplayGainAnalyzer.REPLAYGAIN_EXECUTABLE = 'foosdaf'
metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-utf8.mp3', dict())
ReplayGainAnalyzer.REPLAYGAIN_EXECUTABLE = old_rg # Need to put this back
def test_invalid_filepath():
metadata = ReplayGainAnalyzer.analyze(u'non-existent-file', dict())
def test_mp3_utf8():
metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-utf8.mp3', dict())
check_default_metadata(metadata)
def test_mp3_dualmono():
metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-dualmono.mp3', dict())
check_default_metadata(metadata)
def test_mp3_jointstereo():
metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-jointstereo.mp3', dict())
check_default_metadata(metadata)
def test_mp3_simplestereo():
metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-simplestereo.mp3', dict())
check_default_metadata(metadata)
def test_mp3_stereo():
metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.mp3', dict())
check_default_metadata(metadata)
def test_mp3_mono():
metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-mono.mp3', dict())
check_default_metadata(metadata)
def test_ogg_stereo():
metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.ogg', dict())
check_default_metadata(metadata)
def test_invalid_wma():
metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo-invalid.wma', dict())
def test_mp3_missing_id3_header():
metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-mp3-missingid3header.mp3', dict())
def test_m4a_stereo():
metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.m4a', dict())
check_default_metadata(metadata)
''' WAVE is not supported by python-rgain yet
def test_wav_stereo():
metadata = ReplayGainAnalyzer.analyze(u'tests/test_data/44100Hz-16bit-stereo.wav', dict())
check_default_metadata(metadata)
'''