CC-5709 / CC-5705 : Airtime Analyzer
* Finished the skeleton of the airtime_analyzer service. * Basic round-robin, reliable AMQP messaging works. * Using multiprocess arch so the daemon survives analyzer crashes and avoids failures propagating to other nodes. * Basic metadata extractor using Mutagen is done. * HTTP requests to the File API to are next to come...
This commit is contained in:
parent
b6dd2e3152
commit
a6a64a2b9e
15 changed files with 411 additions and 0 deletions
1
python_apps/airtime_analyzer/MANIFEST.in
Normal file
1
python_apps/airtime_analyzer/MANIFEST.in
Normal file
|
@ -0,0 +1 @@
|
||||||
|
include README.rst
|
30
python_apps/airtime_analyzer/README.rst
Normal file
30
python_apps/airtime_analyzer/README.rst
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
|
||||||
|
Ghetto temporary installation instructions
|
||||||
|
|
||||||
|
set up a virtualenv
|
||||||
|
activate it
|
||||||
|
pip install mutagen python-magic pika
|
||||||
|
|
||||||
|
You will need to allow the "airtime" RabbitMQ user to access the airtime-uploads exchange and queue:
|
||||||
|
|
||||||
|
sudo rabbitmqctl set_permissions -p /airtime airtime airtime-uploads airtime-uploads airtime-uploads
|
||||||
|
|
||||||
|
|
||||||
|
Developers
|
||||||
|
==========
|
||||||
|
|
||||||
|
For development, you want to install AAQ system-wide but with everything symlinked back to the source
|
||||||
|
directory (for convenience), so run:
|
||||||
|
|
||||||
|
$ sudo python setup.py develop
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Unit Tests
|
||||||
|
==========
|
||||||
|
|
||||||
|
To run the unit tests, execute:
|
||||||
|
|
||||||
|
$ nosetests
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
from airtime_analyzer import AirtimeAnalyzerServer
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
import ConfigParser
|
||||||
|
from metadata_analyzer import MetadataAnalyzer
|
||||||
|
from replaygain_analyzer import ReplayGainAnalyzer
|
||||||
|
from message_listener import MessageListener
|
||||||
|
|
||||||
|
|
||||||
|
class AirtimeAnalyzerServer:
|
||||||
|
|
||||||
|
_CONFIG_PATH = '/etc/airtime/airtime.conf'
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
|
||||||
|
# Read our config file
|
||||||
|
rabbitmq_config = self.read_config_file()
|
||||||
|
|
||||||
|
# Start listening for RabbitMQ messages telling us about newly
|
||||||
|
# uploaded files.
|
||||||
|
self._msg_listener = MessageListener(rabbitmq_config)
|
||||||
|
|
||||||
|
def read_config_file(self):
|
||||||
|
config = ConfigParser.SafeConfigParser()
|
||||||
|
config_path = AirtimeAnalyzerServer._CONFIG_PATH
|
||||||
|
try:
|
||||||
|
config.readfp(open(config_path))
|
||||||
|
except IOError as e:
|
||||||
|
print "Failed to open config file at " + config_path + ": " + e.strerror
|
||||||
|
exit(-1)
|
||||||
|
except Exception:
|
||||||
|
print e.strerror
|
||||||
|
exit(-1)
|
||||||
|
|
||||||
|
return config
|
||||||
|
|
||||||
|
|
||||||
|
''' When being run from the command line, analyze a file passed
|
||||||
|
as an argument. '''
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
analyzers = AnalyzerPipeline()
|
||||||
|
|
||||||
|
|
10
python_apps/airtime_analyzer/airtime_analyzer/analyzer.py
Normal file
10
python_apps/airtime_analyzer/airtime_analyzer/analyzer.py
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
|
||||||
|
class Analyzer:
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def analyze(filename):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
class AnalyzerError(Exception):
|
||||||
|
def __init__(self):
|
||||||
|
super.__init__(self)
|
|
@ -0,0 +1,17 @@
|
||||||
|
from metadata_analyzer import MetadataAnalyzer
|
||||||
|
|
||||||
|
class AnalyzerPipeline:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
#TODO: Take a JSON message and perform the necessary analysis.
|
||||||
|
#TODO: Comment the shit out of this
|
||||||
|
@staticmethod
|
||||||
|
def run_analysis(json_msg, queue):
|
||||||
|
# TODO: Pass the JSON along to each analyzer??
|
||||||
|
#print MetadataAnalyzer.analyze("foo.mp3")
|
||||||
|
#print ReplayGainAnalyzer.analyze("foo.mp3")
|
||||||
|
#raise Exception("Test Crash")
|
||||||
|
queue.put(MetadataAnalyzer.analyze("foo.mp3"))
|
||||||
|
|
|
@ -0,0 +1,102 @@
|
||||||
|
import sys
|
||||||
|
import pika
|
||||||
|
import multiprocessing
|
||||||
|
from analyzer_pipeline import AnalyzerPipeline
|
||||||
|
|
||||||
|
EXCHANGE = "airtime-uploads"
|
||||||
|
EXCHANGE_TYPE = "topic"
|
||||||
|
ROUTING_KEY = "" #"airtime.analyzer.tasks"
|
||||||
|
QUEUE = "airtime-uploads"
|
||||||
|
|
||||||
|
|
||||||
|
''' TODO: Document me
|
||||||
|
- round robin messaging
|
||||||
|
- acking
|
||||||
|
- why we use the multiprocess architecture
|
||||||
|
'''
|
||||||
|
class MessageListener:
|
||||||
|
|
||||||
|
def __init__(self, config):
|
||||||
|
|
||||||
|
RMQ_CONFIG_SECTION = "rabbitmq"
|
||||||
|
if not config.has_section(RMQ_CONFIG_SECTION):
|
||||||
|
print "Error: rabbitmq section not found in config file at " + config_path
|
||||||
|
exit(-1)
|
||||||
|
|
||||||
|
self._host = config.get(RMQ_CONFIG_SECTION, 'host')
|
||||||
|
self._port = config.getint(RMQ_CONFIG_SECTION, 'port')
|
||||||
|
self._username = config.get(RMQ_CONFIG_SECTION, 'user')
|
||||||
|
self._password = config.get(RMQ_CONFIG_SECTION, 'password')
|
||||||
|
self._vhost = config.get(RMQ_CONFIG_SECTION, 'vhost')
|
||||||
|
|
||||||
|
self._connection = pika.BlockingConnection(pika.ConnectionParameters(host=self._host,
|
||||||
|
port=self._port, virtual_host=self._vhost,
|
||||||
|
credentials=pika.credentials.PlainCredentials(self._username, self._password)))
|
||||||
|
self._channel = self._connection.channel()
|
||||||
|
self._channel.exchange_declare(exchange=EXCHANGE, type=EXCHANGE_TYPE)
|
||||||
|
result = self._channel.queue_declare(queue=QUEUE, durable=True)
|
||||||
|
|
||||||
|
self._channel.queue_bind(exchange=EXCHANGE, queue=QUEUE, routing_key=ROUTING_KEY)
|
||||||
|
|
||||||
|
print " Listening for messages..."
|
||||||
|
self._channel.basic_consume(MessageListener.msg_received_callback,
|
||||||
|
queue=QUEUE, no_ack=False)
|
||||||
|
|
||||||
|
try:
|
||||||
|
self._channel.start_consuming()
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
self._channel.stop_consuming()
|
||||||
|
|
||||||
|
self._connection.close()
|
||||||
|
|
||||||
|
# consume callback function
|
||||||
|
@staticmethod
|
||||||
|
def msg_received_callback(channel, method_frame, header_frame, body):
|
||||||
|
print " - Received '%s' on routing_key '%s'" % (body, method_frame.routing_key)
|
||||||
|
|
||||||
|
# Spin up a worker process. We use the multiprocessing module and multiprocessing.Queue
|
||||||
|
# to pass objects between the processes so that if the analyzer process crashes, it does not
|
||||||
|
# take down the rest of the daemon and we NACK that message so that it doesn't get
|
||||||
|
# propagated to other airtime_analyzer daemons (eg. running on other servers).
|
||||||
|
# We avoid cascading failure this way.
|
||||||
|
try:
|
||||||
|
MessageListener.spawn_analyzer_process(body)
|
||||||
|
except Exception:
|
||||||
|
#If ANY exception happens while processing a file, we're going to NACK to the
|
||||||
|
#messaging server and tell it to remove the message from the queue.
|
||||||
|
#(NACK is a negative acknowledgement. We could use ACK instead, but this might come
|
||||||
|
# in handy in the future.)
|
||||||
|
#Exceptions in this context are unexpected, unhandled errors. We try to recover
|
||||||
|
#from as many errors as possble in AnalyzerPipeline, but we're safeguarding ourselves
|
||||||
|
#here from any catastrophic or genuinely unexpected errors:
|
||||||
|
channel.basic_nack(delivery_tag=method_frame.delivery_tag, multiple=False,
|
||||||
|
requeue=False) #Important that it doesn't requeue the message
|
||||||
|
|
||||||
|
#TODO: Report this as a failed upload to the File Upload REST API.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
else:
|
||||||
|
# ACK at the very end, after the message has been successfully processed.
|
||||||
|
# If we don't ack, then RabbitMQ will redeliver a message in the future.
|
||||||
|
channel.basic_ack(delivery_tag=method_frame.delivery_tag)
|
||||||
|
|
||||||
|
# Anything else could happen here:
|
||||||
|
# Send an email alert, send an xmnp message, trigger another process, etc
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def spawn_analyzer_process(json_msg):
|
||||||
|
|
||||||
|
q = multiprocessing.Queue()
|
||||||
|
p = multiprocessing.Process(target=AnalyzerPipeline.run_analysis, args=(json_msg, q))
|
||||||
|
p.start()
|
||||||
|
p.join()
|
||||||
|
if p.exitcode == 0:
|
||||||
|
results = q.get()
|
||||||
|
print "Server received results: "
|
||||||
|
print results
|
||||||
|
else:
|
||||||
|
print "Analyzer process terminated unexpectedly."
|
||||||
|
raise AnalyzerException()
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,95 @@
|
||||||
|
import mutagen
|
||||||
|
import magic # For MIME type detection
|
||||||
|
from analyzer import Analyzer
|
||||||
|
|
||||||
|
class MetadataAnalyzer(Analyzer):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def analyze(filename):
|
||||||
|
|
||||||
|
metadata = dict()
|
||||||
|
#Extract metadata from an audio file using mutagen
|
||||||
|
audio_file = mutagen.File(filename, easy=True)
|
||||||
|
|
||||||
|
#Grab other file information that isn't encoded in a tag, but instead usually
|
||||||
|
#in the file header. Mutagen breaks that out into a separate "info" object:
|
||||||
|
info = audio_file.info
|
||||||
|
metadata["sample_rate"] = info.sample_rate
|
||||||
|
metadata["length_seconds"] = info.length
|
||||||
|
metadata["bitrate"] = info.bitrate
|
||||||
|
|
||||||
|
#Use the python-magic module to get the MIME type.
|
||||||
|
mime_magic = magic.Magic(mime=True)
|
||||||
|
metadata["mime_type"] = mime_magic.from_file(filename)
|
||||||
|
|
||||||
|
#We normalize the mutagen tags slightly here, so in case mutagen changes,
|
||||||
|
#we find the
|
||||||
|
mutagen_to_analyzer_mapping = {
|
||||||
|
'title': 'title',
|
||||||
|
'artist': 'artist',
|
||||||
|
'album': 'album',
|
||||||
|
'bpm': 'bpm',
|
||||||
|
'composer': 'composer',
|
||||||
|
'conductor': 'conductor',
|
||||||
|
'copyright': 'copyright',
|
||||||
|
'encoded_by': 'encoder',
|
||||||
|
'genre': 'genre',
|
||||||
|
'isrc': 'isrc',
|
||||||
|
'label': 'label',
|
||||||
|
'language': 'language',
|
||||||
|
'last_modified':'last_modified',
|
||||||
|
'mood': 'mood',
|
||||||
|
'replay_gain': 'replaygain',
|
||||||
|
'track_number': 'tracknumber',
|
||||||
|
'track_total': 'tracktotal',
|
||||||
|
'website': 'website',
|
||||||
|
'year': 'year',
|
||||||
|
}
|
||||||
|
|
||||||
|
for mutagen_tag, analyzer_tag in mutagen_to_analyzer_mapping.iteritems():
|
||||||
|
try:
|
||||||
|
metadata[analyzer_tag] = audio_file[mutagen_tag]
|
||||||
|
|
||||||
|
# Some tags are returned as lists because there could be multiple values.
|
||||||
|
# This is unusual so we're going to always just take the first item in the list.
|
||||||
|
if isinstance(metadata[analyzer_tag], list):
|
||||||
|
metadata[analyzer_tag] = metadata[analyzer_tag][0]
|
||||||
|
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
For reference, the Airtime metadata fields are:
|
||||||
|
title
|
||||||
|
artist ("Creator" in Airtime)
|
||||||
|
album
|
||||||
|
bit rate
|
||||||
|
BPM
|
||||||
|
composer
|
||||||
|
conductor
|
||||||
|
copyright
|
||||||
|
cue in
|
||||||
|
cue out
|
||||||
|
encoded by
|
||||||
|
genre
|
||||||
|
ISRC
|
||||||
|
label
|
||||||
|
language
|
||||||
|
last modified
|
||||||
|
length
|
||||||
|
mime
|
||||||
|
mood
|
||||||
|
owner
|
||||||
|
replay gain
|
||||||
|
sample rate
|
||||||
|
track number
|
||||||
|
website
|
||||||
|
year
|
||||||
|
'''
|
|
@ -0,0 +1,12 @@
|
||||||
|
from analyzer import Analyzer
|
||||||
|
|
||||||
|
''' TODO: everything '''
|
||||||
|
class ReplayGainAnalyzer(Analyzer):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def analyze(filename):
|
||||||
|
pass
|
||||||
|
|
21
python_apps/airtime_analyzer/bin/airtime_analyzer
Executable file
21
python_apps/airtime_analyzer/bin/airtime_analyzer
Executable file
|
@ -0,0 +1,21 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import daemon
|
||||||
|
import argparse
|
||||||
|
import airtime_analyzer as aa
|
||||||
|
|
||||||
|
VERSION = "1.0"
|
||||||
|
|
||||||
|
print "Airtime Analyzer " + VERSION
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("-d", "--daemon", help="run as a daemon", action="store_true")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.daemon:
|
||||||
|
with daemon.DaemonContext():
|
||||||
|
analyzer = aa.AirtimeAnalyzerServer()
|
||||||
|
else:
|
||||||
|
# Run without daemonizing
|
||||||
|
analyzer = aa.AirtimeAnalyzerServer()
|
||||||
|
|
20
python_apps/airtime_analyzer/setup.py
Normal file
20
python_apps/airtime_analyzer/setup.py
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
from setuptools import setup
|
||||||
|
|
||||||
|
setup(name='airtime_analyzer',
|
||||||
|
version='0.1',
|
||||||
|
description='Airtime Analyzer Worker and File Importer',
|
||||||
|
url='http://github.com/sourcefabric/Airtime',
|
||||||
|
author='Albert Santoni',
|
||||||
|
author_email='albert.santoni@sourcefabric.org',
|
||||||
|
license='MIT',
|
||||||
|
packages=['airtime_analyzer'],
|
||||||
|
scripts=['bin/airtime_analyzer'],
|
||||||
|
install_requires=[
|
||||||
|
'mutagen',
|
||||||
|
'python-magic',
|
||||||
|
'pika',
|
||||||
|
'nose',
|
||||||
|
'python-daemon',
|
||||||
|
'requests',
|
||||||
|
],
|
||||||
|
zip_safe=False)
|
0
python_apps/airtime_analyzer/tests/__init__.py
Normal file
0
python_apps/airtime_analyzer/tests/__init__.py
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
from nose.tools import *
|
||||||
|
import airtime_analyzer_queue
|
||||||
|
|
||||||
|
def setup():
|
||||||
|
print "SETUP!"
|
||||||
|
|
||||||
|
def teardown():
|
||||||
|
print "TEAR DOWN!"
|
||||||
|
|
||||||
|
def test_basic():
|
||||||
|
print "I RAN!"
|
||||||
|
|
47
python_apps/airtime_analyzer/tools/message_sender.php
Normal file
47
python_apps/airtime_analyzer/tools/message_sender.php
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
<?
|
||||||
|
require_once('php-amqplib/amqp.inc');
|
||||||
|
|
||||||
|
//use PhpAmqpLibConnectionAMQPConnection;
|
||||||
|
//use PhpAmqpLibMessageAMQPMessage;
|
||||||
|
|
||||||
|
define('HOST', '127.0.0.1');
|
||||||
|
define('PORT', '5672');
|
||||||
|
define('USER', 'airtime');
|
||||||
|
define('PASS', 'QEFKX5GMKT4YNMOAL9R8');
|
||||||
|
define('VHOST', '/airtime');//'/airtime');
|
||||||
|
|
||||||
|
$exchange = "airtime-uploads";
|
||||||
|
$exchangeType = "topic";
|
||||||
|
$queue = "airtime-uploads";
|
||||||
|
$routingKey = ""; //"airtime.analyzer.tasks";
|
||||||
|
|
||||||
|
if ($argc <= 1)
|
||||||
|
{
|
||||||
|
echo("Usage: " . $argv[0] . " message\n");
|
||||||
|
exit();
|
||||||
|
}
|
||||||
|
|
||||||
|
$message = $argv[1];
|
||||||
|
|
||||||
|
$connection = new AMQPConnection(HOST, PORT, USER, PASS, VHOST);
|
||||||
|
if (!isset($connection))
|
||||||
|
{
|
||||||
|
echo "Failed to connect to the RabbitMQ server.";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
$channel = $connection->channel();
|
||||||
|
|
||||||
|
// declare/create the queue
|
||||||
|
$channel->queue_declare($queue, false, true, false, false);
|
||||||
|
|
||||||
|
// declare/create the exchange as a topic exchange.
|
||||||
|
$channel->exchange_declare($exchange, $exchangeType, false, false, false);
|
||||||
|
|
||||||
|
$msg = new AMQPMessage($message, array("content_type" => "text/plain"));
|
||||||
|
|
||||||
|
$channel->basic_publish($msg, $exchange, $routingKey);
|
||||||
|
print "Sent $message ($routingKey)\n";
|
||||||
|
$channel->close();
|
||||||
|
$connection->close();
|
||||||
|
|
1
python_apps/airtime_analyzer/tools/php-amqplib
Symbolic link
1
python_apps/airtime_analyzer/tools/php-amqplib
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../../../airtime_mvc/library/php-amqplib
|
Loading…
Add table
Add a link
Reference in a new issue