almost alpha version

This commit is contained in:
Rudi Grinberg 2012-07-12 11:14:59 -04:00
parent d0245e09fc
commit b0433d4ca4
14 changed files with 482 additions and 94 deletions

View file

@ -0,0 +1,59 @@
import os
from pydispatch import dispatcher
from media.monitor.events import OrganizeFile, NewFile, DeleteFile
import media.monitor.pure as mmp
from media.monitor.log import Loggable
class Bootstrapper(Loggable):
"""
Bootstrapper reads all the info in the filesystem flushes organize
events and watch events
"""
def __init__(self,db,last_ran,org_channels,watch_channels):
self.db = db
self.org_channels = org_channels
self.watch_channels = watch_channels
self.last_ran = last_ran
def flush_organize(self):
"""
walks the organize directories and sends an organize event for every file manually
"""
flushed = 0
for pc in self.org_channels:
for f in mmp.walk_supported(pc.path, clean_empties=True):
self.logger.info("Bootstrapping: File in 'organize' directory: '%s'" % f)
dispatcher.send(signal=pc.signal, sender=self, event=OrganizeFile(f))
flushed += 1
self.logger.info("Flushed organized directory with %d files" % flushed)
def flush_watch(self):
"""
Syncs the file system into the database. Walks over deleted/new/modified files since
the last run in mediamonitor and sends requests to make the database consistent with
file system
"""
songs = set()
modded = 0
deleted = 0
for pc in self.watch_channels:
for f in mmp.walk_supported(pc.path, clean_empties=False):
songs.add(f)
if os.path.getmtime(f) > self.last_ran:
modded += 1
dispatcher.send(signal=pc.signal, sender=self, event=DeleteFile(f))
dispatcher.send(signal=pc.signal, sender=self, event=NewFile(f))
# Want all files in the database that are not in the filesystem
for to_delete in (self.db - songs):
for pc in self.watch_channels:
if os.path.commonprefix([pc.path, to_delete]) == pc.path:
dispatcher.send(signal=pc.signal, sender=self, event=DeleteFile(f))
os.remove(to_delete)
deleted += 1
break
else:
self.logger.info("Error, could not find watch directory of would be deleted \
file '%s'" % to_delete)
self.logger.info("Flushed watch directories. (modified, deleted) = (%d, %d)"
% (modded, deleted) )

View file

@ -1,40 +1,67 @@
# -*- coding: utf-8 -*-
import os
import mutagen
import abc
from media.monitor.exceptions import BadSongFile
# Note: this isn't really good design...
# Anyone who expects a BaseEvent object should be able to handle any instances
# of its subclasses by the substitution principle. CLearly not the case with
# the DeleteFile subclass.
class PathChannel(object):
"""a dumb struct; python has no record types"""
def __init__(self, signal, path):
self.signal = signal
self.path = path
# It would be good if we could parameterize this class by the attribute
# that would contain the path to obtain the meta data. But it would be too much
# work for little reward
class HasMetaData(object):
# TODO : add documentation for HasMetaData
__metaclass__ = abc.ABCMeta
def __init__(self, *args, **kwargs):
self.__metadata = None
self.__loaded = False
# doing weird bullshit here because python constructors only
# call the constructor of the leftmost superclass.
@property
def metadata(self):
if self.__loaded: return self.__metadata
# Normally this would go in init but we don't like
# relying on consumers of this behaviour to have to call
# the constructor
if not hasattr(self,"_loaded"): self._loaded = False
if self._loaded: return self._metadata
else:
f = mutagen.File(self.path, easy=True)
self.__metadata = f
self.__loaded = True
f = None
try: f = mutagen.File(self.path, easy=True)
except Exception: raise BadSongFile(self.path)
# value returned by mutagen only acts like a dictionary.
# in fact it comes with a nice surprise for you if you try
# to add elements to it
self._metadata = {}
for k,v in f:
if isinstance(v, list):
if len(v) == 1:
self._metadata[k] = v[0]
else:
raise Exception("Weird mutagen %s:%s" % (k,str(v)))
else:
self._metadata[k] = v
self._loaded = True
return self.metadata
class BaseEvent(object):
__metaclass__ = abc.ABCMeta
def __init__(self, raw_event):
self.__raw_event = raw_event
self.path = os.path.normpath(raw_event.pathname)
super(BaseEvent, self).__init__()
# TODO : clean up this idiotic hack
# we should use keyword constructors instead of this behaviour checking
# bs to initialize BaseEvent
if getattr(raw_event,"pathname"):
self.__raw_event = raw_event
self.path = os.path.normpath(raw_event.pathname)
else: self.path = raw_event
def exists(self): return os.path.exists(self.path)
def __str__(self):
return "Event. Path: %s" % self.__raw_event.pathname
class OrganizeFile(BaseEvent, HasMetaData): pass
class NewFile(BaseEvent, HasMetaData): pass
class DeleteFile(BaseEvent): pass
class OrganizeFile(BaseEvent, HasMetaData):
def __init__(self, *args, **kwargs): super(OrganizeFile, self).__init__(*args, **kwargs)
class NewFile(BaseEvent, HasMetaData):
def __init__(self, *args, **kwargs): super(NewFile, self).__init__(*args, **kwargs)
class DeleteFile(BaseEvent):
def __init__(self, *args, **kwargs): super(DeleteFile, self).__init__(*args, **kwargs)

View file

@ -0,0 +1,7 @@
# -*- coding: utf-8 -*-
class BadSongFile(Exception):
def __init__(self, path):
self.path = path
def __str__(self):
return "Can't read %s" % self.path

View file

@ -1,16 +1,35 @@
# -*- coding: utf-8 -*-
from pydispatch import dispatcher
import abc
class Handler(object):
from media.monitor.log import Loggable
# Defines the handle interface
class Handles(object):
__metaclass__ = abc.ABCMeta
def __init__(self, signal, target):
self.target = target
self.signal = signal
def dummy(sender, event):
self.handle(sender,event)
dispatcher.connect(dummy, signal=signal, sender=dispatcher.Any, weak=False)
@abc.abstractmethod
def handle(self, sender, event): pass
def handle(self, sender, event, *args, **kwargs): pass
class ReportHandler(Handles):
__metaclass__ = abc.ABCMeta
def __init__(self, signal):
self.signal = signal
self.report_signal = "badfile"
def dummy(sender, event): self.handle(sender,event)
dispatcher.connect(dummy, signal=signal, sender=dispatcher.Any, weak=False)
def report_problem_file(self, event, exception=None):
dispatcher.send(signal=self.report_signal, sender=self, event=event, exception=exception)
class ProblemFileHandler(Handles, Loggable):
def __init__(self, channel, **kwargs):
self.channel = channel
self.signal = self.channel.signal
self.problem_dir = self.channel.path
def dummy(sender, event, exception): self.handle(sender, event, exception)
dispatcher.connect(dummy, signal=self.signal, sender=dispatcher.Any, weak=False)
def handle(self, sender, event, exception=None):
self.logger.info("Received problem file: '%s'. Supposed to move it somewhere", event.path)
# TODO : not actually moving it anywhere yet

View file

@ -1,21 +1,11 @@
# -*- coding: utf-8 -*-
import pyinotify
from pydispatch import dispatcher
import media.monitor.pure as mmp
from media.monitor.pure import IncludeOnly
from media.monitor.events import OrganizeFile, NewFile, DeleteFile
class IncludeOnly(object):
def __init__(self, *deco_args):
self.exts = set([])
for arg in deco_args:
if isinstance(arg,str): self.add(arg)
elif hasattr(arg, '__iter__'):
for x in arg: self.exts.add(x)
def __call__(self, func):
def _wrap(moi, event, *args, **kwargs):
ext = mmp.extension(event.pathname)
if ext in self.exts: func(moi, event, *args, **kwargs)
return _wrap
class BaseListener(object):
def my_init(self, signal):
@ -35,8 +25,8 @@ class OrganizeListener(BaseListener, pyinotify.ProcessEvent):
class StoreWatchListener(BaseListener, pyinotify.ProcessEvent):
def process_IN_CLOSE_WRITE(self, event): self.process_create(event)
def process_IN_MOVE_TO(self, event): self.process_create(event)
def process_IN_MOVE_FROM(self, event): self.process_delete(event)
def process_IN_MOVED_TO(self, event): self.process_create(event)
def process_IN_MOVED_FROM(self, event): self.process_delete(event)
def process_IN_DELETE(self,event): self.process_delete(event)
@IncludeOnly(mmp.supported_extensions)
@ -46,3 +36,5 @@ class StoreWatchListener(BaseListener, pyinotify.ProcessEvent):
@IncludeOnly(mmp.supported_extensions)
def process_delete(self, event):
dispatcher.send(signal=self.signal, sender=self, event=DeleteFile(event))

View file

@ -0,0 +1,12 @@
import logging
import abc
logger = logging.getLogger('mediamonitor2')
logging.basicConfig(filename='/home/rudi/throwaway/mm2.log', level=logging.DEBUG)
class Loggable(object):
__metaclass__ = abc.ABCMeta
@property
def logger(self):
if not hasattr(self,"_logger"): self._logger = logging.getLogger('mediamonitor2')
return self._logger

View file

@ -1,8 +1,26 @@
from media.monitor.handler import Handler
# -*- coding: utf-8 -*-
class Organizer(Handler):
def correct_path(self): pass
from media.monitor.handler import ReportHandler
import media.monitor.pure as mmp
from media.monitor.log import Loggable
from media.monitor.exceptions import BadSongFile
class Organizer(ReportHandler,Loggable):
def __init__(self, channel, target_path):
self.channel = channel
self.target_path = target_path
super(Organizer, self).__init__(signal=self.channel.signal)
def handle(self, sender, event):
print("Handling event: %s" % str(event))
"""Intercept events where a new file has been added to the organize
directory and place it in the correct path (starting with self.target_path)"""
try:
normal_md = mmp.normalized_metadata(event.metadata, event.path)
new_path = mmp.organized_path(event.path, self.target_path, normal_md)
mmp.magic_move(event.path, new_path)
self.logger.info('Organized: "%s" into "%s"' % (event.path, new_path))
except BadSongFile as e:
self.report_problem_file(event=event, exception=e)
# probably general error in mmp.magic.move...
except Exception as e:
self.report_problem_file(event=event, exception=e)

View file

@ -1,12 +1,43 @@
# -*- coding: utf-8 -*-
import copy
import os
import shutil
supported_extensions = ["mp3", "ogg"]
unicode_unknown = u'unknown'
def is_airtime_show_recorder(md):
class IncludeOnly(object):
"""
A little decorator to help listeners only be called on extensions they support
"""
def __init__(self, *deco_args):
self.exts = set([])
for arg in deco_args:
if isinstance(arg,str): self.add(arg)
elif hasattr(arg, '__iter__'):
for x in arg: self.exts.add(x)
def __call__(self, func):
def _wrap(moi, event, *args, **kwargs):
ext = extension(event.pathname)
if ext in self.exts: func(moi, event, *args, **kwargs)
return _wrap
def is_file_supported(path):
return extension(path) in supported_extensions
# In the future we would like a better way to find out
# whether a show has been recorded
def is_airtime_recorded(md):
return md['MDATA_KEY_CREATOR'] == u'Airtime Show Recorder'
def clean_empty_dirs(path):
""" walks path and deletes every empty directory it finds """
for root, dirs, _ in os.walk(path):
full_paths = ( os.path.join(root, d) for d in dirs )
for d in full_paths:
if not os.listdir(d): os.rmdir(d)
def extension(path):
"""
return extension of path, empty string otherwise. Prefer
@ -23,30 +54,66 @@ def extension(path):
if len(ext) < 2: return ""
else: return ext[-1]
def no_extension_basename(path):
"""
returns the extensionsless basename of a filepath
>>> no_extension_basename("/home/test.mp3")
'test'
>>> no_extension_basename("/home/test")
'test'
>>> no_extension_basename('blah.ml')
'blah'
"""
base = os.path.basename(path)
if extension(base) == "": return base
else: return base.split(".")[-2]
def walk_supported(directory, clean_empties=False):
"""
A small generator wrapper around os.walk to only give us files that support the extensions
we are considering. When clean_empties is True we recursively delete empty directories
left over in directory after the walk.
"""
for root, dirs, files in os.walk(directory):
full_paths = ( os.path.join(root, name) for name in files if is_file_supported(name) )
for fp in full_paths: yield fp
if clean_empties: clean_empty_dirs(directory)
def magic_move(old, new):
# TODO : document this
new_dir = os.path.dirname(new)
if not os.path.exists(new_dir): os.makedirs(new_dir)
shutil.move(old,new)
def apply_rules_dict(d, rules):
""" NOTE: this function isn't actually pure but probably should be... """
# TODO : document this
new_d = copy.deepcopy(d)
for k, rule in rules.iteritems():
if k in d: d[k] = rule(d[k])
if k in d: new_d[k] = rule(d[k])
return new_d
def default_to(dictionary, keys, default):
""" NOTE: this function mutates dictionary as well. The name for this module
is terrible. Change it later."""
# TODO : document default_to
new_d = copy.deepcopy(dictionary)
for k in keys:
if not (k in dictionary): dictionary[k] = default
if not (k in new_d): new_d[k] = default
return new_d
def normalized_metadata(md):
def normalized_metadata(md, original_path):
""" consumes a dictionary of metadata and returns a new dictionary with the
formatted meta data """
formatted meta data. We also consume original_path because we must set
MDATA_KEY_CREATOR based on in it sometimes """
new_md = copy.deepcopy(md)
# replace all slashes with dashes
for k,v in new_md.iteritems(): new_md[k] = v.replace('/','-')
for k,v in new_md.iteritems():
new_md[k] = str(v).replace('/','-')
# Specific rules that are applied in a per attribute basis
format_rules = {
# It's very likely that the following isn't strictly necessary. But the old
# code would cast MDATA_KEY_TRACKNUMBER to an integer as a byproduct of
# formatting the track number to 2 digits.
'MDATA_KEY_TRACKNUMBER' : lambda x: int(x),
'MDATA_KEY_BITRATE' : lambda x: str(x / 1000) + "kbps",
'MDATA_KEY_BITRATE' : lambda x: str(int(x) / 1000) + "kbps",
# note: you don't actually need the lambda here. It's only used for clarity
'MDATA_KEY_FILEPATH' : lambda x: os.path.normpath(x),
}
@ -57,14 +124,14 @@ def normalized_metadata(md):
# could possibly lead to subtle bugs down the road. Plus the following
# approach gives us the flexibility to use different defaults for
# different attributes
default_to(dictionary=new_md, keys=path_md, default=unicode_unknown)
# should apply the format_rules last
apply_rules_dict(new_md, format_rules)
new_md = apply_rules_dict(new_md, format_rules)
new_md = default_to(dictionary=new_md, keys=['MDATA_KEY_TITLE'], default=no_extension_basename(original_path))
new_md = default_to(dictionary=new_md, keys=path_md, default=unicode_unknown)
# In the case where the creator is 'Airtime Show Recorder' we would like to
# format the MDATA_KEY_TITLE slightly differently
# Note: I don't know why I'm doing a unicode string comparison here
# that part is copied from the original code
if is_airtime_show_recorder(md):
if is_airtime_recorded(new_md):
hour,minute,second,name = md['MDATA_KEY_TITLE'].split("-",4)
# We assume that MDATA_KEY_YEAR is always given for airtime recorded
# shows
@ -76,7 +143,7 @@ def normalized_metadata(md):
# because it's not clear why it was done
return new_md
def organized_path(self, old_path, root_path, normal_md):
def organized_path(old_path, root_path, normal_md):
"""
old_path - path where file is store at the moment <= maybe not necessary?
root_path - the parent directory where all organized files go
@ -84,29 +151,28 @@ def organized_path(self, old_path, root_path, normal_md):
return value: new file path
"""
filepath = None
ext = extension(filepath)
ext = extension(old_path)
# The blocks for each if statement look awfully similar. Perhaps there is a
# way to simplify this code
if is_airtime_show_recorder(normal_md):
if is_airtime_recorded(normal_md):
fname = u'%s-%s-%s.%s' % ( normal_md['MDATA_KEY_YEAR'], normal_md['MDATA_KEY_TITLE'],
normal_md['MDATA_KEY_BITRATE'], ext )
yyyy, mm, _ = normal_md['MDATA_KEY_YEAR'].split('-',3)
path = os.path.join(root_path,"recorded", yyyy, mm)
path = os.path.join(root_path, yyyy, mm)
filepath = os.path.join(path,fname)
elif normal_md['MDATA_KEY_TRACKNUMBER'] == unicode_unknown:
fname = u'%s-%s.%s' % (normal_md['MDATA_KEY_TITLE'], normal_md['MDATA_KEY_BITRATE'], ext)
path = os.path.join(root_path, "imported", normal_md['MDATA_KEY_CREATOR'],
path = os.path.join(root_path, normal_md['MDATA_KEY_CREATOR'],
normal_md['MDATA_KEY_SOURCE'] )
filepath = os.path.join(path, fname)
else: # The "normal" case
fname = u'%s-%s-%s.%s' % (normal_md['MDATA_KEY_TRACKNUMBER'], normal_md['MDATA_KEY_TITLE'],
normal_md['MDATA_KEY_BITRATE'], ext)
path = os.path.join(root_path, "imported", normal_md['MDATA_KEY_CREATOR'],
path = os.path.join(root_path, normal_md['MDATA_KEY_CREATOR'],
normal_md['MDATA_KEY_SOURCE'])
filepath = os.path.join(path, fname)
return filepath
if __name__ == '__main__':
import doctest
doctest.testmod()

View file

@ -0,0 +1,13 @@
class SyncDB(object):
"""
Represents the database returned by airtime_mvc. We do not use a list or some other
fixed data structure because we might want to change the internal representation for
performance reasons later on.
"""
def __init__(self, source):
pass
def has_file(self, path):
return True
def file_mdata(self, path):
return None

View file

@ -0,0 +1,134 @@
# -*- coding: utf-8 -*-
import threading
import time
import copy
from media.monitor.handler import ReportHandler
from media.monitor.events import NewFile, DeleteFile
from media.monitor.log import Loggable
from media.monitor.exceptions import BadSongFile
class RequestSync(threading.Thread,Loggable):
def __init__(self, watcher, requests):
threading.Thread.__init__(self)
self.watcher = watcher
self.requests = requests
def run(self):
self.logger.info("launching request with %d items." % len(self.requests))
self.watcher.flag_done()
class TimeoutWatcher(threading.Thread,Loggable):
def __init__(self, watcher, timeout=5):
self.logger.info("Created timeout thread...")
threading.Thread.__init__(self)
self.watcher = watcher
self.timeout = timeout
def run(self):
# We try to launch a new thread every self.timeout seconds
# so that the people do not have to wait for the queue to fill up
while True:
time.sleep(self.timeout)
# If there is any requests left we launch em.
# Note that this isn't strictly necessary since RequestSync threads
# already chain themselves
if self.watcher.requests_in_queue():
self.logger.info("We got %d requests waiting to be launched" % self.watcher.requests_left_count())
self.watcher.request_do()
# Same for events, this behaviour is mandatory however.
if self.watcher.events_in_queue():
self.logger.info("We got %d events that are unflushed" % self.watcher.events_left_count())
self.watcher.flush_events()
class WatchSyncer(ReportHandler,Loggable):
def __init__(self, channel, chunking_number = 50, timeout=15):
self.channel = channel
self.timeout = timeout
self.chunking_number = chunking_number
self.__queue = []
# Even though we are not blocking on the http requests, we are still
# trying to send the http requests in order
self.__requests = []
self.request_running = False
self.__current_thread = None
tc = TimeoutWatcher(self, timeout)
tc.daemon = True
tc.start()
super(WatchSyncer, self).__init__(signal=channel.signal)
@property
def target_path(self): return self.channel.path
def signal(self): return self.channel.signal
def handle(self, sender, event):
"""We implement this abstract method from ReportHandler"""
# Using isinstance like this is usually considered to be bad style
# because you are supposed to use polymorphism instead however we would
# separate event handling itself from the events so there seems to be
# no better way to do this
if isinstance(event, NewFile):
try:
self.logger.info("'%s' : New file added: '%s'" % (self.target_path, event.path))
self.push_queue(event)
except BadSongFile as e:
self.report_problem_file(event=event, exception=e)
elif isinstance(event, DeleteFile):
self.logger.info("'%s' : Deleted file: '%s'" % (self.target_path, event.path))
self.push_queue(event)
else: raise Exception("Unknown event: %s" % str(event))
def requests_left_count(self): return len(self.__requests)
def events_left_count(self): return len(self.__queue)
def push_queue(self, elem):
self.logger.info("Added event into queue")
if self.events_left_count() == self.chunking_number:
self.push_request()
self.request_do() # Launch the request if nothing is running
self.__queue.append(elem)
def flush_events(self):
self.logger.info("Force flushing events...")
self.push_request()
self.request_do()
def events_in_queue(self):
"""returns true if there are events in the queue that haven't been processed yet"""
return len(self.__queue) > 0
def requests_in_queue(self):
return len(self.__requests) > 0
def flag_done(self):
""" called by request thread when it finishes operating """
self.request_running = False
self.__current_thread = None
# This call might not be necessary but we would like
# to get the ball running with the requests as soon as possible
if self.requests_in_queue() > 0: self.request_do()
def request_do(self):
""" launches a request thread only if one is not running right now """
if not self.request_running:
self.request_running = True
self.__requests.pop()()
def push_request(self):
self.logger.info("'%s' : Unleashing request" % self.target_path)
# want to do request asyncly and empty the queue
requests = copy.copy(self.__queue)
def launch_request():
# Need shallow copy here
t = RequestSync(watcher=self, requests=requests)
t.start()
self.__current_thread = t
self.__requests.append(launch_request)
self.__queue = []
def __del__(self):
# Ideally we would like to do a little more to ensure safe shutdown
if self.events_in_queue(): self.logger.warn("Terminating with events in the queue still pending...")
if self.requests_in_queue(): self.logger.warn("Terminating with http requests still pending...")