274 lines
11 KiB
Python
274 lines
11 KiB
Python
import os
|
|
import hashlib
|
|
import mutagen
|
|
import logging
|
|
import math
|
|
import re
|
|
from api_clients import api_client
|
|
|
|
"""
|
|
list of supported easy tags in mutagen version 1.20
|
|
['albumartistsort', 'musicbrainz_albumstatus', 'lyricist', 'releasecountry', 'date', 'performer', 'musicbrainz_albumartistid', 'composer', 'encodedby', 'tracknumber', 'musicbrainz_albumid', 'album', 'asin', 'musicbrainz_artistid', 'mood', 'copyright', 'author', 'media', 'length', 'version', 'artistsort', 'titlesort', 'discsubtitle', 'website', 'musicip_fingerprint', 'conductor', 'compilation', 'barcode', 'performer:*', 'composersort', 'musicbrainz_discid', 'musicbrainz_albumtype', 'genre', 'isrc', 'discnumber', 'musicbrainz_trmid', 'replaygain_*_gain', 'musicip_puid', 'artist', 'title', 'bpm', 'musicbrainz_trackid', 'arranger', 'albumsort', 'replaygain_*_peak', 'organization']
|
|
"""
|
|
class AirtimeMetadata:
|
|
|
|
def __init__(self):
|
|
|
|
self.airtime2mutagen = {\
|
|
"MDATA_KEY_TITLE": "title",\
|
|
"MDATA_KEY_CREATOR": "artist",\
|
|
"MDATA_KEY_SOURCE": "album",\
|
|
"MDATA_KEY_GENRE": "genre",\
|
|
"MDATA_KEY_MOOD": "mood",\
|
|
"MDATA_KEY_TRACKNUMBER": "tracknumber",\
|
|
"MDATA_KEY_BPM": "bpm",\
|
|
"MDATA_KEY_LABEL": "organization",\
|
|
"MDATA_KEY_COMPOSER": "composer",\
|
|
"MDATA_KEY_ENCODER": "encodedby",\
|
|
"MDATA_KEY_CONDUCTOR": "conductor",\
|
|
"MDATA_KEY_YEAR": "date",\
|
|
"MDATA_KEY_URL": "website",\
|
|
"MDATA_KEY_ISRC": "isrc",\
|
|
"MDATA_KEY_COPYRIGHT": "copyright",\
|
|
}
|
|
|
|
self.mutagen2airtime = {\
|
|
"title": "MDATA_KEY_TITLE",\
|
|
"artist": "MDATA_KEY_CREATOR",\
|
|
"album": "MDATA_KEY_SOURCE",\
|
|
"genre": "MDATA_KEY_GENRE",\
|
|
"mood": "MDATA_KEY_MOOD",\
|
|
"tracknumber": "MDATA_KEY_TRACKNUMBER",\
|
|
"bpm": "MDATA_KEY_BPM",\
|
|
"organization": "MDATA_KEY_LABEL",\
|
|
"composer": "MDATA_KEY_COMPOSER",\
|
|
"encodedby": "MDATA_KEY_ENCODER",\
|
|
"conductor": "MDATA_KEY_CONDUCTOR",\
|
|
"date": "MDATA_KEY_YEAR",\
|
|
"website": "MDATA_KEY_URL",\
|
|
"isrc": "MDATA_KEY_ISRC",\
|
|
"copyright": "MDATA_KEY_COPYRIGHT",\
|
|
}
|
|
|
|
self.cp1252toUnicode = {
|
|
u"\x80": u"\u20AC", # EURO SIGN
|
|
u"\x82": u"\u201A", # SINGLE LOW-9 QUOTATION MARK
|
|
u"\x83": u"\u0192", # LATIN SMALL LETTER F WITH HOOK
|
|
u"\x84": u"\u201E", # DOUBLE LOW-9 QUOTATION MARK
|
|
u"\x85": u"\u2026", # HORIZONTAL ELLIPSIS
|
|
u"\x86": u"\u2020", # DAGGER
|
|
u"\x87": u"\u2021", # DOUBLE DAGGER
|
|
u"\x88": u"\u02C6", # MODIFIER LETTER CIRCUMFLEX ACCENT
|
|
u"\x89": u"\u2030", # PER MILLE SIGN
|
|
u"\x8A": u"\u0160", # LATIN CAPITAL LETTER S WITH CARON
|
|
u"\x8B": u"\u2039", # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
|
u"\x8C": u"\u0152", # LATIN CAPITAL LIGATURE OE
|
|
u"\x8E": u"\u017D", # LATIN CAPITAL LETTER Z WITH CARON
|
|
u"\x91": u"\u2018", # LEFT SINGLE QUOTATION MARK
|
|
u"\x92": u"\u2019", # RIGHT SINGLE QUOTATION MARK
|
|
u"\x93": u"\u201C", # LEFT DOUBLE QUOTATION MARK
|
|
u"\x94": u"\u201D", # RIGHT DOUBLE QUOTATION MARK
|
|
u"\x95": u"\u2022", # BULLET
|
|
u"\x96": u"\u2013", # EN DASH
|
|
u"\x97": u"\u2014", # EM DASH
|
|
u"\x98": u"\u02DC", # SMALL TILDE
|
|
u"\x99": u"\u2122", # TRADE MARK SIGN
|
|
u"\x9A": u"\u0161", # LATIN SMALL LETTER S WITH CARON
|
|
u"\x9B": u"\u203A", # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
|
u"\x9C": u"\u0153", # LATIN SMALL LIGATURE OE
|
|
u"\x9E": u"\u017E", # LATIN SMALL LETTER Z WITH CARON
|
|
u"\x9F": u"\u0178", # LATIN CAPITAL LETTER Y WITH DIAERESIS
|
|
}
|
|
|
|
self.logger = logging.getLogger()
|
|
|
|
def get_md5(self, filepath):
|
|
f = open(filepath, 'rb')
|
|
m = hashlib.md5()
|
|
m.update(f.read())
|
|
md5 = m.hexdigest()
|
|
|
|
return md5
|
|
|
|
## mutagen_length is in seconds with the format (d+).dd
|
|
## return format hh:mm:ss.uuu
|
|
def format_length(self, mutagen_length):
|
|
t = float(mutagen_length)
|
|
h = int(math.floor(t/3600))
|
|
t = t % 3600
|
|
m = int(math.floor(t/60))
|
|
|
|
s = t % 60
|
|
# will be ss.uuu
|
|
s = str(s)
|
|
seconds = s.split(".")
|
|
s = seconds[0]
|
|
|
|
# have a maximum of 6 subseconds.
|
|
if len(seconds[1]) >= 6:
|
|
ss = seconds[1][0:6]
|
|
else:
|
|
ss = seconds[1][0:]
|
|
|
|
length = "%s:%s:%s.%s" % (h, m, s, ss)
|
|
|
|
return length
|
|
|
|
def save_md_to_file(self, m):
|
|
try:
|
|
airtime_file = mutagen.File(m['MDATA_KEY_FILEPATH'], easy=True)
|
|
|
|
for key in m.keys() :
|
|
if key in self.airtime2mutagen:
|
|
value = m[key]
|
|
if (value is not None):
|
|
self.logger.debug("Saving %s to file", key)
|
|
self.logger.debug(value)
|
|
if isinstance(value, basestring) and (len(value) > 0):
|
|
airtime_file[self.airtime2mutagen[key]] = unicode(value, "utf-8")
|
|
elif isinstance(value, int):
|
|
airtime_file[self.airtime2mutagen[key]] = str(value)
|
|
|
|
|
|
airtime_file.save()
|
|
except Exception, e:
|
|
self.logger.error('Trying to save md')
|
|
self.logger.error('Exception: %s', e)
|
|
self.logger.error('Filepath %s', m['MDATA_KEY_FILEPATH'])
|
|
|
|
def truncate_to_length(self, item, length):
|
|
if isinstance(item, int):
|
|
item = str(item)
|
|
if isinstance(item, basestring):
|
|
if len(item) > length:
|
|
return item[0:length]
|
|
else:
|
|
return item
|
|
|
|
def get_md_from_file(self, filepath):
|
|
|
|
self.logger.info("getting info from filepath %s", filepath)
|
|
|
|
try:
|
|
md = {}
|
|
md5 = self.get_md5(filepath)
|
|
md['MDATA_KEY_MD5'] = md5
|
|
|
|
file_info = mutagen.File(filepath, easy=True)
|
|
|
|
except Exception, e:
|
|
self.logger.error("failed getting metadata from %s", filepath)
|
|
self.logger.error("Exception %s", e)
|
|
return None
|
|
|
|
if file_info is None:
|
|
return None
|
|
#check if file has any metadata
|
|
if file_info is not None:
|
|
for key in file_info.keys() :
|
|
if key in self.mutagen2airtime :
|
|
info = file_info[key][0]
|
|
while 1:
|
|
temp = re.search(u"[\x80-\x9f]", info)
|
|
if temp is not None:
|
|
s = temp.group(0)
|
|
replace = self.cp1252toUnicode.get(s)
|
|
info = re.sub(s, replace, info)
|
|
else:
|
|
break
|
|
md[self.mutagen2airtime[key]] = info
|
|
if 'MDATA_KEY_TITLE' not in md:
|
|
#get rid of file extention from original name, name might have more than 1 '.' in it.
|
|
#filepath = to_unicode(filepath)
|
|
#filepath = filepath.encode('utf-8')
|
|
original_name = os.path.basename(filepath)
|
|
original_name = original_name.split(".")[0:-1]
|
|
original_name = ''.join(original_name)
|
|
md['MDATA_KEY_TITLE'] = original_name
|
|
|
|
#incase track number is in format u'4/11'
|
|
#need to also check that the tracknumber is even a tracknumber (cc-2582)
|
|
if 'MDATA_KEY_TRACKNUMBER' in md:
|
|
try:
|
|
md['MDATA_KEY_TRACKNUMBER'] = int(md['MDATA_KEY_TRACKNUMBER'])
|
|
except Exception, e:
|
|
pass
|
|
|
|
if isinstance(md['MDATA_KEY_TRACKNUMBER'], basestring):
|
|
match = re.search('^(\d*/\d*)?', md['MDATA_KEY_TRACKNUMBER'])
|
|
|
|
if match.group(0) is not u'':
|
|
md['MDATA_KEY_TRACKNUMBER'] = int(md['MDATA_KEY_TRACKNUMBER'].split("/")[0])
|
|
else:
|
|
del md['MDATA_KEY_TRACKNUMBER']
|
|
|
|
#make sure bpm is valid, need to check more types of formats for this tag to assure correct parsing.
|
|
if 'MDATA_KEY_BPM' in md:
|
|
if isinstance(md['MDATA_KEY_BPM'], basestring):
|
|
try:
|
|
md['MDATA_KEY_BPM'] = int(md['MDATA_KEY_BPM'])
|
|
except Exception, e:
|
|
del md['MDATA_KEY_BPM']
|
|
|
|
#following metadata is truncated if needed to fit db requirements.
|
|
if 'MDATA_KEY_GENRE' in md:
|
|
md['MDATA_KEY_GENRE'] = self.truncate_to_length(md['MDATA_KEY_GENRE'], 64)
|
|
|
|
if 'MDATA_KEY_TITLE' in md:
|
|
md['MDATA_KEY_TITLE'] = self.truncate_to_length(md['MDATA_KEY_TITLE'], 512)
|
|
|
|
if 'MDATA_KEY_CREATOR' in md:
|
|
md['MDATA_KEY_CREATOR'] = self.truncate_to_length(md['MDATA_KEY_CREATOR'], 512)
|
|
|
|
if 'MDATA_KEY_SOURCE' in md:
|
|
md['MDATA_KEY_SOURCE'] = self.truncate_to_length(md['MDATA_KEY_SOURCE'], 512)
|
|
|
|
if 'MDATA_KEY_MOOD' in md:
|
|
md['MDATA_KEY_MOOD'] = self.truncate_to_length(md['MDATA_KEY_MOOD'], 64)
|
|
|
|
if 'MDATA_KEY_LABEL' in md:
|
|
md['MDATA_KEY_LABEL'] = self.truncate_to_length(md['MDATA_KEY_LABEL'], 512)
|
|
|
|
if 'MDATA_KEY_COMPOSER' in md:
|
|
md['MDATA_KEY_COMPOSER'] = self.truncate_to_length(md['MDATA_KEY_COMPOSER'], 512)
|
|
|
|
if 'MDATA_KEY_ENCODER' in md:
|
|
md['MDATA_KEY_ENCODER'] = self.truncate_to_length(md['MDATA_KEY_ENCODER'], 255)
|
|
|
|
if 'MDATA_KEY_CONDUCTOR' in md:
|
|
md['MDATA_KEY_CONDUCTOR'] = self.truncate_to_length(md['MDATA_KEY_CONDUCTOR'], 512)
|
|
|
|
if 'MDATA_KEY_YEAR' in md:
|
|
md['MDATA_KEY_YEAR'] = self.truncate_to_length(md['MDATA_KEY_YEAR'], 16)
|
|
|
|
if 'MDATA_KEY_URL' in md:
|
|
md['MDATA_KEY_URL'] = self.truncate_to_length(md['MDATA_KEY_URL'], 512)
|
|
|
|
if 'MDATA_KEY_ISRC' in md:
|
|
md['MDATA_KEY_ISRC'] = self.truncate_to_length(md['MDATA_KEY_ISRC'], 512)
|
|
|
|
if 'MDATA_KEY_COPYRIGHT' in md:
|
|
md['MDATA_KEY_COPYRIGHT'] = self.truncate_to_length(md['MDATA_KEY_COPYRIGHT'], 512)
|
|
#end of db truncation checks.
|
|
|
|
|
|
md['MDATA_KEY_BITRATE'] = file_info.info.bitrate
|
|
md['MDATA_KEY_SAMPLERATE'] = file_info.info.sample_rate
|
|
md['MDATA_KEY_DURATION'] = self.format_length(file_info.info.length)
|
|
md['MDATA_KEY_MIME'] = file_info.mime[0]
|
|
|
|
if "mp3" in md['MDATA_KEY_MIME']:
|
|
md['MDATA_KEY_FTYPE'] = "audioclip"
|
|
elif "vorbis" in md['MDATA_KEY_MIME']:
|
|
md['MDATA_KEY_FTYPE'] = "audioclip"
|
|
|
|
#do this so object can be urlencoded properly.
|
|
for key in md.keys():
|
|
|
|
if (isinstance(md[key], basestring)):
|
|
#self.logger.info("Converting md[%s] = '%s' ", key, md[key])
|
|
md[key] = api_client.encode_to(md[key], 'utf-8')
|
|
#self.logger.info("Converting complete: md[%s] = '%s' ", key, md[key])
|
|
|
|
return md
|