CC-2747: When metadata contains non utf-8 encoding, we should handle it

- we cover some cases with cp1252 encoding for now.
- extra fix: Logging.php (copied from 2.0.0)
This commit is contained in:
James 2011-08-31 14:46:21 -04:00
parent 982c8f9c20
commit 22c9416c31
2 changed files with 47 additions and 3 deletions

View File

@ -16,4 +16,9 @@ class Logging {
public static function setLogPath($path){
self::$_path = $path;
}
public static function log($p_msg){
$logger = self::getLogger();
$logger->info($p_msg);
}
}

View File

@ -49,6 +49,36 @@ class AirtimeMetadata:
"isrc": "MDATA_KEY_ISRC",\
"copyright": "MDATA_KEY_COPYRIGHT",\
}
self.cp1252toUnicode = {
u"\x80": u"\u20AC", # EURO SIGN
u"\x82": u"\u201A", # SINGLE LOW-9 QUOTATION MARK
u"\x83": u"\u0192", # LATIN SMALL LETTER F WITH HOOK
u"\x84": u"\u201E", # DOUBLE LOW-9 QUOTATION MARK
u"\x85": u"\u2026", # HORIZONTAL ELLIPSIS
u"\x86": u"\u2020", # DAGGER
u"\x87": u"\u2021", # DOUBLE DAGGER
u"\x88": u"\u02C6", # MODIFIER LETTER CIRCUMFLEX ACCENT
u"\x89": u"\u2030", # PER MILLE SIGN
u"\x8A": u"\u0160", # LATIN CAPITAL LETTER S WITH CARON
u"\x8B": u"\u2039", # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
u"\x8C": u"\u0152", # LATIN CAPITAL LIGATURE OE
u"\x8E": u"\u017D", # LATIN CAPITAL LETTER Z WITH CARON
u"\x91": u"\u2018", # LEFT SINGLE QUOTATION MARK
u"\x92": u"\u2019", # RIGHT SINGLE QUOTATION MARK
u"\x93": u"\u201C", # LEFT DOUBLE QUOTATION MARK
u"\x94": u"\u201D", # RIGHT DOUBLE QUOTATION MARK
u"\x95": u"\u2022", # BULLET
u"\x96": u"\u2013", # EN DASH
u"\x97": u"\u2014", # EM DASH
u"\x98": u"\u02DC", # SMALL TILDE
u"\x99": u"\u2122", # TRADE MARK SIGN
u"\x9A": u"\u0161", # LATIN SMALL LETTER S WITH CARON
u"\x9B": u"\u203A", # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
u"\x9C": u"\u0153", # LATIN SMALL LIGATURE OE
u"\x9E": u"\u017E", # LATIN SMALL LETTER Z WITH CARON
u"\x9F": u"\u0178", # LATIN CAPITAL LETTER Y WITH DIAERESIS
}
self.logger = logging.getLogger()
@ -131,7 +161,8 @@ class AirtimeMetadata:
self.logger.error("Exception %s", e)
return None
self.logger.info("sDFSDFSDF")
self.logger.info(file_info)
if file_info is None:
return None
@ -139,8 +170,16 @@ class AirtimeMetadata:
if file_info is not None:
for key in file_info.keys() :
if key in self.mutagen2airtime :
md[self.mutagen2airtime[key]] = file_info[key][0]
info = file_info[key][0]
while 1:
temp = re.search(u"[\x80-\x9f]", info)
if temp is not None:
s = temp.group(0)
replace = self.cp1252toUnicode.get(s)
info = re.sub(s, replace, info)
else:
break
md[self.mutagen2airtime[key]] = info
if 'MDATA_KEY_TITLE' not in md:
#get rid of file extention from original name, name might have more than 1 '.' in it.
#filepath = to_unicode(filepath)