Merge branch 'cc-5709-airtime-analyzer' into cc-5709-airtime-analyzer-saas

This commit is contained in:
Albert Santoni 2014-05-30 13:04:08 -04:00
commit 8ae1be265b
2 changed files with 30 additions and 1 deletions

View File

@ -395,10 +395,15 @@ class Rest_MediaController extends Zend_Rest_Controller
if ($stringLengthValidator) { if ($stringLengthValidator) {
$value = substr($value, 0, $stringLengthValidator->getMax()); $value = substr($value, 0, $stringLengthValidator->getMax());
} }
$value = $this->stripInvalidUtf8Characters($value);
} }
} }
if (!$fileForm->isValidPartial($whiteList)) { if (!$fileForm->isValidPartial($whiteList)) {
$errors = $fileForm->getErrors();
$messages = $fileForm->getMessages();
Logging::error($messages);
$file->setDbImportStatus(2); $file->setDbImportStatus(2);
$file->setDbHidden(true); $file->setDbHidden(true);
$this->invalidDataResponse(); $this->invalidDataResponse();
@ -519,5 +524,25 @@ class Rest_MediaController extends Zend_Rest_Controller
} }
return $metadata; return $metadata;
} }
private function stripInvalidUtf8Characters($string)
{
//Remove invalid UTF-8 characters
//reject overly long 2 byte sequences, as well as characters above U+10000 and replace with ?
$string = preg_replace('/[\x00-\x08\x10\x0B\x0C\x0E-\x19\x7F]'.
'|[\x00-\x7F][\x80-\xBF]+'.
'|([\xC0\xC1]|[\xF0-\xFF])[\x80-\xBF]*'.
'|[\xC2-\xDF]((?![\x80-\xBF])|[\x80-\xBF]{2,})'.
'|[\xE0-\xEF](([\x80-\xBF](?![\x80-\xBF]))|(?![\x80-\xBF]{2})|[\x80-\xBF]{3,})/S',
'?', $string );
//reject overly long 3 byte sequences and UTF-16 surrogates and replace with ?
$string = preg_replace('/\xE0[\x80-\x9F][\x80-\xBF]'.
'|\xED[\xA0-\xBF][\x80-\xBF]/S','?', $string );
//Do a final encoding conversion to
$string = mb_convert_encoding($string, 'UTF-8', 'UTF-8');
return $string;
}
} }

View File

@ -83,7 +83,11 @@ class MetadataAnalyzer(Analyzer):
track_number = audio_file["tracknumber"] track_number = audio_file["tracknumber"]
if isinstance(track_number, list): # Sometimes tracknumber is a list, ugh if isinstance(track_number, list): # Sometimes tracknumber is a list, ugh
track_number = track_number[0] track_number = track_number[0]
track_number_tokens = track_number.split(u'/') track_number_tokens = track_number
if u'/' in track_number:
track_number_tokens = track_number.split(u'/')
elif u'-' in track_number:
track_number_tokens = track_number.split(u'-')
track_number = track_number_tokens[0] track_number = track_number_tokens[0]
metadata["track_number"] = track_number metadata["track_number"] = track_number
track_total = track_number_tokens[1] track_total = track_number_tokens[1]