CC-5862: Invalid UTF-8 chars cause DB error
* Strip and validate UTF-8 strings in the Media API * Also properly parse track numbers containing "-"
This commit is contained in:
parent
7caa42cf69
commit
1373d4984f
|
@ -402,10 +402,15 @@ class Rest_MediaController extends Zend_Rest_Controller
|
|||
if ($stringLengthValidator) {
|
||||
$value = substr($value, 0, $stringLengthValidator->getMax());
|
||||
}
|
||||
|
||||
$value = $this->stripInvalidUtf8Characters($value);
|
||||
}
|
||||
}
|
||||
|
||||
if (!$fileForm->isValidPartial($whiteList)) {
|
||||
$errors = $fileForm->getErrors();
|
||||
$messages = $fileForm->getMessages();
|
||||
Logging::error($messages);
|
||||
$file->setDbImportStatus(2);
|
||||
$file->setDbHidden(true);
|
||||
$this->invalidDataResponse();
|
||||
|
@ -526,5 +531,25 @@ class Rest_MediaController extends Zend_Rest_Controller
|
|||
}
|
||||
return $metadata;
|
||||
}
|
||||
|
||||
private function stripInvalidUtf8Characters($string)
|
||||
{
|
||||
//Remove invalid UTF-8 characters
|
||||
//reject overly long 2 byte sequences, as well as characters above U+10000 and replace with ?
|
||||
$string = preg_replace('/[\x00-\x08\x10\x0B\x0C\x0E-\x19\x7F]'.
|
||||
'|[\x00-\x7F][\x80-\xBF]+'.
|
||||
'|([\xC0\xC1]|[\xF0-\xFF])[\x80-\xBF]*'.
|
||||
'|[\xC2-\xDF]((?![\x80-\xBF])|[\x80-\xBF]{2,})'.
|
||||
'|[\xE0-\xEF](([\x80-\xBF](?![\x80-\xBF]))|(?![\x80-\xBF]{2})|[\x80-\xBF]{3,})/S',
|
||||
'?', $string );
|
||||
|
||||
//reject overly long 3 byte sequences and UTF-16 surrogates and replace with ?
|
||||
$string = preg_replace('/\xE0[\x80-\x9F][\x80-\xBF]'.
|
||||
'|\xED[\xA0-\xBF][\x80-\xBF]/S','?', $string );
|
||||
|
||||
//Do a final encoding conversion to
|
||||
$string = mb_convert_encoding($string, 'UTF-8', 'UTF-8');
|
||||
return $string;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -83,7 +83,11 @@ class MetadataAnalyzer(Analyzer):
|
|||
track_number = audio_file["tracknumber"]
|
||||
if isinstance(track_number, list): # Sometimes tracknumber is a list, ugh
|
||||
track_number = track_number[0]
|
||||
track_number_tokens = track_number.split(u'/')
|
||||
track_number_tokens = track_number
|
||||
if u'/' in track_number:
|
||||
track_number_tokens = track_number.split(u'/')
|
||||
elif u'-' in track_number:
|
||||
track_number_tokens = track_number.split(u'-')
|
||||
track_number = track_number_tokens[0]
|
||||
metadata["track_number"] = track_number
|
||||
track_total = track_number_tokens[1]
|
||||
|
|
Loading…
Reference in New Issue