CC-5862: Invalid UTF-8 chars cause DB error
* Strip and validate UTF-8 strings in the Media API * Also properly parse track numbers containing "-"
This commit is contained in:
parent
7caa42cf69
commit
1373d4984f
2 changed files with 30 additions and 1 deletions
|
@ -402,10 +402,15 @@ class Rest_MediaController extends Zend_Rest_Controller
|
|||
if ($stringLengthValidator) {
|
||||
$value = substr($value, 0, $stringLengthValidator->getMax());
|
||||
}
|
||||
|
||||
$value = $this->stripInvalidUtf8Characters($value);
|
||||
}
|
||||
}
|
||||
|
||||
if (!$fileForm->isValidPartial($whiteList)) {
|
||||
$errors = $fileForm->getErrors();
|
||||
$messages = $fileForm->getMessages();
|
||||
Logging::error($messages);
|
||||
$file->setDbImportStatus(2);
|
||||
$file->setDbHidden(true);
|
||||
$this->invalidDataResponse();
|
||||
|
@ -526,5 +531,25 @@ class Rest_MediaController extends Zend_Rest_Controller
|
|||
}
|
||||
return $metadata;
|
||||
}
|
||||
|
||||
private function stripInvalidUtf8Characters($string)
|
||||
{
|
||||
//Remove invalid UTF-8 characters
|
||||
//reject overly long 2 byte sequences, as well as characters above U+10000 and replace with ?
|
||||
$string = preg_replace('/[\x00-\x08\x10\x0B\x0C\x0E-\x19\x7F]'.
|
||||
'|[\x00-\x7F][\x80-\xBF]+'.
|
||||
'|([\xC0\xC1]|[\xF0-\xFF])[\x80-\xBF]*'.
|
||||
'|[\xC2-\xDF]((?![\x80-\xBF])|[\x80-\xBF]{2,})'.
|
||||
'|[\xE0-\xEF](([\x80-\xBF](?![\x80-\xBF]))|(?![\x80-\xBF]{2})|[\x80-\xBF]{3,})/S',
|
||||
'?', $string );
|
||||
|
||||
//reject overly long 3 byte sequences and UTF-16 surrogates and replace with ?
|
||||
$string = preg_replace('/\xE0[\x80-\x9F][\x80-\xBF]'.
|
||||
'|\xED[\xA0-\xBF][\x80-\xBF]/S','?', $string );
|
||||
|
||||
//Do a final encoding conversion to
|
||||
$string = mb_convert_encoding($string, 'UTF-8', 'UTF-8');
|
||||
return $string;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue