Sanitize to UTF-8

This commit is contained in:
Marko Jovanovic 2025-10-29 12:22:05 +01:00
parent 60bc3e6e99
commit 93f9a0dd18

View File

@ -92,6 +92,26 @@ final class CleanMobileCommand extends Command
'17', '17',
]; ];
$sanitiseUtf8 = static function(?string $raw): ?string {
$utf8 = null;
if (mb_check_encoding($raw, 'UTF-8')) {
$utf8 = $raw;
} else {
$encodings = ['Windows-1252', 'ISO-8859-1', 'CP1252', 'ASCII'];
foreach ($encodings as $src) {
$utf8 = @iconv($src, 'UTF-8//TRANSLIT//IGNORE', $raw);
if ($utf8 !== false) {
break;
}
}
if ($utf8 === false) {
// Could not be converted treat it as “invalid”.
return null;
}
}
return $utf8;
};
// ------------------------------------------------------------- // -------------------------------------------------------------
// 4⃣ Helper closures // 4⃣ Helper closures
// ------------------------------------------------------------- // -------------------------------------------------------------
@ -206,7 +226,7 @@ final class CleanMobileCommand extends Command
$contact->setParsedAt(new \DateTimeImmutable()); $contact->setParsedAt(new \DateTimeImmutable());
$contact->setStudyId($study_id); $contact->setStudyId($study_id);
$contact->setParsedFileLinenum($rowCount + 1); $contact->setParsedFileLinenum($rowCount + 1);
$contact->setParsedFileLine(implode(';', $row)); $contact->setParsedFileLine($sanitiseUtf8(implode(';', $row)));
$contact->setMsgContentType($rowCount % 2 ? 1 : 2); $contact->setMsgContentType($rowCount % 2 ? 1 : 2);
try { try {