From 93f9a0dd18b767b5c1200cf7da29b0b23ff2cd09 Mon Sep 17 00:00:00 2001 From: Marko Jovanovic Date: Wed, 29 Oct 2025 12:22:05 +0100 Subject: [PATCH] Sanitize to UTF-8 --- src/Command/CleanMobileCommand.php | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/Command/CleanMobileCommand.php b/src/Command/CleanMobileCommand.php index 26dae5d..e907ebd 100644 --- a/src/Command/CleanMobileCommand.php +++ b/src/Command/CleanMobileCommand.php @@ -92,6 +92,26 @@ final class CleanMobileCommand extends Command '17', ]; + $sanitiseUtf8 = static function(?string $raw): ?string { + $utf8 = null; + if (mb_check_encoding($raw, 'UTF-8')) { + $utf8 = $raw; + } else { + $encodings = ['Windows-1252', 'ISO-8859-1', 'CP1252', 'ASCII']; + foreach ($encodings as $src) { + $utf8 = @iconv($src, 'UTF-8//TRANSLIT//IGNORE', $raw); + if ($utf8 !== false) { + break; + } + } + if ($utf8 === false) { + // Could not be converted – treat it as “invalid”. + return null; + } + } + return $utf8; + }; + // ------------------------------------------------------------- // 4️⃣ Helper closures // ------------------------------------------------------------- @@ -206,7 +226,7 @@ final class CleanMobileCommand extends Command $contact->setParsedAt(new \DateTimeImmutable()); $contact->setStudyId($study_id); $contact->setParsedFileLinenum($rowCount + 1); - $contact->setParsedFileLine(implode(';', $row)); + $contact->setParsedFileLine($sanitiseUtf8(implode(';', $row))); $contact->setMsgContentType($rowCount % 2 ? 1 : 2); try {