From 81147ad0329d4bea5776cbe8fde2b52e2170feaf Mon Sep 17 00:00:00 2001 From: Marko Jovanovic Date: Tue, 14 Oct 2025 13:54:27 +0200 Subject: [PATCH] Parsing of phones from given input CSV files --- composer.json | 1 + composer.lock | 225 ++++++++++++++++++++++++++++- src/Command/CleanMobileCommand.php | 215 +++++++++++++++++++++++++++ 3 files changed, 440 insertions(+), 1 deletion(-) create mode 100644 src/Command/CleanMobileCommand.php diff --git a/composer.json b/composer.json index 0d62a94..5837abc 100644 --- a/composer.json +++ b/composer.json @@ -11,6 +11,7 @@ "doctrine/doctrine-bundle": "^2.16", "doctrine/doctrine-migrations-bundle": "^3.4", "doctrine/orm": "^3.5", + "league/csv": "^9.26", "symfony/console": "7.3.*", "symfony/dotenv": "7.3.*", "symfony/flex": "^2", diff --git a/composer.lock b/composer.lock index 6978863..a876427 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "0dbc2f806bd2846bcd6f686243f4b8cd", + "content-hash": "b9305001e2268ecfb0a0152c539f85b1", "packages": [ { "name": "doctrine/collections", @@ -1130,6 +1130,229 @@ }, "time": "2025-01-24T11:45:48+00:00" }, + { + "name": "giggsey/libphonenumber-for-php", + "version": "9.0.16", + "source": { + "type": "git", + "url": "https://github.com/giggsey/libphonenumber-for-php.git", + "reference": "c513a04df3824e9f19082d935bb8f331741252d1" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/giggsey/libphonenumber-for-php/zipball/c513a04df3824e9f19082d935bb8f331741252d1", + "reference": "c513a04df3824e9f19082d935bb8f331741252d1", + "shasum": "" + }, + "require": { + "giggsey/locale": "^2.7", + "php": "^8.1", + "symfony/polyfill-mbstring": "^1.31" + }, + "replace": { + "giggsey/libphonenumber-for-php-lite": "self.version" + }, + "require-dev": { + "ext-dom": "*", + "friendsofphp/php-cs-fixer": "^3.71", + "infection/infection": "^0.29|^0.31.0", + "nette/php-generator": "^4.1", + "php-coveralls/php-coveralls": "^2.7", + "phpstan/extension-installer": "^1.4.3", + "phpstan/phpstan": "^2.1.7", + "phpstan/phpstan-deprecation-rules": "^2.0.1", + "phpstan/phpstan-phpunit": "^2.0.4", + "phpstan/phpstan-strict-rules": "^2.0.3", + "phpunit/phpunit": "^10.5.45", + "symfony/console": "^6.4", + "symfony/filesystem": "^6.4", + "symfony/process": "^6.4" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "9.x-dev" + } + }, + "autoload": { + "psr-4": { + "libphonenumber\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "Apache-2.0" + ], + "authors": [ + { + "name": "Joshua Gigg", + "email": "giggsey@gmail.com", + "homepage": "https://giggsey.com/" + } + ], + "description": "A library for parsing, formatting, storing and validating international phone numbers, a PHP Port of Google's libphonenumber.", + "homepage": "https://github.com/giggsey/libphonenumber-for-php", + "keywords": [ + "geocoding", + "geolocation", + "libphonenumber", + "mobile", + "phonenumber", + "validation" + ], + "support": { + "issues": "https://github.com/giggsey/libphonenumber-for-php/issues", + "source": "https://github.com/giggsey/libphonenumber-for-php" + }, + "time": "2025-10-10T10:55:56+00:00" + }, + { + "name": "giggsey/locale", + "version": "2.8.0", + "source": { + "type": "git", + "url": "https://github.com/giggsey/Locale.git", + "reference": "1cd8b3ad2d43e04f4c2c6a240495af44780f809b" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/giggsey/Locale/zipball/1cd8b3ad2d43e04f4c2c6a240495af44780f809b", + "reference": "1cd8b3ad2d43e04f4c2c6a240495af44780f809b", + "shasum": "" + }, + "require": { + "php": "^8.1" + }, + "require-dev": { + "ext-json": "*", + "friendsofphp/php-cs-fixer": "^3.66", + "pear/pear-core-minimal": "^1.10", + "pear/pear_exception": "^1.0", + "pear/versioncontrol_git": "^0.5", + "phing/phing": "^2.17.4", + "php-coveralls/php-coveralls": "^2.7", + "phpunit/phpunit": "^10.5.45", + "symfony/console": "^6.4", + "symfony/filesystem": "6.4", + "symfony/finder": "^6.4", + "symfony/process": "^6.4", + "symfony/var-exporter": "^6.4" + }, + "type": "library", + "autoload": { + "psr-4": { + "Giggsey\\Locale\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Joshua Gigg", + "email": "giggsey@gmail.com", + "homepage": "https://giggsey.com/" + } + ], + "description": "Locale functions required by libphonenumber-for-php", + "support": { + "issues": "https://github.com/giggsey/Locale/issues", + "source": "https://github.com/giggsey/Locale/tree/2.8.0" + }, + "time": "2025-03-20T14:25:27+00:00" + }, + { + "name": "league/csv", + "version": "9.26.0", + "source": { + "type": "git", + "url": "https://github.com/thephpleague/csv.git", + "reference": "7fce732754d043f3938899e5183e2d0f3d31b571" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/thephpleague/csv/zipball/7fce732754d043f3938899e5183e2d0f3d31b571", + "reference": "7fce732754d043f3938899e5183e2d0f3d31b571", + "shasum": "" + }, + "require": { + "ext-filter": "*", + "php": "^8.1.2" + }, + "require-dev": { + "ext-dom": "*", + "ext-xdebug": "*", + "friendsofphp/php-cs-fixer": "^3.75.0", + "phpbench/phpbench": "^1.4.1", + "phpstan/phpstan": "^1.12.27", + "phpstan/phpstan-deprecation-rules": "^1.2.1", + "phpstan/phpstan-phpunit": "^1.4.2", + "phpstan/phpstan-strict-rules": "^1.6.2", + "phpunit/phpunit": "^10.5.16 || ^11.5.22 || ^12.3.6", + "symfony/var-dumper": "^6.4.8 || ^7.3.0" + }, + "suggest": { + "ext-dom": "Required to use the XMLConverter and the HTMLConverter classes", + "ext-iconv": "Needed to ease transcoding CSV using iconv stream filters", + "ext-mbstring": "Needed to ease transcoding CSV using mb stream filters", + "ext-mysqli": "Requiered to use the package with the MySQLi extension", + "ext-pdo": "Required to use the package with the PDO extension", + "ext-pgsql": "Requiered to use the package with the PgSQL extension", + "ext-sqlite3": "Required to use the package with the SQLite3 extension" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "9.x-dev" + } + }, + "autoload": { + "files": [ + "src/functions_include.php" + ], + "psr-4": { + "League\\Csv\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Ignace Nyamagana Butera", + "email": "nyamsprod@gmail.com", + "homepage": "https://github.com/nyamsprod/", + "role": "Developer" + } + ], + "description": "CSV data manipulation made easy in PHP", + "homepage": "https://csv.thephpleague.com", + "keywords": [ + "convert", + "csv", + "export", + "filter", + "import", + "read", + "transform", + "write" + ], + "support": { + "docs": "https://csv.thephpleague.com", + "issues": "https://github.com/thephpleague/csv/issues", + "rss": "https://github.com/thephpleague/csv/releases.atom", + "source": "https://github.com/thephpleague/csv" + }, + "funding": [ + { + "url": "https://github.com/sponsors/nyamsprod", + "type": "github" + } + ], + "time": "2025-10-01T11:24:54+00:00" + }, { "name": "psr/cache", "version": "3.0.0", diff --git a/src/Command/CleanMobileCommand.php b/src/Command/CleanMobileCommand.php new file mode 100644 index 0000000..50fe59b --- /dev/null +++ b/src/Command/CleanMobileCommand.php @@ -0,0 +1,215 @@ +addArgument('inputCsv', InputArgument::REQUIRED, 'Path to the source CSV file') + ->addArgument( + 'outputCsv', + InputArgument::OPTIONAL, + 'Path to the cleaned CSV (defaults to cleaned_.csv)' + ); + } + + protected function execute(InputInterface $input, OutputInterface $output): int + { + $io = new SymfonyStyle($input, $output); + + // ------------------------------------------------------------- + // 1️⃣ Resolve file paths + // ------------------------------------------------------------- + $inputPath = $input->getArgument('inputCsv'); + $outputPath = $input->getArgument('outputCsv') + ?? sprintf('cleaned_%s', basename($inputPath)); + + if (!is_file($inputPath) || !is_readable($inputPath)) { + $io->error("Input file does not exist or is not readable: $inputPath"); + return Command::FAILURE; + } + + // ------------------------------------------------------------- + // 2️⃣ CSV reader / writer (semicolon‑separated) + // ------------------------------------------------------------- + $csvReader = Reader::createFromPath($inputPath, 'r'); + $csvReader->setDelimiter(';'); + $csvReader->setHeaderOffset(0); // first line = header + $header = $csvReader->getHeader(); + + // Ensure the extra column exists in the header + if (!in_array('HANDY_E164', $header, true)) { + $header[] = 'HANDY_E164'; + } + + $csvWriter = Writer::createFromPath($outputPath, 'w+'); + $csvWriter->setDelimiter(';'); + $csvWriter->insertOne($header); // write header row + + // ------------------------------------------------------------- + // 3️⃣ German mobile prefixes (the part *after* the leading 0) + // ------------------------------------------------------------- + $germanMobilePrefixes = [ + '151','152','155','157','159', + '160','162','163','164','165','166','167','168','169', + '170','171','172','173','174','175','176','177','178','179', + ]; + + // ------------------------------------------------------------- + // 4️⃣ Helper closures + // ------------------------------------------------------------- + // Build the raw number from the two possible column pairs + $buildRawNumber = static function(array $row, string $prefixCol, string $numberCol): ?string { + $p = trim($row[$prefixCol] ?? ''); + $n = trim($row[$numberCol] ?? ''); + if ($p === '' && $n === '') { + return null; + } + return $p . $n; + }; + + // Strip everything that is not a digit or '+' and then convert to the + // required "0049…" format (no leading '+') + $normaliseTo0049 = static function(string $raw): string { + $raw = preg_replace('/[^\d+]/', '', $raw); + $raw = ltrim($raw, '+'); + + if (str_starts_with($raw, '0049')) { + return $raw; + } + if (str_starts_with($raw, '49')) { + return '00' . $raw; + } + if (str_starts_with($raw, '0')) { + return '0049' . substr($raw, 1); + } + // If it already looks like a plain German subscriber (e.g. 15112345678) + return '0049' . $raw; + }; + + // Very small, deterministic validation – no external libs required + $isGermanMobile = static function(string $e164) use ($germanMobilePrefixes): bool { + // Must start with the German country code + if (!str_starts_with($e164, '0049')) { + return false; + } + + // Extract the 3‑digit network prefix and the subscriber part + $prefix = substr($e164, 4, 3); // after 0049 + $subscriber = substr($e164, 7); + + // Prefix must be one of the known mobile prefixes + if (!in_array($prefix, $germanMobilePrefixes, true)) { + return false; + } + + // Subscriber must be 6‑10 digits long and consist only of digits + return preg_match('/^\d{6,10}$/', $subscriber) === 1; + }; + + // ------------------------------------------------------------- + // 5️⃣ Process every record + // ------------------------------------------------------------- + $validContacts = []; + $rowCount = 0; + $invalid = 0; + + foreach ($csvReader->getRecords() as $row) { + $rowCount++; + + // 5.1 Get the raw number (first try HANDY_*, then generic VORWAHL/DURCHWAHL) + $raw = $buildRawNumber($row, 'HANDY_VORWAHL', 'HANDY_DURCHWAHL') + ?? $buildRawNumber($row, 'VORWAHL', 'DURCHWAHL'); + + if ($raw === null) { + // No number at all → empty column + $row['HANDY_E164'] = ''; + $csvWriter->insertOne($row); + continue; + } + + // 5.2 Normalise to the canonical 0049… format + $e164 = $normaliseTo0049($raw); + + // 5.3 Validate + if ($isGermanMobile($e164)) { + // ----> VALID ------------------------------------------------- + $row['HANDY_E164'] = $e164; + $csvWriter->insertOne($row); + + // Create a Contact entity for DB insertion + $contact = new Contacts(); + $contact->setPhoneNumber($e164); + $dueDate = (new \DateTime('tomorrow'))->setTime(16, 0, 0); + $contact->setDueDate($dueDate); + $contact->setContacted(false); + $validContacts[] = $contact; + } else { + // ----> NOT VALID -------------------------------------------- + $row['HANDY_E164'] = ''; + $csvWriter->insertOne($row); + $invalid++; + } + } + + // ------------------------------------------------------------- + // 6️⃣ Persist the valid contacts (batch insert) + // ------------------------------------------------------------- + if (\count($validContacts) > 0) { + $batchSize = 100; + foreach ($validContacts as $i => $contact) { + $this->em->persist($contact); + if ((($i + 1) % $batchSize) === 0) { + $this->em->flush(); + $this->em->clear(); // free memory + } + } + $this->em->flush(); + $this->em->clear(); + } + + // ------------------------------------------------------------- + // 7️⃣ Output a short summary + // ------------------------------------------------------------- + $io->success('Processing completed.'); + $io->listing([ + "Rows read : $rowCount", + "Valid mobile numbers : " . \count($validContacts), + "Invalid / empty numbers : $invalid", + "Cleaned CSV written to : $outputPath", + ]); + + return Command::SUCCESS; + } +}