Parsing of phones from given input CSV files

This commit is contained in:
Marko Jovanovic 2025-10-14 13:54:27 +02:00
parent b0a76ac545
commit 81147ad032
3 changed files with 440 additions and 1 deletions

View File

@ -11,6 +11,7 @@
"doctrine/doctrine-bundle": "^2.16", "doctrine/doctrine-bundle": "^2.16",
"doctrine/doctrine-migrations-bundle": "^3.4", "doctrine/doctrine-migrations-bundle": "^3.4",
"doctrine/orm": "^3.5", "doctrine/orm": "^3.5",
"league/csv": "^9.26",
"symfony/console": "7.3.*", "symfony/console": "7.3.*",
"symfony/dotenv": "7.3.*", "symfony/dotenv": "7.3.*",
"symfony/flex": "^2", "symfony/flex": "^2",

225
composer.lock generated
View File

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically" "This file is @generated automatically"
], ],
"content-hash": "0dbc2f806bd2846bcd6f686243f4b8cd", "content-hash": "b9305001e2268ecfb0a0152c539f85b1",
"packages": [ "packages": [
{ {
"name": "doctrine/collections", "name": "doctrine/collections",
@ -1130,6 +1130,229 @@
}, },
"time": "2025-01-24T11:45:48+00:00" "time": "2025-01-24T11:45:48+00:00"
}, },
{
"name": "giggsey/libphonenumber-for-php",
"version": "9.0.16",
"source": {
"type": "git",
"url": "https://github.com/giggsey/libphonenumber-for-php.git",
"reference": "c513a04df3824e9f19082d935bb8f331741252d1"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/giggsey/libphonenumber-for-php/zipball/c513a04df3824e9f19082d935bb8f331741252d1",
"reference": "c513a04df3824e9f19082d935bb8f331741252d1",
"shasum": ""
},
"require": {
"giggsey/locale": "^2.7",
"php": "^8.1",
"symfony/polyfill-mbstring": "^1.31"
},
"replace": {
"giggsey/libphonenumber-for-php-lite": "self.version"
},
"require-dev": {
"ext-dom": "*",
"friendsofphp/php-cs-fixer": "^3.71",
"infection/infection": "^0.29|^0.31.0",
"nette/php-generator": "^4.1",
"php-coveralls/php-coveralls": "^2.7",
"phpstan/extension-installer": "^1.4.3",
"phpstan/phpstan": "^2.1.7",
"phpstan/phpstan-deprecation-rules": "^2.0.1",
"phpstan/phpstan-phpunit": "^2.0.4",
"phpstan/phpstan-strict-rules": "^2.0.3",
"phpunit/phpunit": "^10.5.45",
"symfony/console": "^6.4",
"symfony/filesystem": "^6.4",
"symfony/process": "^6.4"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "9.x-dev"
}
},
"autoload": {
"psr-4": {
"libphonenumber\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"Apache-2.0"
],
"authors": [
{
"name": "Joshua Gigg",
"email": "giggsey@gmail.com",
"homepage": "https://giggsey.com/"
}
],
"description": "A library for parsing, formatting, storing and validating international phone numbers, a PHP Port of Google's libphonenumber.",
"homepage": "https://github.com/giggsey/libphonenumber-for-php",
"keywords": [
"geocoding",
"geolocation",
"libphonenumber",
"mobile",
"phonenumber",
"validation"
],
"support": {
"issues": "https://github.com/giggsey/libphonenumber-for-php/issues",
"source": "https://github.com/giggsey/libphonenumber-for-php"
},
"time": "2025-10-10T10:55:56+00:00"
},
{
"name": "giggsey/locale",
"version": "2.8.0",
"source": {
"type": "git",
"url": "https://github.com/giggsey/Locale.git",
"reference": "1cd8b3ad2d43e04f4c2c6a240495af44780f809b"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/giggsey/Locale/zipball/1cd8b3ad2d43e04f4c2c6a240495af44780f809b",
"reference": "1cd8b3ad2d43e04f4c2c6a240495af44780f809b",
"shasum": ""
},
"require": {
"php": "^8.1"
},
"require-dev": {
"ext-json": "*",
"friendsofphp/php-cs-fixer": "^3.66",
"pear/pear-core-minimal": "^1.10",
"pear/pear_exception": "^1.0",
"pear/versioncontrol_git": "^0.5",
"phing/phing": "^2.17.4",
"php-coveralls/php-coveralls": "^2.7",
"phpunit/phpunit": "^10.5.45",
"symfony/console": "^6.4",
"symfony/filesystem": "6.4",
"symfony/finder": "^6.4",
"symfony/process": "^6.4",
"symfony/var-exporter": "^6.4"
},
"type": "library",
"autoload": {
"psr-4": {
"Giggsey\\Locale\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Joshua Gigg",
"email": "giggsey@gmail.com",
"homepage": "https://giggsey.com/"
}
],
"description": "Locale functions required by libphonenumber-for-php",
"support": {
"issues": "https://github.com/giggsey/Locale/issues",
"source": "https://github.com/giggsey/Locale/tree/2.8.0"
},
"time": "2025-03-20T14:25:27+00:00"
},
{
"name": "league/csv",
"version": "9.26.0",
"source": {
"type": "git",
"url": "https://github.com/thephpleague/csv.git",
"reference": "7fce732754d043f3938899e5183e2d0f3d31b571"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/thephpleague/csv/zipball/7fce732754d043f3938899e5183e2d0f3d31b571",
"reference": "7fce732754d043f3938899e5183e2d0f3d31b571",
"shasum": ""
},
"require": {
"ext-filter": "*",
"php": "^8.1.2"
},
"require-dev": {
"ext-dom": "*",
"ext-xdebug": "*",
"friendsofphp/php-cs-fixer": "^3.75.0",
"phpbench/phpbench": "^1.4.1",
"phpstan/phpstan": "^1.12.27",
"phpstan/phpstan-deprecation-rules": "^1.2.1",
"phpstan/phpstan-phpunit": "^1.4.2",
"phpstan/phpstan-strict-rules": "^1.6.2",
"phpunit/phpunit": "^10.5.16 || ^11.5.22 || ^12.3.6",
"symfony/var-dumper": "^6.4.8 || ^7.3.0"
},
"suggest": {
"ext-dom": "Required to use the XMLConverter and the HTMLConverter classes",
"ext-iconv": "Needed to ease transcoding CSV using iconv stream filters",
"ext-mbstring": "Needed to ease transcoding CSV using mb stream filters",
"ext-mysqli": "Requiered to use the package with the MySQLi extension",
"ext-pdo": "Required to use the package with the PDO extension",
"ext-pgsql": "Requiered to use the package with the PgSQL extension",
"ext-sqlite3": "Required to use the package with the SQLite3 extension"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "9.x-dev"
}
},
"autoload": {
"files": [
"src/functions_include.php"
],
"psr-4": {
"League\\Csv\\": "src"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Ignace Nyamagana Butera",
"email": "nyamsprod@gmail.com",
"homepage": "https://github.com/nyamsprod/",
"role": "Developer"
}
],
"description": "CSV data manipulation made easy in PHP",
"homepage": "https://csv.thephpleague.com",
"keywords": [
"convert",
"csv",
"export",
"filter",
"import",
"read",
"transform",
"write"
],
"support": {
"docs": "https://csv.thephpleague.com",
"issues": "https://github.com/thephpleague/csv/issues",
"rss": "https://github.com/thephpleague/csv/releases.atom",
"source": "https://github.com/thephpleague/csv"
},
"funding": [
{
"url": "https://github.com/sponsors/nyamsprod",
"type": "github"
}
],
"time": "2025-10-01T11:24:54+00:00"
},
{ {
"name": "psr/cache", "name": "psr/cache",
"version": "3.0.0", "version": "3.0.0",

View File

@ -0,0 +1,215 @@
<?php
// src/Command/CleanMobileCommand.php
declare(strict_types=1);
namespace App\Command;
use App\Entity\Contacts;
use Doctrine\ORM\EntityManagerInterface;
use League\Csv\Reader;
use League\Csv\Writer;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
/**
* Normalises German mobile numbers from a CSV and stores the valid ones in the DB.
*
* Usage:
* php bin/console app:clean-mobile input.csv [output.csv]
*/
#[AsCommand(
name: 'app:clean-mobile',
description: 'Normalize German mobile numbers from a CSV and store them in PostgreSQL.'
)]
final class CleanMobileCommand extends Command
{
public function __construct(
private readonly EntityManagerInterface $em
) {
parent::__construct();
}
protected function configure(): void
{
$this
->addArgument('inputCsv', InputArgument::REQUIRED, 'Path to the source CSV file')
->addArgument(
'outputCsv',
InputArgument::OPTIONAL,
'Path to the cleaned CSV (defaults to cleaned_<input>.csv)'
);
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$io = new SymfonyStyle($input, $output);
// -------------------------------------------------------------
// 1⃣ Resolve file paths
// -------------------------------------------------------------
$inputPath = $input->getArgument('inputCsv');
$outputPath = $input->getArgument('outputCsv')
?? sprintf('cleaned_%s', basename($inputPath));
if (!is_file($inputPath) || !is_readable($inputPath)) {
$io->error("Input file does not exist or is not readable: $inputPath");
return Command::FAILURE;
}
// -------------------------------------------------------------
// 2⃣ CSV reader / writer (semicolonseparated)
// -------------------------------------------------------------
$csvReader = Reader::createFromPath($inputPath, 'r');
$csvReader->setDelimiter(';');
$csvReader->setHeaderOffset(0); // first line = header
$header = $csvReader->getHeader();
// Ensure the extra column exists in the header
if (!in_array('HANDY_E164', $header, true)) {
$header[] = 'HANDY_E164';
}
$csvWriter = Writer::createFromPath($outputPath, 'w+');
$csvWriter->setDelimiter(';');
$csvWriter->insertOne($header); // write header row
// -------------------------------------------------------------
// 3⃣ German mobile prefixes (the part *after* the leading 0)
// -------------------------------------------------------------
$germanMobilePrefixes = [
'151','152','155','157','159',
'160','162','163','164','165','166','167','168','169',
'170','171','172','173','174','175','176','177','178','179',
];
// -------------------------------------------------------------
// 4⃣ Helper closures
// -------------------------------------------------------------
// Build the raw number from the two possible column pairs
$buildRawNumber = static function(array $row, string $prefixCol, string $numberCol): ?string {
$p = trim($row[$prefixCol] ?? '');
$n = trim($row[$numberCol] ?? '');
if ($p === '' && $n === '') {
return null;
}
return $p . $n;
};
// Strip everything that is not a digit or '+' and then convert to the
// required "0049…" format (no leading '+')
$normaliseTo0049 = static function(string $raw): string {
$raw = preg_replace('/[^\d+]/', '', $raw);
$raw = ltrim($raw, '+');
if (str_starts_with($raw, '0049')) {
return $raw;
}
if (str_starts_with($raw, '49')) {
return '00' . $raw;
}
if (str_starts_with($raw, '0')) {
return '0049' . substr($raw, 1);
}
// If it already looks like a plain German subscriber (e.g. 15112345678)
return '0049' . $raw;
};
// Very small, deterministic validation no external libs required
$isGermanMobile = static function(string $e164) use ($germanMobilePrefixes): bool {
// Must start with the German country code
if (!str_starts_with($e164, '0049')) {
return false;
}
// Extract the 3digit network prefix and the subscriber part
$prefix = substr($e164, 4, 3); // after 0049
$subscriber = substr($e164, 7);
// Prefix must be one of the known mobile prefixes
if (!in_array($prefix, $germanMobilePrefixes, true)) {
return false;
}
// Subscriber must be 610 digits long and consist only of digits
return preg_match('/^\d{6,10}$/', $subscriber) === 1;
};
// -------------------------------------------------------------
// 5⃣ Process every record
// -------------------------------------------------------------
$validContacts = [];
$rowCount = 0;
$invalid = 0;
foreach ($csvReader->getRecords() as $row) {
$rowCount++;
// 5.1 Get the raw number (first try HANDY_*, then generic VORWAHL/DURCHWAHL)
$raw = $buildRawNumber($row, 'HANDY_VORWAHL', 'HANDY_DURCHWAHL')
?? $buildRawNumber($row, 'VORWAHL', 'DURCHWAHL');
if ($raw === null) {
// No number at all → empty column
$row['HANDY_E164'] = '';
$csvWriter->insertOne($row);
continue;
}
// 5.2 Normalise to the canonical 0049… format
$e164 = $normaliseTo0049($raw);
// 5.3 Validate
if ($isGermanMobile($e164)) {
// ----> VALID -------------------------------------------------
$row['HANDY_E164'] = $e164;
$csvWriter->insertOne($row);
// Create a Contact entity for DB insertion
$contact = new Contacts();
$contact->setPhoneNumber($e164);
$dueDate = (new \DateTime('tomorrow'))->setTime(16, 0, 0);
$contact->setDueDate($dueDate);
$contact->setContacted(false);
$validContacts[] = $contact;
} else {
// ----> NOT VALID --------------------------------------------
$row['HANDY_E164'] = '';
$csvWriter->insertOne($row);
$invalid++;
}
}
// -------------------------------------------------------------
// 6⃣ Persist the valid contacts (batch insert)
// -------------------------------------------------------------
if (\count($validContacts) > 0) {
$batchSize = 100;
foreach ($validContacts as $i => $contact) {
$this->em->persist($contact);
if ((($i + 1) % $batchSize) === 0) {
$this->em->flush();
$this->em->clear(); // free memory
}
}
$this->em->flush();
$this->em->clear();
}
// -------------------------------------------------------------
// 7⃣ Output a short summary
// -------------------------------------------------------------
$io->success('Processing completed.');
$io->listing([
"Rows read : $rowCount",
"Valid mobile numbers : " . \count($validContacts),
"Invalid / empty numbers : $invalid",
"Cleaned CSV written to : $outputPath",
]);
return Command::SUCCESS;
}
}