Parsing of phones from given input CSV files
This commit is contained in:
parent
b0a76ac545
commit
81147ad032
|
|
@ -11,6 +11,7 @@
|
||||||
"doctrine/doctrine-bundle": "^2.16",
|
"doctrine/doctrine-bundle": "^2.16",
|
||||||
"doctrine/doctrine-migrations-bundle": "^3.4",
|
"doctrine/doctrine-migrations-bundle": "^3.4",
|
||||||
"doctrine/orm": "^3.5",
|
"doctrine/orm": "^3.5",
|
||||||
|
"league/csv": "^9.26",
|
||||||
"symfony/console": "7.3.*",
|
"symfony/console": "7.3.*",
|
||||||
"symfony/dotenv": "7.3.*",
|
"symfony/dotenv": "7.3.*",
|
||||||
"symfony/flex": "^2",
|
"symfony/flex": "^2",
|
||||||
|
|
|
||||||
225
composer.lock
generated
225
composer.lock
generated
|
|
@ -4,7 +4,7 @@
|
||||||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
|
||||||
"This file is @generated automatically"
|
"This file is @generated automatically"
|
||||||
],
|
],
|
||||||
"content-hash": "0dbc2f806bd2846bcd6f686243f4b8cd",
|
"content-hash": "b9305001e2268ecfb0a0152c539f85b1",
|
||||||
"packages": [
|
"packages": [
|
||||||
{
|
{
|
||||||
"name": "doctrine/collections",
|
"name": "doctrine/collections",
|
||||||
|
|
@ -1130,6 +1130,229 @@
|
||||||
},
|
},
|
||||||
"time": "2025-01-24T11:45:48+00:00"
|
"time": "2025-01-24T11:45:48+00:00"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "giggsey/libphonenumber-for-php",
|
||||||
|
"version": "9.0.16",
|
||||||
|
"source": {
|
||||||
|
"type": "git",
|
||||||
|
"url": "https://github.com/giggsey/libphonenumber-for-php.git",
|
||||||
|
"reference": "c513a04df3824e9f19082d935bb8f331741252d1"
|
||||||
|
},
|
||||||
|
"dist": {
|
||||||
|
"type": "zip",
|
||||||
|
"url": "https://api.github.com/repos/giggsey/libphonenumber-for-php/zipball/c513a04df3824e9f19082d935bb8f331741252d1",
|
||||||
|
"reference": "c513a04df3824e9f19082d935bb8f331741252d1",
|
||||||
|
"shasum": ""
|
||||||
|
},
|
||||||
|
"require": {
|
||||||
|
"giggsey/locale": "^2.7",
|
||||||
|
"php": "^8.1",
|
||||||
|
"symfony/polyfill-mbstring": "^1.31"
|
||||||
|
},
|
||||||
|
"replace": {
|
||||||
|
"giggsey/libphonenumber-for-php-lite": "self.version"
|
||||||
|
},
|
||||||
|
"require-dev": {
|
||||||
|
"ext-dom": "*",
|
||||||
|
"friendsofphp/php-cs-fixer": "^3.71",
|
||||||
|
"infection/infection": "^0.29|^0.31.0",
|
||||||
|
"nette/php-generator": "^4.1",
|
||||||
|
"php-coveralls/php-coveralls": "^2.7",
|
||||||
|
"phpstan/extension-installer": "^1.4.3",
|
||||||
|
"phpstan/phpstan": "^2.1.7",
|
||||||
|
"phpstan/phpstan-deprecation-rules": "^2.0.1",
|
||||||
|
"phpstan/phpstan-phpunit": "^2.0.4",
|
||||||
|
"phpstan/phpstan-strict-rules": "^2.0.3",
|
||||||
|
"phpunit/phpunit": "^10.5.45",
|
||||||
|
"symfony/console": "^6.4",
|
||||||
|
"symfony/filesystem": "^6.4",
|
||||||
|
"symfony/process": "^6.4"
|
||||||
|
},
|
||||||
|
"type": "library",
|
||||||
|
"extra": {
|
||||||
|
"branch-alias": {
|
||||||
|
"dev-master": "9.x-dev"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"autoload": {
|
||||||
|
"psr-4": {
|
||||||
|
"libphonenumber\\": "src/"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"notification-url": "https://packagist.org/downloads/",
|
||||||
|
"license": [
|
||||||
|
"Apache-2.0"
|
||||||
|
],
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "Joshua Gigg",
|
||||||
|
"email": "giggsey@gmail.com",
|
||||||
|
"homepage": "https://giggsey.com/"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "A library for parsing, formatting, storing and validating international phone numbers, a PHP Port of Google's libphonenumber.",
|
||||||
|
"homepage": "https://github.com/giggsey/libphonenumber-for-php",
|
||||||
|
"keywords": [
|
||||||
|
"geocoding",
|
||||||
|
"geolocation",
|
||||||
|
"libphonenumber",
|
||||||
|
"mobile",
|
||||||
|
"phonenumber",
|
||||||
|
"validation"
|
||||||
|
],
|
||||||
|
"support": {
|
||||||
|
"issues": "https://github.com/giggsey/libphonenumber-for-php/issues",
|
||||||
|
"source": "https://github.com/giggsey/libphonenumber-for-php"
|
||||||
|
},
|
||||||
|
"time": "2025-10-10T10:55:56+00:00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "giggsey/locale",
|
||||||
|
"version": "2.8.0",
|
||||||
|
"source": {
|
||||||
|
"type": "git",
|
||||||
|
"url": "https://github.com/giggsey/Locale.git",
|
||||||
|
"reference": "1cd8b3ad2d43e04f4c2c6a240495af44780f809b"
|
||||||
|
},
|
||||||
|
"dist": {
|
||||||
|
"type": "zip",
|
||||||
|
"url": "https://api.github.com/repos/giggsey/Locale/zipball/1cd8b3ad2d43e04f4c2c6a240495af44780f809b",
|
||||||
|
"reference": "1cd8b3ad2d43e04f4c2c6a240495af44780f809b",
|
||||||
|
"shasum": ""
|
||||||
|
},
|
||||||
|
"require": {
|
||||||
|
"php": "^8.1"
|
||||||
|
},
|
||||||
|
"require-dev": {
|
||||||
|
"ext-json": "*",
|
||||||
|
"friendsofphp/php-cs-fixer": "^3.66",
|
||||||
|
"pear/pear-core-minimal": "^1.10",
|
||||||
|
"pear/pear_exception": "^1.0",
|
||||||
|
"pear/versioncontrol_git": "^0.5",
|
||||||
|
"phing/phing": "^2.17.4",
|
||||||
|
"php-coveralls/php-coveralls": "^2.7",
|
||||||
|
"phpunit/phpunit": "^10.5.45",
|
||||||
|
"symfony/console": "^6.4",
|
||||||
|
"symfony/filesystem": "6.4",
|
||||||
|
"symfony/finder": "^6.4",
|
||||||
|
"symfony/process": "^6.4",
|
||||||
|
"symfony/var-exporter": "^6.4"
|
||||||
|
},
|
||||||
|
"type": "library",
|
||||||
|
"autoload": {
|
||||||
|
"psr-4": {
|
||||||
|
"Giggsey\\Locale\\": "src/"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"notification-url": "https://packagist.org/downloads/",
|
||||||
|
"license": [
|
||||||
|
"MIT"
|
||||||
|
],
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "Joshua Gigg",
|
||||||
|
"email": "giggsey@gmail.com",
|
||||||
|
"homepage": "https://giggsey.com/"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "Locale functions required by libphonenumber-for-php",
|
||||||
|
"support": {
|
||||||
|
"issues": "https://github.com/giggsey/Locale/issues",
|
||||||
|
"source": "https://github.com/giggsey/Locale/tree/2.8.0"
|
||||||
|
},
|
||||||
|
"time": "2025-03-20T14:25:27+00:00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "league/csv",
|
||||||
|
"version": "9.26.0",
|
||||||
|
"source": {
|
||||||
|
"type": "git",
|
||||||
|
"url": "https://github.com/thephpleague/csv.git",
|
||||||
|
"reference": "7fce732754d043f3938899e5183e2d0f3d31b571"
|
||||||
|
},
|
||||||
|
"dist": {
|
||||||
|
"type": "zip",
|
||||||
|
"url": "https://api.github.com/repos/thephpleague/csv/zipball/7fce732754d043f3938899e5183e2d0f3d31b571",
|
||||||
|
"reference": "7fce732754d043f3938899e5183e2d0f3d31b571",
|
||||||
|
"shasum": ""
|
||||||
|
},
|
||||||
|
"require": {
|
||||||
|
"ext-filter": "*",
|
||||||
|
"php": "^8.1.2"
|
||||||
|
},
|
||||||
|
"require-dev": {
|
||||||
|
"ext-dom": "*",
|
||||||
|
"ext-xdebug": "*",
|
||||||
|
"friendsofphp/php-cs-fixer": "^3.75.0",
|
||||||
|
"phpbench/phpbench": "^1.4.1",
|
||||||
|
"phpstan/phpstan": "^1.12.27",
|
||||||
|
"phpstan/phpstan-deprecation-rules": "^1.2.1",
|
||||||
|
"phpstan/phpstan-phpunit": "^1.4.2",
|
||||||
|
"phpstan/phpstan-strict-rules": "^1.6.2",
|
||||||
|
"phpunit/phpunit": "^10.5.16 || ^11.5.22 || ^12.3.6",
|
||||||
|
"symfony/var-dumper": "^6.4.8 || ^7.3.0"
|
||||||
|
},
|
||||||
|
"suggest": {
|
||||||
|
"ext-dom": "Required to use the XMLConverter and the HTMLConverter classes",
|
||||||
|
"ext-iconv": "Needed to ease transcoding CSV using iconv stream filters",
|
||||||
|
"ext-mbstring": "Needed to ease transcoding CSV using mb stream filters",
|
||||||
|
"ext-mysqli": "Requiered to use the package with the MySQLi extension",
|
||||||
|
"ext-pdo": "Required to use the package with the PDO extension",
|
||||||
|
"ext-pgsql": "Requiered to use the package with the PgSQL extension",
|
||||||
|
"ext-sqlite3": "Required to use the package with the SQLite3 extension"
|
||||||
|
},
|
||||||
|
"type": "library",
|
||||||
|
"extra": {
|
||||||
|
"branch-alias": {
|
||||||
|
"dev-master": "9.x-dev"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"autoload": {
|
||||||
|
"files": [
|
||||||
|
"src/functions_include.php"
|
||||||
|
],
|
||||||
|
"psr-4": {
|
||||||
|
"League\\Csv\\": "src"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"notification-url": "https://packagist.org/downloads/",
|
||||||
|
"license": [
|
||||||
|
"MIT"
|
||||||
|
],
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "Ignace Nyamagana Butera",
|
||||||
|
"email": "nyamsprod@gmail.com",
|
||||||
|
"homepage": "https://github.com/nyamsprod/",
|
||||||
|
"role": "Developer"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "CSV data manipulation made easy in PHP",
|
||||||
|
"homepage": "https://csv.thephpleague.com",
|
||||||
|
"keywords": [
|
||||||
|
"convert",
|
||||||
|
"csv",
|
||||||
|
"export",
|
||||||
|
"filter",
|
||||||
|
"import",
|
||||||
|
"read",
|
||||||
|
"transform",
|
||||||
|
"write"
|
||||||
|
],
|
||||||
|
"support": {
|
||||||
|
"docs": "https://csv.thephpleague.com",
|
||||||
|
"issues": "https://github.com/thephpleague/csv/issues",
|
||||||
|
"rss": "https://github.com/thephpleague/csv/releases.atom",
|
||||||
|
"source": "https://github.com/thephpleague/csv"
|
||||||
|
},
|
||||||
|
"funding": [
|
||||||
|
{
|
||||||
|
"url": "https://github.com/sponsors/nyamsprod",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"time": "2025-10-01T11:24:54+00:00"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "psr/cache",
|
"name": "psr/cache",
|
||||||
"version": "3.0.0",
|
"version": "3.0.0",
|
||||||
|
|
|
||||||
215
src/Command/CleanMobileCommand.php
Normal file
215
src/Command/CleanMobileCommand.php
Normal file
|
|
@ -0,0 +1,215 @@
|
||||||
|
<?php
|
||||||
|
// src/Command/CleanMobileCommand.php
|
||||||
|
declare(strict_types=1);
|
||||||
|
|
||||||
|
namespace App\Command;
|
||||||
|
|
||||||
|
use App\Entity\Contacts;
|
||||||
|
use Doctrine\ORM\EntityManagerInterface;
|
||||||
|
use League\Csv\Reader;
|
||||||
|
use League\Csv\Writer;
|
||||||
|
use Symfony\Component\Console\Attribute\AsCommand;
|
||||||
|
use Symfony\Component\Console\Command\Command;
|
||||||
|
use Symfony\Component\Console\Input\InputArgument;
|
||||||
|
use Symfony\Component\Console\Input\InputInterface;
|
||||||
|
use Symfony\Component\Console\Output\OutputInterface;
|
||||||
|
use Symfony\Component\Console\Style\SymfonyStyle;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalises German mobile numbers from a CSV and stores the valid ones in the DB.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* php bin/console app:clean-mobile input.csv [output.csv]
|
||||||
|
*/
|
||||||
|
#[AsCommand(
|
||||||
|
name: 'app:clean-mobile',
|
||||||
|
description: 'Normalize German mobile numbers from a CSV and store them in PostgreSQL.'
|
||||||
|
)]
|
||||||
|
final class CleanMobileCommand extends Command
|
||||||
|
{
|
||||||
|
public function __construct(
|
||||||
|
private readonly EntityManagerInterface $em
|
||||||
|
) {
|
||||||
|
parent::__construct();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function configure(): void
|
||||||
|
{
|
||||||
|
$this
|
||||||
|
->addArgument('inputCsv', InputArgument::REQUIRED, 'Path to the source CSV file')
|
||||||
|
->addArgument(
|
||||||
|
'outputCsv',
|
||||||
|
InputArgument::OPTIONAL,
|
||||||
|
'Path to the cleaned CSV (defaults to cleaned_<input>.csv)'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected function execute(InputInterface $input, OutputInterface $output): int
|
||||||
|
{
|
||||||
|
$io = new SymfonyStyle($input, $output);
|
||||||
|
|
||||||
|
// -------------------------------------------------------------
|
||||||
|
// 1️⃣ Resolve file paths
|
||||||
|
// -------------------------------------------------------------
|
||||||
|
$inputPath = $input->getArgument('inputCsv');
|
||||||
|
$outputPath = $input->getArgument('outputCsv')
|
||||||
|
?? sprintf('cleaned_%s', basename($inputPath));
|
||||||
|
|
||||||
|
if (!is_file($inputPath) || !is_readable($inputPath)) {
|
||||||
|
$io->error("Input file does not exist or is not readable: $inputPath");
|
||||||
|
return Command::FAILURE;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------
|
||||||
|
// 2️⃣ CSV reader / writer (semicolon‑separated)
|
||||||
|
// -------------------------------------------------------------
|
||||||
|
$csvReader = Reader::createFromPath($inputPath, 'r');
|
||||||
|
$csvReader->setDelimiter(';');
|
||||||
|
$csvReader->setHeaderOffset(0); // first line = header
|
||||||
|
$header = $csvReader->getHeader();
|
||||||
|
|
||||||
|
// Ensure the extra column exists in the header
|
||||||
|
if (!in_array('HANDY_E164', $header, true)) {
|
||||||
|
$header[] = 'HANDY_E164';
|
||||||
|
}
|
||||||
|
|
||||||
|
$csvWriter = Writer::createFromPath($outputPath, 'w+');
|
||||||
|
$csvWriter->setDelimiter(';');
|
||||||
|
$csvWriter->insertOne($header); // write header row
|
||||||
|
|
||||||
|
// -------------------------------------------------------------
|
||||||
|
// 3️⃣ German mobile prefixes (the part *after* the leading 0)
|
||||||
|
// -------------------------------------------------------------
|
||||||
|
$germanMobilePrefixes = [
|
||||||
|
'151','152','155','157','159',
|
||||||
|
'160','162','163','164','165','166','167','168','169',
|
||||||
|
'170','171','172','173','174','175','176','177','178','179',
|
||||||
|
];
|
||||||
|
|
||||||
|
// -------------------------------------------------------------
|
||||||
|
// 4️⃣ Helper closures
|
||||||
|
// -------------------------------------------------------------
|
||||||
|
// Build the raw number from the two possible column pairs
|
||||||
|
$buildRawNumber = static function(array $row, string $prefixCol, string $numberCol): ?string {
|
||||||
|
$p = trim($row[$prefixCol] ?? '');
|
||||||
|
$n = trim($row[$numberCol] ?? '');
|
||||||
|
if ($p === '' && $n === '') {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return $p . $n;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Strip everything that is not a digit or '+' and then convert to the
|
||||||
|
// required "0049…" format (no leading '+')
|
||||||
|
$normaliseTo0049 = static function(string $raw): string {
|
||||||
|
$raw = preg_replace('/[^\d+]/', '', $raw);
|
||||||
|
$raw = ltrim($raw, '+');
|
||||||
|
|
||||||
|
if (str_starts_with($raw, '0049')) {
|
||||||
|
return $raw;
|
||||||
|
}
|
||||||
|
if (str_starts_with($raw, '49')) {
|
||||||
|
return '00' . $raw;
|
||||||
|
}
|
||||||
|
if (str_starts_with($raw, '0')) {
|
||||||
|
return '0049' . substr($raw, 1);
|
||||||
|
}
|
||||||
|
// If it already looks like a plain German subscriber (e.g. 15112345678)
|
||||||
|
return '0049' . $raw;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Very small, deterministic validation – no external libs required
|
||||||
|
$isGermanMobile = static function(string $e164) use ($germanMobilePrefixes): bool {
|
||||||
|
// Must start with the German country code
|
||||||
|
if (!str_starts_with($e164, '0049')) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract the 3‑digit network prefix and the subscriber part
|
||||||
|
$prefix = substr($e164, 4, 3); // after 0049
|
||||||
|
$subscriber = substr($e164, 7);
|
||||||
|
|
||||||
|
// Prefix must be one of the known mobile prefixes
|
||||||
|
if (!in_array($prefix, $germanMobilePrefixes, true)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Subscriber must be 6‑10 digits long and consist only of digits
|
||||||
|
return preg_match('/^\d{6,10}$/', $subscriber) === 1;
|
||||||
|
};
|
||||||
|
|
||||||
|
// -------------------------------------------------------------
|
||||||
|
// 5️⃣ Process every record
|
||||||
|
// -------------------------------------------------------------
|
||||||
|
$validContacts = [];
|
||||||
|
$rowCount = 0;
|
||||||
|
$invalid = 0;
|
||||||
|
|
||||||
|
foreach ($csvReader->getRecords() as $row) {
|
||||||
|
$rowCount++;
|
||||||
|
|
||||||
|
// 5.1 Get the raw number (first try HANDY_*, then generic VORWAHL/DURCHWAHL)
|
||||||
|
$raw = $buildRawNumber($row, 'HANDY_VORWAHL', 'HANDY_DURCHWAHL')
|
||||||
|
?? $buildRawNumber($row, 'VORWAHL', 'DURCHWAHL');
|
||||||
|
|
||||||
|
if ($raw === null) {
|
||||||
|
// No number at all → empty column
|
||||||
|
$row['HANDY_E164'] = '';
|
||||||
|
$csvWriter->insertOne($row);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5.2 Normalise to the canonical 0049… format
|
||||||
|
$e164 = $normaliseTo0049($raw);
|
||||||
|
|
||||||
|
// 5.3 Validate
|
||||||
|
if ($isGermanMobile($e164)) {
|
||||||
|
// ----> VALID -------------------------------------------------
|
||||||
|
$row['HANDY_E164'] = $e164;
|
||||||
|
$csvWriter->insertOne($row);
|
||||||
|
|
||||||
|
// Create a Contact entity for DB insertion
|
||||||
|
$contact = new Contacts();
|
||||||
|
$contact->setPhoneNumber($e164);
|
||||||
|
$dueDate = (new \DateTime('tomorrow'))->setTime(16, 0, 0);
|
||||||
|
$contact->setDueDate($dueDate);
|
||||||
|
$contact->setContacted(false);
|
||||||
|
$validContacts[] = $contact;
|
||||||
|
} else {
|
||||||
|
// ----> NOT VALID --------------------------------------------
|
||||||
|
$row['HANDY_E164'] = '';
|
||||||
|
$csvWriter->insertOne($row);
|
||||||
|
$invalid++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------
|
||||||
|
// 6️⃣ Persist the valid contacts (batch insert)
|
||||||
|
// -------------------------------------------------------------
|
||||||
|
if (\count($validContacts) > 0) {
|
||||||
|
$batchSize = 100;
|
||||||
|
foreach ($validContacts as $i => $contact) {
|
||||||
|
$this->em->persist($contact);
|
||||||
|
if ((($i + 1) % $batchSize) === 0) {
|
||||||
|
$this->em->flush();
|
||||||
|
$this->em->clear(); // free memory
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$this->em->flush();
|
||||||
|
$this->em->clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------
|
||||||
|
// 7️⃣ Output a short summary
|
||||||
|
// -------------------------------------------------------------
|
||||||
|
$io->success('Processing completed.');
|
||||||
|
$io->listing([
|
||||||
|
"Rows read : $rowCount",
|
||||||
|
"Valid mobile numbers : " . \count($validContacts),
|
||||||
|
"Invalid / empty numbers : $invalid",
|
||||||
|
"Cleaned CSV written to : $outputPath",
|
||||||
|
]);
|
||||||
|
|
||||||
|
return Command::SUCCESS;
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue
Block a user