theloftstore/public/userarea/extract_prices_table.php
2025-11-17 15:05:10 +01:00

138 lines
4.1 KiB
PHP

<?php
header("Content-Type: application/json; charset=utf-8");
/******************************************************
* CONFIG
******************************************************/
$apiKey = "info@claudiosironi.com_Qfh02D7sAvi2tcx3ZchHpusNaBquCKhJw81fEnkHe2ersQDVOex4IokhCCzaFAz1";
$fileToken = "filetoken://61a780917907f86a340290d22c449357dc68950e9066bd67b2";
// Se passi ?token=xxx usa quello
if (!empty($_GET['token'])) {
$fileToken = $_GET['token'];
}
/******************************************************
* 1) ESTRAGGO CSV (solo pagine con tabelle vere)
******************************************************/
$endpointCsv = "https://api.pdf.co/v1/pdf/convert/to/csv";
$payloadCsv = [
"url" => $fileToken,
"inline" => true,
"detectTables" => true,
"pages" => "all",
"extractColumnBy" => "vertical_lines",
"csvSeparator" => ";"
];
$ch = curl_init($endpointCsv);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, [
"Content-Type: application/json",
"x-api-key: $apiKey"
]);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payloadCsv));
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$csvResponse = curl_exec($ch);
curl_close($ch);
$csvJson = json_decode($csvResponse, true);
/******************************************************
* 2) ESTRAGGO ANCHE TESTO (per pagine NON tabellari)
******************************************************/
$endpointText = "https://api.pdf.co/v1/pdf/convert/to/text";
$payloadText = [
"url" => $fileToken,
"inline" => true,
"pages" => "all"
];
$ch = curl_init($endpointText);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, [
"Content-Type: application/json",
"x-api-key: $apiKey"
]);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payloadText));
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$textResponse = curl_exec($ch);
curl_close($ch);
$textJson = json_decode($textResponse, true);
/******************************************************
* 3) PARSER CSV → TABELLE FORMATO A
******************************************************/
$formatoA = [];
if (isset($csvJson["csv"])) {
$rows = explode("\n", $csvJson["csv"]);
foreach ($rows as $r) {
$cols = str_getcsv($r, ";");
if (count($cols) < 2) continue;
if (!preg_match('/^\d{3}\s+[0-9A-Z]{2,3}$/', trim($cols[0]))) continue;
$formatoA[] = [
"codice" => trim($cols[0]),
"descrizione" => trim($cols[1]),
"prezzi" => array_map(fn($x) => is_numeric(str_replace(",", ".", $x))
? floatval(str_replace(",", ".", $x))
: null, array_slice($cols, 2))
];
}
}
/******************************************************
* 4) PARSER TESTUALE → FORMATO B (pagina tipo 499…)
******************************************************/
$formatoB = [];
if (isset($textJson["body"])) {
$text = $textJson["body"];
// pattern codice tipo “499 3A 14”
$codeRegex = '/\b(\d{3}\s+[A-Z0-9]{1,2}\s+\d{2})\b/';
// pattern prezzo “5.475” o “5475” o “6.085”
$priceRegex = '/\b\d{1,2}\.?\d{3}\b/';
// Cerca combinazioni CODICE + PREZZO ripetute sulla stessa riga
$linee = explode("\n", $text);
foreach ($linee as $line) {
if (!preg_match_all($codeRegex, $line, $codici)) continue;
if (!preg_match_all($priceRegex, $line, $prezzi)) continue;
$codici = $codici[1];
$prezzi = $prezzi[0];
// Se quantità corrispondono → coppie 1:1
if (count($codici) == count($prezzi)) {
for ($i = 0; $i < count($codici); $i++) {
$formatoB[] = [
"codice" => $codici[$i],
"prezzo" => floatval(str_replace(".", "", $prezzi[$i]))
];
}
}
}
}
/******************************************************
* 5) OUTPUT
******************************************************/
echo json_encode([
"status" => "ok",
"formatoA" => $formatoA,
"formatoB" => $formatoB,
], JSON_PRETTY_PRINT);