theloftstore/public/userarea/extract_prices_pdfco.php
2025-11-17 15:05:10 +01:00

219 lines
6.0 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
header("Content-Type: application/json; charset=utf-8");
// ---------------------------------------------------------
// 1) CONFIGURAZIONE
// ---------------------------------------------------------
$apiKey = "info@claudiosironi.com_Qfh02D7sAvi2tcx3ZchHpusNaBquCKhJw81fEnkHe2ersQDVOex4IokhCCzaFAz1";
$fileToken = "filetoken://37bb07a8561409d281b32bcd023ff3dc33de92a8f65aae5af7";
// opzionale: passare un token via GET
if (isset($_GET['token']) && $_GET['token'] !== "") {
$fileToken = $_GET['token'];
}
// ---------------------------------------------------------
// 2) PDF.CO → JSON2
// ---------------------------------------------------------
$endpoint = "https://api.pdf.co/v1/pdf/convert/to/json2";
$payload = [
"url" => $fileToken,
"inline" => true,
"detectTables" => true,
"pages" => ""
];
$ch = curl_init($endpoint);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, [
"Content-Type: application/json",
"x-api-key: $apiKey"
]);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload));
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$response = curl_exec($ch);
if (!$response) {
echo json_encode(["error" => "Errore CURL: " . curl_error($ch)]);
curl_close($ch);
exit;
}
curl_close($ch);
$json = json_decode($response, true);
if (!isset($json["body"]["document"]["page"]["row"])) {
echo json_encode([
"error" => true,
"message" => "JSON PDF.co non contiene tabelle utili",
"raw" => $json
]);
exit;
}
$rows = $json["body"]["document"]["page"]["row"];
// ---------------------------------------------------------
// 3) FUNZIONI UTILITARIE
// ---------------------------------------------------------
// Formato A 291 02F
function isCodeTypeA($txt)
{
return preg_match('/^\d{3}\s+[0-9A-Z]{2,3}$/', $txt);
}
// Estrai blocchi tipo "499 3A 14 5.475"
function extractTypeCBlocks($line)
{
preg_match_all(
'/(\d{3})\s+([0-9A-Z]{1,3})\s+(\d{2})\s+(\d{1,2}\.\d{3})/',
$line,
$m,
PREG_SET_ORDER
);
$out = [];
foreach ($m as $b) {
$out[] = [
"codice" => $b[1],
"variante" => $b[2],
"dimensione" => $b[3],
"prezzo" => floatval(str_replace(".", "", $b[4]))
];
}
return $out;
}
// ---------------------------------------------------------
// 4) PARSER UNICO (GESTISCE TUTTI I FORMATI)
// ---------------------------------------------------------
$items = [];
$currentCode = null;
$currentDesc = "";
$currentPrices = [];
foreach ($rows as $row) {
// Ricostruisco la riga
$line = "";
foreach ($row["column"] as $col) {
if (!isset($col["text"]["text"])) continue;
$t = trim($col["text"]["text"]);
if ($t !== "") $line .= " " . $t;
}
$line = trim($line);
if ($line === "") continue;
// ---------------------------------------------------------
// 4A — FORMATO CASSINA A (FIX prezzi concatenati)
// ---------------------------------------------------------
if (preg_match('/^(\d{3}\s+[0-9A-Z]{2,3})\s+(.*)$/', $line, $mA)) {
$codice = trim($mA[1]);
$resto = trim($mA[2]);
// Estrae TUTTI i prezzi concatenati (es. "10,808.9906.460...")
preg_match_all('/\d{1,2}[.,]\d{2,3}/', $resto, $matchesPrezzi);
if (count($matchesPrezzi[0]) >= 2) {
// Descrizione SENZA prezzi
$descr = trim(preg_replace('/\d{1,2}[.,]\d{2,3}/', '', $resto));
// Conversione valori
$vals = array_map(function ($v) {
return floatval(str_replace(",", ".", str_replace(".", "", $v)));
}, $matchesPrezzi[0]);
$items[] = [
"type" => "A",
"codice" => $codice,
"descrizione" => $descr,
"prezzi" => $vals
];
continue;
}
}
// ---------------------------------------------------------
// 4B — FORMATO VECCHIO CASSINA (003 BC cromata … 8 prezzi)
// ---------------------------------------------------------
if (preg_match(
'/^(\d{3}\s+[A-Z]{1,3})\s+([A-Za-zÀ-ù\s]+?)\s+((?:\d{1,2}[.,]\d{2,3}\s*){8,})$/',
$line,
$mB
)) {
$codice = trim($mB[1]);
$descr = trim($mB[2]);
$valuesString = trim($mB[3]);
preg_match_all('/\d{1,2}[.,]\d{2,3}/', $valuesString, $vv);
$vals = array_map(function ($v) {
return floatval(str_replace(",", ".", str_replace(".", "", $v)));
}, $vv[0]);
$categorie = ["Z", "Y", "X", "O", "L", "F/COL", "E/COM", "ALTRO"];
$prezziMappati = [];
foreach ($vals as $i => $v) {
$key = $categorie[$i] ?? ("COL_" . ($i + 1));
$prezziMappati[$key] = $v;
}
$items[] = [
"type" => "B1",
"codice" => $codice,
"descrizione" => $descr,
"prezzi" => $prezziMappati
];
continue;
}
// ---------------------------------------------------------
// 4C — FORMATO LISTINO2 (499 3A 14 5.475 ripetuti)
// ---------------------------------------------------------
$blocks = extractTypeCBlocks($line);
if (!empty($blocks)) {
$desc = $line;
foreach ($blocks as $b) {
$pattern = sprintf(
'/%s\s+%s\s+%s\s+\d{1,2}\.\d{3}/',
$b["codice"],
$b["variante"],
$b["dimensione"]
);
$desc = preg_replace($pattern, "", $desc);
}
$desc = trim($desc);
$items[] = [
"type" => "C",
"descrizione" => $desc,
"varianti" => $blocks
];
continue;
}
}
// ---------------------------------------------------------
// 5) OUTPUT FINALE
// ---------------------------------------------------------
echo json_encode([
"status" => "ok",
"total" => count($items),
"items" => $items
], JSON_PRETTY_PRINT);