218 lines
5.6 KiB
PHP
218 lines
5.6 KiB
PHP
<?php
|
||
header("Content-Type: application/json; charset=utf-8");
|
||
|
||
// ---------------------------------------------------------
|
||
// 1) CONFIGURAZIONE
|
||
// ---------------------------------------------------------
|
||
$apiKey = "info@claudiosironi.com_Qfh02D7sAvi2tcx3ZchHpusNaBquCKhJw81fEnkHe2ersQDVOex4IokhCCzaFAz1";
|
||
$fileToken = "filetoken://61a780917907f86a340290d22c449357dc68950e9066bd67b2";
|
||
|
||
// opzionale: passare un token via GET
|
||
if (isset($_GET['token']) && $_GET['token'] !== "") {
|
||
$fileToken = $_GET['token'];
|
||
}
|
||
|
||
// ---------------------------------------------------------
|
||
// 2) PDF.CO → JSON2 (TUTTE LE PAGINE)
|
||
// ---------------------------------------------------------
|
||
$endpoint = "https://api.pdf.co/v1/pdf/convert/to/json2";
|
||
|
||
$payload = [
|
||
"url" => $fileToken,
|
||
"inline" => true,
|
||
"detectTables" => true,
|
||
"pages" => "all"
|
||
];
|
||
|
||
$ch = curl_init($endpoint);
|
||
curl_setopt($ch, CURLOPT_POST, true);
|
||
curl_setopt($ch, CURLOPT_HTTPHEADER, [
|
||
"Content-Type: application/json",
|
||
"x-api-key: $apiKey"
|
||
]);
|
||
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload));
|
||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||
|
||
$response = curl_exec($ch);
|
||
|
||
if (!$response) {
|
||
echo json_encode(["error" => "Errore CURL: " . curl_error($ch)]);
|
||
exit;
|
||
}
|
||
|
||
curl_close($ch);
|
||
|
||
$json = json_decode($response, true);
|
||
|
||
// ---------------------------------------------------------
|
||
// 3) VALIDAZIONE
|
||
// ---------------------------------------------------------
|
||
if (!isset($json["body"]["document"]["page"])) {
|
||
echo json_encode([
|
||
"error" => true,
|
||
"message" => "PDF.co JSON senza pagine valide",
|
||
"raw" => $json
|
||
]);
|
||
exit;
|
||
}
|
||
|
||
$pages = $json["body"]["document"]["page"];
|
||
|
||
// ---------------------------------------------------------
|
||
// 4) FUNZIONI UTILI
|
||
// ---------------------------------------------------------
|
||
|
||
// Formato A – 291 02F
|
||
function isCodeTypeA($txt)
|
||
{
|
||
return preg_match('/^\d{3}\s+[0-9A-Z]{2,3}$/', $txt);
|
||
}
|
||
|
||
// Formato C – blocchi ripetuti (499 3A 14 5.475)
|
||
function extractTypeCBlocks($line)
|
||
{
|
||
preg_match_all(
|
||
'/(\d{3})\s+([0-9A-Z]{1,3})\s+(\d{2})\s+(\d{1,3}\.\d{3})/',
|
||
$line,
|
||
$m,
|
||
PREG_SET_ORDER
|
||
);
|
||
|
||
$out = [];
|
||
foreach ($m as $b) {
|
||
$out[] = [
|
||
"codice" => $b[1],
|
||
"variante" => $b[2],
|
||
"dimensione" => $b[3],
|
||
"prezzo" => floatval(str_replace(".", "", $b[4]))
|
||
];
|
||
}
|
||
return $out;
|
||
}
|
||
|
||
// ---------------------------------------------------------
|
||
// 5) PARSER PER UNA SINGOLA RIGA (ricostruita)
|
||
// ---------------------------------------------------------
|
||
function parseLine($line)
|
||
{
|
||
// FORMATO A
|
||
if (preg_match('/^(\d{3}\s+[0-9A-Z]{2,3})\s+(.*)$/', $line, $mA)) {
|
||
|
||
$codice = trim($mA[1]);
|
||
$resto = trim($mA[2]);
|
||
|
||
preg_match_all('/\d{1,3}[.,]\d{2,3}/', $resto, $matchesPrezzi);
|
||
|
||
if (count($matchesPrezzi[0]) >= 2) {
|
||
|
||
$descr = trim(preg_replace('/\d{1,3}[.,]\d{2,3}/', '', $resto));
|
||
|
||
$vals = array_map(function ($v) {
|
||
return floatval(str_replace(",", ".", str_replace(".", "", $v)));
|
||
}, $matchesPrezzi[0]);
|
||
|
||
return [
|
||
"type" => "A",
|
||
"codice" => $codice,
|
||
"descrizione" => $descr,
|
||
"prezzi" => $vals
|
||
];
|
||
}
|
||
}
|
||
|
||
// FORMATO B1
|
||
if (preg_match(
|
||
'/^(\d{3}\s+[A-Z]{1,3})\s+([A-Za-zÀ-ù0-9\s]+?)\s+((?:\d{1,3}[.,]\d{2,3}\s*){4,})$/',
|
||
$line,
|
||
$mB
|
||
)) {
|
||
$codice = trim($mB[1]);
|
||
$descr = trim($mB[2]);
|
||
$valuesString = trim($mB[3]);
|
||
|
||
preg_match_all('/\d{1,3}[.,]\d{2,3}/', $valuesString, $vv);
|
||
|
||
$vals = array_map(function ($v) {
|
||
return floatval(str_replace(",", ".", str_replace(".", "", $v)));
|
||
}, $vv[0]);
|
||
|
||
return [
|
||
"type" => "B1",
|
||
"codice" => $codice,
|
||
"descrizione" => $descr,
|
||
"prezzi" => $vals
|
||
];
|
||
}
|
||
|
||
// FORMATO C (ripetuto in riga)
|
||
$blocks = extractTypeCBlocks($line);
|
||
|
||
if (!empty($blocks)) {
|
||
|
||
$desc = $line;
|
||
foreach ($blocks as $b) {
|
||
$pattern = sprintf(
|
||
'/%s\s+%s\s+%s\s+\d{1,3}\.\d{3}/',
|
||
$b["codice"],
|
||
$b["variante"],
|
||
$b["dimensione"]
|
||
);
|
||
$desc = preg_replace($pattern, "", $desc);
|
||
}
|
||
|
||
$desc = trim($desc);
|
||
|
||
return [
|
||
"type" => "C",
|
||
"descrizione" => $desc,
|
||
"varianti" => $blocks
|
||
];
|
||
}
|
||
|
||
// Nessun match
|
||
return null;
|
||
}
|
||
|
||
// ---------------------------------------------------------
|
||
// 6) PROCESSAMENTO DI TUTTE LE PAGINE
|
||
// ---------------------------------------------------------
|
||
$items = [];
|
||
|
||
foreach ($pages as $p) {
|
||
|
||
if (!isset($p["row"])) continue;
|
||
|
||
// Ricostruisco tutte le vere righe (“flat lines”)
|
||
$flatLines = [];
|
||
|
||
foreach ($p["row"] as $row) {
|
||
$line = "";
|
||
foreach ($row["column"] as $col) {
|
||
if (!empty($col["text"]["text"])) {
|
||
$line .= " " . trim($col["text"]["text"]);
|
||
}
|
||
}
|
||
$line = trim($line);
|
||
if ($line !== "") {
|
||
$flatLines[] = $line;
|
||
}
|
||
}
|
||
|
||
// Applico i parser su ogni riga ricostruita
|
||
foreach ($flatLines as $l) {
|
||
$parsed = parseLine($l);
|
||
if ($parsed !== null) {
|
||
$items[] = $parsed;
|
||
}
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------
|
||
// 7) OUTPUT FINALE
|
||
// ---------------------------------------------------------
|
||
echo json_encode([
|
||
"status" => "ok",
|
||
"total" => count($items),
|
||
"items" => $items
|
||
], JSON_PRETTY_PRINT);
|