2025-11-17 15:05:10 +01:00

218 lines
5.6 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
header("Content-Type: application/json; charset=utf-8");
// ---------------------------------------------------------
// 1) CONFIGURAZIONE
// ---------------------------------------------------------
$apiKey = "info@claudiosironi.com_Qfh02D7sAvi2tcx3ZchHpusNaBquCKhJw81fEnkHe2ersQDVOex4IokhCCzaFAz1";
$fileToken = "filetoken://61a780917907f86a340290d22c449357dc68950e9066bd67b2";
// opzionale: passare un token via GET
if (isset($_GET['token']) && $_GET['token'] !== "") {
$fileToken = $_GET['token'];
}
// ---------------------------------------------------------
// 2) PDF.CO → JSON2 (TUTTE LE PAGINE)
// ---------------------------------------------------------
$endpoint = "https://api.pdf.co/v1/pdf/convert/to/json2";
$payload = [
"url" => $fileToken,
"inline" => true,
"detectTables" => true,
"pages" => "all"
];
$ch = curl_init($endpoint);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, [
"Content-Type: application/json",
"x-api-key: $apiKey"
]);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload));
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$response = curl_exec($ch);
if (!$response) {
echo json_encode(["error" => "Errore CURL: " . curl_error($ch)]);
exit;
}
curl_close($ch);
$json = json_decode($response, true);
// ---------------------------------------------------------
// 3) VALIDAZIONE
// ---------------------------------------------------------
if (!isset($json["body"]["document"]["page"])) {
echo json_encode([
"error" => true,
"message" => "PDF.co JSON senza pagine valide",
"raw" => $json
]);
exit;
}
$pages = $json["body"]["document"]["page"];
// ---------------------------------------------------------
// 4) FUNZIONI UTILI
// ---------------------------------------------------------
// Formato A 291 02F
function isCodeTypeA($txt)
{
return preg_match('/^\d{3}\s+[0-9A-Z]{2,3}$/', $txt);
}
// Formato C blocchi ripetuti (499 3A 14 5.475)
function extractTypeCBlocks($line)
{
preg_match_all(
'/(\d{3})\s+([0-9A-Z]{1,3})\s+(\d{2})\s+(\d{1,3}\.\d{3})/',
$line,
$m,
PREG_SET_ORDER
);
$out = [];
foreach ($m as $b) {
$out[] = [
"codice" => $b[1],
"variante" => $b[2],
"dimensione" => $b[3],
"prezzo" => floatval(str_replace(".", "", $b[4]))
];
}
return $out;
}
// ---------------------------------------------------------
// 5) PARSER PER UNA SINGOLA RIGA (ricostruita)
// ---------------------------------------------------------
function parseLine($line)
{
// FORMATO A
if (preg_match('/^(\d{3}\s+[0-9A-Z]{2,3})\s+(.*)$/', $line, $mA)) {
$codice = trim($mA[1]);
$resto = trim($mA[2]);
preg_match_all('/\d{1,3}[.,]\d{2,3}/', $resto, $matchesPrezzi);
if (count($matchesPrezzi[0]) >= 2) {
$descr = trim(preg_replace('/\d{1,3}[.,]\d{2,3}/', '', $resto));
$vals = array_map(function ($v) {
return floatval(str_replace(",", ".", str_replace(".", "", $v)));
}, $matchesPrezzi[0]);
return [
"type" => "A",
"codice" => $codice,
"descrizione" => $descr,
"prezzi" => $vals
];
}
}
// FORMATO B1
if (preg_match(
'/^(\d{3}\s+[A-Z]{1,3})\s+([A-Za-zÀ-ù0-9\s]+?)\s+((?:\d{1,3}[.,]\d{2,3}\s*){4,})$/',
$line,
$mB
)) {
$codice = trim($mB[1]);
$descr = trim($mB[2]);
$valuesString = trim($mB[3]);
preg_match_all('/\d{1,3}[.,]\d{2,3}/', $valuesString, $vv);
$vals = array_map(function ($v) {
return floatval(str_replace(",", ".", str_replace(".", "", $v)));
}, $vv[0]);
return [
"type" => "B1",
"codice" => $codice,
"descrizione" => $descr,
"prezzi" => $vals
];
}
// FORMATO C (ripetuto in riga)
$blocks = extractTypeCBlocks($line);
if (!empty($blocks)) {
$desc = $line;
foreach ($blocks as $b) {
$pattern = sprintf(
'/%s\s+%s\s+%s\s+\d{1,3}\.\d{3}/',
$b["codice"],
$b["variante"],
$b["dimensione"]
);
$desc = preg_replace($pattern, "", $desc);
}
$desc = trim($desc);
return [
"type" => "C",
"descrizione" => $desc,
"varianti" => $blocks
];
}
// Nessun match
return null;
}
// ---------------------------------------------------------
// 6) PROCESSAMENTO DI TUTTE LE PAGINE
// ---------------------------------------------------------
$items = [];
foreach ($pages as $p) {
if (!isset($p["row"])) continue;
// Ricostruisco tutte le vere righe (“flat lines”)
$flatLines = [];
foreach ($p["row"] as $row) {
$line = "";
foreach ($row["column"] as $col) {
if (!empty($col["text"]["text"])) {
$line .= " " . trim($col["text"]["text"]);
}
}
$line = trim($line);
if ($line !== "") {
$flatLines[] = $line;
}
}
// Applico i parser su ogni riga ricostruita
foreach ($flatLines as $l) {
$parsed = parseLine($l);
if ($parsed !== null) {
$items[] = $parsed;
}
}
}
// ---------------------------------------------------------
// 7) OUTPUT FINALE
// ---------------------------------------------------------
echo json_encode([
"status" => "ok",
"total" => count($items),
"items" => $items
], JSON_PRETTY_PRINT);