$fileToken, "inline" => true, "detectTables" => true, "pages" => "all" ]; $ch = curl_init($endpoint); curl_setopt($ch, CURLOPT_POST, true); curl_setopt($ch, CURLOPT_HTTPHEADER, [ "Content-Type: application/json", "x-api-key: $apiKey" ]); curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload)); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); $response = curl_exec($ch); if (!$response) { echo json_encode(["error" => "Errore CURL: " . curl_error($ch)]); exit; } curl_close($ch); $json = json_decode($response, true); // --------------------------------------------------------- // 3) VALIDAZIONE // --------------------------------------------------------- if (!isset($json["body"]["document"]["page"])) { echo json_encode([ "error" => true, "message" => "PDF.co JSON senza pagine valide", "raw" => $json ]); exit; } $pages = $json["body"]["document"]["page"]; // --------------------------------------------------------- // 4) FUNZIONI UTILI // --------------------------------------------------------- // Formato A – 291 02F function isCodeTypeA($txt) { return preg_match('/^\d{3}\s+[0-9A-Z]{2,3}$/', $txt); } // Formato C – blocchi ripetuti (499 3A 14 5.475) function extractTypeCBlocks($line) { preg_match_all( '/(\d{3})\s+([0-9A-Z]{1,3})\s+(\d{2})\s+(\d{1,3}\.\d{3})/', $line, $m, PREG_SET_ORDER ); $out = []; foreach ($m as $b) { $out[] = [ "codice" => $b[1], "variante" => $b[2], "dimensione" => $b[3], "prezzo" => floatval(str_replace(".", "", $b[4])) ]; } return $out; } // --------------------------------------------------------- // 5) PARSER PER UNA SINGOLA RIGA (ricostruita) // --------------------------------------------------------- function parseLine($line) { // FORMATO A if (preg_match('/^(\d{3}\s+[0-9A-Z]{2,3})\s+(.*)$/', $line, $mA)) { $codice = trim($mA[1]); $resto = trim($mA[2]); preg_match_all('/\d{1,3}[.,]\d{2,3}/', $resto, $matchesPrezzi); if (count($matchesPrezzi[0]) >= 2) { $descr = trim(preg_replace('/\d{1,3}[.,]\d{2,3}/', '', $resto)); $vals = array_map(function ($v) { return floatval(str_replace(",", ".", str_replace(".", "", $v))); }, $matchesPrezzi[0]); return [ "type" => "A", "codice" => $codice, "descrizione" => $descr, "prezzi" => $vals ]; } } // FORMATO B1 if (preg_match( '/^(\d{3}\s+[A-Z]{1,3})\s+([A-Za-zÀ-ù0-9\s]+?)\s+((?:\d{1,3}[.,]\d{2,3}\s*){4,})$/', $line, $mB )) { $codice = trim($mB[1]); $descr = trim($mB[2]); $valuesString = trim($mB[3]); preg_match_all('/\d{1,3}[.,]\d{2,3}/', $valuesString, $vv); $vals = array_map(function ($v) { return floatval(str_replace(",", ".", str_replace(".", "", $v))); }, $vv[0]); return [ "type" => "B1", "codice" => $codice, "descrizione" => $descr, "prezzi" => $vals ]; } // FORMATO C (ripetuto in riga) $blocks = extractTypeCBlocks($line); if (!empty($blocks)) { $desc = $line; foreach ($blocks as $b) { $pattern = sprintf( '/%s\s+%s\s+%s\s+\d{1,3}\.\d{3}/', $b["codice"], $b["variante"], $b["dimensione"] ); $desc = preg_replace($pattern, "", $desc); } $desc = trim($desc); return [ "type" => "C", "descrizione" => $desc, "varianti" => $blocks ]; } // Nessun match return null; } // --------------------------------------------------------- // 6) PROCESSAMENTO DI TUTTE LE PAGINE // --------------------------------------------------------- $items = []; foreach ($pages as $p) { if (!isset($p["row"])) continue; // Ricostruisco tutte le vere righe (“flat lines”) $flatLines = []; foreach ($p["row"] as $row) { $line = ""; foreach ($row["column"] as $col) { if (!empty($col["text"]["text"])) { $line .= " " . trim($col["text"]["text"]); } } $line = trim($line); if ($line !== "") { $flatLines[] = $line; } } // Applico i parser su ogni riga ricostruita foreach ($flatLines as $l) { $parsed = parseLine($l); if ($parsed !== null) { $items[] = $parsed; } } } // --------------------------------------------------------- // 7) OUTPUT FINALE // --------------------------------------------------------- echo json_encode([ "status" => "ok", "total" => count($items), "items" => $items ], JSON_PRETTY_PRINT);