$fileToken, "inline" => true, "detectTables" => true, "pages" => "all", "extractColumnBy" => "vertical_lines", "csvSeparator" => ";" ]; $ch = curl_init($endpointCsv); curl_setopt($ch, CURLOPT_POST, true); curl_setopt($ch, CURLOPT_HTTPHEADER, [ "Content-Type: application/json", "x-api-key: $apiKey" ]); curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payloadCsv)); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); $csvResponse = curl_exec($ch); curl_close($ch); $csvJson = json_decode($csvResponse, true); /****************************************************** * 2) ESTRAGGO ANCHE TESTO (per pagine NON tabellari) ******************************************************/ $endpointText = "https://api.pdf.co/v1/pdf/convert/to/text"; $payloadText = [ "url" => $fileToken, "inline" => true, "pages" => "all" ]; $ch = curl_init($endpointText); curl_setopt($ch, CURLOPT_POST, true); curl_setopt($ch, CURLOPT_HTTPHEADER, [ "Content-Type: application/json", "x-api-key: $apiKey" ]); curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payloadText)); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); $textResponse = curl_exec($ch); curl_close($ch); $textJson = json_decode($textResponse, true); /****************************************************** * 3) PARSER CSV → TABELLE FORMATO A ******************************************************/ $formatoA = []; if (isset($csvJson["csv"])) { $rows = explode("\n", $csvJson["csv"]); foreach ($rows as $r) { $cols = str_getcsv($r, ";"); if (count($cols) < 2) continue; if (!preg_match('/^\d{3}\s+[0-9A-Z]{2,3}$/', trim($cols[0]))) continue; $formatoA[] = [ "codice" => trim($cols[0]), "descrizione" => trim($cols[1]), "prezzi" => array_map(fn($x) => is_numeric(str_replace(",", ".", $x)) ? floatval(str_replace(",", ".", $x)) : null, array_slice($cols, 2)) ]; } } /****************************************************** * 4) PARSER TESTUALE → FORMATO B (pagina tipo 499…) ******************************************************/ $formatoB = []; if (isset($textJson["body"])) { $text = $textJson["body"]; // pattern codice tipo “499 3A 14” $codeRegex = '/\b(\d{3}\s+[A-Z0-9]{1,2}\s+\d{2})\b/'; // pattern prezzo “5.475” o “5475” o “6.085” $priceRegex = '/\b\d{1,2}\.?\d{3}\b/'; // Cerca combinazioni CODICE + PREZZO ripetute sulla stessa riga $linee = explode("\n", $text); foreach ($linee as $line) { if (!preg_match_all($codeRegex, $line, $codici)) continue; if (!preg_match_all($priceRegex, $line, $prezzi)) continue; $codici = $codici[1]; $prezzi = $prezzi[0]; // Se quantità corrispondono → coppie 1:1 if (count($codici) == count($prezzi)) { for ($i = 0; $i < count($codici); $i++) { $formatoB[] = [ "codice" => $codici[$i], "prezzo" => floatval(str_replace(".", "", $prezzi[$i])) ]; } } } } /****************************************************** * 5) OUTPUT ******************************************************/ echo json_encode([ "status" => "ok", "formatoA" => $formatoA, "formatoB" => $formatoB, ], JSON_PRETTY_PRINT);