theloftstore/public/userarea/extract_prices.php
2025-11-17 15:05:10 +01:00

74 lines
2.0 KiB
PHP

<?php
require_once __DIR__ . "/../../vendor/autoload.php";
use Spatie\PdfToText\Pdf;
// ---------------------------------------------------------
// 1) Percorso PDF
// ---------------------------------------------------------
$pdfFile = __DIR__ . "/listino.pdf";
if (!file_exists($pdfFile)) {
die(json_encode(["error" => "PDF non trovato: $pdfFile"]));
}
// ---------------------------------------------------------
// 2) Estrazione testo usando Poppler
// ---------------------------------------------------------
$poppler = 'C:\poppler\Library\bin\pdftotext.exe';
try {
$text = Pdf::getText($pdfFile, $poppler);
} catch (Exception $e) {
die(json_encode(["error" => $e->getMessage()]));
}
// ---------------------------------------------------------
// 3) Normalizzazione
// ---------------------------------------------------------
$text = preg_replace('/[ ]{2,}/', ' ', $text);
$text = str_replace("\t", " ", $text);
// ---------------------------------------------------------
// 4) REGEX PER IL LISTINO CASSINA
// ---------------------------------------------------------
$regex = '/(\d{3}\s+\d{2,3}\s*[A-Z]?)\s+([^0-9]+?)\s+((?:\d{1,2}[.,]\d{2,3}\s+)+)/';
preg_match_all($regex, $text, $matches, PREG_SET_ORDER);
$results = [];
foreach ($matches as $m) {
$codice = trim($m[1]);
$descr = trim($m[2]);
$priceChunk = trim($m[3]);
// Trova tutti i prezzi
preg_match_all('/\d{1,2}[.,]\d{2,3}/', $priceChunk, $nums);
foreach ($nums[0] as $i => $val) {
// 10,80 → 10.80
$val = str_replace(',', '.', $val);
$results[] = [
"codice" => $codice,
"descrizione" => $descr,
"colonna" => "COL_" . ($i + 1),
"prezzo" => floatval($val)
];
}
}
// ---------------------------------------------------------
// 5) Output JSON
// ---------------------------------------------------------
header("Content-Type: application/json");
echo json_encode([
"items" => count($results),
"data" => $results
], JSON_PRETTY_PRINT);