diff --git a/public/userarea/process_import_xls2.php b/public/userarea/process_import_xls2.php index 25e4bd2..5a79ba0 100644 --- a/public/userarea/process_import_xls2.php +++ b/public/userarea/process_import_xls2.php @@ -74,6 +74,105 @@ function cleanCellText($value): string return trim($cleaned ?? $raw); } +/** + * Pre-clean an .xlsx by streaming out "ghost" cells: empty, self-closing + * (and ) elements that carry only leftover styling. + */ +function slimXlsxGhostCells(string $path): ?string +{ + if (!class_exists('ZipArchive')) { + return null; + } + + $slim = $path . '.slim.xlsx'; + + try { + if (!copy($path, $slim)) { + return null; + } + + // Phase 1: stream-strip each worksheet to a temp file (low memory). + $zip = new ZipArchive(); + if ($zip->open($slim) !== true) { + @unlink($slim); + return null; + } + + $temps = []; + for ($i = 0; $i < $zip->numFiles; $i++) { + $name = $zip->getNameIndex($i); + if (!preg_match('#^xl/worksheets/sheet\d+\.xml$#', $name)) { + continue; + } + + $in = $zip->getStream($name); + if (!$in) { + continue; + } + + $tmp = tempnam(sys_get_temp_dir(), 'slim'); + $out = fopen($tmp, 'w'); + $carry = ''; + + while (!feof($in)) { + $chunk = fread($in, 4194304); + if ($chunk === '' || $chunk === false) { + break; + } + // Only process up to the last complete '>' so a cell tag is + // never split across a chunk boundary; carry the remainder. + $buf = $carry . $chunk; + $lastGt = strrpos($buf, '>'); + if ($lastGt === false) { + $carry = $buf; + continue; + } + $proc = substr($buf, 0, $lastGt + 1); + $carry = substr($buf, $lastGt + 1); + $proc = preg_replace(['#]*/>#', '#]*>#'], '', $proc); + fwrite($out, $proc); + } + if ($carry !== '') { + fwrite($out, $carry); + } + fclose($in); + fclose($out); + $temps[$name] = $tmp; + } + $zip->close(); + + if (!$temps) { + @unlink($slim); + return null; + } + + // Phase 2: swap the stripped worksheets back into the archive. + $zip = new ZipArchive(); + if ($zip->open($slim) !== true) { + foreach ($temps as $t) { + @unlink($t); + } + @unlink($slim); + return null; + } + foreach ($temps as $name => $tmp) { + $zip->deleteName($name); + $zip->addFile($tmp, $name); + } + $zip->close(); // addFile streams from disk here, so unlink only after. + + foreach ($temps as $t) { + @unlink($t); + } + + return $slim; + } catch (\Throwable $e) { + error_log('slimXlsxGhostCells failed: ' . $e->getMessage()); + @unlink($slim); + return null; + } +} + try { // Quando il body POST supera post_max_size, PHP scarta $_POST e $_FILES // (warning "Content-Length exceeds the limit ... in Unknown on line 0") e lo @@ -198,10 +297,28 @@ try { if (empty($mappings)) { $response['error'] = "Nessun mapping trovato per il template con ID $template_id"; } else { - // Carica il file rinominato con PHPSpreadsheet - $reader = IOFactory::createReaderForFile($destination); + // Pre-clean ghost cells for .xlsx so a bloated worksheet (millions + // of empty styled cells) doesn't make the load time out. Falls back + // to the original file if slimming fails for any reason. + $loadPath = $destination; + $slimPath = null; + if (preg_match('/\.xlsx$/i', $destination)) { + $slimPath = slimXlsxGhostCells($destination); + if ($slimPath !== null) { + $loadPath = $slimPath; + error_log("Ghost-cell pre-clean applied, loading slimmed copy: $slimPath"); + } + } + + // Carica il file con PHPSpreadsheet. + $reader = IOFactory::createReaderForFile($loadPath); $reader->setReadEmptyCells(false); - $spreadsheet = $reader->load($destination); + $spreadsheet = $reader->load($loadPath); + + // The slimmed copy is only needed for parsing; drop it now. + if ($slimPath !== null) { + @unlink($slimPath); + } $sheetCount = $spreadsheet->getSheetCount(); $sheetNames = $spreadsheet->getSheetNames();