fix ghost columns v2

This commit is contained in:
2026-06-22 22:30:02 +03:00
parent ff61456d91
commit fda3d66d1f
+120 -3
View File
@@ -74,6 +74,105 @@ function cleanCellText($value): string
return trim($cleaned ?? $raw);
}
/**
* Pre-clean an .xlsx by streaming out "ghost" cells: empty, self-closing
* <c .../> (and <c ...></c>) elements that carry only leftover styling.
*/
function slimXlsxGhostCells(string $path): ?string
{
if (!class_exists('ZipArchive')) {
return null;
}
$slim = $path . '.slim.xlsx';
try {
if (!copy($path, $slim)) {
return null;
}
// Phase 1: stream-strip each worksheet to a temp file (low memory).
$zip = new ZipArchive();
if ($zip->open($slim) !== true) {
@unlink($slim);
return null;
}
$temps = [];
for ($i = 0; $i < $zip->numFiles; $i++) {
$name = $zip->getNameIndex($i);
if (!preg_match('#^xl/worksheets/sheet\d+\.xml$#', $name)) {
continue;
}
$in = $zip->getStream($name);
if (!$in) {
continue;
}
$tmp = tempnam(sys_get_temp_dir(), 'slim');
$out = fopen($tmp, 'w');
$carry = '';
while (!feof($in)) {
$chunk = fread($in, 4194304);
if ($chunk === '' || $chunk === false) {
break;
}
// Only process up to the last complete '>' so a cell tag is
// never split across a chunk boundary; carry the remainder.
$buf = $carry . $chunk;
$lastGt = strrpos($buf, '>');
if ($lastGt === false) {
$carry = $buf;
continue;
}
$proc = substr($buf, 0, $lastGt + 1);
$carry = substr($buf, $lastGt + 1);
$proc = preg_replace(['#<c [^>]*/>#', '#<c [^>]*></c>#'], '', $proc);
fwrite($out, $proc);
}
if ($carry !== '') {
fwrite($out, $carry);
}
fclose($in);
fclose($out);
$temps[$name] = $tmp;
}
$zip->close();
if (!$temps) {
@unlink($slim);
return null;
}
// Phase 2: swap the stripped worksheets back into the archive.
$zip = new ZipArchive();
if ($zip->open($slim) !== true) {
foreach ($temps as $t) {
@unlink($t);
}
@unlink($slim);
return null;
}
foreach ($temps as $name => $tmp) {
$zip->deleteName($name);
$zip->addFile($tmp, $name);
}
$zip->close(); // addFile streams from disk here, so unlink only after.
foreach ($temps as $t) {
@unlink($t);
}
return $slim;
} catch (\Throwable $e) {
error_log('slimXlsxGhostCells failed: ' . $e->getMessage());
@unlink($slim);
return null;
}
}
try {
// Quando il body POST supera post_max_size, PHP scarta $_POST e $_FILES
// (warning "Content-Length exceeds the limit ... in Unknown on line 0") e lo
@@ -198,10 +297,28 @@ try {
if (empty($mappings)) {
$response['error'] = "Nessun mapping trovato per il template con ID $template_id";
} else {
// Carica il file rinominato con PHPSpreadsheet
$reader = IOFactory::createReaderForFile($destination);
// Pre-clean ghost cells for .xlsx so a bloated worksheet (millions
// of empty styled cells) doesn't make the load time out. Falls back
// to the original file if slimming fails for any reason.
$loadPath = $destination;
$slimPath = null;
if (preg_match('/\.xlsx$/i', $destination)) {
$slimPath = slimXlsxGhostCells($destination);
if ($slimPath !== null) {
$loadPath = $slimPath;
error_log("Ghost-cell pre-clean applied, loading slimmed copy: $slimPath");
}
}
// Carica il file con PHPSpreadsheet.
$reader = IOFactory::createReaderForFile($loadPath);
$reader->setReadEmptyCells(false);
$spreadsheet = $reader->load($destination);
$spreadsheet = $reader->load($loadPath);
// The slimmed copy is only needed for parsing; drop it now.
if ($slimPath !== null) {
@unlink($slimPath);
}
$sheetCount = $spreadsheet->getSheetCount();
$sheetNames = $spreadsheet->getSheetNames();