fix ghost columns v2
This commit is contained in:
@@ -74,6 +74,105 @@ function cleanCellText($value): string
|
||||
return trim($cleaned ?? $raw);
|
||||
}
|
||||
|
||||
/**
|
||||
* Pre-clean an .xlsx by streaming out "ghost" cells: empty, self-closing
|
||||
* <c .../> (and <c ...></c>) elements that carry only leftover styling.
|
||||
*/
|
||||
function slimXlsxGhostCells(string $path): ?string
|
||||
{
|
||||
if (!class_exists('ZipArchive')) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$slim = $path . '.slim.xlsx';
|
||||
|
||||
try {
|
||||
if (!copy($path, $slim)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Phase 1: stream-strip each worksheet to a temp file (low memory).
|
||||
$zip = new ZipArchive();
|
||||
if ($zip->open($slim) !== true) {
|
||||
@unlink($slim);
|
||||
return null;
|
||||
}
|
||||
|
||||
$temps = [];
|
||||
for ($i = 0; $i < $zip->numFiles; $i++) {
|
||||
$name = $zip->getNameIndex($i);
|
||||
if (!preg_match('#^xl/worksheets/sheet\d+\.xml$#', $name)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$in = $zip->getStream($name);
|
||||
if (!$in) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$tmp = tempnam(sys_get_temp_dir(), 'slim');
|
||||
$out = fopen($tmp, 'w');
|
||||
$carry = '';
|
||||
|
||||
while (!feof($in)) {
|
||||
$chunk = fread($in, 4194304);
|
||||
if ($chunk === '' || $chunk === false) {
|
||||
break;
|
||||
}
|
||||
// Only process up to the last complete '>' so a cell tag is
|
||||
// never split across a chunk boundary; carry the remainder.
|
||||
$buf = $carry . $chunk;
|
||||
$lastGt = strrpos($buf, '>');
|
||||
if ($lastGt === false) {
|
||||
$carry = $buf;
|
||||
continue;
|
||||
}
|
||||
$proc = substr($buf, 0, $lastGt + 1);
|
||||
$carry = substr($buf, $lastGt + 1);
|
||||
$proc = preg_replace(['#<c [^>]*/>#', '#<c [^>]*></c>#'], '', $proc);
|
||||
fwrite($out, $proc);
|
||||
}
|
||||
if ($carry !== '') {
|
||||
fwrite($out, $carry);
|
||||
}
|
||||
fclose($in);
|
||||
fclose($out);
|
||||
$temps[$name] = $tmp;
|
||||
}
|
||||
$zip->close();
|
||||
|
||||
if (!$temps) {
|
||||
@unlink($slim);
|
||||
return null;
|
||||
}
|
||||
|
||||
// Phase 2: swap the stripped worksheets back into the archive.
|
||||
$zip = new ZipArchive();
|
||||
if ($zip->open($slim) !== true) {
|
||||
foreach ($temps as $t) {
|
||||
@unlink($t);
|
||||
}
|
||||
@unlink($slim);
|
||||
return null;
|
||||
}
|
||||
foreach ($temps as $name => $tmp) {
|
||||
$zip->deleteName($name);
|
||||
$zip->addFile($tmp, $name);
|
||||
}
|
||||
$zip->close(); // addFile streams from disk here, so unlink only after.
|
||||
|
||||
foreach ($temps as $t) {
|
||||
@unlink($t);
|
||||
}
|
||||
|
||||
return $slim;
|
||||
} catch (\Throwable $e) {
|
||||
error_log('slimXlsxGhostCells failed: ' . $e->getMessage());
|
||||
@unlink($slim);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
// Quando il body POST supera post_max_size, PHP scarta $_POST e $_FILES
|
||||
// (warning "Content-Length exceeds the limit ... in Unknown on line 0") e lo
|
||||
@@ -198,10 +297,28 @@ try {
|
||||
if (empty($mappings)) {
|
||||
$response['error'] = "Nessun mapping trovato per il template con ID $template_id";
|
||||
} else {
|
||||
// Carica il file rinominato con PHPSpreadsheet
|
||||
$reader = IOFactory::createReaderForFile($destination);
|
||||
// Pre-clean ghost cells for .xlsx so a bloated worksheet (millions
|
||||
// of empty styled cells) doesn't make the load time out. Falls back
|
||||
// to the original file if slimming fails for any reason.
|
||||
$loadPath = $destination;
|
||||
$slimPath = null;
|
||||
if (preg_match('/\.xlsx$/i', $destination)) {
|
||||
$slimPath = slimXlsxGhostCells($destination);
|
||||
if ($slimPath !== null) {
|
||||
$loadPath = $slimPath;
|
||||
error_log("Ghost-cell pre-clean applied, loading slimmed copy: $slimPath");
|
||||
}
|
||||
}
|
||||
|
||||
// Carica il file con PHPSpreadsheet.
|
||||
$reader = IOFactory::createReaderForFile($loadPath);
|
||||
$reader->setReadEmptyCells(false);
|
||||
$spreadsheet = $reader->load($destination);
|
||||
$spreadsheet = $reader->load($loadPath);
|
||||
|
||||
// The slimmed copy is only needed for parsing; drop it now.
|
||||
if ($slimPath !== null) {
|
||||
@unlink($slimPath);
|
||||
}
|
||||
|
||||
$sheetCount = $spreadsheet->getSheetCount();
|
||||
$sheetNames = $spreadsheet->getSheetNames();
|
||||
|
||||
Reference in New Issue
Block a user