Fix header finder

This commit is contained in:
r.mubarakzyanov 2026-03-31 16:33:08 +03:00
parent d24836e2b1
commit 0be7109df4
3 changed files with 134 additions and 20 deletions

View File

@ -329,7 +329,10 @@ error_log("Loaded template: " . print_r($template, true));
<thead>
<tr>
<th><input type="checkbox" id="selectAll"> Seleziona</th>
${data.columns.map(col => `<th>${col || 'Colonna senza nome'}<div class="resize-handle"></div></th>`).join('')}
${data.columns.map(col => {
const label = !col ? 'Colonna senza nome' : (col.match(/^__empty_\d+__$/) ? 'Colonna senza nome' : col);
return `<th>${label}<div class="resize-handle"></div></th>`;
}).join('')}
</tr>
<tr class="column-filters">
<th></th>

View File

@ -515,6 +515,10 @@ $xlsHeaders = $template['xls_headers'] ? json_decode($template['xls_headers'], t
});
let sheet = workbook.Sheets[workbook.SheetNames[0]];
// Read sheet range to determine column offset
const sheetRange = XLSX.utils.decode_range(sheet['!ref'] || 'A1');
const colOffset = sheetRange.s.c; // first column index in sheet (0-based)
let sheetData = XLSX.utils.sheet_to_json(sheet, {
header: 1,
defval: "",
@ -522,6 +526,13 @@ $xlsHeaders = $template['xls_headers'] ? json_decode($template['xls_headers'], t
range: 0
});
// Track merged cell ranges — adjust for column offset
const merges = (sheet['!merges'] || []).map(m => ({
s: { r: m.s.r, c: m.s.c - colOffset },
e: { r: m.e.r, c: m.e.c - colOffset }
}));
console.log('Sheet column offset:', colOffset, '(first col:', String.fromCharCode(65 + colOffset) + ')');
const useAutoDetect = document.getElementById('autoDetectHeader').checked;
if (!useAutoDetect) {
@ -537,7 +548,33 @@ $xlsHeaders = $template['xls_headers'] ? json_decode($template['xls_headers'], t
return;
}
let headers = sheetData[rowIndex - 1].slice(startColumn - 1).map(header => header === undefined ? "" : String(header).trim());
// Build logical headers, collapsing merged cells
const mergeStartMapManual = {};
merges.forEach(m => {
if ((rowIndex - 1) >= m.s.r && (rowIndex - 1) <= m.e.r) {
for (let c = m.s.c; c <= m.e.c; c++) {
mergeStartMapManual[c] = m.s.c;
}
}
});
let headers = [];
const rawRowManual = sheetData[rowIndex - 1] || [];
const seenManual = new Set();
for (let c = startColumn - 1; c < rawRowManual.length; c++) {
const ms = mergeStartMapManual[c];
if (ms !== undefined) {
if (seenManual.has(ms)) continue;
seenManual.add(ms);
const v = rawRowManual[ms];
headers.push(v === undefined ? "" : String(v).replace(/[\r\n\t]+/g, ' ').trim());
} else {
const v = rawRowManual[c];
headers.push(v === undefined ? "" : String(v).replace(/[\r\n\t]+/g, ' ').trim());
}
}
while (headers.length > 0 && headers[headers.length - 1] === '') {
headers.pop();
}
console.log("Intestazioni estratte (manual):", headers);
availableXlsColumns = [...headers];
usedColumnsFromDB = [];
@ -639,8 +676,40 @@ $xlsHeaders = $template['xls_headers'] ? json_decode($template['xls_headers'], t
return;
}
let headers = sheetData[bestRow].slice(bestStartCol).map(header => header === undefined ? "" : String(header).trim());
console.log("Intestazioni estratte:", headers);
// Build logical columns: each merge = one column, each non-merged cell = one column
let headers = [];
const rawRow = sheetData[bestRow] || [];
// Map each physical column to its merge start (or itself if not merged)
const mergeStartMap = {}; // physCol -> startCol of its merge
merges.forEach(m => {
if (bestRow >= m.s.r && bestRow <= m.e.r) {
for (let c = m.s.c; c <= m.e.c; c++) {
mergeStartMap[c] = m.s.c;
}
}
});
const seen = new Set();
for (let c = bestStartCol; c < rawRow.length; c++) {
const mergeStart = mergeStartMap[c];
const cleanVal = (v) => (v === undefined ? "" : String(v).replace(/[\r\n\t]+/g, ' ').trim());
if (mergeStart !== undefined) {
// Part of a merge — only take the first occurrence
if (seen.has(mergeStart)) continue;
seen.add(mergeStart);
headers.push(cleanVal(rawRow[mergeStart]));
} else {
headers.push(cleanVal(rawRow[c]));
}
}
// Trim trailing empty columns
while (headers.length > 0 && headers[headers.length - 1] === '') {
headers.pop();
}
// Final clean: ensure no whitespace-only entries sneak through
headers = headers.map(h => h.replace(/[\r\n\t]+/g, ' ').trim());
console.log("Logical headers:", headers, `(${headers.length} columns from ${rawRow.length} physical)`);
availableXlsColumns = [...headers];
usedColumnsFromDB = [];
saveXlsHeaders(headers, bestRow + 1, bestStartCol + 1);
@ -680,8 +749,17 @@ $xlsHeaders = $template['xls_headers'] ? json_decode($template['xls_headers'], t
document.querySelectorAll('select.xls-columns').forEach(select => {
let currentValue = select.value || select.dataset.currentXls || '';
let options = availableXlsColumns
.filter(col => !usedColumns.includes(col) || col === currentValue)
.map(col => `<option value="${col}" ${col === currentValue ? 'selected' : ''}>${col}</option>`)
.map((col, origIdx) => ({ col, origIdx }))
.filter(({ col }) => !usedColumns.includes(col) || col === currentValue)
.map(({ col, origIdx }) => {
const clean = col.replace(/[\r\n\t]+/g, ' ').trim();
const isEmpty = clean === '';
const colNum = origIdx + 1;
const val = isEmpty ? `__empty_${colNum}__` : clean;
const label = isEmpty ? `(empty column ${colNum})` : clean;
const isSelected = (isEmpty ? val === currentValue : col === currentValue) ? 'selected' : '';
return `<option value="${val}" ${isSelected}>${label}</option>`;
})
.join('');
select.innerHTML = '<option value="">Select XLS Column</option>' + options;
select.dataset.currentXls = currentValue;

View File

@ -84,34 +84,67 @@ try {
$response['error'] = "La colonna di partenza ($startColumn) supera il numero totale di colonne ($highestColumnIndex).";
} else {
$excelData = [];
// Estrai la riga degli header
$headerRowData = [];
for ($col = $startColumn; $col <= $highestColumnIndex; $col++) {
$columnLetter = \PhpOffice\PhpSpreadsheet\Cell\Coordinate::stringFromColumnIndex($col);
$cell = $worksheet->getCell($columnLetter . $header_row);
$cellValue = $cell ? $cell->getCalculatedValue() : '';
$headerRowData[] = $cellValue ?: '';
// Build merge map for header row: physCol -> mergeStartCol
$mergeStartMap = [];
foreach ($worksheet->getMergeCells() as $range) {
[$startCell, $endCell] = explode(':', $range);
$mStartCol = \PhpOffice\PhpSpreadsheet\Cell\Coordinate::columnIndexFromString(preg_replace('/\d+/', '', $startCell));
$mEndCol = \PhpOffice\PhpSpreadsheet\Cell\Coordinate::columnIndexFromString(preg_replace('/\d+/', '', $endCell));
$mStartRow = (int)preg_replace('/[A-Z]+/i', '', $startCell);
$mEndRow = (int)preg_replace('/[A-Z]+/i', '', $endCell);
if ($header_row >= $mStartRow && $header_row <= $mEndRow) {
for ($c = $mStartCol; $c <= $mEndCol; $c++) {
$mergeStartMap[$c] = $mStartCol;
}
}
}
// Find which header columns are non-empty (these are the "real" columns)
// Build logical columns: each merge = one column
$logicalCols = []; // array of physical column indices (one per logical column)
$seen = [];
for ($col = $startColumn; $col <= $highestColumnIndex; $col++) {
if (isset($mergeStartMap[$col])) {
$ms = $mergeStartMap[$col];
if (in_array($ms, $seen, true)) continue;
$seen[] = $ms;
$logicalCols[] = $ms;
} else {
$logicalCols[] = $col;
}
}
// Build header row using logical columns
$headerRowData = [];
$logicalNum = 0;
foreach ($logicalCols as $physCol) {
$logicalNum++;
$columnLetter = \PhpOffice\PhpSpreadsheet\Cell\Coordinate::stringFromColumnIndex($physCol);
$cell = $worksheet->getCell($columnLetter . $header_row);
$cellValue = trim((string)($cell ? $cell->getCalculatedValue() : ''));
$cellValue = preg_replace('/[\r\n\t]+/', ' ', $cellValue);
// Empty headers get __empty_N__ to match mapping page
$headerRowData[] = ($cellValue !== '') ? $cellValue : '__empty_' . $logicalNum . '__';
}
// Find which logical columns have real headers
$headerFilledIndices = [];
foreach ($headerRowData as $idx => $hVal) {
if (trim((string)$hVal) !== '') $headerFilledIndices[] = $idx;
if (!str_starts_with($hVal, '__empty_')) $headerFilledIndices[] = $idx;
}
// Require at least 2 filled header-columns (or 1 if only 1 exists)
$minFilled = max(1, min(2, count($headerFilledIndices)));
// Estrai i dati a partire dalla riga successiva, includendo excelrow
// Extract data rows using logical columns
for ($row = $startRow + 1; $row <= $highestRow; $row++) {
$rowData = [];
for ($col = $startColumn; $col <= $highestColumnIndex; $col++) {
$columnLetter = \PhpOffice\PhpSpreadsheet\Cell\Coordinate::stringFromColumnIndex($col);
foreach ($logicalCols as $physCol) {
$columnLetter = \PhpOffice\PhpSpreadsheet\Cell\Coordinate::stringFromColumnIndex($physCol);
$cell = $worksheet->getCell($columnLetter . $row);
$cellValue = $cell ? $cell->getCalculatedValue() : '';
$rowData[] = $cellValue ?: '';
}
// Count how many "header columns" have data in this row
// Count how many header columns have data in this row
$filledCount = 0;
foreach ($headerFilledIndices as $idx) {
if (isset($rowData[$idx]) && trim((string)$rowData[$idx]) !== '') {