feat: stream hashing y entradas en archivos

- Añade `computeHashesFromStream` para hashing desde streams
- Añade `streamArchiveEntry` e integra en `importDirectory` (path codificado con ::)
- Extiende `scanDirectory` para exponer entradas internas, normaliza rutas POSIX y evita traversal; `ARCHIVE_MAX_ENTRIES` configurable
- Limpia listeners en hashing y mejora robustez/logging
- Añade tests unitarios e integración; actualiza mocks a `Mock` types
- CI: instala `unzip` junto a `p7zip` para soportar tests de integración
This commit is contained in:
2026-02-09 19:49:56 +01:00
parent 7ca465fb73
commit 79c42fad55
7 changed files with 187 additions and 50 deletions

View File

@@ -84,24 +84,49 @@ export async function computeHashesFromStream(rs: NodeJS.ReadableStream): Promis
let size = 0;
let crc = 0xffffffff >>> 0;
let settled = false;
rs.on('error', (err: any) => reject(err));
const cleanup = () => {
try {
rs.removeListener('error', onError as any);
rs.removeListener('data', onData as any);
rs.removeListener('end', onEnd as any);
rs.removeListener('close', onClose as any);
} catch (e) {}
};
rs.on('data', (chunk: Buffer) => {
md5.update(chunk);
sha1.update(chunk);
size += chunk.length;
crc = updateCrc(crc, chunk);
});
rs.on('end', () => {
const finalize = () => {
if (settled) return;
settled = true;
cleanup();
const md5sum = md5.digest('hex');
const sha1sum = sha1.digest('hex');
const final = (crc ^ 0xffffffff) >>> 0;
const crcHex = final.toString(16).padStart(8, '0').toLowerCase();
resolve({ size, md5: md5sum, sha1: sha1sum, crc32: crcHex });
});
};
const onError = (err: any) => {
if (settled) return;
settled = true;
cleanup();
reject(err);
};
const onData = (chunk: Buffer) => {
md5.update(chunk);
sha1.update(chunk);
size += chunk.length;
crc = updateCrc(crc, chunk);
};
const onEnd = () => finalize();
const onClose = () => finalize();
rs.on('error', onError as any);
rs.on('data', onData as any);
rs.on('end', onEnd as any);
rs.on('close', onClose as any);
});
}

View File

@@ -15,7 +15,11 @@ import { promises as fsPromises } from 'fs';
import { detectFormat } from '../lib/fileTypeDetector';
import { listArchiveEntries } from './archiveReader';
const ARCHIVE_MAX_ENTRIES = Number(process.env.ARCHIVE_MAX_ENTRIES) || 1000;
const DEFAULT_ARCHIVE_MAX_ENTRIES = 1000;
function getArchiveMaxEntries(): number {
const parsed = parseInt(process.env.ARCHIVE_MAX_ENTRIES ?? '', 10);
return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_ARCHIVE_MAX_ENTRIES;
}
export async function scanDirectory(dirPath: string): Promise<any[]> {
const results: any[] = [];
@@ -52,20 +56,24 @@ export async function scanDirectory(dirPath: string): Promise<any[]> {
try {
const entries = await listArchiveEntries(full);
const maxEntries = getArchiveMaxEntries();
for (const e of entries) {
if (archiveEntriesAdded >= ARCHIVE_MAX_ENTRIES) break;
if (archiveEntriesAdded >= maxEntries) break;
if (!e || !e.name) continue;
// avoid path traversal or absolute paths
if (e.name.includes('..') || path.isAbsolute(e.name)) continue;
// Normalize entry path using posix rules and avoid traversal/absolute paths
const normalized = path.posix.normalize(e.name);
const parts = normalized.split('/').filter(Boolean);
if (parts.includes('..') || path.posix.isAbsolute(normalized)) continue;
results.push({
path: `${full}::${e.name}`,
path: `${full}::${normalized}`,
containerPath: full,
entryPath: e.name,
filename: path.basename(e.name),
name: e.name,
entryPath: normalized,
filename: path.posix.basename(normalized),
name: normalized,
size: e.size,
format: detectFormat(e.name),
format: detectFormat(normalized),
isArchive: false,
isArchiveEntry: true,
});
@@ -73,7 +81,11 @@ export async function scanDirectory(dirPath: string): Promise<any[]> {
archiveEntriesAdded++;
}
} catch (err) {
// ignore archive listing errors
// log for diagnostics but continue
try {
// eslint-disable-next-line no-console
console.debug('fsScanner: listArchiveEntries failed for', full, err);
} catch (e) {}
}
}
}