feat: stream hashing y entradas en archivos

- Añade `computeHashesFromStream` para hashing desde streams - Añade `streamArchiveEntry` e integra en `importDirectory` (path codificado con ::) - Extiende `scanDirectory` para exponer entradas internas, normaliza rutas POSIX y evita traversal; `ARCHIVE_MAX_ENTRIES` configurable - Limpia listeners en hashing y mejora robustez/logging - Añade tests unitarios e integración; actualiza mocks a `Mock` types - CI: instala `unzip` junto a `p7zip` para soportar tests de integración
2026-02-09 19:49:56 +01:00
parent 7ca465fb73
commit 79c42fad55
7 changed files with 187 additions and 50 deletions
--- a/backend/src/services/checksumService.ts
+++ b/backend/src/services/checksumService.ts
@@ -84,24 +84,49 @@ export async function computeHashesFromStream(rs: NodeJS.ReadableStream): Promis

    let size = 0;
    let crc = 0xffffffff >>> 0;
+    let settled = false;

-    rs.on('error', (err: any) => reject(err));
+    const cleanup = () => {
+      try {
+        rs.removeListener('error', onError as any);
+        rs.removeListener('data', onData as any);
+        rs.removeListener('end', onEnd as any);
+        rs.removeListener('close', onClose as any);
+      } catch (e) {}
+    };

-    rs.on('data', (chunk: Buffer) => {
-      md5.update(chunk);
-      sha1.update(chunk);
-      size += chunk.length;
-      crc = updateCrc(crc, chunk);
-    });
-
-    rs.on('end', () => {
+    const finalize = () => {
+      if (settled) return;
+      settled = true;
+      cleanup();
      const md5sum = md5.digest('hex');
      const sha1sum = sha1.digest('hex');
      const final = (crc ^ 0xffffffff) >>> 0;
      const crcHex = final.toString(16).padStart(8, '0').toLowerCase();
-
      resolve({ size, md5: md5sum, sha1: sha1sum, crc32: crcHex });
-    });
+    };
+
+    const onError = (err: any) => {
+      if (settled) return;
+      settled = true;
+      cleanup();
+      reject(err);
+    };
+
+    const onData = (chunk: Buffer) => {
+      md5.update(chunk);
+      sha1.update(chunk);
+      size += chunk.length;
+      crc = updateCrc(crc, chunk);
+    };
+
+    const onEnd = () => finalize();
+    const onClose = () => finalize();
+
+    rs.on('error', onError as any);
+    rs.on('data', onData as any);
+    rs.on('end', onEnd as any);
+    rs.on('close', onClose as any);
  });
 }

--- a/backend/src/services/fsScanner.ts
+++ b/backend/src/services/fsScanner.ts
@@ -15,7 +15,11 @@ import { promises as fsPromises } from 'fs';
 import { detectFormat } from '../lib/fileTypeDetector';
 import { listArchiveEntries } from './archiveReader';

-const ARCHIVE_MAX_ENTRIES = Number(process.env.ARCHIVE_MAX_ENTRIES) || 1000;
+const DEFAULT_ARCHIVE_MAX_ENTRIES = 1000;
+function getArchiveMaxEntries(): number {
+  const parsed = parseInt(process.env.ARCHIVE_MAX_ENTRIES ?? '', 10);
+  return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_ARCHIVE_MAX_ENTRIES;
+}

 export async function scanDirectory(dirPath: string): Promise<any[]> {
  const results: any[] = [];
@@ -52,20 +56,24 @@ export async function scanDirectory(dirPath: string): Promise<any[]> {
          try {
            const entries = await listArchiveEntries(full);

+            const maxEntries = getArchiveMaxEntries();
            for (const e of entries) {
-              if (archiveEntriesAdded >= ARCHIVE_MAX_ENTRIES) break;
+              if (archiveEntriesAdded >= maxEntries) break;
              if (!e || !e.name) continue;
-              // avoid path traversal or absolute paths
-              if (e.name.includes('..') || path.isAbsolute(e.name)) continue;
+
+              // Normalize entry path using posix rules and avoid traversal/absolute paths
+              const normalized = path.posix.normalize(e.name);
+              const parts = normalized.split('/').filter(Boolean);
+              if (parts.includes('..') || path.posix.isAbsolute(normalized)) continue;

              results.push({
-                path: `${full}::${e.name}`,
+                path: `${full}::${normalized}`,
                containerPath: full,
-                entryPath: e.name,
-                filename: path.basename(e.name),
-                name: e.name,
+                entryPath: normalized,
+                filename: path.posix.basename(normalized),
+                name: normalized,
                size: e.size,
-                format: detectFormat(e.name),
+                format: detectFormat(normalized),
                isArchive: false,
                isArchiveEntry: true,
              });
@@ -73,7 +81,11 @@ export async function scanDirectory(dirPath: string): Promise<any[]> {
              archiveEntriesAdded++;
            }
          } catch (err) {
-            // ignore archive listing errors
+            // log for diagnostics but continue
+            try {
+              // eslint-disable-next-line no-console
+              console.debug('fsScanner: listArchiveEntries failed for', full, err);
+            } catch (e) {}
          }
        }
      }