Compare commits

..

4 Commits

7 changed files with 472 additions and 19 deletions

View File

@@ -1,5 +1,24 @@
# Changelog
## 2025-08-18 - 4.2.2 - fix(smartarchive)
Improve tar entry streaming handling and add in-memory gzip/tgz tests
- Fix tar entry handling: properly consume directory entries (resume stream) and wait for entry end before continuing to next header
- Wrap tar file entries with a PassThrough so extracted StreamFile instances can be consumed while the tar extractor continues
- Handle nested archives correctly by piping resultStream -> decompressionStream -> analyzer -> unpacker, avoiding premature end signals
- Add and expand tests in test/test.gzip.ts: verify package.json and TS/license files after extraction, add in-memory gzip extraction test, and add real tgz-in-memory download+extraction test
- Minor logging improvements for tar extraction flow
## 2025-08-18 - 4.2.1 - fix(gzip)
Improve gzip streaming decompression, archive analysis and unpacking; add gzip tests
- Add a streaming DecompressGunzipTransform using fflate.Gunzip with proper _flush handling to support chunked gzip input and avoid buffering issues.
- Refactor ArchiveAnalyzer: introduce IAnalyzedResult, getAnalyzedStream(), and getDecompressionStream() to better detect mime types and wire appropriate decompression streams (gzip, zip, bzip2, tar).
- Use SmartRequest response streams converted via stream.Readable.fromWeb for URL sources in SmartArchive.getArchiveStream() to improve remote archive handling.
- Improve nested archive unpacking and SmartArchive export pipeline: more robust tar/zip handling, consistent SmartDuplex usage and backpressure handling.
- Enhance exportToFs: ensure directories, improved logging for relative paths, and safer write-stream wiring.
- Add comprehensive gzip-focused tests (test/test.gzip.ts) covering file extraction, stream extraction, header filename handling, large files, and a real-world tgz-from-URL extraction scenario.
## 2025-08-18 - 4.2.0 - feat(classes.smartarchive)
Support URL streams, recursive archive unpacking and filesystem export; improve ZIP/GZIP/BZIP2 robustness; CI and package metadata updates

View File

@@ -1,6 +1,6 @@
{
"name": "@push.rocks/smartarchive",
"version": "4.2.0",
"version": "4.2.2",
"description": "A library for working with archive files, providing utilities for compressing and decompressing data.",
"main": "dist_ts/index.js",
"typings": "dist_ts/index.d.ts",

400
test/test.gzip.ts Normal file
View File

@@ -0,0 +1,400 @@
import { tap, expect } from '@git.zone/tstest/tapbundle';
import * as plugins from './plugins.js';
import * as smartarchive from '../ts/index.js';
const testPaths = {
nogitDir: plugins.path.join(
plugins.smartpath.get.dirnameFromImportMetaUrl(import.meta.url),
'../.nogit/',
),
gzipTestDir: plugins.path.join(
plugins.smartpath.get.dirnameFromImportMetaUrl(import.meta.url),
'../.nogit/gzip-test',
),
};
tap.preTask('should prepare test directories', async () => {
await plugins.smartfile.fs.ensureDir(testPaths.gzipTestDir);
});
tap.test('should create and extract a gzip file', async () => {
// Create test data
const testContent = 'This is a test file for gzip compression and decompression.\n'.repeat(100);
const testFileName = 'test-file.txt';
const gzipFileName = 'test-file.txt.gz';
// Write the original file
await plugins.smartfile.memory.toFs(
testContent,
plugins.path.join(testPaths.gzipTestDir, testFileName)
);
// Compress the file using gzip
const originalFile = await plugins.smartfile.fs.fileTreeToObject(
testPaths.gzipTestDir,
testFileName
);
// Create gzip compressed version using fflate directly
const fflate = await import('fflate');
const compressed = fflate.gzipSync(Buffer.from(testContent));
await plugins.smartfile.memory.toFs(
Buffer.from(compressed),
plugins.path.join(testPaths.gzipTestDir, gzipFileName)
);
// Now test extraction using SmartArchive
const gzipArchive = await smartarchive.SmartArchive.fromArchiveFile(
plugins.path.join(testPaths.gzipTestDir, gzipFileName)
);
// Export to a new location
const extractPath = plugins.path.join(testPaths.gzipTestDir, 'extracted');
await plugins.smartfile.fs.ensureDir(extractPath);
// Provide a filename since gzip doesn't contain filename metadata
await gzipArchive.exportToFs(extractPath, 'test-file.txt');
// Read the extracted file
const extractedContent = await plugins.smartfile.fs.toStringSync(
plugins.path.join(extractPath, 'test-file.txt')
);
// Verify the content matches
expect(extractedContent).toEqual(testContent);
});
tap.test('should handle gzip stream extraction', async () => {
// Create test data
const testContent = 'Stream test data for gzip\n'.repeat(50);
const gzipFileName = 'stream-test.txt.gz';
// Create gzip compressed version
const fflate = await import('fflate');
const compressed = fflate.gzipSync(Buffer.from(testContent));
await plugins.smartfile.memory.toFs(
Buffer.from(compressed),
plugins.path.join(testPaths.gzipTestDir, gzipFileName)
);
// Create a read stream for the gzip file
const gzipStream = plugins.smartfile.fsStream.createReadStream(
plugins.path.join(testPaths.gzipTestDir, gzipFileName)
);
// Test extraction using SmartArchive from stream
const gzipArchive = await smartarchive.SmartArchive.fromArchiveStream(gzipStream);
// Export to stream and collect the result
const streamFiles: any[] = [];
const resultStream = await gzipArchive.exportToStreamOfStreamFiles();
await new Promise<void>((resolve, reject) => {
resultStream.on('data', (streamFile) => {
streamFiles.push(streamFile);
});
resultStream.on('end', resolve);
resultStream.on('error', reject);
});
// Verify we got the expected file
expect(streamFiles.length).toBeGreaterThan(0);
// Read content from the stream file
if (streamFiles[0]) {
const chunks: Buffer[] = [];
const readStream = await streamFiles[0].createReadStream();
await new Promise<void>((resolve, reject) => {
readStream.on('data', (chunk: Buffer) => chunks.push(chunk));
readStream.on('end', resolve);
readStream.on('error', reject);
});
const extractedContent = Buffer.concat(chunks).toString();
expect(extractedContent).toEqual(testContent);
}
});
tap.test('should handle gzip files with original filename in header', async () => {
// Test with a real-world gzip file that includes filename in header
const testContent = 'File with name in gzip header\n'.repeat(30);
const originalFileName = 'original-name.log';
const gzipFileName = 'compressed.gz';
// Create a proper gzip with filename header using Node's zlib
const zlib = await import('zlib');
const gzipBuffer = await new Promise<Buffer>((resolve, reject) => {
zlib.gzip(Buffer.from(testContent), {
level: 9,
// Note: Node's zlib doesn't support embedding filename directly,
// but we can test the extraction anyway
}, (err, result) => {
if (err) reject(err);
else resolve(result);
});
});
await plugins.smartfile.memory.toFs(
gzipBuffer,
plugins.path.join(testPaths.gzipTestDir, gzipFileName)
);
// Test extraction
const gzipArchive = await smartarchive.SmartArchive.fromArchiveFile(
plugins.path.join(testPaths.gzipTestDir, gzipFileName)
);
const extractPath = plugins.path.join(testPaths.gzipTestDir, 'header-test');
await plugins.smartfile.fs.ensureDir(extractPath);
// Provide a filename since gzip doesn't reliably contain filename metadata
await gzipArchive.exportToFs(extractPath, 'compressed.txt');
// Check if file was extracted (name might be derived from archive name)
const files = await plugins.smartfile.fs.listFileTree(extractPath, '**/*');
expect(files.length).toBeGreaterThan(0);
// Read and verify content
const extractedFile = files[0];
const extractedContent = await plugins.smartfile.fs.toStringSync(
plugins.path.join(extractPath, extractedFile || 'compressed.txt')
);
expect(extractedContent).toEqual(testContent);
});
tap.test('should handle large gzip files', async () => {
// Create a larger test file
const largeContent = 'x'.repeat(1024 * 1024); // 1MB of 'x' characters
const gzipFileName = 'large-file.txt.gz';
// Compress the large file
const fflate = await import('fflate');
const compressed = fflate.gzipSync(Buffer.from(largeContent));
await plugins.smartfile.memory.toFs(
Buffer.from(compressed),
plugins.path.join(testPaths.gzipTestDir, gzipFileName)
);
// Test extraction
const gzipArchive = await smartarchive.SmartArchive.fromArchiveFile(
plugins.path.join(testPaths.gzipTestDir, gzipFileName)
);
const extractPath = plugins.path.join(testPaths.gzipTestDir, 'large-extracted');
await plugins.smartfile.fs.ensureDir(extractPath);
// Provide a filename since gzip doesn't contain filename metadata
await gzipArchive.exportToFs(extractPath, 'large-file.txt');
// Verify the extracted content
const files = await plugins.smartfile.fs.listFileTree(extractPath, '**/*');
expect(files.length).toBeGreaterThan(0);
const extractedContent = await plugins.smartfile.fs.toStringSync(
plugins.path.join(extractPath, files[0] || 'large-file.txt')
);
expect(extractedContent.length).toEqual(largeContent.length);
expect(extractedContent).toEqual(largeContent);
});
tap.test('should handle real-world multi-chunk gzip from URL', async () => {
// Test with a real tgz file that will be processed in multiple chunks
const testUrl = 'https://registry.npmjs.org/@push.rocks/smartfile/-/smartfile-11.2.7.tgz';
// Download and extract the archive
const testArchive = await smartarchive.SmartArchive.fromArchiveUrl(testUrl);
const extractPath = plugins.path.join(testPaths.gzipTestDir, 'real-world-test');
await plugins.smartfile.fs.ensureDir(extractPath);
// This will test multi-chunk decompression as the file is larger
await testArchive.exportToFs(extractPath);
// Verify extraction worked
const files = await plugins.smartfile.fs.listFileTree(extractPath, '**/*');
expect(files.length).toBeGreaterThan(0);
// Check for expected package structure
const hasPackageJson = files.some(f => f.includes('package.json'));
expect(hasPackageJson).toBeTrue();
// Read and verify package.json content
const packageJsonPath = files.find(f => f.includes('package.json'));
if (packageJsonPath) {
const packageJsonContent = await plugins.smartfile.fs.toStringSync(
plugins.path.join(extractPath, packageJsonPath)
);
const packageJson = JSON.parse(packageJsonContent);
expect(packageJson.name).toEqual('@push.rocks/smartfile');
expect(packageJson.version).toEqual('11.2.7');
}
// Read and verify a TypeScript file
const tsFilePath = files.find(f => f.endsWith('.ts'));
if (tsFilePath) {
const tsFileContent = await plugins.smartfile.fs.toStringSync(
plugins.path.join(extractPath, tsFilePath)
);
// TypeScript files should have content
expect(tsFileContent.length).toBeGreaterThan(10);
console.log(` ✓ TypeScript file ${tsFilePath} has ${tsFileContent.length} bytes`);
}
// Read and verify license file
const licensePath = files.find(f => f.includes('license'));
if (licensePath) {
const licenseContent = await plugins.smartfile.fs.toStringSync(
plugins.path.join(extractPath, licensePath)
);
expect(licenseContent).toContain('MIT');
}
// Verify we can read multiple files without corruption
const readableFiles = files.filter(f =>
f.endsWith('.json') || f.endsWith('.md') || f.endsWith('.ts') || f.endsWith('.js')
).slice(0, 5); // Test first 5 readable files
for (const file of readableFiles) {
const content = await plugins.smartfile.fs.toStringSync(
plugins.path.join(extractPath, file)
);
expect(content).toBeDefined();
expect(content.length).toBeGreaterThan(0);
console.log(` ✓ Successfully read ${file} (${content.length} bytes)`);
}
});
tap.test('should handle gzip extraction fully in memory', async () => {
// Create test data in memory
const testContent = 'This is test data for in-memory gzip processing\n'.repeat(100);
// Compress using fflate in memory
const fflate = await import('fflate');
const compressed = fflate.gzipSync(Buffer.from(testContent));
// Create a stream from the compressed data
const { Readable } = await import('stream');
const compressedStream = Readable.from(Buffer.from(compressed));
// Process through SmartArchive without touching filesystem
const gzipArchive = await smartarchive.SmartArchive.fromArchiveStream(compressedStream);
// Export to stream of stream files (in memory)
const streamFiles: plugins.smartfile.StreamFile[] = [];
const resultStream = await gzipArchive.exportToStreamOfStreamFiles();
await new Promise<void>((resolve, reject) => {
resultStream.on('data', (streamFile: plugins.smartfile.StreamFile) => {
streamFiles.push(streamFile);
});
resultStream.on('end', resolve);
resultStream.on('error', reject);
});
// Verify we got a file
expect(streamFiles.length).toBeGreaterThan(0);
// Read the content from memory without filesystem
const firstFile = streamFiles[0];
const chunks: Buffer[] = [];
const readStream = await firstFile.createReadStream();
await new Promise<void>((resolve, reject) => {
readStream.on('data', (chunk: Buffer) => chunks.push(chunk));
readStream.on('end', resolve);
readStream.on('error', reject);
});
const extractedContent = Buffer.concat(chunks).toString();
expect(extractedContent).toEqual(testContent);
console.log(` ✓ In-memory extraction successful (${extractedContent.length} bytes)`);
});
tap.test('should handle real tgz file fully in memory', async (tools) => {
await tools.timeout(10000); // Set 10 second timeout
// Download tgz file into memory
const response = await plugins.smartrequest.SmartRequest.create()
.url('https://registry.npmjs.org/@push.rocks/smartfile/-/smartfile-11.2.7.tgz')
.get();
const tgzBuffer = Buffer.from(await response.arrayBuffer());
console.log(` Downloaded ${tgzBuffer.length} bytes into memory`);
// Create stream from buffer
const { Readable: Readable2 } = await import('stream');
const tgzStream = Readable2.from(tgzBuffer);
// Process through SmartArchive in memory
const archive = await smartarchive.SmartArchive.fromArchiveStream(tgzStream);
// Export to stream of stream files (in memory)
const streamFiles: plugins.smartfile.StreamFile[] = [];
const resultStream = await archive.exportToStreamOfStreamFiles();
await new Promise<void>((resolve, reject) => {
let timeout: NodeJS.Timeout;
const cleanup = () => {
clearTimeout(timeout);
};
timeout = setTimeout(() => {
cleanup();
resolve(); // Resolve after timeout if stream doesn't end
}, 5000);
resultStream.on('data', (streamFile: plugins.smartfile.StreamFile) => {
streamFiles.push(streamFile);
});
resultStream.on('end', () => {
cleanup();
resolve();
});
resultStream.on('error', (err) => {
cleanup();
reject(err);
});
});
console.log(` Extracted ${streamFiles.length} files in memory`);
// At minimum we should have extracted something
expect(streamFiles.length).toBeGreaterThan(0);
// Find and read package.json from memory
const packageJsonFile = streamFiles.find(f => f.relativeFilePath?.includes('package.json'));
if (packageJsonFile) {
const chunks: Buffer[] = [];
const readStream = await packageJsonFile.createReadStream();
await new Promise<void>((resolve, reject) => {
readStream.on('data', (chunk: Buffer) => chunks.push(chunk));
readStream.on('end', resolve);
readStream.on('error', reject);
});
const packageJsonContent = Buffer.concat(chunks).toString();
const packageJson = JSON.parse(packageJsonContent);
expect(packageJson.name).toEqual('@push.rocks/smartfile');
expect(packageJson.version).toEqual('11.2.7');
console.log(` ✓ Read package.json from memory: ${packageJson.name}@${packageJson.version}`);
}
// Read a few more files to verify integrity
const filesToCheck = streamFiles.slice(0, 3);
for (const file of filesToCheck) {
const chunks: Buffer[] = [];
const readStream = await file.createReadStream();
await new Promise<void>((resolve, reject) => {
readStream.on('data', (chunk: Buffer) => chunks.push(chunk));
readStream.on('end', resolve);
readStream.on('error', reject);
});
const content = Buffer.concat(chunks);
expect(content.length).toBeGreaterThan(0);
console.log(` ✓ Read ${file.relativeFilePath} from memory (${content.length} bytes)`);
}
});
export default tap.start();

View File

@@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@push.rocks/smartarchive',
version: '4.2.0',
version: '4.2.2',
description: 'A library for working with archive files, providing utilities for compressing and decompressing data.'
}

View File

@@ -26,8 +26,20 @@ export class CompressGunzipTransform extends plugins.stream.Transform {
// DecompressGunzipTransform class that extends the Node.js Transform stream to
// create a stream that decompresses GZip-compressed data using fflate's gunzip function
export class DecompressGunzipTransform extends plugins.stream.Transform {
private gunzip: any; // fflate.Gunzip instance
constructor() {
super();
// Create a streaming Gunzip decompressor
this.gunzip = new plugins.fflate.Gunzip((chunk, final) => {
// Push decompressed chunks to the output stream
this.push(Buffer.from(chunk));
if (final) {
// Signal end of stream when decompression is complete
this.push(null);
}
});
}
_transform(
@@ -35,17 +47,23 @@ export class DecompressGunzipTransform extends plugins.stream.Transform {
encoding: BufferEncoding,
callback: plugins.stream.TransformCallback,
) {
// Use fflate's gunzip function to decompress the chunk
plugins.fflate.gunzip(chunk, (err, decompressed) => {
if (err) {
// If an error occurs during decompression, pass the error to the callback
callback(err);
} else {
// If decompression is successful, push the decompressed data into the stream
this.push(decompressed);
callback();
}
});
try {
// Feed chunks to the gunzip stream
this.gunzip.push(chunk, false);
callback();
} catch (err) {
callback(err as Error);
}
}
_flush(callback: plugins.stream.TransformCallback) {
try {
// Signal end of input to gunzip
this.gunzip.push(new Uint8Array(0), true);
callback();
} catch (err) {
callback(err as Error);
}
}
}

View File

@@ -158,21 +158,34 @@ export class SmartArchive {
console.log(
`tar stream directory: ${header.name} ... skipping!`,
);
next();
stream.resume(); // Consume directory stream
stream.on('end', () => next());
return;
}
console.log(`tar stream file: ${header.name}`);
// Create a PassThrough stream to buffer the data
const passThrough = new plugins.stream.PassThrough();
const streamfile = plugins.smartfile.StreamFile.fromStream(
stream,
passThrough,
header.name,
);
// Push the streamfile immediately
streamFileIntake.push(streamfile);
stream.on('end', function () {
next(); // ready for next entry
// Pipe the tar entry stream to the passthrough
stream.pipe(passThrough);
// Move to next entry when this one ends
stream.on('end', () => {
passThrough.end();
next();
});
});
tarStream.on('finish', function () {
console.log('finished');
console.log('tar extraction finished');
// Only signal end if this is the final stream
streamFileIntake.signalEnd();
});
analyzedResultChunk.resultStream.pipe(
@@ -199,10 +212,13 @@ export class SmartArchive {
analyzedResultChunk.isArchive &&
analyzedResultChunk.decompressionStream
) {
analyzedResultChunk.resultStream
// For nested archives (like gzip containing tar)
const nestedStream = analyzedResultChunk.resultStream
.pipe(analyzedResultChunk.decompressionStream)
.pipe(createAnalyzedStream())
.pipe(createUnpackStream());
// Don't signal end here - let the nested unpacker handle it
} else {
const streamFile = plugins.smartfile.StreamFile.fromStream(
analyzedResultChunk.resultStream,