Compare commits

...

8 Commits

25 changed files with 11444 additions and 2352 deletions

View File

@@ -1,5 +1,44 @@
# Changelog # Changelog
## 2025-11-25 - 5.0.0 - BREAKING CHANGE(SmartArchive)
Refactor public API: rename factory/extraction methods, introduce typed interfaces and improved compression tools
- Renamed SmartArchive factory methods: fromArchiveUrl -> fromUrl, fromArchiveFile -> fromFile, fromArchiveStream -> fromStream; added fromBuffer helper.
- Renamed extraction APIs: exportToFs -> extractToDirectory and exportToStreamOfStreamFiles -> extractToStream; stream-based helpers updated accordingly.
- Export surface reorganized (ts/index.ts): core interfaces and errors are exported and new modules (bzip2tools, archiveanalyzer) are publicly available.
- Introduced strong TypeScript types (ts/interfaces.ts) and centralized error types (ts/errors.ts) including Bzip2Error and BZIP2_ERROR_CODES.
- Refactored format implementations and stream transforms: GzipTools/GzipCompressionTransform/GzipDecompressionTransform, ZipTools (ZipCompressionStream, ZipDecompressionTransform), TarTools improvements.
- BZIP2 implementation improvements: new bit iterator (IBitReader), clearer error handling and streaming unbzip2 transform.
- Updated tests to use the new APIs and method names.
- Breaking change: public API method names and some class/transform names have changed — this requires code updates for consumers.
## 2025-11-25 - 4.2.4 - fix(plugins)
Migrate filesystem usage to Node fs/fsPromises and upgrade smartfile to v13; add listFileTree helper and update tests
- Bumped dependency @push.rocks/smartfile to ^13.0.0 and removed unused dependency `through`
- Replaced usages of smartfile.fs and smartfile.fsStream with Node native fs and fs/promises (createReadStream/createWriteStream, mkdir({recursive:true}), stat, readFile)
- Added plugins.listFileTree helper (recursive directory lister) and used it in TarTools.packDirectory and tests
- Updated SmartArchive.exportToFs to use plugins.fs and plugins.fsPromises for directory creation and file writes
- Updated TarTools to use plugins.fs.createReadStream and plugins.fsPromises.stat when packing directories
- Converted/updated tests to a Node/Deno-friendly test file (test.node+deno.ts) and switched test helpers to use fsPromises
- Added readme.hints.md with migration notes for Smartfile v13 and architecture/dependency notes
## 2025-11-25 - 4.2.3 - fix(build)
Upgrade dev tooling: bump @git.zone/tsbuild, @git.zone/tsrun and @git.zone/tstest versions
- Bump @git.zone/tsbuild from ^2.6.6 to ^3.1.0
- Bump @git.zone/tsrun from ^1.3.3 to ^2.0.0
- Bump @git.zone/tstest from ^2.3.4 to ^3.1.3
## 2025-08-18 - 4.2.2 - fix(smartarchive)
Improve tar entry streaming handling and add in-memory gzip/tgz tests
- Fix tar entry handling: properly consume directory entries (resume stream) and wait for entry end before continuing to next header
- Wrap tar file entries with a PassThrough so extracted StreamFile instances can be consumed while the tar extractor continues
- Handle nested archives correctly by piping resultStream -> decompressionStream -> analyzer -> unpacker, avoiding premature end signals
- Add and expand tests in test/test.gzip.ts: verify package.json and TS/license files after extraction, add in-memory gzip extraction test, and add real tgz-in-memory download+extraction test
- Minor logging improvements for tar extraction flow
## 2025-08-18 - 4.2.1 - fix(gzip) ## 2025-08-18 - 4.2.1 - fix(gzip)
Improve gzip streaming decompression, archive analysis and unpacking; add gzip tests Improve gzip streaming decompression, archive analysis and unpacking; add gzip tests

6945
deno.lock generated Normal file

File diff suppressed because it is too large Load Diff

4
dist_ts/index.d.ts vendored
View File

@@ -1,4 +1,8 @@
export * from './interfaces.js';
export * from './errors.js';
export * from './classes.smartarchive.js'; export * from './classes.smartarchive.js';
export * from './classes.tartools.js'; export * from './classes.tartools.js';
export * from './classes.ziptools.js'; export * from './classes.ziptools.js';
export * from './classes.gziptools.js'; export * from './classes.gziptools.js';
export * from './classes.bzip2tools.js';
export * from './classes.archiveanalyzer.js';

View File

@@ -1,5 +1,13 @@
// Core types and errors
export * from './interfaces.js';
export * from './errors.js';
// Main archive class
export * from './classes.smartarchive.js'; export * from './classes.smartarchive.js';
// Format-specific tools
export * from './classes.tartools.js'; export * from './classes.tartools.js';
export * from './classes.ziptools.js'; export * from './classes.ziptools.js';
export * from './classes.gziptools.js'; export * from './classes.gziptools.js';
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiaW5kZXguanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyIuLi90cy9pbmRleC50cyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiQUFBQSxjQUFjLDJCQUEyQixDQUFDO0FBQzFDLGNBQWMsdUJBQXVCLENBQUM7QUFDdEMsY0FBYyx1QkFBdUIsQ0FBQztBQUN0QyxjQUFjLHdCQUF3QixDQUFDIn0= export * from './classes.bzip2tools.js';
// Archive analysis
export * from './classes.archiveanalyzer.js';
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoiaW5kZXguanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyIuLi90cy9pbmRleC50cyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiQUFBQSx3QkFBd0I7QUFDeEIsY0FBYyxpQkFBaUIsQ0FBQztBQUNoQyxjQUFjLGFBQWEsQ0FBQztBQUU1QixxQkFBcUI7QUFDckIsY0FBYywyQkFBMkIsQ0FBQztBQUUxQyx3QkFBd0I7QUFDeEIsY0FBYyx1QkFBdUIsQ0FBQztBQUN0QyxjQUFjLHVCQUF1QixDQUFDO0FBQ3RDLGNBQWMsd0JBQXdCLENBQUM7QUFDdkMsY0FBYyx5QkFBeUIsQ0FBQztBQUV4QyxtQkFBbUI7QUFDbkIsY0FBYyw4QkFBOEIsQ0FBQyJ9

View File

@@ -1,6 +1,6 @@
{ {
"name": "@push.rocks/smartarchive", "name": "@push.rocks/smartarchive",
"version": "4.2.1", "version": "5.0.0",
"description": "A library for working with archive files, providing utilities for compressing and decompressing data.", "description": "A library for working with archive files, providing utilities for compressing and decompressing data.",
"main": "dist_ts/index.js", "main": "dist_ts/index.js",
"typings": "dist_ts/index.d.ts", "typings": "dist_ts/index.d.ts",
@@ -22,7 +22,7 @@
"homepage": "https://code.foss.global/push.rocks/smartarchive#readme", "homepage": "https://code.foss.global/push.rocks/smartarchive#readme",
"dependencies": { "dependencies": {
"@push.rocks/smartdelay": "^3.0.5", "@push.rocks/smartdelay": "^3.0.5",
"@push.rocks/smartfile": "^11.2.7", "@push.rocks/smartfile": "^13.0.0",
"@push.rocks/smartpath": "^6.0.0", "@push.rocks/smartpath": "^6.0.0",
"@push.rocks/smartpromise": "^4.2.3", "@push.rocks/smartpromise": "^4.2.3",
"@push.rocks/smartrequest": "^4.2.2", "@push.rocks/smartrequest": "^4.2.2",
@@ -33,13 +33,12 @@
"@types/tar-stream": "^3.1.4", "@types/tar-stream": "^3.1.4",
"fflate": "^0.8.2", "fflate": "^0.8.2",
"file-type": "^21.0.0", "file-type": "^21.0.0",
"tar-stream": "^3.1.7", "tar-stream": "^3.1.7"
"through": "^2.3.8"
}, },
"devDependencies": { "devDependencies": {
"@git.zone/tsbuild": "^2.6.6", "@git.zone/tsbuild": "^3.1.0",
"@git.zone/tsrun": "^1.3.3", "@git.zone/tsrun": "^2.0.0",
"@git.zone/tstest": "^2.3.4" "@git.zone/tstest": "^3.1.3"
}, },
"private": false, "private": false,
"files": [ "files": [

3586
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -1 +1,38 @@
# Smartarchive Development Hints
## Dependency Upgrades (2025-01-25)
### Completed Upgrades
- **@git.zone/tsbuild**: ^2.6.6 → ^3.1.0
- **@git.zone/tsrun**: ^1.3.3 → ^2.0.0
- **@git.zone/tstest**: ^2.3.4 → ^3.1.3
- **@push.rocks/smartfile**: ^11.2.7 → ^13.0.0
### Migration Notes
#### Smartfile v13 Migration
Smartfile v13 removed filesystem operations (`fs`, `memory`, `fsStream` namespaces). These were replaced with Node.js native `fs` and `fs/promises`:
**Replacements made:**
- `smartfile.fs.ensureDir(path)``fsPromises.mkdir(path, { recursive: true })`
- `smartfile.fs.stat(path)``fsPromises.stat(path)`
- `smartfile.fs.toReadStream(path)``fs.createReadStream(path)`
- `smartfile.fs.toStringSync(path)``fsPromises.readFile(path, 'utf8')`
- `smartfile.fs.listFileTree(dir, pattern)` → custom `listFileTree()` helper
- `smartfile.fsStream.createReadStream(path)``fs.createReadStream(path)`
- `smartfile.fsStream.createWriteStream(path)``fs.createWriteStream(path)`
- `smartfile.memory.toFs(content, path)``fsPromises.writeFile(path, content)`
**Still using from smartfile v13:**
- `SmartFile` class (in-memory file representation)
- `StreamFile` class (streaming file handling)
### Removed Dependencies
- `through@2.3.8` - was unused in the codebase
## Architecture Notes
- Uses `fflate` for ZIP/GZIP compression (pure JS, works in browser)
- Uses `tar-stream` for TAR archive handling
- Uses `file-type` for MIME type detection
- Custom BZIP2 implementation in `ts/bzip2/` directory

411
readme.md
View File

@@ -1,29 +1,32 @@
# @push.rocks/smartarchive 📦 # @push.rocks/smartarchive 📦
**Powerful archive manipulation for modern Node.js applications** Powerful archive manipulation for modern Node.js applications.
`@push.rocks/smartarchive` is a versatile library for handling archive files with a focus on developer experience. Work with **zip**, **tar**, **gzip**, and **bzip2** formats through a unified, streaming-optimized API. `@push.rocks/smartarchive` is a versatile library for handling archive files with a focus on developer experience. Work with **zip**, **tar**, **gzip**, and **bzip2** formats through a unified, streaming-optimized API.
## Issue Reporting and Security
For reporting bugs, issues, or security vulnerabilities, please visit [community.foss.global/](https://community.foss.global/). This is the central community hub for all issue reporting. Developers who sign and comply with our contribution agreement and go through identification can also get a [code.foss.global/](https://code.foss.global/) account to submit Pull Requests directly.
## Features 🚀 ## Features 🚀
- 📁 **Multi-format support** - Handle `.zip`, `.tar`, `.tar.gz`, `.tgz`, and `.bz2` archives - 📁 **Multi-format support** Handle `.zip`, `.tar`, `.tar.gz`, `.tgz`, and `.bz2` archives
- 🌊 **Streaming-first architecture** - Process large archives without memory constraints - 🌊 **Streaming-first architecture** Process large archives without memory constraints
- 🔄 **Unified API** - Consistent interface across different archive formats - 🔄 **Unified API** Consistent interface across different archive formats
- 🎯 **Smart detection** - Automatically identifies archive types - 🎯 **Smart detection** Automatically identifies archive types via magic bytes
-**High performance** - Optimized for speed with parallel processing where possible -**High performance** Built on `tar-stream` and `fflate` for speed
- 🔧 **Flexible I/O** - Work with files, URLs, and streams seamlessly - 🔧 **Flexible I/O** Work with files, URLs, and streams seamlessly
- 📊 **Archive analysis** - Inspect contents without extraction - 🛠️ **Modern TypeScript** Full type safety and excellent IDE support
- 🛠️ **Modern TypeScript** - Full type safety and excellent IDE support
## Installation 📥 ## Installation 📥
```bash ```bash
# Using npm
npm install @push.rocks/smartarchive
# Using pnpm (recommended) # Using pnpm (recommended)
pnpm add @push.rocks/smartarchive pnpm add @push.rocks/smartarchive
# Using npm
npm install @push.rocks/smartarchive
# Using yarn # Using yarn
yarn add @push.rocks/smartarchive yarn add @push.rocks/smartarchive
``` ```
@@ -37,7 +40,7 @@ import { SmartArchive } from '@push.rocks/smartarchive';
// Extract a .tar.gz archive from a URL directly to the filesystem // Extract a .tar.gz archive from a URL directly to the filesystem
const archive = await SmartArchive.fromArchiveUrl( const archive = await SmartArchive.fromArchiveUrl(
'https://github.com/some/repo/archive/main.tar.gz' 'https://registry.npmjs.org/some-package/-/some-package-1.0.0.tgz'
); );
await archive.exportToFs('./extracted'); await archive.exportToFs('./extracted');
``` ```
@@ -52,10 +55,15 @@ const archive = await SmartArchive.fromArchiveFile('./large-archive.zip');
const streamOfFiles = await archive.exportToStreamOfStreamFiles(); const streamOfFiles = await archive.exportToStreamOfStreamFiles();
// Process each file in the archive // Process each file in the archive
streamOfFiles.on('data', (fileStream) => { streamOfFiles.on('data', async (streamFile) => {
console.log(`Processing ${fileStream.path}`); console.log(`Processing ${streamFile.relativeFilePath}`);
const readStream = await streamFile.createReadStream();
// Handle individual file stream // Handle individual file stream
}); });
streamOfFiles.on('end', () => {
console.log('Extraction complete');
});
``` ```
## Core Concepts 💡 ## Core Concepts 💡
@@ -64,17 +72,18 @@ streamOfFiles.on('data', (fileStream) => {
`SmartArchive` accepts archives from three sources: `SmartArchive` accepts archives from three sources:
1. **URL** - Download and process archives from the web | Source | Method | Use Case |
2. **File** - Load archives from the local filesystem |--------|--------|----------|
3. **Stream** - Process archives from any Node.js stream | **URL** | `SmartArchive.fromArchiveUrl(url)` | Download and process archives from the web |
| **File** | `SmartArchive.fromArchiveFile(path)` | Load archives from the local filesystem |
| **Stream** | `SmartArchive.fromArchiveStream(stream)` | Process archives from any Node.js stream |
### Export Destinations ### Export Destinations
Extract archives to multiple destinations: | Destination | Method | Use Case |
|-------------|--------|----------|
1. **Filesystem** - Extract directly to a directory | **Filesystem** | `exportToFs(targetDir, fileName?)` | Extract directly to a directory |
2. **Stream of files** - Process files individually as streams | **Stream of files** | `exportToStreamOfStreamFiles()` | Process files individually as `StreamFile` objects |
3. **Archive stream** - Re-stream as different format
## Usage Examples 🔨 ## Usage Examples 🔨
@@ -89,10 +98,11 @@ await zipArchive.exportToFs('./output');
// Stream ZIP contents for processing // Stream ZIP contents for processing
const fileStream = await zipArchive.exportToStreamOfStreamFiles(); const fileStream = await zipArchive.exportToStreamOfStreamFiles();
fileStream.on('data', (file) => {
if (file.path.endsWith('.json')) { fileStream.on('data', async (streamFile) => {
if (streamFile.relativeFilePath.endsWith('.json')) {
const readStream = await streamFile.createReadStream();
// Process JSON files from the archive // Process JSON files from the archive
file.pipe(jsonProcessor);
} }
}); });
``` ```
@@ -106,10 +116,38 @@ import { SmartArchive, TarTools } from '@push.rocks/smartarchive';
const tarGzArchive = await SmartArchive.fromArchiveFile('./archive.tar.gz'); const tarGzArchive = await SmartArchive.fromArchiveFile('./archive.tar.gz');
await tarGzArchive.exportToFs('./extracted'); await tarGzArchive.exportToFs('./extracted');
// Create a TAR archive (using TarTools directly) // Create a TAR archive using TarTools directly
const tarTools = new TarTools(); const tarTools = new TarTools();
const packStream = await tarTools.packDirectory('./source-directory'); const pack = await tarTools.getPackStream();
packStream.pipe(createWriteStream('./output.tar'));
// Add files to the pack
await tarTools.addFileToPack(pack, {
fileName: 'hello.txt',
content: 'Hello, World!'
});
await tarTools.addFileToPack(pack, {
fileName: 'data.json',
content: Buffer.from(JSON.stringify({ foo: 'bar' }))
});
// Finalize and pipe to destination
pack.finalize();
pack.pipe(createWriteStream('./output.tar'));
```
### Pack a directory into TAR
```typescript
import { TarTools } from '@push.rocks/smartarchive';
import { createWriteStream } from 'fs';
const tarTools = new TarTools();
// Pack an entire directory
const pack = await tarTools.packDirectory('./src');
pack.finalize();
pack.pipe(createWriteStream('./source.tar'));
``` ```
### Extracting from URLs ### Extracting from URLs
@@ -117,47 +155,36 @@ packStream.pipe(createWriteStream('./output.tar'));
```typescript ```typescript
import { SmartArchive } from '@push.rocks/smartarchive'; import { SmartArchive } from '@push.rocks/smartarchive';
// Download and extract in one operation // Download and extract npm packages
const remoteArchive = await SmartArchive.fromArchiveUrl( const npmPackage = await SmartArchive.fromArchiveUrl(
'https://example.com/data.tar.gz' 'https://registry.npmjs.org/@push.rocks/smartfile/-/smartfile-11.2.7.tgz'
); );
await npmPackage.exportToFs('./node_modules/@push.rocks/smartfile');
// Extract to filesystem // Or process as stream for memory efficiency
await remoteArchive.exportToFs('./local-dir'); const stream = await npmPackage.exportToStreamOfStreamFiles();
stream.on('data', async (file) => {
// Or process as stream console.log(`Extracted: ${file.relativeFilePath}`);
const stream = await remoteArchive.exportToStreamOfStreamFiles(); });
```
### Analyzing archive contents
```typescript
import { SmartArchive } from '@push.rocks/smartarchive';
// Analyze without extracting
const archive = await SmartArchive.fromArchiveFile('./archive.zip');
const analyzer = archive.archiveAnalyzer;
// Use the analyzer to inspect contents
// (exact implementation depends on analyzer methods)
``` ```
### Working with GZIP files ### Working with GZIP files
```typescript ```typescript
import { SmartArchive, GzipTools } from '@push.rocks/smartarchive'; import { SmartArchive, GzipTools } from '@push.rocks/smartarchive';
import { createReadStream, createWriteStream } from 'fs';
// Decompress a .gz file // Decompress a .gz file - provide filename since gzip doesn't store it
const gzipArchive = await SmartArchive.fromArchiveFile('./data.json.gz'); const gzipArchive = await SmartArchive.fromArchiveFile('./data.json.gz');
await gzipArchive.exportToFs('./decompressed', 'data.json'); await gzipArchive.exportToFs('./decompressed', 'data.json');
// Use GzipTools directly for streaming // Use GzipTools directly for streaming decompression
const gzipTools = new GzipTools(); const gzipTools = new GzipTools();
const decompressStream = gzipTools.getDecompressionStream(); const decompressStream = gzipTools.getDecompressionStream();
createReadStream('./compressed.gz') createReadStream('./compressed.gz')
.pipe(decompressStream) .pipe(decompressStream)
.pipe(createWriteStream('./decompressed')); .pipe(createWriteStream('./decompressed.txt'));
``` ```
### Working with BZIP2 files ### Working with BZIP2 files
@@ -172,115 +199,175 @@ const bzipArchive = await SmartArchive.fromArchiveUrl(
await bzipArchive.exportToFs('./extracted', 'data.txt'); await bzipArchive.exportToFs('./extracted', 'data.txt');
``` ```
### Advanced streaming operations ### In-memory processing (no filesystem)
```typescript ```typescript
import { SmartArchive } from '@push.rocks/smartarchive'; import { SmartArchive } from '@push.rocks/smartarchive';
import { pipeline } from 'stream/promises'; import { Readable } from 'stream';
// Chain operations with streams // Process archives entirely in memory
const archive = await SmartArchive.fromArchiveFile('./archive.tar.gz'); const compressedBuffer = await fetchCompressedData();
const exportStream = await archive.exportToStreamOfStreamFiles(); const memoryStream = Readable.from(compressedBuffer);
// Process each file in the archive const archive = await SmartArchive.fromArchiveStream(memoryStream);
await pipeline( const streamFiles = await archive.exportToStreamOfStreamFiles();
exportStream,
async function* (source) {
for await (const file of source) {
if (file.path.endsWith('.log')) {
// Process log files
yield processLogFile(file);
}
}
},
createWriteStream('./processed-logs.txt')
);
```
### Creating archives (advanced) const extractedFiles: Array<{ name: string; content: Buffer }> = [];
```typescript streamFiles.on('data', async (streamFile) => {
import { SmartArchive } from '@push.rocks/smartarchive'; const chunks: Buffer[] = [];
import { TarTools } from '@push.rocks/smartarchive'; const readStream = await streamFile.createReadStream();
// Using SmartArchive to create an archive for await (const chunk of readStream) {
const archive = new SmartArchive(); chunks.push(chunk);
// Add content to the archive
archive.addedDirectories.push('./src');
archive.addedFiles.push('./readme.md');
archive.addedFiles.push('./package.json');
// Export as TAR.GZ
const tarGzStream = await archive.exportToTarGzStream();
tarGzStream.pipe(createWriteStream('./output.tar.gz'));
```
### Extract and transform
```typescript
import { SmartArchive } from '@push.rocks/smartarchive';
import { Transform } from 'stream';
// Extract and transform files in one pipeline
const archive = await SmartArchive.fromArchiveUrl(
'https://example.com/source-code.tar.gz'
);
const extractStream = await archive.exportToStreamOfStreamFiles();
// Transform TypeScript to JavaScript during extraction
extractStream.on('data', (fileStream) => {
if (fileStream.path.endsWith('.ts')) {
fileStream
.pipe(typescriptTranspiler())
.pipe(createWriteStream(fileStream.path.replace('.ts', '.js')));
} else {
fileStream.pipe(createWriteStream(fileStream.path));
} }
extractedFiles.push({
name: streamFile.relativeFilePath,
content: Buffer.concat(chunks)
});
}); });
await new Promise((resolve) => streamFiles.on('end', resolve));
console.log(`Extracted ${extractedFiles.length} files in memory`);
```
### Nested archive handling (e.g., .tar.gz)
The library automatically handles nested compression. A `.tar.gz` file is:
1. First decompressed from gzip
2. Then unpacked from tar
This happens transparently:
```typescript
import { SmartArchive } from '@push.rocks/smartarchive';
// Automatically handles gzip → tar extraction chain
const tgzArchive = await SmartArchive.fromArchiveFile('./package.tar.gz');
await tgzArchive.exportToFs('./extracted');
``` ```
## API Reference 📚 ## API Reference 📚
### SmartArchive Class ### SmartArchive Class
#### Static Methods The main entry point for archive operations.
- `SmartArchive.fromArchiveUrl(url: string)` - Create from URL #### Static Factory Methods
- `SmartArchive.fromArchiveFile(path: string)` - Create from file
- `SmartArchive.fromArchiveStream(stream: NodeJS.ReadableStream)` - Create from stream ```typescript
// Create from URL - downloads and processes archive
SmartArchive.fromArchiveUrl(url: string): Promise<SmartArchive>
// Create from local file path
SmartArchive.fromArchiveFile(path: string): Promise<SmartArchive>
// Create from any Node.js readable stream
SmartArchive.fromArchiveStream(stream: Readable | Duplex | Transform): Promise<SmartArchive>
```
#### Instance Methods #### Instance Methods
- `exportToFs(targetDir: string, fileName?: string)` - Extract to filesystem ```typescript
- `exportToStreamOfStreamFiles()` - Get a stream of file streams // Extract all files to a directory
- `exportToTarGzStream()` - Export as TAR.GZ stream // fileName is optional - used for single-file archives (like .gz) that don't store filename
- `getArchiveStream()` - Get the raw archive stream exportToFs(targetDir: string, fileName?: string): Promise<void>
#### Properties // Get a stream that emits StreamFile objects for each file in the archive
exportToStreamOfStreamFiles(): Promise<StreamIntake<StreamFile>>
- `archiveAnalyzer` - Analyze archive contents // Get the raw archive stream (useful for piping)
- `tarTools` - TAR-specific operations getArchiveStream(): Promise<Readable>
- `zipTools` - ZIP-specific operations ```
- `gzipTools` - GZIP-specific operations
- `bzip2Tools` - BZIP2-specific operations
### Specialized Tools #### Instance Properties
Each tool class provides format-specific operations: ```typescript
archive.tarTools // TarTools instance for TAR-specific operations
archive.zipTools // ZipTools instance for ZIP-specific operations
archive.gzipTools // GzipTools instance for GZIP-specific operations
archive.bzip2Tools // Bzip2Tools instance for BZIP2-specific operations
archive.archiveAnalyzer // ArchiveAnalyzer for inspecting archive type
```
- **TarTools** - Pack/unpack TAR archives ### TarTools Class
- **ZipTools** - Handle ZIP compression
- **GzipTools** - GZIP compression/decompression TAR-specific operations for creating and extracting TAR archives.
- **Bzip2Tools** - BZIP2 operations
```typescript
import { TarTools } from '@push.rocks/smartarchive';
const tarTools = new TarTools();
// Get a tar pack stream for creating archives
const pack = await tarTools.getPackStream();
// Add files to a pack stream
await tarTools.addFileToPack(pack, {
fileName: 'file.txt', // Name in archive
content: 'Hello World', // String, Buffer, Readable, SmartFile, or StreamFile
byteLength?: number, // Optional: specify size for streams
filePath?: string // Optional: path to file on disk
});
// Pack an entire directory
const pack = await tarTools.packDirectory('./src');
// Get extraction stream
const extract = tarTools.getDecompressionStream();
```
### ZipTools Class
ZIP-specific operations.
```typescript
import { ZipTools } from '@push.rocks/smartarchive';
const zipTools = new ZipTools();
// Get compression stream (for creating ZIP)
const compressor = zipTools.getCompressionStream();
// Get decompression stream (for extracting ZIP)
const decompressor = zipTools.getDecompressionStream();
```
### GzipTools Class
GZIP compression/decompression streams.
```typescript
import { GzipTools } from '@push.rocks/smartarchive';
const gzipTools = new GzipTools();
// Get compression stream
const compressor = gzipTools.getCompressionStream();
// Get decompression stream
const decompressor = gzipTools.getDecompressionStream();
```
## Supported Formats 📋
| Format | Extension(s) | Extract | Create |
|--------|--------------|---------|--------|
| TAR | `.tar` | ✅ | ✅ |
| TAR.GZ / TGZ | `.tar.gz`, `.tgz` | ✅ | ⚠️ |
| ZIP | `.zip` | ✅ | ⚠️ |
| GZIP | `.gz` | ✅ | ✅ |
| BZIP2 | `.bz2` | ✅ | ❌ |
✅ Full support | ⚠️ Partial/basic support | ❌ Not supported
## Performance Tips 🏎️ ## Performance Tips 🏎️
1. **Use streaming for large files** - Avoid loading entire archives into memory 1. **Use streaming for large files** Avoid loading entire archives into memory with `exportToStreamOfStreamFiles()`
2. **Process files in parallel** - Utilize stream operations for concurrent processing 2. **Provide byte lengths when known** When adding streams to TAR, provide `byteLength` for better performance
3. **Choose the right format** - TAR.GZ for Unix systems, ZIP for cross-platform compatibility 3. **Process files as they stream** Don't collect all files into an array unless necessary
4. **Enable compression wisely** - Balance between file size and CPU usage 4. **Choose the right format** TAR.GZ for Unix/compression, ZIP for cross-platform compatibility
## Error Handling 🛡️ ## Error Handling 🛡️
@@ -295,6 +382,8 @@ try {
console.error('Archive file not found'); console.error('Archive file not found');
} else if (error.code === 'EACCES') { } else if (error.code === 'EACCES') {
console.error('Permission denied'); console.error('Permission denied');
} else if (error.message.includes('fetch')) {
console.error('Network error downloading archive');
} else { } else {
console.error('Archive extraction failed:', error.message); console.error('Archive extraction failed:', error.message);
} }
@@ -303,35 +392,57 @@ try {
## Real-World Use Cases 🌍 ## Real-World Use Cases 🌍
### Backup System ### CI/CD: Download & Extract Build Artifacts
```typescript
// Automated backup extraction
const backup = await SmartArchive.fromArchiveFile('./backup.tar.gz');
await backup.exportToFs('/restore/location');
```
### CI/CD Pipeline
```typescript ```typescript
// Download and extract build artifacts
const artifacts = await SmartArchive.fromArchiveUrl( const artifacts = await SmartArchive.fromArchiveUrl(
`${CI_SERVER}/artifacts/build-${BUILD_ID}.zip` `${CI_SERVER}/artifacts/build-${BUILD_ID}.zip`
); );
await artifacts.exportToFs('./dist'); await artifacts.exportToFs('./dist');
``` ```
### Data Processing ### Backup System: Restore from Archive
```typescript ```typescript
// Process compressed datasets const backup = await SmartArchive.fromArchiveFile('./backup-2024.tar.gz');
const dataset = await SmartArchive.fromArchiveUrl( await backup.exportToFs('/restore/location');
'https://data.source/dataset.tar.bz2' ```
### NPM Package Inspection
```typescript
const pkg = await SmartArchive.fromArchiveUrl(
'https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz'
); );
const files = await pkg.exportToStreamOfStreamFiles();
files.on('data', async (file) => {
if (file.relativeFilePath.includes('package.json')) {
const stream = await file.createReadStream();
// Read and analyze package.json
}
});
```
### Data Pipeline: Process Compressed Datasets
```typescript
const dataset = await SmartArchive.fromArchiveUrl(
'https://data.source/dataset.tar.gz'
);
const files = await dataset.exportToStreamOfStreamFiles(); const files = await dataset.exportToStreamOfStreamFiles();
// Process each file in the dataset files.on('data', async (file) => {
if (file.relativeFilePath.endsWith('.csv')) {
const stream = await file.createReadStream();
// Stream CSV processing
}
});
``` ```
## License and Legal Information ## License and Legal Information
This repository contains open-source code that is licensed under the MIT License. A copy of the MIT License can be found in the [license](license) file within this repository. This repository contains open-source code that is licensed under the MIT License. A copy of the MIT License can be found in the [license](license) file within this repository.
**Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file. **Please note:** The MIT License does not grant permission to use the trade names, trademarks, service marks, or product names of the project, except as required for reasonable and customary use in describing the origin of the work and reproducing the content of the NOTICE file.
@@ -341,9 +452,9 @@ This project is owned and maintained by Task Venture Capital GmbH. The names and
### Company Information ### Company Information
Task Venture Capital GmbH Task Venture Capital GmbH
Registered at District court Bremen HRB 35230 HB, Germany Registered at District court Bremen HRB 35230 HB, Germany
For any legal inquiries or if you require further information, please contact us via email at hello@task.vc. For any legal inquiries or if you require further information, please contact us via email at hello@task.vc.
By using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works. By using this repository, you acknowledge that you have read this section, agree to comply with its terms, and understand that the licensing of the code does not imply endorsement by Task Venture Capital GmbH of any derivative works.

View File

@@ -1,7 +1,33 @@
import * as path from 'path'; import * as path from 'node:path';
import * as fs from 'node:fs';
import * as fsPromises from 'node:fs/promises';
import * as smartpath from '@push.rocks/smartpath'; import * as smartpath from '@push.rocks/smartpath';
import * as smartfile from '@push.rocks/smartfile'; import * as smartfile from '@push.rocks/smartfile';
import * as smartrequest from '@push.rocks/smartrequest'; import * as smartrequest from '@push.rocks/smartrequest';
import * as smartstream from '@push.rocks/smartstream'; import * as smartstream from '@push.rocks/smartstream';
export { path, smartpath, smartfile, smartrequest, smartstream }; export { path, fs, fsPromises, smartpath, smartfile, smartrequest, smartstream };
/**
* List files in a directory recursively, returning relative paths
*/
export async function listFileTree(dirPath: string, _pattern: string = '**/*'): Promise<string[]> {
const results: string[] = [];
async function walkDir(currentPath: string, relativePath: string = '') {
const entries = await fsPromises.readdir(currentPath, { withFileTypes: true });
for (const entry of entries) {
const entryRelPath = relativePath ? path.join(relativePath, entry.name) : entry.name;
const entryFullPath = path.join(currentPath, entry.name);
if (entry.isDirectory()) {
await walkDir(entryFullPath, entryRelPath);
} else if (entry.isFile()) {
results.push(entryRelPath);
}
}
}
await walkDir(dirPath);
return results;
}

401
test/test.gzip.node+deno.ts Normal file
View File

@@ -0,0 +1,401 @@
import { tap, expect } from '@git.zone/tstest/tapbundle';
import * as plugins from './plugins.js';
import * as smartarchive from '../ts/index.js';
const testPaths = {
nogitDir: plugins.path.join(
plugins.smartpath.get.dirnameFromImportMetaUrl(import.meta.url),
'../.nogit/',
),
gzipTestDir: plugins.path.join(
plugins.smartpath.get.dirnameFromImportMetaUrl(import.meta.url),
'../.nogit/gzip-test',
),
};
tap.preTask('should prepare test directories', async () => {
await plugins.fsPromises.mkdir(testPaths.gzipTestDir, { recursive: true });
});
tap.test('should create and extract a gzip file', async () => {
// Create test data
const testContent = 'This is a test file for gzip compression and decompression.\n'.repeat(100);
const testFileName = 'test-file.txt';
const gzipFileName = 'test-file.txt.gz';
// Write the original file
await plugins.fsPromises.writeFile(
plugins.path.join(testPaths.gzipTestDir, testFileName),
testContent
);
// Create gzip compressed version using fflate directly
const fflate = await import('fflate');
const compressed = fflate.gzipSync(Buffer.from(testContent));
await plugins.fsPromises.writeFile(
plugins.path.join(testPaths.gzipTestDir, gzipFileName),
Buffer.from(compressed)
);
// Now test extraction using SmartArchive
const gzipArchive = await smartarchive.SmartArchive.fromFile(
plugins.path.join(testPaths.gzipTestDir, gzipFileName)
);
// Export to a new location
const extractPath = plugins.path.join(testPaths.gzipTestDir, 'extracted');
await plugins.fsPromises.mkdir(extractPath, { recursive: true });
// Provide a filename since gzip doesn't contain filename metadata
await gzipArchive.extractToDirectory(extractPath, { fileName: 'test-file.txt' });
// Read the extracted file
const extractedContent = await plugins.fsPromises.readFile(
plugins.path.join(extractPath, 'test-file.txt'),
'utf8'
);
// Verify the content matches
expect(extractedContent).toEqual(testContent);
});
tap.test('should handle gzip stream extraction', async () => {
// Create test data
const testContent = 'Stream test data for gzip\n'.repeat(50);
const gzipFileName = 'stream-test.txt.gz';
// Create gzip compressed version
const fflate = await import('fflate');
const compressed = fflate.gzipSync(Buffer.from(testContent));
await plugins.fsPromises.writeFile(
plugins.path.join(testPaths.gzipTestDir, gzipFileName),
Buffer.from(compressed)
);
// Create a read stream for the gzip file
const gzipStream = plugins.fs.createReadStream(
plugins.path.join(testPaths.gzipTestDir, gzipFileName)
);
// Test extraction using SmartArchive from stream
const gzipArchive = await smartarchive.SmartArchive.fromStream(gzipStream);
// Export to stream and collect the result
const streamFiles: any[] = [];
const resultStream = await gzipArchive.extractToStream();
await new Promise<void>((resolve, reject) => {
resultStream.on('data', (streamFile) => {
streamFiles.push(streamFile);
});
resultStream.on('end', resolve);
resultStream.on('error', reject);
});
// Verify we got the expected file
expect(streamFiles.length).toBeGreaterThan(0);
// Read content from the stream file
if (streamFiles[0]) {
const chunks: Buffer[] = [];
const readStream = await streamFiles[0].createReadStream();
await new Promise<void>((resolve, reject) => {
readStream.on('data', (chunk: Buffer) => chunks.push(chunk));
readStream.on('end', resolve);
readStream.on('error', reject);
});
const extractedContent = Buffer.concat(chunks).toString();
expect(extractedContent).toEqual(testContent);
}
});
tap.test('should handle gzip files with original filename in header', async () => {
// Test with a real-world gzip file that includes filename in header
const testContent = 'File with name in gzip header\n'.repeat(30);
const originalFileName = 'original-name.log';
const gzipFileName = 'compressed.gz';
// Create a proper gzip with filename header using Node's zlib
const zlib = await import('node:zlib');
const gzipBuffer = await new Promise<Buffer>((resolve, reject) => {
zlib.gzip(Buffer.from(testContent), {
level: 9,
// Note: Node's zlib doesn't support embedding filename directly,
// but we can test the extraction anyway
}, (err, result) => {
if (err) reject(err);
else resolve(result);
});
});
await plugins.fsPromises.writeFile(
plugins.path.join(testPaths.gzipTestDir, gzipFileName),
gzipBuffer
);
// Test extraction
const gzipArchive = await smartarchive.SmartArchive.fromFile(
plugins.path.join(testPaths.gzipTestDir, gzipFileName)
);
const extractPath = plugins.path.join(testPaths.gzipTestDir, 'header-test');
await plugins.fsPromises.mkdir(extractPath, { recursive: true });
// Provide a filename since gzip doesn't reliably contain filename metadata
await gzipArchive.extractToDirectory(extractPath, { fileName: 'compressed.txt' });
// Check if file was extracted (name might be derived from archive name)
const files = await plugins.listFileTree(extractPath, '**/*');
expect(files.length).toBeGreaterThan(0);
// Read and verify content
const extractedFile = files[0];
const extractedContent = await plugins.fsPromises.readFile(
plugins.path.join(extractPath, extractedFile || 'compressed.txt'),
'utf8'
);
expect(extractedContent).toEqual(testContent);
});
tap.test('should handle large gzip files', async () => {
// Create a larger test file
const largeContent = 'x'.repeat(1024 * 1024); // 1MB of 'x' characters
const gzipFileName = 'large-file.txt.gz';
// Compress the large file
const fflate = await import('fflate');
const compressed = fflate.gzipSync(Buffer.from(largeContent));
await plugins.fsPromises.writeFile(
plugins.path.join(testPaths.gzipTestDir, gzipFileName),
Buffer.from(compressed)
);
// Test extraction
const gzipArchive = await smartarchive.SmartArchive.fromFile(
plugins.path.join(testPaths.gzipTestDir, gzipFileName)
);
const extractPath = plugins.path.join(testPaths.gzipTestDir, 'large-extracted');
await plugins.fsPromises.mkdir(extractPath, { recursive: true });
// Provide a filename since gzip doesn't contain filename metadata
await gzipArchive.extractToDirectory(extractPath, { fileName: 'large-file.txt' });
// Verify the extracted content
const files = await plugins.listFileTree(extractPath, '**/*');
expect(files.length).toBeGreaterThan(0);
const extractedContent = await plugins.fsPromises.readFile(
plugins.path.join(extractPath, files[0] || 'large-file.txt'),
'utf8'
);
expect(extractedContent.length).toEqual(largeContent.length);
expect(extractedContent).toEqual(largeContent);
});
tap.test('should handle real-world multi-chunk gzip from URL', async () => {
// Test with a real tgz file that will be processed in multiple chunks
const testUrl = 'https://registry.npmjs.org/@push.rocks/smartfile/-/smartfile-11.2.7.tgz';
// Download and extract the archive
const testArchive = await smartarchive.SmartArchive.fromUrl(testUrl);
const extractPath = plugins.path.join(testPaths.gzipTestDir, 'real-world-test');
await plugins.fsPromises.mkdir(extractPath, { recursive: true });
// This will test multi-chunk decompression as the file is larger
await testArchive.extractToDirectory(extractPath);
// Verify extraction worked
const files = await plugins.listFileTree(extractPath, '**/*');
expect(files.length).toBeGreaterThan(0);
// Check for expected package structure
const hasPackageJson = files.some(f => f.includes('package.json'));
expect(hasPackageJson).toBeTrue();
// Read and verify package.json content
const packageJsonPath = files.find(f => f.includes('package.json'));
if (packageJsonPath) {
const packageJsonContent = await plugins.fsPromises.readFile(
plugins.path.join(extractPath, packageJsonPath),
'utf8'
);
const packageJson = JSON.parse(packageJsonContent);
expect(packageJson.name).toEqual('@push.rocks/smartfile');
expect(packageJson.version).toEqual('11.2.7');
}
// Read and verify a TypeScript file
const tsFilePath = files.find(f => f.endsWith('.ts'));
if (tsFilePath) {
const tsFileContent = await plugins.fsPromises.readFile(
plugins.path.join(extractPath, tsFilePath),
'utf8'
);
// TypeScript files should have content
expect(tsFileContent.length).toBeGreaterThan(10);
console.log(` ✓ TypeScript file ${tsFilePath} has ${tsFileContent.length} bytes`);
}
// Read and verify license file
const licensePath = files.find(f => f.includes('license'));
if (licensePath) {
const licenseContent = await plugins.fsPromises.readFile(
plugins.path.join(extractPath, licensePath),
'utf8'
);
expect(licenseContent).toContain('MIT');
}
// Verify we can read multiple files without corruption
const readableFiles = files.filter(f =>
f.endsWith('.json') || f.endsWith('.md') || f.endsWith('.ts') || f.endsWith('.js')
).slice(0, 5); // Test first 5 readable files
for (const file of readableFiles) {
const content = await plugins.fsPromises.readFile(
plugins.path.join(extractPath, file),
'utf8'
);
expect(content).toBeDefined();
expect(content.length).toBeGreaterThan(0);
console.log(` ✓ Successfully read ${file} (${content.length} bytes)`);
}
});
tap.test('should handle gzip extraction fully in memory', async () => {
// Create test data in memory
const testContent = 'This is test data for in-memory gzip processing\n'.repeat(100);
// Compress using fflate in memory
const fflate = await import('fflate');
const compressed = fflate.gzipSync(Buffer.from(testContent));
// Create a stream from the compressed data
const { Readable } = await import('node:stream');
const compressedStream = Readable.from(Buffer.from(compressed));
// Process through SmartArchive without touching filesystem
const gzipArchive = await smartarchive.SmartArchive.fromStream(compressedStream);
// Export to stream of stream files (in memory)
const streamFiles: plugins.smartfile.StreamFile[] = [];
const resultStream = await gzipArchive.extractToStream();
await new Promise<void>((resolve, reject) => {
resultStream.on('data', (streamFile: plugins.smartfile.StreamFile) => {
streamFiles.push(streamFile);
});
resultStream.on('end', resolve);
resultStream.on('error', reject);
});
// Verify we got a file
expect(streamFiles.length).toBeGreaterThan(0);
// Read the content from memory without filesystem
const firstFile = streamFiles[0];
const chunks: Buffer[] = [];
const readStream = await firstFile.createReadStream();
await new Promise<void>((resolve, reject) => {
readStream.on('data', (chunk: Buffer) => chunks.push(chunk));
readStream.on('end', resolve);
readStream.on('error', reject);
});
const extractedContent = Buffer.concat(chunks).toString();
expect(extractedContent).toEqual(testContent);
console.log(` ✓ In-memory extraction successful (${extractedContent.length} bytes)`);
});
tap.test('should handle real tgz file fully in memory', async (tools) => {
await tools.timeout(10000); // Set 10 second timeout
// Download tgz file into memory
const response = await plugins.smartrequest.SmartRequest.create()
.url('https://registry.npmjs.org/@push.rocks/smartfile/-/smartfile-11.2.7.tgz')
.get();
const tgzBuffer = Buffer.from(await response.arrayBuffer());
console.log(` Downloaded ${tgzBuffer.length} bytes into memory`);
// Create stream from buffer
const { Readable: Readable2 } = await import('node:stream');
const tgzStream = Readable2.from(tgzBuffer);
// Process through SmartArchive in memory
const archive = await smartarchive.SmartArchive.fromStream(tgzStream);
// Export to stream of stream files (in memory)
const streamFiles: plugins.smartfile.StreamFile[] = [];
const resultStream = await archive.extractToStream();
await new Promise<void>((resolve, reject) => {
let timeout: NodeJS.Timeout;
const cleanup = () => {
clearTimeout(timeout);
};
timeout = setTimeout(() => {
cleanup();
resolve(); // Resolve after timeout if stream doesn't end
}, 5000);
resultStream.on('data', (streamFile: plugins.smartfile.StreamFile) => {
streamFiles.push(streamFile);
});
resultStream.on('end', () => {
cleanup();
resolve();
});
resultStream.on('error', (err) => {
cleanup();
reject(err);
});
});
console.log(` Extracted ${streamFiles.length} files in memory`);
// At minimum we should have extracted something
expect(streamFiles.length).toBeGreaterThan(0);
// Find and read package.json from memory
const packageJsonFile = streamFiles.find(f => f.relativeFilePath?.includes('package.json'));
if (packageJsonFile) {
const chunks: Buffer[] = [];
const readStream = await packageJsonFile.createReadStream();
await new Promise<void>((resolve, reject) => {
readStream.on('data', (chunk: Buffer) => chunks.push(chunk));
readStream.on('end', resolve);
readStream.on('error', reject);
});
const packageJsonContent = Buffer.concat(chunks).toString();
const packageJson = JSON.parse(packageJsonContent);
expect(packageJson.name).toEqual('@push.rocks/smartfile');
expect(packageJson.version).toEqual('11.2.7');
console.log(` ✓ Read package.json from memory: ${packageJson.name}@${packageJson.version}`);
}
// Read a few more files to verify integrity
const filesToCheck = streamFiles.slice(0, 3);
for (const file of filesToCheck) {
const chunks: Buffer[] = [];
const readStream = await file.createReadStream();
await new Promise<void>((resolve, reject) => {
readStream.on('data', (chunk: Buffer) => chunks.push(chunk));
readStream.on('end', resolve);
readStream.on('error', reject);
});
const content = Buffer.concat(chunks);
expect(content.length).toBeGreaterThan(0);
console.log(` ✓ Read ${file.relativeFilePath} from memory (${content.length} bytes)`);
}
});
export default tap.start();

View File

@@ -1,219 +0,0 @@
import { tap, expect } from '@git.zone/tstest/tapbundle';
import * as plugins from './plugins.js';
import * as smartarchive from '../ts/index.js';
const testPaths = {
nogitDir: plugins.path.join(
plugins.smartpath.get.dirnameFromImportMetaUrl(import.meta.url),
'../.nogit/',
),
gzipTestDir: plugins.path.join(
plugins.smartpath.get.dirnameFromImportMetaUrl(import.meta.url),
'../.nogit/gzip-test',
),
};
tap.preTask('should prepare test directories', async () => {
await plugins.smartfile.fs.ensureDir(testPaths.gzipTestDir);
});
tap.test('should create and extract a gzip file', async () => {
// Create test data
const testContent = 'This is a test file for gzip compression and decompression.\n'.repeat(100);
const testFileName = 'test-file.txt';
const gzipFileName = 'test-file.txt.gz';
// Write the original file
await plugins.smartfile.memory.toFs(
testContent,
plugins.path.join(testPaths.gzipTestDir, testFileName)
);
// Compress the file using gzip
const originalFile = await plugins.smartfile.fs.fileTreeToObject(
testPaths.gzipTestDir,
testFileName
);
// Create gzip compressed version using fflate directly
const fflate = await import('fflate');
const compressed = fflate.gzipSync(Buffer.from(testContent));
await plugins.smartfile.memory.toFs(
Buffer.from(compressed),
plugins.path.join(testPaths.gzipTestDir, gzipFileName)
);
// Now test extraction using SmartArchive
const gzipArchive = await smartarchive.SmartArchive.fromArchiveFile(
plugins.path.join(testPaths.gzipTestDir, gzipFileName)
);
// Export to a new location
const extractPath = plugins.path.join(testPaths.gzipTestDir, 'extracted');
await plugins.smartfile.fs.ensureDir(extractPath);
// Provide a filename since gzip doesn't contain filename metadata
await gzipArchive.exportToFs(extractPath, 'test-file.txt');
// Read the extracted file
const extractedContent = await plugins.smartfile.fs.toStringSync(
plugins.path.join(extractPath, 'test-file.txt')
);
// Verify the content matches
expect(extractedContent).toEqual(testContent);
});
tap.test('should handle gzip stream extraction', async () => {
// Create test data
const testContent = 'Stream test data for gzip\n'.repeat(50);
const gzipFileName = 'stream-test.txt.gz';
// Create gzip compressed version
const fflate = await import('fflate');
const compressed = fflate.gzipSync(Buffer.from(testContent));
await plugins.smartfile.memory.toFs(
Buffer.from(compressed),
plugins.path.join(testPaths.gzipTestDir, gzipFileName)
);
// Create a read stream for the gzip file
const gzipStream = plugins.smartfile.fsStream.createReadStream(
plugins.path.join(testPaths.gzipTestDir, gzipFileName)
);
// Test extraction using SmartArchive from stream
const gzipArchive = await smartarchive.SmartArchive.fromArchiveStream(gzipStream);
// Export to stream and collect the result
const streamFiles: any[] = [];
const resultStream = await gzipArchive.exportToStreamOfStreamFiles();
await new Promise<void>((resolve, reject) => {
resultStream.on('data', (streamFile) => {
streamFiles.push(streamFile);
});
resultStream.on('end', resolve);
resultStream.on('error', reject);
});
// Verify we got the expected file
expect(streamFiles.length).toBeGreaterThan(0);
// Read content from the stream file
if (streamFiles[0]) {
const chunks: Buffer[] = [];
const readStream = await streamFiles[0].createReadStream();
await new Promise<void>((resolve, reject) => {
readStream.on('data', (chunk: Buffer) => chunks.push(chunk));
readStream.on('end', resolve);
readStream.on('error', reject);
});
const extractedContent = Buffer.concat(chunks).toString();
expect(extractedContent).toEqual(testContent);
}
});
tap.test('should handle gzip files with original filename in header', async () => {
// Test with a real-world gzip file that includes filename in header
const testContent = 'File with name in gzip header\n'.repeat(30);
const originalFileName = 'original-name.log';
const gzipFileName = 'compressed.gz';
// Create a proper gzip with filename header using Node's zlib
const zlib = await import('zlib');
const gzipBuffer = await new Promise<Buffer>((resolve, reject) => {
zlib.gzip(Buffer.from(testContent), {
level: 9,
// Note: Node's zlib doesn't support embedding filename directly,
// but we can test the extraction anyway
}, (err, result) => {
if (err) reject(err);
else resolve(result);
});
});
await plugins.smartfile.memory.toFs(
gzipBuffer,
plugins.path.join(testPaths.gzipTestDir, gzipFileName)
);
// Test extraction
const gzipArchive = await smartarchive.SmartArchive.fromArchiveFile(
plugins.path.join(testPaths.gzipTestDir, gzipFileName)
);
const extractPath = plugins.path.join(testPaths.gzipTestDir, 'header-test');
await plugins.smartfile.fs.ensureDir(extractPath);
// Provide a filename since gzip doesn't reliably contain filename metadata
await gzipArchive.exportToFs(extractPath, 'compressed.txt');
// Check if file was extracted (name might be derived from archive name)
const files = await plugins.smartfile.fs.listFileTree(extractPath, '**/*');
expect(files.length).toBeGreaterThan(0);
// Read and verify content
const extractedFile = files[0];
const extractedContent = await plugins.smartfile.fs.toStringSync(
plugins.path.join(extractPath, extractedFile || 'compressed.txt')
);
expect(extractedContent).toEqual(testContent);
});
tap.test('should handle large gzip files', async () => {
// Create a larger test file
const largeContent = 'x'.repeat(1024 * 1024); // 1MB of 'x' characters
const gzipFileName = 'large-file.txt.gz';
// Compress the large file
const fflate = await import('fflate');
const compressed = fflate.gzipSync(Buffer.from(largeContent));
await plugins.smartfile.memory.toFs(
Buffer.from(compressed),
plugins.path.join(testPaths.gzipTestDir, gzipFileName)
);
// Test extraction
const gzipArchive = await smartarchive.SmartArchive.fromArchiveFile(
plugins.path.join(testPaths.gzipTestDir, gzipFileName)
);
const extractPath = plugins.path.join(testPaths.gzipTestDir, 'large-extracted');
await plugins.smartfile.fs.ensureDir(extractPath);
// Provide a filename since gzip doesn't contain filename metadata
await gzipArchive.exportToFs(extractPath, 'large-file.txt');
// Verify the extracted content
const files = await plugins.smartfile.fs.listFileTree(extractPath, '**/*');
expect(files.length).toBeGreaterThan(0);
const extractedContent = await plugins.smartfile.fs.toStringSync(
plugins.path.join(extractPath, files[0] || 'large-file.txt')
);
expect(extractedContent.length).toEqual(largeContent.length);
expect(extractedContent).toEqual(largeContent);
});
tap.test('should handle real-world multi-chunk gzip from URL', async () => {
// Test with a real tgz file that will be processed in multiple chunks
const testUrl = 'https://registry.npmjs.org/@push.rocks/smartfile/-/smartfile-11.2.7.tgz';
// Download and extract the archive
const testArchive = await smartarchive.SmartArchive.fromArchiveUrl(testUrl);
const extractPath = plugins.path.join(testPaths.gzipTestDir, 'real-world-test');
await plugins.smartfile.fs.ensureDir(extractPath);
// This will test multi-chunk decompression as the file is larger
await testArchive.exportToFs(extractPath);
// Verify extraction worked
const files = await plugins.smartfile.fs.listFileTree(extractPath, '**/*');
expect(files.length).toBeGreaterThan(0);
// Check for expected package structure
const hasPackageJson = files.some(f => f.includes('package.json'));
expect(hasPackageJson).toBeTrue();
});
export default tap.start();

View File

@@ -16,7 +16,7 @@ const testPaths = {
import * as smartarchive from '../ts/index.js'; import * as smartarchive from '../ts/index.js';
tap.preTask('should prepare .nogit dir', async () => { tap.preTask('should prepare .nogit dir', async () => {
await plugins.smartfile.fs.ensureDir(testPaths.remoteDir); await plugins.fsPromises.mkdir(testPaths.remoteDir, { recursive: true });
}); });
tap.preTask('should prepare downloads', async (tools) => { tap.preTask('should prepare downloads', async (tools) => {
@@ -26,26 +26,25 @@ tap.preTask('should prepare downloads', async (tools) => {
) )
.get(); .get();
const downloadedFile: Buffer = Buffer.from(await response.arrayBuffer()); const downloadedFile: Buffer = Buffer.from(await response.arrayBuffer());
await plugins.smartfile.memory.toFs( await plugins.fsPromises.writeFile(
downloadedFile,
plugins.path.join(testPaths.nogitDir, 'test.tgz'), plugins.path.join(testPaths.nogitDir, 'test.tgz'),
downloadedFile,
); );
}); });
tap.test('should extract existing files on disk', async () => { tap.test('should extract existing files on disk', async () => {
const testSmartarchive = await smartarchive.SmartArchive.fromArchiveUrl( const testSmartarchive = await smartarchive.SmartArchive.fromUrl(
'https://verdaccio.lossless.digital/@pushrocks%2fwebsetup/-/websetup-2.0.14.tgz', 'https://verdaccio.lossless.digital/@pushrocks%2fwebsetup/-/websetup-2.0.14.tgz',
); );
await testSmartarchive.exportToFs(testPaths.nogitDir); await testSmartarchive.extractToDirectory(testPaths.nogitDir);
}); });
tap.skip.test('should extract a b2zip', async () => { tap.skip.test('should extract a b2zip', async () => {
const dataUrl = const dataUrl =
'https://daten.offeneregister.de/de_companies_ocdata.jsonl.bz2'; 'https://daten.offeneregister.de/de_companies_ocdata.jsonl.bz2';
const testArchive = await smartarchive.SmartArchive.fromArchiveUrl(dataUrl); const testArchive = await smartarchive.SmartArchive.fromUrl(dataUrl);
await testArchive.exportToFs( await testArchive.extractToDirectory(
plugins.path.join(testPaths.nogitDir, 'de_companies_ocdata.jsonl'), plugins.path.join(testPaths.nogitDir, 'de_companies_ocdata.jsonl'),
'data.jsonl',
); );
}); });

View File

@@ -3,6 +3,6 @@
*/ */
export const commitinfo = { export const commitinfo = {
name: '@push.rocks/smartarchive', name: '@push.rocks/smartarchive',
version: '4.2.1', version: '5.0.0',
description: 'A library for working with archive files, providing utilities for compressing and decompressing data.' description: 'A library for working with archive files, providing utilities for compressing and decompressing data.'
} }

View File

@@ -1,44 +1,60 @@
var BITMASK = [0, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff]; import type { IBitReader } from '../interfaces.js';
// returns a function that reads bits. const BITMASK = [0, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff] as const;
// takes a buffer iterator as input
export function bitIterator(nextBuffer: () => Buffer) { /**
var bit = 0, * Creates a bit reader function for BZIP2 decompression.
byte = 0; * Takes a buffer iterator as input and returns a function that reads bits.
var bytes = nextBuffer(); */
var f = function (n) { export function bitIterator(nextBuffer: () => Buffer): IBitReader {
if (n === null && bit != 0) { let bit = 0;
let byte = 0;
let bytes = nextBuffer();
let _bytesRead = 0;
const reader = function (n: number | null): number | void {
if (n === null && bit !== 0) {
// align to byte boundary // align to byte boundary
bit = 0; bit = 0;
byte++; byte++;
return; return;
} }
var result = 0;
while (n > 0) { let result = 0;
let remaining = n as number;
while (remaining > 0) {
if (byte >= bytes.length) { if (byte >= bytes.length) {
byte = 0; byte = 0;
bytes = nextBuffer(); bytes = nextBuffer();
} }
var left = 8 - bit;
if (bit === 0 && n > 0) const left = 8 - bit;
// @ts-ignore
f.bytesRead++; if (bit === 0 && remaining > 0) {
if (n >= left) { _bytesRead++;
}
if (remaining >= left) {
result <<= left; result <<= left;
result |= BITMASK[left] & bytes[byte++]; result |= BITMASK[left] & bytes[byte++];
bit = 0; bit = 0;
n -= left; remaining -= left;
} else { } else {
result <<= n; result <<= remaining;
result |= result |= (bytes[byte] & (BITMASK[remaining] << (8 - remaining - bit))) >> (8 - remaining - bit);
(bytes[byte] & (BITMASK[n] << (8 - n - bit))) >> (8 - n - bit); bit += remaining;
bit += n; remaining = 0;
n = 0;
} }
} }
return result; return result;
}; } as IBitReader;
// @ts-ignore
f.bytesRead = 0; Object.defineProperty(reader, 'bytesRead', {
return f; get: () => _bytesRead,
enumerable: true,
});
return reader;
} }

View File

@@ -1,23 +1,22 @@
export class Bzip2Error extends Error { import { Bzip2Error, BZIP2_ERROR_CODES } from '../errors.js';
public name: string = 'Bzip2Error'; import type { IBitReader, IHuffmanGroup } from '../interfaces.js';
public message: string;
public stack = new Error().stack;
constructor(messageArg: string) { // Re-export Bzip2Error for backward compatibility
super(); export { Bzip2Error };
this.message = messageArg;
} /**
* Throw a BZIP2 error with proper error code
*/
function throwError(message: string, code: string = BZIP2_ERROR_CODES.INVALID_BLOCK_DATA): never {
throw new Bzip2Error(message, code);
} }
var messageArg = { /**
Error: function (message) { * BZIP2 decompression implementation
throw new Bzip2Error(message); */
},
};
export class Bzip2 { export class Bzip2 {
public Bzip2Error = Bzip2Error; // CRC32 lookup table for BZIP2
public crcTable = [ public readonly crcTable: readonly number[] = [
0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b,
0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61,
0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, 0x4c11db70, 0x48d0c6c7, 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, 0x4c11db70, 0x48d0c6c7,
@@ -63,14 +62,24 @@ export class Bzip2 {
0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4, 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4,
]; ];
array = function (bytes) { // State arrays initialized in header()
var bit = 0, private byteCount!: Int32Array;
byte = 0; private symToByte!: Uint8Array;
var BITMASK = [0, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff]; private mtfSymbol!: Int32Array;
return function (n) { private selectors!: Uint8Array;
var result = 0;
/**
* Create a bit reader from a byte array
*/
array(bytes: Uint8Array | Buffer): (n: number) => number {
let bit = 0;
let byte = 0;
const BITMASK = [0, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff];
return function (n: number): number {
let result = 0;
while (n > 0) { while (n > 0) {
var left = 8 - bit; const left = 8 - bit;
if (n >= left) { if (n >= left) {
result <<= left; result <<= left;
result |= BITMASK[left] & bytes[byte++]; result |= BITMASK[left] & bytes[byte++];
@@ -78,234 +87,341 @@ export class Bzip2 {
n -= left; n -= left;
} else { } else {
result <<= n; result <<= n;
result |= result |= (bytes[byte] & (BITMASK[n] << (8 - n - bit))) >> (8 - n - bit);
(bytes[byte] & (BITMASK[n] << (8 - n - bit))) >> (8 - n - bit);
bit += n; bit += n;
n = 0; n = 0;
} }
} }
return result; return result;
}; };
}; }
simple = function (srcbuffer, stream) { /**
var bits = this.array(srcbuffer); * Simple decompression from a buffer
var size = this.header(bits); */
var ret = false; simple(srcbuffer: Uint8Array | Buffer, stream: (byte: number) => void): void {
var bufsize = 100000 * size; const bits = this.array(srcbuffer);
var buf = new Int32Array(bufsize); const size = this.header(bits as IBitReader);
let ret: number | null = 0;
const bufsize = 100000 * size;
const buf = new Int32Array(bufsize);
do { do {
ret = this.decompress(bits, stream, buf, bufsize); ret = this.decompress(bits as IBitReader, stream, buf, bufsize, ret);
} while (!ret); } while (ret !== null);
}; }
header = function (bits) { /**
* Parse BZIP2 header and return block size
*/
header(bits: IBitReader): number {
this.byteCount = new Int32Array(256); this.byteCount = new Int32Array(256);
this.symToByte = new Uint8Array(256); this.symToByte = new Uint8Array(256);
this.mtfSymbol = new Int32Array(256); this.mtfSymbol = new Int32Array(256);
this.selectors = new Uint8Array(0x8000); this.selectors = new Uint8Array(0x8000);
if (bits(8 * 3) != 4348520) messageArg.Error('No magic number found'); if (bits(8 * 3) !== 4348520) {
throwError('No BZIP2 magic number found at start of stream', BZIP2_ERROR_CODES.NO_MAGIC_NUMBER);
var i = bits(8) - 48;
if (i < 1 || i > 9) messageArg.Error('Not a BZIP archive');
return i;
};
decompress = function (bits, stream, buf, bufsize, streamCRC) {
var MAX_HUFCODE_BITS = 20;
var MAX_SYMBOLS = 258;
var SYMBOL_RUNA = 0;
var SYMBOL_RUNB = 1;
var GROUP_SIZE = 50;
var crc = 0 ^ -1;
for (var h = '', i = 0; i < 6; i++) h += bits(8).toString(16);
if (h == '177245385090') {
var finalCRC = bits(32) | 0;
if (finalCRC !== streamCRC)
messageArg.Error('Error in bzip2: crc32 do not match');
// align stream to byte
bits(null);
return null; // reset streamCRC for next call
} }
if (h != '314159265359') messageArg.Error('eek not valid bzip data');
var crcblock = bits(32) | 0; // CRC code const blockSize = (bits(8) as number) - 48;
if (bits(1)) messageArg.Error('unsupported obsolete version'); if (blockSize < 1 || blockSize > 9) {
var origPtr = bits(24); throwError('Invalid BZIP2 archive: block size must be 1-9', BZIP2_ERROR_CODES.INVALID_ARCHIVE);
if (origPtr > bufsize) }
messageArg.Error('Initial position larger than buffer size'); return blockSize;
var t = bits(16); }
var symTotal = 0;
for (i = 0; i < 16; i++) { /**
if (t & (1 << (15 - i))) { * Decompress a BZIP2 block
var k = bits(16); */
for (j = 0; j < 16; j++) { decompress(
if (k & (1 << (15 - j))) { bits: IBitReader,
stream: (byte: number) => void,
buf: Int32Array,
bufsize: number,
streamCRC?: number | null
): number | null {
const MAX_HUFCODE_BITS = 20;
const MAX_SYMBOLS = 258;
const SYMBOL_RUNA = 0;
const SYMBOL_RUNB = 1;
const GROUP_SIZE = 50;
let crc = 0 ^ -1;
// Read block header
let headerHex = '';
for (let i = 0; i < 6; i++) {
headerHex += (bits(8) as number).toString(16);
}
// Check for end-of-stream marker
if (headerHex === '177245385090') {
const finalCRC = bits(32) as number | 0;
if (finalCRC !== streamCRC) {
throwError('CRC32 mismatch: stream checksum verification failed', BZIP2_ERROR_CODES.CRC_MISMATCH);
}
// Align stream to byte boundary
bits(null);
return null;
}
// Verify block signature (pi digits)
if (headerHex !== '314159265359') {
throwError('Invalid block header: expected pi signature (0x314159265359)', BZIP2_ERROR_CODES.INVALID_BLOCK_DATA);
}
const crcblock = bits(32) as number | 0;
if (bits(1)) {
throwError('Unsupported obsolete BZIP2 format version', BZIP2_ERROR_CODES.INVALID_ARCHIVE);
}
const origPtr = bits(24) as number;
if (origPtr > bufsize) {
throwError('Initial position larger than buffer size', BZIP2_ERROR_CODES.BUFFER_OVERFLOW);
}
// Read symbol map
let symbolMapBits = bits(16) as number;
let symTotal = 0;
for (let i = 0; i < 16; i++) {
if (symbolMapBits & (1 << (15 - i))) {
const subMap = bits(16) as number;
for (let j = 0; j < 16; j++) {
if (subMap & (1 << (15 - j))) {
this.symToByte[symTotal++] = 16 * i + j; this.symToByte[symTotal++] = 16 * i + j;
} }
} }
} }
} }
var groupCount = bits(3); // Read Huffman groups
if (groupCount < 2 || groupCount > 6) messageArg.Error('another error'); const groupCount = bits(3) as number;
var nSelectors = bits(15); if (groupCount < 2 || groupCount > 6) {
if (nSelectors == 0) messageArg.Error('meh'); throwError('Invalid group count: must be between 2 and 6', BZIP2_ERROR_CODES.INVALID_HUFFMAN);
for (var i = 0; i < groupCount; i++) this.mtfSymbol[i] = i; }
for (var i = 0; i < nSelectors; i++) { const nSelectors = bits(15) as number;
for (var j = 0; bits(1); j++) if (nSelectors === 0) {
if (j >= groupCount) messageArg.Error('whoops another error'); throwError('Invalid selector count: cannot be zero', BZIP2_ERROR_CODES.INVALID_SELECTOR);
var uc = this.mtfSymbol[j]; }
for (var k: any = j - 1; k >= 0; k--) {
// Initialize MTF symbol array
for (let i = 0; i < groupCount; i++) {
this.mtfSymbol[i] = i;
}
// Read selectors using MTF decoding
for (let i = 0; i < nSelectors; i++) {
let j = 0;
while (bits(1)) {
j++;
if (j >= groupCount) {
throwError('Invalid MTF index: exceeds group count', BZIP2_ERROR_CODES.INVALID_HUFFMAN);
}
}
const uc = this.mtfSymbol[j];
for (let k = j - 1; k >= 0; k--) {
this.mtfSymbol[k + 1] = this.mtfSymbol[k]; this.mtfSymbol[k + 1] = this.mtfSymbol[k];
} }
this.mtfSymbol[0] = uc; this.mtfSymbol[0] = uc;
this.selectors[i] = uc; this.selectors[i] = uc;
} }
var symCount = symTotal + 2; // Build Huffman tables
var groups = []; const symCount = symTotal + 2;
var length = new Uint8Array(MAX_SYMBOLS), const groups: IHuffmanGroup[] = [];
temp = new Uint16Array(MAX_HUFCODE_BITS + 1); const length = new Uint8Array(MAX_SYMBOLS);
const temp = new Uint16Array(MAX_HUFCODE_BITS + 1);
var hufGroup; for (let j = 0; j < groupCount; j++) {
let t = bits(5) as number;
for (var j = 0; j < groupCount; j++) { for (let i = 0; i < symCount; i++) {
t = bits(5); //lengths
for (var i = 0; i < symCount; i++) {
while (true) { while (true) {
if (t < 1 || t > MAX_HUFCODE_BITS) if (t < 1 || t > MAX_HUFCODE_BITS) {
messageArg.Error('I gave up a while ago on writing error messages'); throwError('Invalid Huffman code length: must be between 1 and 20', BZIP2_ERROR_CODES.INVALID_HUFFMAN);
}
if (!bits(1)) break; if (!bits(1)) break;
if (!bits(1)) t++; if (!bits(1)) t++;
else t--; else t--;
} }
length[i] = t; length[i] = t;
} }
var minLen, maxLen;
minLen = maxLen = length[0]; let minLen = length[0];
for (var i = 1; i < symCount; i++) { let maxLen = length[0];
for (let i = 1; i < symCount; i++) {
if (length[i] > maxLen) maxLen = length[i]; if (length[i] > maxLen) maxLen = length[i];
else if (length[i] < minLen) minLen = length[i]; else if (length[i] < minLen) minLen = length[i];
} }
hufGroup = groups[j] = {};
hufGroup.permute = new Int32Array(MAX_SYMBOLS);
hufGroup.limit = new Int32Array(MAX_HUFCODE_BITS + 1);
hufGroup.base = new Int32Array(MAX_HUFCODE_BITS + 1);
hufGroup.minLen = minLen; const hufGroup: IHuffmanGroup = {
hufGroup.maxLen = maxLen; permute: new Int32Array(MAX_SYMBOLS),
var base = hufGroup.base; limit: new Int32Array(MAX_HUFCODE_BITS + 1),
var limit = hufGroup.limit; base: new Int32Array(MAX_HUFCODE_BITS + 1),
var pp = 0; minLen,
for (var i: number = minLen; i <= maxLen; i++) maxLen,
for (var t: any = 0; t < symCount; t++) };
if (length[t] == i) hufGroup.permute[pp++] = t; groups[j] = hufGroup;
for (i = minLen; i <= maxLen; i++) temp[i] = limit[i] = 0;
for (i = 0; i < symCount; i++) temp[length[i]]++; const base = hufGroup.base;
pp = t = 0; const limit = hufGroup.limit;
for (i = minLen; i < maxLen; i++) {
let pp = 0;
for (let i = minLen; i <= maxLen; i++) {
for (let t = 0; t < symCount; t++) {
if (length[t] === i) hufGroup.permute[pp++] = t;
}
}
for (let i = minLen; i <= maxLen; i++) {
temp[i] = 0;
limit[i] = 0;
}
for (let i = 0; i < symCount; i++) {
temp[length[i]]++;
}
pp = 0;
let tt = 0;
for (let i = minLen; i < maxLen; i++) {
pp += temp[i]; pp += temp[i];
limit[i] = pp - 1; limit[i] = pp - 1;
pp <<= 1; pp <<= 1;
base[i + 1] = pp - (t += temp[i]); base[i + 1] = pp - (tt += temp[i]);
} }
limit[maxLen] = pp + temp[maxLen] - 1; limit[maxLen] = pp + temp[maxLen] - 1;
base[minLen] = 0; base[minLen] = 0;
} }
for (var i = 0; i < 256; i++) { // Initialize for decoding
for (let i = 0; i < 256; i++) {
this.mtfSymbol[i] = i; this.mtfSymbol[i] = i;
this.byteCount[i] = 0; this.byteCount[i] = 0;
} }
var runPos, count, symCount: number, selector;
runPos = count = symCount = selector = 0; let runPos = 0;
let count = 0;
let symCountRemaining = 0;
let selector = 0;
let hufGroup = groups[0];
let base = hufGroup.base;
let limit = hufGroup.limit;
// Main decoding loop
while (true) { while (true) {
if (!symCount--) { if (!symCountRemaining--) {
symCount = GROUP_SIZE - 1; symCountRemaining = GROUP_SIZE - 1;
if (selector >= nSelectors) if (selector >= nSelectors) {
messageArg.Error("meow i'm a kitty, that's an error"); throwError('Invalid selector index: exceeds available groups', BZIP2_ERROR_CODES.INVALID_SELECTOR);
}
hufGroup = groups[this.selectors[selector++]]; hufGroup = groups[this.selectors[selector++]];
base = hufGroup.base; base = hufGroup.base;
limit = hufGroup.limit; limit = hufGroup.limit;
} }
i = hufGroup.minLen;
j = bits(i); let i = hufGroup.minLen;
let j = bits(i) as number;
while (true) { while (true) {
if (i > hufGroup.maxLen) messageArg.Error("rawr i'm a dinosaur"); if (i > hufGroup.maxLen) {
throwError('Huffman decoding error: bit length exceeds maximum allowed', BZIP2_ERROR_CODES.INVALID_HUFFMAN);
}
if (j <= limit[i]) break; if (j <= limit[i]) break;
i++; i++;
j = (j << 1) | bits(1); j = (j << 1) | (bits(1) as number);
} }
j -= base[i]; j -= base[i];
if (j < 0 || j >= MAX_SYMBOLS) messageArg.Error("moo i'm a cow"); if (j < 0 || j >= MAX_SYMBOLS) {
var nextSym = hufGroup.permute[j]; throwError('Symbol index out of bounds during Huffman decoding', BZIP2_ERROR_CODES.INVALID_HUFFMAN);
if (nextSym == SYMBOL_RUNA || nextSym == SYMBOL_RUNB) { }
const nextSym = hufGroup.permute[j];
if (nextSym === SYMBOL_RUNA || nextSym === SYMBOL_RUNB) {
if (!runPos) { if (!runPos) {
runPos = 1; runPos = 1;
t = 0; j = 0;
} }
if (nextSym == SYMBOL_RUNA) t += runPos; if (nextSym === SYMBOL_RUNA) j += runPos;
else t += 2 * runPos; else j += 2 * runPos;
runPos <<= 1; runPos <<= 1;
continue; continue;
} }
if (runPos) { if (runPos) {
runPos = 0; runPos = 0;
if (count + t > bufsize) messageArg.Error('Boom.'); const runLength = j;
uc = this.symToByte[this.mtfSymbol[0]]; if (count + runLength > bufsize) {
this.byteCount[uc] += t; throwError('Run-length overflow: decoded run exceeds buffer capacity', BZIP2_ERROR_CODES.BUFFER_OVERFLOW);
while (t--) buf[count++] = uc; }
const uc = this.symToByte[this.mtfSymbol[0]];
this.byteCount[uc] += runLength;
for (let t = 0; t < runLength; t++) {
buf[count++] = uc;
}
} }
if (nextSym > symTotal) break; if (nextSym > symTotal) break;
if (count >= bufsize)
messageArg.Error("I can't think of anything. Error"); if (count >= bufsize) {
i = nextSym - 1; throwError('Buffer overflow: decoded data exceeds buffer capacity', BZIP2_ERROR_CODES.BUFFER_OVERFLOW);
uc = this.mtfSymbol[i]; }
for (var k: any = i - 1; k >= 0; k--) {
const mtfIndex = nextSym - 1;
const uc = this.mtfSymbol[mtfIndex];
for (let k = mtfIndex - 1; k >= 0; k--) {
this.mtfSymbol[k + 1] = this.mtfSymbol[k]; this.mtfSymbol[k + 1] = this.mtfSymbol[k];
} }
this.mtfSymbol[0] = uc; this.mtfSymbol[0] = uc;
uc = this.symToByte[uc]; const decodedByte = this.symToByte[uc];
this.byteCount[uc]++; this.byteCount[decodedByte]++;
buf[count++] = uc; buf[count++] = decodedByte;
} }
if (origPtr < 0 || origPtr >= count)
messageArg.Error( if (origPtr < 0 || origPtr >= count) {
"I'm a monkey and I'm throwing something at someone, namely you", throwError('Invalid original pointer: position outside decoded block', BZIP2_ERROR_CODES.INVALID_POSITION);
); }
var j = 0;
for (var i = 0; i < 256; i++) { // Inverse BWT transform
k = j + this.byteCount[i]; let j = 0;
for (let i = 0; i < 256; i++) {
const k = j + this.byteCount[i];
this.byteCount[i] = j; this.byteCount[i] = j;
j = k; j = k;
} }
for (var i = 0; i < count; i++) {
uc = buf[i] & 0xff; for (let i = 0; i < count; i++) {
const uc = buf[i] & 0xff;
buf[this.byteCount[uc]] |= i << 8; buf[this.byteCount[uc]] |= i << 8;
this.byteCount[uc]++; this.byteCount[uc]++;
} }
var pos = 0,
current = 0, // Output decoded data
run = 0; let pos = 0;
let current = 0;
let run = 0;
if (count) { if (count) {
pos = buf[origPtr]; pos = buf[origPtr];
current = pos & 0xff; current = pos & 0xff;
pos >>= 8; pos >>= 8;
run = -1; run = -1;
} }
count = count;
var copies, previous, outbyte; let remaining = count;
while (count) { while (remaining) {
count--; remaining--;
previous = current; const previous = current;
pos = buf[pos]; pos = buf[pos];
current = pos & 0xff; current = pos & 0xff;
pos >>= 8; pos >>= 8;
if (run++ == 3) {
let copies: number;
let outbyte: number;
if (run++ === 3) {
copies = current; copies = current;
outbyte = previous; outbyte = previous;
current = -1; current = -1;
@@ -313,19 +429,21 @@ export class Bzip2 {
copies = 1; copies = 1;
outbyte = current; outbyte = current;
} }
while (copies--) { while (copies--) {
crc = crc = ((crc << 8) ^ this.crcTable[((crc >> 24) ^ outbyte) & 0xff]) & 0xffffffff;
((crc << 8) ^ this.crcTable[((crc >> 24) ^ outbyte) & 0xff]) &
0xffffffff; // crc32
stream(outbyte); stream(outbyte);
} }
if (current != previous) run = 0;
if (current !== previous) run = 0;
} }
crc = (crc ^ -1) >>> 0; crc = (crc ^ -1) >>> 0;
if ((crc | 0) != (crcblock | 0)) if ((crc | 0) !== (crcblock | 0)) {
messageArg.Error('Error in bzip2: crc32 do not match'); throwError('CRC32 mismatch: block checksum verification failed', BZIP2_ERROR_CODES.CRC_MISMATCH);
streamCRC = (crc ^ ((streamCRC << 1) | (streamCRC >>> 31))) & 0xffffffff; }
return streamCRC;
}; const newStreamCRC = (crc ^ (((streamCRC || 0) << 1) | ((streamCRC || 0) >>> 31))) & 0xffffffff;
return newStreamCRC;
}
} }

View File

@@ -1,51 +1,53 @@
import * as plugins from '../plugins.js'; import * as plugins from '../plugins.js';
import { Bzip2Error, BZIP2_ERROR_CODES } from '../errors.js';
import type { IBitReader } from '../interfaces.js';
import { Bzip2 } from './bzip2.js'; import { Bzip2 } from './bzip2.js';
import { bitIterator } from './bititerator.js'; import { bitIterator } from './bititerator.js';
export function unbzip2Stream() { /**
* Creates a streaming BZIP2 decompression transform
*/
export function unbzip2Stream(): plugins.smartstream.SmartDuplex<Buffer, Buffer> {
const bzip2Instance = new Bzip2(); const bzip2Instance = new Bzip2();
var bufferQueue = []; const bufferQueue: Buffer[] = [];
var hasBytes = 0; let hasBytes = 0;
var blockSize = 0; let blockSize = 0;
var broken = false; let broken = false;
var done = false; let bitReader: IBitReader | null = null;
var bitReader = null; let streamCRC: number | null = null;
var streamCRC = null;
function decompressBlock() { function decompressBlock(): Buffer | undefined {
if (!blockSize) { if (!blockSize) {
blockSize = bzip2Instance.header(bitReader); blockSize = bzip2Instance.header(bitReader!);
streamCRC = 0; streamCRC = 0;
} else { return undefined;
var bufsize = 100000 * blockSize;
var buf = new Int32Array(bufsize);
var chunk = [];
var f = function (b) {
chunk.push(b);
};
streamCRC = bzip2Instance.decompress(
bitReader,
f,
buf,
bufsize,
streamCRC,
);
if (streamCRC === null) {
// reset for next bzip2 header
blockSize = 0;
return;
} else {
return Buffer.from(chunk);
}
} }
const bufsize = 100000 * blockSize;
const buf = new Int32Array(bufsize);
const chunk: number[] = [];
const outputFunc = (b: number): void => {
chunk.push(b);
};
streamCRC = bzip2Instance.decompress(bitReader!, outputFunc, buf, bufsize, streamCRC);
if (streamCRC === null) {
// Reset for next bzip2 header
blockSize = 0;
return undefined;
}
return Buffer.from(chunk);
} }
var outlength = 0; let outlength = 0;
const decompressAndPush = async () => {
if (broken) return; const decompressAndPush = async (): Promise<Buffer | undefined> => {
if (broken) return undefined;
try { try {
const resultChunk = decompressBlock(); const resultChunk = decompressBlock();
if (resultChunk) { if (resultChunk) {
@@ -53,40 +55,39 @@ export function unbzip2Stream() {
} }
return resultChunk; return resultChunk;
} catch (e) { } catch (e) {
console.error(e);
broken = true; broken = true;
if (e instanceof Error) {
throw new Bzip2Error(`Decompression failed: ${e.message}`, BZIP2_ERROR_CODES.INVALID_BLOCK_DATA);
}
throw e;
} }
}; };
let counter = 0;
return new plugins.smartstream.SmartDuplex({ return new plugins.smartstream.SmartDuplex<Buffer, Buffer>({
objectMode: true, objectMode: true,
name: 'bzip2', name: 'bzip2',
debug: false,
highWaterMark: 1, highWaterMark: 1,
writeFunction: async function (data, streamTools) { writeFunction: async function (data, streamTools) {
// console.log(`got chunk ${counter++}`)
bufferQueue.push(data); bufferQueue.push(data);
hasBytes += data.length; hasBytes += data.length;
if (bitReader === null) { if (bitReader === null) {
bitReader = bitIterator(function () { bitReader = bitIterator(function () {
return bufferQueue.shift(); return bufferQueue.shift()!;
}); });
} }
while (
!broken && const threshold = 25000 + 100000 * blockSize || 4;
hasBytes - bitReader.bytesRead + 1 >= (25000 + 100000 * blockSize || 4) while (!broken && hasBytes - bitReader.bytesRead + 1 >= threshold) {
) {
//console.error('decompressing with', hasBytes - bitReader.bytesRead + 1, 'bytes in buffer');
const result = await decompressAndPush(); const result = await decompressAndPush();
if (!result) { if (!result) {
continue; continue;
} }
// console.log(result.toString());
await streamTools.push(result); await streamTools.push(result);
} }
return null;
}, },
finalFunction: async function (streamTools) { finalFunction: async function (streamTools) {
//console.error(x,'last compressing with', hasBytes, 'bytes in buffer');
while (!broken && bitReader && hasBytes > bitReader.bytesRead) { while (!broken && bitReader && hasBytes > bitReader.bytesRead) {
const result = await decompressAndPush(); const result = await decompressAndPush();
if (!result) { if (!result) {
@@ -94,10 +95,11 @@ export function unbzip2Stream() {
} }
await streamTools.push(result); await streamTools.push(result);
} }
if (!broken) {
if (streamCRC !== null) if (!broken && streamCRC !== null) {
this.emit('error', new Error('input stream ended prematurely')); this.emit('error', new Bzip2Error('Input stream ended prematurely', BZIP2_ERROR_CODES.PREMATURE_END));
} }
return null;
}, },
}); });
} }

View File

@@ -1,24 +1,41 @@
import type { SmartArchive } from './classes.smartarchive.js'; import type { SmartArchive } from './classes.smartarchive.js';
import type { TSupportedMime } from './interfaces.js';
import * as plugins from './plugins.js'; import * as plugins from './plugins.js';
/**
* Type for decompression streams
*/
export type TDecompressionStream =
| plugins.stream.Transform
| plugins.stream.Duplex
| plugins.tarStream.Extract;
/**
* Result of archive analysis
*/
export interface IAnalyzedResult { export interface IAnalyzedResult {
fileType: plugins.fileType.FileTypeResult; fileType: plugins.fileType.FileTypeResult | undefined;
isArchive: boolean; isArchive: boolean;
resultStream: plugins.smartstream.SmartDuplex; resultStream: plugins.smartstream.SmartDuplex<Buffer, Buffer>;
decompressionStream: decompressionStream: TDecompressionStream;
| plugins.stream.Transform
| plugins.stream.Duplex
| plugins.tarStream.Extract;
} }
/**
* Analyzes archive streams to detect format and provide decompression
*/
export class ArchiveAnalyzer { export class ArchiveAnalyzer {
smartArchiveRef: SmartArchive; private smartArchiveRef: SmartArchive;
constructor(smartArchiveRefArg: SmartArchive) { constructor(smartArchiveRefArg: SmartArchive) {
this.smartArchiveRef = smartArchiveRefArg; this.smartArchiveRef = smartArchiveRefArg;
} }
private async mimeTypeIsArchive(mimeType: string): Promise<boolean> { /**
* Check if a MIME type represents an archive format
*/
private async mimeTypeIsArchive(mimeType: string | undefined): Promise<boolean> {
if (!mimeType) return false;
const archiveMimeTypes: Set<string> = new Set([ const archiveMimeTypes: Set<string> = new Set([
'application/zip', 'application/zip',
'application/x-rar-compressed', 'application/x-rar-compressed',
@@ -26,50 +43,46 @@ export class ArchiveAnalyzer {
'application/gzip', 'application/gzip',
'application/x-7z-compressed', 'application/x-7z-compressed',
'application/x-bzip2', 'application/x-bzip2',
// Add other archive mime types here
]); ]);
return archiveMimeTypes.has(mimeType); return archiveMimeTypes.has(mimeType);
} }
private async getDecompressionStream( /**
mimeTypeArg: plugins.fileType.FileTypeResult['mime'], * Get the appropriate decompression stream for a MIME type
): Promise< */
plugins.stream.Transform | plugins.stream.Duplex | plugins.tarStream.Extract private async getDecompressionStream(mimeTypeArg: TSupportedMime): Promise<TDecompressionStream> {
> {
switch (mimeTypeArg) { switch (mimeTypeArg) {
case 'application/gzip': case 'application/gzip':
return this.smartArchiveRef.gzipTools.getDecompressionStream(); return this.smartArchiveRef.gzipTools.getDecompressionStream();
case 'application/zip': case 'application/zip':
return this.smartArchiveRef.zipTools.getDecompressionStream(); return this.smartArchiveRef.zipTools.getDecompressionStream();
case 'application/x-bzip2': case 'application/x-bzip2':
return await this.smartArchiveRef.bzip2Tools.getDecompressionStream(); // replace with your own bzip2 decompression stream return this.smartArchiveRef.bzip2Tools.getDecompressionStream();
case 'application/x-tar': case 'application/x-tar':
return this.smartArchiveRef.tarTools.getDecompressionStream(); // replace with your own tar decompression stream return this.smartArchiveRef.tarTools.getDecompressionStream();
default: default:
// Handle unsupported formats or no decompression needed // Handle unsupported formats or no decompression needed
return plugins.smartstream.createPassThrough(); return plugins.smartstream.createPassThrough();
} }
} }
public getAnalyzedStream() { /**
* Create an analyzed stream that detects archive type and provides decompression
* Emits a single IAnalyzedResult object
*/
public getAnalyzedStream(): plugins.smartstream.SmartDuplex<Buffer, IAnalyzedResult> {
let firstRun = true; let firstRun = true;
const resultStream = plugins.smartstream.createPassThrough(); const resultStream = plugins.smartstream.createPassThrough();
const analyzerstream = new plugins.smartstream.SmartDuplex<
Buffer, const analyzerstream = new plugins.smartstream.SmartDuplex<Buffer, IAnalyzedResult>({
IAnalyzedResult
>({
readableObjectMode: true, readableObjectMode: true,
writeFunction: async (chunkArg: Buffer, streamtools) => { writeFunction: async (chunkArg: Buffer, streamtools) => {
if (firstRun) { if (firstRun) {
firstRun = false; firstRun = false;
const fileType = await plugins.fileType.fileTypeFromBuffer(chunkArg); const fileType = await plugins.fileType.fileTypeFromBuffer(chunkArg);
const decompressionStream = await this.getDecompressionStream( const decompressionStream = await this.getDecompressionStream(fileType?.mime as TSupportedMime);
fileType?.mime as any,
);
/**
* analyzed stream emits once with this object
*/
const result: IAnalyzedResult = { const result: IAnalyzedResult = {
fileType, fileType,
isArchive: await this.mimeTypeIsArchive(fileType?.mime), isArchive: await this.mimeTypeIsArchive(fileType?.mime),
@@ -81,11 +94,12 @@ export class ArchiveAnalyzer {
await resultStream.backpressuredPush(chunkArg); await resultStream.backpressuredPush(chunkArg);
return null; return null;
}, },
finalFunction: async (tools) => { finalFunction: async () => {
resultStream.push(null); resultStream.push(null);
return null; return null;
}, },
}); });
return analyzerstream; return analyzerstream;
} }
} }

View File

@@ -1,42 +1,19 @@
import type { SmartArchive } from './classes.smartarchive.js';
import * as plugins from './plugins.js'; import * as plugins from './plugins.js';
import type { TCompressionLevel } from './interfaces.js';
// This class wraps fflate's gunzip in a Node.js Transform stream /**
export class CompressGunzipTransform extends plugins.stream.Transform { * Transform stream for GZIP compression using fflate
constructor() { */
export class GzipCompressionTransform extends plugins.stream.Transform {
private gzip: plugins.fflate.Gzip;
constructor(level: TCompressionLevel = 6) {
super(); super();
}
_transform( // Create a streaming Gzip compressor
chunk: Buffer, this.gzip = new plugins.fflate.Gzip({ level }, (chunk, final) => {
encoding: BufferEncoding,
callback: plugins.stream.TransformCallback,
) {
plugins.fflate.gunzip(chunk, (err, decompressed) => {
if (err) {
callback(err);
} else {
this.push(decompressed);
callback();
}
});
}
}
// DecompressGunzipTransform class that extends the Node.js Transform stream to
// create a stream that decompresses GZip-compressed data using fflate's gunzip function
export class DecompressGunzipTransform extends plugins.stream.Transform {
private gunzip: any; // fflate.Gunzip instance
constructor() {
super();
// Create a streaming Gunzip decompressor
this.gunzip = new plugins.fflate.Gunzip((chunk, final) => {
// Push decompressed chunks to the output stream
this.push(Buffer.from(chunk)); this.push(Buffer.from(chunk));
if (final) { if (final) {
// Signal end of stream when decompression is complete
this.push(null); this.push(null);
} }
}); });
@@ -45,20 +22,59 @@ export class DecompressGunzipTransform extends plugins.stream.Transform {
_transform( _transform(
chunk: Buffer, chunk: Buffer,
encoding: BufferEncoding, encoding: BufferEncoding,
callback: plugins.stream.TransformCallback, callback: plugins.stream.TransformCallback
) { ): void {
try {
this.gzip.push(chunk, false);
callback();
} catch (err) {
callback(err as Error);
}
}
_flush(callback: plugins.stream.TransformCallback): void {
try {
this.gzip.push(new Uint8Array(0), true);
callback();
} catch (err) {
callback(err as Error);
}
}
}
/**
* Transform stream for GZIP decompression using fflate
*/
export class GzipDecompressionTransform extends plugins.stream.Transform {
private gunzip: plugins.fflate.Gunzip;
constructor() {
super();
// Create a streaming Gunzip decompressor
this.gunzip = new plugins.fflate.Gunzip((chunk, final) => {
this.push(Buffer.from(chunk));
if (final) {
this.push(null);
}
});
}
_transform(
chunk: Buffer,
encoding: BufferEncoding,
callback: plugins.stream.TransformCallback
): void {
try { try {
// Feed chunks to the gunzip stream
this.gunzip.push(chunk, false); this.gunzip.push(chunk, false);
callback(); callback();
} catch (err) { } catch (err) {
callback(err as Error); callback(err as Error);
} }
} }
_flush(callback: plugins.stream.TransformCallback) { _flush(callback: plugins.stream.TransformCallback): void {
try { try {
// Signal end of input to gunzip
this.gunzip.push(new Uint8Array(0), true); this.gunzip.push(new Uint8Array(0), true);
callback(); callback();
} catch (err) { } catch (err) {
@@ -67,14 +83,61 @@ export class DecompressGunzipTransform extends plugins.stream.Transform {
} }
} }
/**
* GZIP compression and decompression utilities
*/
export class GzipTools { export class GzipTools {
constructor() {} /**
* Get a streaming compression transform
public getCompressionStream() { */
return new CompressGunzipTransform(); public getCompressionStream(level?: TCompressionLevel): plugins.stream.Transform {
return new GzipCompressionTransform(level);
} }
public getDecompressionStream() { /**
return new DecompressGunzipTransform(); * Get a streaming decompression transform
*/
public getDecompressionStream(): plugins.stream.Transform {
return new GzipDecompressionTransform();
}
/**
* Compress data synchronously
*/
public compressSync(data: Buffer, level?: TCompressionLevel): Buffer {
const options = level !== undefined ? { level } : undefined;
return Buffer.from(plugins.fflate.gzipSync(data, options));
}
/**
* Decompress data synchronously
*/
public decompressSync(data: Buffer): Buffer {
return Buffer.from(plugins.fflate.gunzipSync(data));
}
/**
* Compress data asynchronously
*/
public async compress(data: Buffer, level?: TCompressionLevel): Promise<Buffer> {
return new Promise((resolve, reject) => {
const options = level !== undefined ? { level } : undefined;
plugins.fflate.gzip(data, options as plugins.fflate.AsyncGzipOptions, (err, result) => {
if (err) reject(err);
else resolve(Buffer.from(result));
});
});
}
/**
* Decompress data asynchronously
*/
public async decompress(data: Buffer): Promise<Buffer> {
return new Promise((resolve, reject) => {
plugins.fflate.gunzip(data, (err, result) => {
if (err) reject(err);
else resolve(Buffer.from(result));
});
});
} }
} }

View File

@@ -1,75 +1,267 @@
import * as plugins from './plugins.js'; import * as plugins from './plugins.js';
import * as paths from './paths.js'; import type {
IArchiveCreationOptions,
IArchiveEntry,
IArchiveExtractionOptions,
IArchiveEntryInfo,
IArchiveInfo,
TArchiveFormat,
TCompressionLevel,
} from './interfaces.js';
import { Bzip2Tools } from './classes.bzip2tools.js'; import { Bzip2Tools } from './classes.bzip2tools.js';
import { GzipTools } from './classes.gziptools.js'; import { GzipTools } from './classes.gziptools.js';
import { TarTools } from './classes.tartools.js'; import { TarTools } from './classes.tartools.js';
import { ZipTools } from './classes.ziptools.js'; import { ZipTools } from './classes.ziptools.js';
import { ArchiveAnalyzer, type IAnalyzedResult } from './classes.archiveanalyzer.js';
import { /**
ArchiveAnalyzer, * Main class for archive manipulation
type IAnalyzedResult, * Supports TAR, ZIP, GZIP, and BZIP2 formats
} from './classes.archiveanalyzer.js'; */
import type { from } from '@push.rocks/smartrx/dist_ts/smartrx.plugins.rxjs.js';
export class SmartArchive { export class SmartArchive {
// STATIC // ============================================
public static async fromArchiveUrl(urlArg: string): Promise<SmartArchive> { // STATIC FACTORY METHODS - EXTRACTION
// ============================================
/**
* Create SmartArchive from a URL
*/
public static async fromUrl(urlArg: string): Promise<SmartArchive> {
const smartArchiveInstance = new SmartArchive(); const smartArchiveInstance = new SmartArchive();
smartArchiveInstance.sourceUrl = urlArg; smartArchiveInstance.sourceUrl = urlArg;
return smartArchiveInstance; return smartArchiveInstance;
} }
public static async fromArchiveFile( /**
filePathArg: string, * Create SmartArchive from a local file path
): Promise<SmartArchive> { */
public static async fromFile(filePathArg: string): Promise<SmartArchive> {
const smartArchiveInstance = new SmartArchive(); const smartArchiveInstance = new SmartArchive();
smartArchiveInstance.sourceFilePath = filePathArg; smartArchiveInstance.sourceFilePath = filePathArg;
return smartArchiveInstance; return smartArchiveInstance;
} }
public static async fromArchiveStream( /**
streamArg: * Create SmartArchive from a readable stream
| plugins.stream.Readable */
| plugins.stream.Duplex public static async fromStream(
| plugins.stream.Transform, streamArg: plugins.stream.Readable | plugins.stream.Duplex | plugins.stream.Transform
): Promise<SmartArchive> { ): Promise<SmartArchive> {
const smartArchiveInstance = new SmartArchive(); const smartArchiveInstance = new SmartArchive();
smartArchiveInstance.sourceStream = streamArg; smartArchiveInstance.sourceStream = streamArg;
return smartArchiveInstance; return smartArchiveInstance;
} }
// INSTANCE /**
* Create SmartArchive from an in-memory buffer
*/
public static async fromBuffer(buffer: Buffer): Promise<SmartArchive> {
const smartArchiveInstance = new SmartArchive();
smartArchiveInstance.sourceStream = plugins.stream.Readable.from(buffer);
return smartArchiveInstance;
}
// ============================================
// STATIC FACTORY METHODS - CREATION
// ============================================
/**
* Create a new archive from a directory
*/
public static async fromDirectory(
directoryPath: string,
options: IArchiveCreationOptions
): Promise<SmartArchive> {
const smartArchiveInstance = new SmartArchive();
smartArchiveInstance.creationOptions = options;
const tarTools = new TarTools();
if (options.format === 'tar' || options.format === 'tar.gz' || options.format === 'tgz') {
if (options.format === 'tar') {
const pack = await tarTools.packDirectory(directoryPath);
pack.finalize();
smartArchiveInstance.archiveBuffer = await SmartArchive.streamToBuffer(pack);
} else {
smartArchiveInstance.archiveBuffer = await tarTools.packDirectoryToTarGz(
directoryPath,
options.compressionLevel
);
}
} else if (options.format === 'zip') {
const zipTools = new ZipTools();
const fileTree = await plugins.listFileTree(directoryPath, '**/*');
const entries: IArchiveEntry[] = [];
for (const filePath of fileTree) {
const absolutePath = plugins.path.join(directoryPath, filePath);
const content = await plugins.fsPromises.readFile(absolutePath);
entries.push({
archivePath: filePath,
content,
});
}
smartArchiveInstance.archiveBuffer = await zipTools.createZip(entries, options.compressionLevel);
} else {
throw new Error(`Unsupported format for directory packing: ${options.format}`);
}
return smartArchiveInstance;
}
/**
* Create a new archive from an array of entries
*/
public static async fromFiles(
files: IArchiveEntry[],
options: IArchiveCreationOptions
): Promise<SmartArchive> {
const smartArchiveInstance = new SmartArchive();
smartArchiveInstance.creationOptions = options;
if (options.format === 'tar' || options.format === 'tar.gz' || options.format === 'tgz') {
const tarTools = new TarTools();
if (options.format === 'tar') {
smartArchiveInstance.archiveBuffer = await tarTools.packFiles(files);
} else {
smartArchiveInstance.archiveBuffer = await tarTools.packFilesToTarGz(files, options.compressionLevel);
}
} else if (options.format === 'zip') {
const zipTools = new ZipTools();
smartArchiveInstance.archiveBuffer = await zipTools.createZip(files, options.compressionLevel);
} else if (options.format === 'gz') {
if (files.length !== 1) {
throw new Error('GZIP format only supports a single file');
}
const gzipTools = new GzipTools();
let content: Buffer;
if (typeof files[0].content === 'string') {
content = Buffer.from(files[0].content);
} else if (Buffer.isBuffer(files[0].content)) {
content = files[0].content;
} else {
throw new Error('GZIP format requires string or Buffer content');
}
smartArchiveInstance.archiveBuffer = await gzipTools.compress(content, options.compressionLevel);
} else {
throw new Error(`Unsupported format: ${options.format}`);
}
return smartArchiveInstance;
}
/**
* Start building an archive incrementally using a builder pattern
*/
public static create(options: IArchiveCreationOptions): SmartArchive {
const smartArchiveInstance = new SmartArchive();
smartArchiveInstance.creationOptions = options;
smartArchiveInstance.pendingEntries = [];
return smartArchiveInstance;
}
/**
* Helper to convert a stream to buffer
*/
private static async streamToBuffer(stream: plugins.stream.Readable): Promise<Buffer> {
const chunks: Buffer[] = [];
return new Promise((resolve, reject) => {
stream.on('data', (chunk) => chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)));
stream.on('end', () => resolve(Buffer.concat(chunks)));
stream.on('error', reject);
});
}
// ============================================
// INSTANCE PROPERTIES
// ============================================
public tarTools = new TarTools(); public tarTools = new TarTools();
public zipTools = new ZipTools(); public zipTools = new ZipTools();
public gzipTools = new GzipTools(); public gzipTools = new GzipTools();
public bzip2Tools = new Bzip2Tools(this); public bzip2Tools = new Bzip2Tools(this);
public archiveAnalyzer = new ArchiveAnalyzer(this); public archiveAnalyzer = new ArchiveAnalyzer(this);
public sourceUrl: string; public sourceUrl?: string;
public sourceFilePath: string; public sourceFilePath?: string;
public sourceStream: public sourceStream?: plugins.stream.Readable | plugins.stream.Duplex | plugins.stream.Transform;
| plugins.stream.Readable
| plugins.stream.Duplex
| plugins.stream.Transform;
public archiveName: string; private archiveBuffer?: Buffer;
public singleFileMode: boolean = false; private creationOptions?: IArchiveCreationOptions;
private pendingEntries?: IArchiveEntry[];
public addedDirectories: string[] = [];
public addedFiles: (
| plugins.smartfile.SmartFile
| plugins.smartfile.StreamFile
)[] = [];
public addedUrls: string[] = [];
constructor() {} constructor() {}
// ============================================
// BUILDER METHODS (for incremental creation)
// ============================================
/** /**
* gets the original archive stream * Add a file to the archive (builder pattern)
*/ */
public async getArchiveStream() { public addFile(archivePath: string, content: string | Buffer): this {
if (!this.pendingEntries) {
throw new Error('addFile can only be called on archives created with SmartArchive.create()');
}
this.pendingEntries.push({ archivePath, content });
return this;
}
/**
* Add a SmartFile to the archive (builder pattern)
*/
public addSmartFile(file: plugins.smartfile.SmartFile, archivePath?: string): this {
if (!this.pendingEntries) {
throw new Error('addSmartFile can only be called on archives created with SmartArchive.create()');
}
this.pendingEntries.push({
archivePath: archivePath || file.relative,
content: file,
});
return this;
}
/**
* Add a StreamFile to the archive (builder pattern)
*/
public addStreamFile(file: plugins.smartfile.StreamFile, archivePath?: string): this {
if (!this.pendingEntries) {
throw new Error('addStreamFile can only be called on archives created with SmartArchive.create()');
}
this.pendingEntries.push({
archivePath: archivePath || file.relativeFilePath,
content: file,
});
return this;
}
/**
* Build the archive from pending entries
*/
public async build(): Promise<SmartArchive> {
if (!this.pendingEntries || !this.creationOptions) {
throw new Error('build can only be called on archives created with SmartArchive.create()');
}
const built = await SmartArchive.fromFiles(this.pendingEntries, this.creationOptions);
this.archiveBuffer = built.archiveBuffer;
this.pendingEntries = undefined;
return this;
}
// ============================================
// EXTRACTION METHODS
// ============================================
/**
* Get the original archive stream
*/
public async toStream(): Promise<plugins.stream.Readable> {
if (this.archiveBuffer) {
return plugins.stream.Readable.from(this.archiveBuffer);
}
if (this.sourceStream) { if (this.sourceStream) {
return this.sourceStream; return this.sourceStream;
} }
@@ -78,127 +270,160 @@ export class SmartArchive {
.url(this.sourceUrl) .url(this.sourceUrl)
.get(); .get();
const webStream = response.stream(); const webStream = response.stream();
// @ts-ignore - Web stream to Node.js stream conversion return plugins.stream.Readable.fromWeb(webStream as any);
const urlStream = plugins.stream.Readable.fromWeb(webStream);
return urlStream;
} }
if (this.sourceFilePath) { if (this.sourceFilePath) {
const fileStream = plugins.smartfile.fs.toReadStream(this.sourceFilePath); return plugins.fs.createReadStream(this.sourceFilePath);
return fileStream;
} }
throw new Error('No archive source configured');
} }
public async exportToTarGzStream() { /**
const tarPackStream = await this.tarTools.getPackStream(); * Get archive as a Buffer
const gzipStream = await this.gzipTools.getCompressionStream(); */
// const archiveStream = tarPackStream.pipe(gzipStream); public async toBuffer(): Promise<Buffer> {
// return archiveStream; if (this.archiveBuffer) {
return this.archiveBuffer;
}
const stream = await this.toStream();
return SmartArchive.streamToBuffer(stream);
} }
public async exportToFs( /**
* Write archive to a file
*/
public async toFile(filePath: string): Promise<void> {
const buffer = await this.toBuffer();
await plugins.fsPromises.mkdir(plugins.path.dirname(filePath), { recursive: true });
await plugins.fsPromises.writeFile(filePath, buffer);
}
/**
* Extract archive to filesystem
*/
public async extractToDirectory(
targetDir: string, targetDir: string,
fileNameArg?: string, options?: Partial<IArchiveExtractionOptions>
): Promise<void> { ): Promise<void> {
const done = plugins.smartpromise.defer<void>(); const done = plugins.smartpromise.defer<void>();
const streamFileStream = await this.exportToStreamOfStreamFiles(); const streamFileStream = await this.extractToStream();
streamFileStream.pipe( streamFileStream.pipe(
new plugins.smartstream.SmartDuplex({ new plugins.smartstream.SmartDuplex({
objectMode: true, objectMode: true,
writeFunction: async ( writeFunction: async (streamFileArg: plugins.smartfile.StreamFile) => {
streamFileArg: plugins.smartfile.StreamFile, const innerDone = plugins.smartpromise.defer<void>();
streamtools,
) => {
const done = plugins.smartpromise.defer<void>();
console.log(
streamFileArg.relativeFilePath
? streamFileArg.relativeFilePath
: 'no relative path',
);
const streamFile = streamFileArg; const streamFile = streamFileArg;
let relativePath = streamFile.relativeFilePath || options?.fileName || 'extracted_file';
// Apply stripComponents if specified
if (options?.stripComponents && options.stripComponents > 0) {
const parts = relativePath.split('/');
relativePath = parts.slice(options.stripComponents).join('/');
if (!relativePath) {
innerDone.resolve();
return;
}
}
// Apply filter if specified
if (options?.filter) {
const entryInfo: IArchiveEntryInfo = {
path: relativePath,
size: 0,
isDirectory: false,
isFile: true,
};
if (!options.filter(entryInfo)) {
innerDone.resolve();
return;
}
}
const readStream = await streamFile.createReadStream(); const readStream = await streamFile.createReadStream();
await plugins.smartfile.fs.ensureDir(targetDir); await plugins.fsPromises.mkdir(targetDir, { recursive: true });
const writePath = plugins.path.join( const writePath = plugins.path.join(targetDir, relativePath);
targetDir, await plugins.fsPromises.mkdir(plugins.path.dirname(writePath), { recursive: true });
streamFile.relativeFilePath || fileNameArg, const writeStream = plugins.fs.createWriteStream(writePath);
);
await plugins.smartfile.fs.ensureDir(plugins.path.dirname(writePath));
const writeStream =
plugins.smartfile.fsStream.createWriteStream(writePath);
readStream.pipe(writeStream); readStream.pipe(writeStream);
writeStream.on('finish', () => { writeStream.on('finish', () => {
done.resolve(); innerDone.resolve();
}); });
await done.promise; await innerDone.promise;
}, },
finalFunction: async () => { finalFunction: async () => {
done.resolve(); done.resolve();
}, },
}), })
); );
return done.promise; return done.promise;
} }
public async exportToStreamOfStreamFiles() { /**
const streamFileIntake = * Extract archive to a stream of StreamFile objects
new plugins.smartstream.StreamIntake<plugins.smartfile.StreamFile>({ */
objectMode: true, public async extractToStream(): Promise<plugins.smartstream.StreamIntake<plugins.smartfile.StreamFile>> {
}); const streamFileIntake = new plugins.smartstream.StreamIntake<plugins.smartfile.StreamFile>({
const archiveStream = await this.getArchiveStream(); objectMode: true,
});
// Guard to prevent multiple signalEnd calls
let hasSignaledEnd = false;
const safeSignalEnd = () => {
if (!hasSignaledEnd) {
hasSignaledEnd = true;
streamFileIntake.signalEnd();
}
};
const archiveStream = await this.toStream();
const createAnalyzedStream = () => this.archiveAnalyzer.getAnalyzedStream(); const createAnalyzedStream = () => this.archiveAnalyzer.getAnalyzedStream();
// lets create a function that can be called multiple times to unpack layers of archives
const createUnpackStream = () => const createUnpackStream = () =>
plugins.smartstream.createTransformFunction<IAnalyzedResult, any>( plugins.smartstream.createTransformFunction<IAnalyzedResult, void>(
async (analyzedResultChunk) => { async (analyzedResultChunk) => {
if (analyzedResultChunk.fileType?.mime === 'application/x-tar') { if (analyzedResultChunk.fileType?.mime === 'application/x-tar') {
const tarStream = const tarStream = analyzedResultChunk.decompressionStream as plugins.tarStream.Extract;
analyzedResultChunk.decompressionStream as plugins.tarStream.Extract;
tarStream.on('entry', async (header, stream, next) => { tarStream.on('entry', async (header, stream, next) => {
if (header.type === 'directory') { if (header.type === 'directory') {
console.log( stream.resume();
`tar stream directory: ${header.name} ... skipping!`, stream.on('end', () => next());
);
next();
return; return;
} }
console.log(`tar stream file: ${header.name}`);
const streamfile = plugins.smartfile.StreamFile.fromStream( const passThrough = new plugins.stream.PassThrough();
stream, const streamfile = plugins.smartfile.StreamFile.fromStream(passThrough, header.name);
header.name,
);
streamFileIntake.push(streamfile); streamFileIntake.push(streamfile);
stream.on('end', function () { stream.pipe(passThrough);
next(); // ready for next entry stream.on('end', () => {
passThrough.end();
next();
}); });
}); });
tarStream.on('finish', function () {
console.log('finished'); tarStream.on('finish', () => {
streamFileIntake.signalEnd(); safeSignalEnd();
}); });
analyzedResultChunk.resultStream.pipe(
analyzedResultChunk.decompressionStream, analyzedResultChunk.resultStream.pipe(analyzedResultChunk.decompressionStream);
);
} else if (analyzedResultChunk.fileType?.mime === 'application/zip') { } else if (analyzedResultChunk.fileType?.mime === 'application/zip') {
analyzedResultChunk.resultStream analyzedResultChunk.resultStream
.pipe(analyzedResultChunk.decompressionStream) .pipe(analyzedResultChunk.decompressionStream)
.pipe( .pipe(
new plugins.smartstream.SmartDuplex({ new plugins.smartstream.SmartDuplex({
objectMode: true, objectMode: true,
writeFunction: async ( writeFunction: async (streamFileArg: plugins.smartfile.StreamFile) => {
streamFileArg: plugins.smartfile.StreamFile,
streamtools,
) => {
streamFileIntake.push(streamFileArg); streamFileIntake.push(streamFileArg);
}, },
finalFunction: async () => { finalFunction: async () => {
streamFileIntake.signalEnd(); safeSignalEnd();
}, },
}), })
); );
} else if ( } else if (analyzedResultChunk.isArchive && analyzedResultChunk.decompressionStream) {
analyzedResultChunk.isArchive && // For nested archives (like gzip containing tar)
analyzedResultChunk.decompressionStream
) {
analyzedResultChunk.resultStream analyzedResultChunk.resultStream
.pipe(analyzedResultChunk.decompressionStream) .pipe(analyzedResultChunk.decompressionStream)
.pipe(createAnalyzedStream()) .pipe(createAnalyzedStream())
@@ -206,18 +431,159 @@ export class SmartArchive {
} else { } else {
const streamFile = plugins.smartfile.StreamFile.fromStream( const streamFile = plugins.smartfile.StreamFile.fromStream(
analyzedResultChunk.resultStream, analyzedResultChunk.resultStream,
analyzedResultChunk.fileType?.ext, analyzedResultChunk.fileType?.ext
); );
streamFileIntake.push(streamFile); streamFileIntake.push(streamFile);
streamFileIntake.signalEnd(); safeSignalEnd();
} }
}, },
{ { objectMode: true }
objectMode: true,
},
); );
archiveStream.pipe(createAnalyzedStream()).pipe(createUnpackStream()); archiveStream.pipe(createAnalyzedStream()).pipe(createUnpackStream());
return streamFileIntake; return streamFileIntake;
} }
/**
* Extract archive to an array of SmartFile objects (in-memory)
*/
public async extractToSmartFiles(): Promise<plugins.smartfile.SmartFile[]> {
const streamFiles = await this.extractToStream();
const smartFiles: plugins.smartfile.SmartFile[] = [];
return new Promise((resolve, reject) => {
streamFiles.on('data', async (streamFile: plugins.smartfile.StreamFile) => {
try {
const smartFile = await streamFile.toSmartFile();
smartFiles.push(smartFile);
} catch (err) {
reject(err);
}
});
streamFiles.on('end', () => resolve(smartFiles));
streamFiles.on('error', reject);
});
}
/**
* Extract a single file from the archive by path
*/
public async extractFile(filePath: string): Promise<plugins.smartfile.SmartFile | null> {
const streamFiles = await this.extractToStream();
return new Promise((resolve, reject) => {
let found = false;
streamFiles.on('data', async (streamFile: plugins.smartfile.StreamFile) => {
if (streamFile.relativeFilePath === filePath || streamFile.relativeFilePath?.endsWith(filePath)) {
found = true;
try {
const smartFile = await streamFile.toSmartFile();
resolve(smartFile);
} catch (err) {
reject(err);
}
}
});
streamFiles.on('end', () => {
if (!found) {
resolve(null);
}
});
streamFiles.on('error', reject);
});
}
// ============================================
// ANALYSIS METHODS
// ============================================
/**
* Analyze the archive and return metadata
*/
public async analyze(): Promise<IArchiveInfo> {
const stream = await this.toStream();
const firstChunk = await this.readFirstChunk(stream);
const fileType = await plugins.fileType.fileTypeFromBuffer(firstChunk);
let format: TArchiveFormat | null = null;
let isCompressed = false;
let isArchive = false;
if (fileType) {
switch (fileType.mime) {
case 'application/gzip':
format = 'gz';
isCompressed = true;
isArchive = true;
break;
case 'application/zip':
format = 'zip';
isCompressed = true;
isArchive = true;
break;
case 'application/x-tar':
format = 'tar';
isArchive = true;
break;
case 'application/x-bzip2':
format = 'bz2';
isCompressed = true;
isArchive = true;
break;
}
}
return {
format,
isCompressed,
isArchive,
};
}
/**
* List all entries in the archive without extracting
*/
public async listEntries(): Promise<IArchiveEntryInfo[]> {
const entries: IArchiveEntryInfo[] = [];
const streamFiles = await this.extractToStream();
return new Promise((resolve, reject) => {
streamFiles.on('data', (streamFile: plugins.smartfile.StreamFile) => {
entries.push({
path: streamFile.relativeFilePath || 'unknown',
size: 0, // Size not available without reading
isDirectory: false,
isFile: true,
});
});
streamFiles.on('end', () => resolve(entries));
streamFiles.on('error', reject);
});
}
/**
* Check if a specific file exists in the archive
*/
public async hasFile(filePath: string): Promise<boolean> {
const entries = await this.listEntries();
return entries.some((e) => e.path === filePath || e.path.endsWith(filePath));
}
/**
* Helper to read first chunk from stream
*/
private async readFirstChunk(stream: plugins.stream.Readable): Promise<Buffer> {
return new Promise((resolve, reject) => {
const onData = (chunk: Buffer) => {
stream.removeListener('data', onData);
stream.removeListener('error', reject);
resolve(chunk);
};
stream.on('data', onData);
stream.on('error', reject);
});
}
} }

View File

@@ -1,11 +1,14 @@
import type { SmartArchive } from './classes.smartarchive.js';
import * as plugins from './plugins.js'; import * as plugins from './plugins.js';
import type { IArchiveEntry, TCompressionLevel } from './interfaces.js';
import { GzipTools } from './classes.gziptools.js';
/**
* TAR archive creation and extraction utilities
*/
export class TarTools { export class TarTools {
// INSTANCE /**
constructor() {} * Add a file to a TAR pack stream
*/
// packing
public async addFileToPack( public async addFileToPack(
pack: plugins.tarStream.Pack, pack: plugins.tarStream.Pack,
optionsArg: { optionsArg: {
@@ -13,12 +16,12 @@ export class TarTools {
content?: content?:
| string | string
| Buffer | Buffer
| plugins.smartstream.stream.Readable | plugins.stream.Readable
| plugins.smartfile.SmartFile | plugins.smartfile.SmartFile
| plugins.smartfile.StreamFile; | plugins.smartfile.StreamFile;
byteLength?: number; byteLength?: number;
filePath?: string; filePath?: string;
}, }
): Promise<void> { ): Promise<void> {
return new Promise<void>(async (resolve, reject) => { return new Promise<void>(async (resolve, reject) => {
let fileName: string | null = null; let fileName: string | null = null;
@@ -26,18 +29,20 @@ export class TarTools {
if (optionsArg.fileName) { if (optionsArg.fileName) {
fileName = optionsArg.fileName; fileName = optionsArg.fileName;
} else if (optionsArg.content instanceof plugins.smartfile.SmartFile) { } else if (optionsArg.content instanceof plugins.smartfile.SmartFile) {
fileName = (optionsArg.content as plugins.smartfile.SmartFile).relative; fileName = optionsArg.content.relative;
} else if (optionsArg.content instanceof plugins.smartfile.StreamFile) { } else if (optionsArg.content instanceof plugins.smartfile.StreamFile) {
fileName = (optionsArg.content as plugins.smartfile.StreamFile) fileName = optionsArg.content.relativeFilePath;
.relativeFilePath;
} else if (optionsArg.filePath) { } else if (optionsArg.filePath) {
fileName = optionsArg.filePath; fileName = optionsArg.filePath;
} }
/** if (!fileName) {
* contentByteLength is used to set the size of the entry in the tar file reject(new Error('No filename specified for TAR entry'));
*/ return;
let contentByteLength: number; }
// Determine content byte length
let contentByteLength: number | undefined;
if (optionsArg.byteLength) { if (optionsArg.byteLength) {
contentByteLength = optionsArg.byteLength; contentByteLength = optionsArg.byteLength;
} else if (typeof optionsArg.content === 'string') { } else if (typeof optionsArg.content === 'string') {
@@ -45,95 +50,159 @@ export class TarTools {
} else if (Buffer.isBuffer(optionsArg.content)) { } else if (Buffer.isBuffer(optionsArg.content)) {
contentByteLength = optionsArg.content.length; contentByteLength = optionsArg.content.length;
} else if (optionsArg.content instanceof plugins.smartfile.SmartFile) { } else if (optionsArg.content instanceof plugins.smartfile.SmartFile) {
contentByteLength = await optionsArg.content.getSize(); // assuming SmartFile has getSize method contentByteLength = await optionsArg.content.getSize();
} else if (optionsArg.content instanceof plugins.smartfile.StreamFile) { } else if (optionsArg.content instanceof plugins.smartfile.StreamFile) {
contentByteLength = await optionsArg.content.getSize(); // assuming StreamFile has getSize method contentByteLength = await optionsArg.content.getSize();
} else if (
optionsArg.content instanceof plugins.smartstream.stream.Readable
) {
console.warn(
'@push.rocks/smartarchive: When streaming, it is recommended to provide byteLength, if known.',
);
} else if (optionsArg.filePath) { } else if (optionsArg.filePath) {
const fileStat = await plugins.smartfile.fs.stat(optionsArg.filePath); const fileStat = await plugins.fsPromises.stat(optionsArg.filePath);
contentByteLength = fileStat.size; contentByteLength = fileStat.size;
} }
/** // Convert all content types to Readable stream
* here we try to harmonize all kind of entries towards a readable stream let content: plugins.stream.Readable;
*/
let content: plugins.smartstream.stream.Readable;
if (Buffer.isBuffer(optionsArg.content)) { if (Buffer.isBuffer(optionsArg.content)) {
content = plugins.smartstream.stream.Readable.from(optionsArg.content); content = plugins.stream.Readable.from(optionsArg.content);
} else if (typeof optionsArg.content === 'string') { } else if (typeof optionsArg.content === 'string') {
content = plugins.smartstream.stream.Readable.from( content = plugins.stream.Readable.from(Buffer.from(optionsArg.content));
Buffer.from(optionsArg.content),
);
} else if (optionsArg.content instanceof plugins.smartfile.SmartFile) { } else if (optionsArg.content instanceof plugins.smartfile.SmartFile) {
content = plugins.smartstream.stream.Readable.from( content = plugins.stream.Readable.from(optionsArg.content.contents);
optionsArg.content.contents,
);
} else if (optionsArg.content instanceof plugins.smartfile.StreamFile) { } else if (optionsArg.content instanceof plugins.smartfile.StreamFile) {
content = await optionsArg.content.createReadStream(); content = await optionsArg.content.createReadStream();
} else if ( } else if (optionsArg.content instanceof plugins.stream.Readable) {
optionsArg.content instanceof plugins.smartstream.stream.Readable
) {
content = optionsArg.content; content = optionsArg.content;
} else if (optionsArg.filePath) {
content = plugins.fs.createReadStream(optionsArg.filePath);
} else {
reject(new Error('No content or filePath specified for TAR entry'));
return;
} }
const entry = pack.entry( const entry = pack.entry(
{ {
name: fileName, name: fileName,
...(contentByteLength ...(contentByteLength !== undefined ? { size: contentByteLength } : {}),
? {
size: contentByteLength,
}
: null),
}, },
(err: Error) => { (err: Error | null) => {
if (err) { if (err) {
reject(err); reject(err);
} else { } else {
resolve(); resolve();
} }
}, }
); );
content.pipe(entry); content.pipe(entry);
resolve(); // Note: resolve() is called in the callback above when pipe completes
}); });
} }
/** /**
* packs a directory from disk into a tar stream * Pack a directory into a TAR stream
* @param directoryPath
*/ */
public async packDirectory(directoryPath: string) { public async packDirectory(directoryPath: string): Promise<plugins.tarStream.Pack> {
const fileTree = await plugins.smartfile.fs.listFileTree( const fileTree = await plugins.listFileTree(directoryPath, '**/*');
directoryPath,
'**/*',
);
const pack = await this.getPackStream(); const pack = await this.getPackStream();
for (const filePath of fileTree) { for (const filePath of fileTree) {
const absolutePath = plugins.path.join(directoryPath, filePath); const absolutePath = plugins.path.join(directoryPath, filePath);
const fileStat = await plugins.smartfile.fs.stat(absolutePath); const fileStat = await plugins.fsPromises.stat(absolutePath);
await this.addFileToPack(pack, { await this.addFileToPack(pack, {
byteLength: fileStat.size, byteLength: fileStat.size,
filePath: absolutePath, filePath: absolutePath,
fileName: filePath, fileName: filePath,
content: plugins.smartfile.fsStream.createReadStream(absolutePath), content: plugins.fs.createReadStream(absolutePath),
}); });
} }
return pack; return pack;
} }
public async getPackStream() { /**
const pack = plugins.tarStream.pack(); * Get a new TAR pack stream
return pack; */
public async getPackStream(): Promise<plugins.tarStream.Pack> {
return plugins.tarStream.pack();
} }
// extracting /**
getDecompressionStream() { * Get a TAR extraction stream
*/
public getDecompressionStream(): plugins.tarStream.Extract {
return plugins.tarStream.extract(); return plugins.tarStream.extract();
} }
/**
* Pack files into a TAR buffer
*/
public async packFiles(files: IArchiveEntry[]): Promise<Buffer> {
const pack = await this.getPackStream();
for (const file of files) {
await this.addFileToPack(pack, {
fileName: file.archivePath,
content: file.content as string | Buffer | plugins.stream.Readable | plugins.smartfile.SmartFile | plugins.smartfile.StreamFile,
byteLength: file.size,
});
}
pack.finalize();
const chunks: Buffer[] = [];
return new Promise((resolve, reject) => {
pack.on('data', (chunk: Buffer) => chunks.push(chunk));
pack.on('end', () => resolve(Buffer.concat(chunks)));
pack.on('error', reject);
});
}
/**
* Pack a directory into a TAR.GZ buffer
*/
public async packDirectoryToTarGz(
directoryPath: string,
compressionLevel?: TCompressionLevel
): Promise<Buffer> {
const pack = await this.packDirectory(directoryPath);
pack.finalize();
const gzipTools = new GzipTools();
const gzipStream = gzipTools.getCompressionStream(compressionLevel);
const chunks: Buffer[] = [];
return new Promise((resolve, reject) => {
pack
.pipe(gzipStream)
.on('data', (chunk: Buffer) => chunks.push(chunk))
.on('end', () => resolve(Buffer.concat(chunks)))
.on('error', reject);
});
}
/**
* Pack a directory into a TAR.GZ stream
*/
public async packDirectoryToTarGzStream(
directoryPath: string,
compressionLevel?: TCompressionLevel
): Promise<plugins.stream.Readable> {
const pack = await this.packDirectory(directoryPath);
pack.finalize();
const gzipTools = new GzipTools();
const gzipStream = gzipTools.getCompressionStream(compressionLevel);
return pack.pipe(gzipStream);
}
/**
* Pack files into a TAR.GZ buffer
*/
public async packFilesToTarGz(
files: IArchiveEntry[],
compressionLevel?: TCompressionLevel
): Promise<Buffer> {
const tarBuffer = await this.packFiles(files);
const gzipTools = new GzipTools();
return gzipTools.compress(tarBuffer, compressionLevel);
}
} }

View File

@@ -1,83 +1,209 @@
import type { SmartArchive } from './classes.smartarchive.js';
import * as plugins from './plugins.js'; import * as plugins from './plugins.js';
import type { IArchiveEntry, TCompressionLevel } from './interfaces.js';
class DecompressZipTransform extends plugins.smartstream /**
.SmartDuplex<ArrayBufferLike> { * Transform stream for ZIP decompression using fflate
private streamtools: plugins.smartstream.IStreamTools; * Emits StreamFile objects for each file in the archive
*/
export class ZipDecompressionTransform extends plugins.smartstream.SmartDuplex<Buffer, plugins.smartfile.StreamFile> {
private streamtools!: plugins.smartstream.IStreamTools;
private unzipper = new plugins.fflate.Unzip(async (fileArg) => { private unzipper = new plugins.fflate.Unzip(async (fileArg) => {
let resultBuffer: Buffer; let resultBuffer: Buffer;
fileArg.ondata = async (flateError, dat, final) => { fileArg.ondata = async (_flateError, dat, final) => {
resultBuffer resultBuffer
? (resultBuffer = Buffer.concat([resultBuffer, Buffer.from(dat)])) ? (resultBuffer = Buffer.concat([resultBuffer, Buffer.from(dat)]))
: (resultBuffer = Buffer.from(dat)); : (resultBuffer = Buffer.from(dat));
if (final) { if (final) {
const streamFile = const streamFile = plugins.smartfile.StreamFile.fromBuffer(resultBuffer);
plugins.smartfile.StreamFile.fromBuffer(resultBuffer);
streamFile.relativeFilePath = fileArg.name; streamFile.relativeFilePath = fileArg.name;
this.streamtools.push(streamFile); this.streamtools.push(streamFile);
} }
}; };
fileArg.start(); fileArg.start();
}); });
constructor() { constructor() {
super({ super({
objectMode: true, objectMode: true,
writeFunction: async (chunkArg, streamtoolsArg) => { writeFunction: async (chunkArg, streamtoolsArg) => {
this.streamtools ? null : (this.streamtools = streamtoolsArg); this.streamtools ? null : (this.streamtools = streamtoolsArg);
this.unzipper.push( this.unzipper.push(
Buffer.isBuffer(chunkArg) ? chunkArg : Buffer.from(chunkArg), Buffer.isBuffer(chunkArg) ? chunkArg : Buffer.from(chunkArg as unknown as ArrayBuffer),
false, false
); );
return null;
}, },
finalFunction: async () => { finalFunction: async () => {
this.unzipper.push(Buffer.from(''), true); this.unzipper.push(Buffer.from(''), true);
await plugins.smartdelay.delayFor(0); await plugins.smartdelay.delayFor(0);
await this.streamtools.push(null); await this.streamtools.push(null);
return null;
}, },
}); });
this.unzipper.register(plugins.fflate.UnzipInflate); this.unzipper.register(plugins.fflate.UnzipInflate);
} }
} }
// This class wraps fflate's zip in a Node.js Transform stream for compression /**
export class CompressZipTransform extends plugins.stream.Transform { * Streaming ZIP compression using fflate
files: { [fileName: string]: Uint8Array }; * Allows adding multiple entries before finalizing
*/
export class ZipCompressionStream extends plugins.stream.Duplex {
private files: Map<string, { data: Uint8Array; options?: plugins.fflate.ZipOptions }> = new Map();
private finalized = false;
constructor() { constructor() {
super(); super();
this.files = {};
} }
_transform( /**
chunk: Buffer, * Add a file entry to the ZIP archive
encoding: BufferEncoding, */
callback: plugins.stream.TransformCallback, public async addEntry(
) { fileName: string,
// Simple example: storing chunks in memory before finalizing ZIP in _flush content: Buffer | plugins.stream.Readable,
this.files['file.txt'] = new Uint8Array(chunk); options?: { compressionLevel?: TCompressionLevel }
callback(); ): Promise<void> {
} if (this.finalized) {
throw new Error('Cannot add entries to a finalized ZIP archive');
}
_flush(callback: plugins.stream.TransformCallback) { let data: Buffer;
plugins.fflate.zip(this.files, (err, zipped) => { if (Buffer.isBuffer(content)) {
if (err) { data = content;
callback(err); } else {
} else { // Collect stream to buffer
this.push(Buffer.from(zipped)); const chunks: Buffer[] = [];
callback(); for await (const chunk of content) {
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
} }
data = Buffer.concat(chunks);
}
this.files.set(fileName, {
data: new Uint8Array(data),
options: options?.compressionLevel !== undefined ? { level: options.compressionLevel } : undefined,
});
}
/**
* Finalize the ZIP archive and emit the compressed data
*/
public async finalize(): Promise<void> {
if (this.finalized) {
return;
}
this.finalized = true;
const filesObj: plugins.fflate.Zippable = {};
for (const [name, { data, options }] of this.files) {
filesObj[name] = options ? [data, options] : data;
}
return new Promise((resolve, reject) => {
plugins.fflate.zip(filesObj, (err, result) => {
if (err) {
reject(err);
} else {
this.push(Buffer.from(result));
this.push(null);
resolve();
}
});
});
}
_read(): void {
// No-op: data is pushed when finalize() is called
}
_write(
_chunk: Buffer,
_encoding: BufferEncoding,
callback: (error?: Error | null) => void
): void {
// Not used for ZIP creation - use addEntry() instead
callback(new Error('Use addEntry() to add files to the ZIP archive'));
}
}
/**
* ZIP compression and decompression utilities
*/
export class ZipTools {
/**
* Get a streaming compression object for creating ZIP archives
*/
public getCompressionStream(): ZipCompressionStream {
return new ZipCompressionStream();
}
/**
* Get a streaming decompression transform for extracting ZIP archives
*/
public getDecompressionStream(): ZipDecompressionTransform {
return new ZipDecompressionTransform();
}
/**
* Create a ZIP archive from an array of entries
*/
public async createZip(entries: IArchiveEntry[], compressionLevel?: TCompressionLevel): Promise<Buffer> {
const filesObj: plugins.fflate.Zippable = {};
for (const entry of entries) {
let data: Uint8Array;
if (typeof entry.content === 'string') {
data = new TextEncoder().encode(entry.content);
} else if (Buffer.isBuffer(entry.content)) {
data = new Uint8Array(entry.content);
} else if (entry.content instanceof plugins.smartfile.SmartFile) {
data = new Uint8Array(entry.content.contents);
} else if (entry.content instanceof plugins.smartfile.StreamFile) {
const buffer = await entry.content.getContentAsBuffer();
data = new Uint8Array(buffer);
} else {
// Readable stream
const chunks: Buffer[] = [];
for await (const chunk of entry.content as plugins.stream.Readable) {
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
}
data = new Uint8Array(Buffer.concat(chunks));
}
if (compressionLevel !== undefined) {
filesObj[entry.archivePath] = [data, { level: compressionLevel }];
} else {
filesObj[entry.archivePath] = data;
}
}
return new Promise((resolve, reject) => {
plugins.fflate.zip(filesObj, (err, result) => {
if (err) reject(err);
else resolve(Buffer.from(result));
});
});
}
/**
* Extract a ZIP buffer to an array of entries
*/
public async extractZip(data: Buffer): Promise<Array<{ path: string; content: Buffer }>> {
return new Promise((resolve, reject) => {
plugins.fflate.unzip(data, (err, result) => {
if (err) {
reject(err);
return;
}
const entries: Array<{ path: string; content: Buffer }> = [];
for (const [path, content] of Object.entries(result)) {
entries.push({ path, content: Buffer.from(content) });
}
resolve(entries);
});
}); });
} }
} }
export class ZipTools {
constructor() {}
public getCompressionStream() {
return new CompressZipTransform();
}
public getDecompressionStream() {
return new DecompressZipTransform();
}
}

70
ts/errors.ts Normal file
View File

@@ -0,0 +1,70 @@
/**
* Base error class for smartarchive
*/
export class SmartArchiveError extends Error {
public readonly code: string;
constructor(message: string, code: string) {
super(message);
this.name = 'SmartArchiveError';
this.code = code;
// Maintains proper stack trace for where error was thrown (V8)
if (Error.captureStackTrace) {
Error.captureStackTrace(this, this.constructor);
}
}
}
/**
* BZIP2-specific decompression errors
*/
export class Bzip2Error extends SmartArchiveError {
constructor(message: string, code: string = 'BZIP2_ERROR') {
super(message, code);
this.name = 'Bzip2Error';
}
}
/**
* Archive format detection errors
*/
export class ArchiveFormatError extends SmartArchiveError {
constructor(message: string) {
super(message, 'ARCHIVE_FORMAT_ERROR');
this.name = 'ArchiveFormatError';
}
}
/**
* Stream processing errors
*/
export class StreamError extends SmartArchiveError {
constructor(message: string) {
super(message, 'STREAM_ERROR');
this.name = 'StreamError';
}
}
/**
* BZIP2 error codes for programmatic error handling
*/
export const BZIP2_ERROR_CODES = {
NO_MAGIC_NUMBER: 'BZIP2_NO_MAGIC',
INVALID_ARCHIVE: 'BZIP2_INVALID_ARCHIVE',
CRC_MISMATCH: 'BZIP2_CRC_MISMATCH',
INVALID_BLOCK_DATA: 'BZIP2_INVALID_BLOCK',
BUFFER_OVERFLOW: 'BZIP2_BUFFER_OVERFLOW',
INVALID_HUFFMAN: 'BZIP2_INVALID_HUFFMAN',
INVALID_SELECTOR: 'BZIP2_INVALID_SELECTOR',
INVALID_POSITION: 'BZIP2_INVALID_POSITION',
PREMATURE_END: 'BZIP2_PREMATURE_END',
} as const;
export type TBzip2ErrorCode = typeof BZIP2_ERROR_CODES[keyof typeof BZIP2_ERROR_CODES];
/**
* Throw a BZIP2 error with a specific code
*/
export function throwBzip2Error(message: string, code: TBzip2ErrorCode): never {
throw new Bzip2Error(message, code);
}

View File

@@ -1,4 +1,15 @@
// Core types and errors
export * from './interfaces.js';
export * from './errors.js';
// Main archive class
export * from './classes.smartarchive.js'; export * from './classes.smartarchive.js';
// Format-specific tools
export * from './classes.tartools.js'; export * from './classes.tartools.js';
export * from './classes.ziptools.js'; export * from './classes.ziptools.js';
export * from './classes.gziptools.js'; export * from './classes.gziptools.js';
export * from './classes.bzip2tools.js';
// Archive analysis
export * from './classes.archiveanalyzer.js';

131
ts/interfaces.ts Normal file
View File

@@ -0,0 +1,131 @@
import type * as stream from 'node:stream';
import type { SmartFile, StreamFile } from '@push.rocks/smartfile';
/**
* Supported archive formats
*/
export type TArchiveFormat = 'tar' | 'tar.gz' | 'tgz' | 'zip' | 'gz' | 'bz2';
/**
* Compression level (0 = no compression, 9 = maximum compression)
*/
export type TCompressionLevel = 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9;
/**
* Supported MIME types for archive detection
*/
export type TSupportedMime =
| 'application/gzip'
| 'application/zip'
| 'application/x-bzip2'
| 'application/x-tar'
| undefined;
/**
* Entry to add to an archive during creation
*/
export interface IArchiveEntry {
/** Path within the archive */
archivePath: string;
/** Content: string, Buffer, Readable stream, SmartFile, or StreamFile */
content: string | Buffer | stream.Readable | SmartFile | StreamFile;
/** Optional size hint for streams (improves performance) */
size?: number;
/** Optional file mode/permissions */
mode?: number;
/** Optional modification time */
mtime?: Date;
}
/**
* Options for creating archives
*/
export interface IArchiveCreationOptions {
/** Target archive format */
format: TArchiveFormat;
/** Compression level (0-9, default depends on format) */
compressionLevel?: TCompressionLevel;
/** Base path to strip from file paths in archive */
basePath?: string;
}
/**
* Options for extracting archives
*/
export interface IArchiveExtractionOptions {
/** Target directory for extraction */
targetDir: string;
/** Optional filename for single-file archives (gz, bz2) */
fileName?: string;
/** Number of leading path components to strip */
stripComponents?: number;
/** Filter function to select which entries to extract */
filter?: (entry: IArchiveEntryInfo) => boolean;
/** Whether to overwrite existing files */
overwrite?: boolean;
}
/**
* Information about an archive entry
*/
export interface IArchiveEntryInfo {
/** Path of the entry within the archive */
path: string;
/** Size in bytes */
size: number;
/** Whether this entry is a directory */
isDirectory: boolean;
/** Whether this entry is a file */
isFile: boolean;
/** Modification time */
mtime?: Date;
/** File mode/permissions */
mode?: number;
}
/**
* Result of archive analysis
*/
export interface IArchiveInfo {
/** Detected archive format */
format: TArchiveFormat | null;
/** Whether the archive is compressed */
isCompressed: boolean;
/** Whether this is a recognized archive format */
isArchive: boolean;
/** List of entries (if available without full extraction) */
entries?: IArchiveEntryInfo[];
}
/**
* Options for adding a file to a TAR pack stream
*/
export interface IAddFileOptions {
/** Filename within the archive */
fileName?: string;
/** File content */
content?: string | Buffer | stream.Readable | SmartFile | StreamFile;
/** Size in bytes (required for streams) */
byteLength?: number;
/** Path to file on disk (alternative to content) */
filePath?: string;
}
/**
* Bit reader interface for BZIP2 decompression
*/
export interface IBitReader {
(n: number | null): number | void;
bytesRead: number;
}
/**
* Huffman group for BZIP2 decompression
*/
export interface IHuffmanGroup {
permute: Int32Array;
limit: Int32Array;
base: Int32Array;
minLen: number;
maxLen: number;
}

View File

@@ -1,8 +1,34 @@
// node native scope // node native scope
import * as path from 'path'; import * as path from 'node:path';
import * as stream from 'stream'; import * as stream from 'node:stream';
import * as fs from 'node:fs';
import * as fsPromises from 'node:fs/promises';
export { path, stream }; export { path, stream, fs, fsPromises };
/**
* List files in a directory recursively, returning relative paths
*/
export async function listFileTree(dirPath: string, _pattern: string = '**/*'): Promise<string[]> {
const results: string[] = [];
async function walkDir(currentPath: string, relativePath: string = '') {
const entries = await fsPromises.readdir(currentPath, { withFileTypes: true });
for (const entry of entries) {
const entryRelPath = relativePath ? path.join(relativePath, entry.name) : entry.name;
const entryFullPath = path.join(currentPath, entry.name);
if (entry.isDirectory()) {
await walkDir(entryFullPath, entryRelPath);
} else if (entry.isFile()) {
results.push(entryRelPath);
}
}
}
await walkDir(dirPath);
return results;
}
// @pushrocks scope // @pushrocks scope
import * as smartfile from '@push.rocks/smartfile'; import * as smartfile from '@push.rocks/smartfile';