Files
smartarchive/ts/classes.smartarchive.ts

864 lines
25 KiB
TypeScript
Raw Permalink Normal View History

2023-11-06 18:14:21 +01:00
import * as plugins from './plugins.js';
import type {
IArchiveEntry,
IArchiveEntryInfo,
IArchiveInfo,
TArchiveFormat,
TCompressionLevel,
TEntryFilter,
} from './interfaces.js';
2023-11-06 18:14:21 +01:00
2024-03-17 00:29:42 +01:00
import { Bzip2Tools } from './classes.bzip2tools.js';
2023-11-06 18:14:21 +01:00
import { GzipTools } from './classes.gziptools.js';
import { TarTools } from './classes.tartools.js';
2024-03-17 00:29:42 +01:00
import { ZipTools } from './classes.ziptools.js';
import { ArchiveAnalyzer, type IAnalyzedResult } from './classes.archiveanalyzer.js';
2023-11-06 18:14:21 +01:00
/**
* Pending directory entry for async resolution
*/
interface IPendingDirectory {
sourcePath: string;
archiveBase?: string;
}
/**
* Main class for archive manipulation with fluent API
* Supports TAR, ZIP, GZIP, and BZIP2 formats
*
* @example Extraction from URL
* ```typescript
* await SmartArchive.create()
* .url('https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz')
* .stripComponents(1)
* .extract('./node_modules/lodash');
* ```
*
* @example Creation with thenable
* ```typescript
* const archive = await SmartArchive.create()
* .format('tar.gz')
* .compression(9)
* .entry('config.json', JSON.stringify(config))
* .directory('./src');
* ```
*/
2023-11-06 18:14:21 +01:00
export class SmartArchive {
// ============================================
// STATIC ENTRY POINT
// ============================================
/**
* Create a new SmartArchive instance for fluent configuration
*/
public static create(): SmartArchive {
return new SmartArchive();
2023-11-06 18:14:21 +01:00
}
// ============================================
// TOOLS (public for internal use)
// ============================================
2023-11-06 18:14:21 +01:00
public tarTools = new TarTools();
public zipTools = new ZipTools();
public gzipTools = new GzipTools();
public bzip2Tools = new Bzip2Tools(this);
public archiveAnalyzer = new ArchiveAnalyzer(this);
2023-11-06 18:14:21 +01:00
// ============================================
// SOURCE STATE (extraction mode)
// ============================================
private sourceUrl?: string;
private sourceFilePath?: string;
private sourceStream?: plugins.stream.Readable | plugins.stream.Duplex | plugins.stream.Transform;
// ============================================
// CREATION STATE
// ============================================
private archiveBuffer?: Buffer;
private creationFormat?: TArchiveFormat;
private _compressionLevel: TCompressionLevel = 6;
private pendingEntries: IArchiveEntry[] = [];
private pendingDirectories: IPendingDirectory[] = [];
// ============================================
// FLUENT STATE
// ============================================
private _mode: 'extract' | 'create' | null = null;
private _filters: TEntryFilter[] = [];
private _excludePatterns: RegExp[] = [];
private _includePatterns: RegExp[] = [];
private _stripComponents: number = 0;
private _overwrite: boolean = false;
private _fileName?: string;
constructor() {}
// ============================================
// SOURCE METHODS (set extraction mode)
// ============================================
/**
* Load archive from URL
*/
public url(urlArg: string): this {
this.ensureNotInCreateMode('url');
this._mode = 'extract';
this.sourceUrl = urlArg;
return this;
}
/**
* Load archive from file path
*/
public file(pathArg: string): this {
this.ensureNotInCreateMode('file');
this._mode = 'extract';
this.sourceFilePath = pathArg;
return this;
}
/**
* Load archive from readable stream
*/
public stream(streamArg: plugins.stream.Readable | plugins.stream.Duplex | plugins.stream.Transform): this {
this.ensureNotInCreateMode('stream');
this._mode = 'extract';
this.sourceStream = streamArg;
return this;
}
/**
* Load archive from buffer
*/
public buffer(bufferArg: Buffer): this {
this.ensureNotInCreateMode('buffer');
this._mode = 'extract';
this.sourceStream = plugins.stream.Readable.from(bufferArg);
return this;
}
// ============================================
// FORMAT METHODS (set creation mode)
// ============================================
/**
* Set output format for archive creation
*/
public format(fmt: TArchiveFormat): this {
this.ensureNotInExtractMode('format');
this._mode = 'create';
this.creationFormat = fmt;
return this;
}
2023-11-06 18:14:21 +01:00
/**
* Set compression level (0-9)
*/
public compression(level: TCompressionLevel): this {
this._compressionLevel = level;
return this;
}
2023-11-06 18:14:21 +01:00
// ============================================
// CONTENT METHODS (creation mode)
// ============================================
/**
* Add a single file entry to the archive
*/
public entry(archivePath: string, content: string | Buffer): this {
this.ensureNotInExtractMode('entry');
if (!this._mode) this._mode = 'create';
this.pendingEntries.push({ archivePath, content });
return this;
}
/**
* Add multiple entries to the archive
*/
public entries(entriesArg: Array<{ archivePath: string; content: string | Buffer }>): this {
this.ensureNotInExtractMode('entries');
if (!this._mode) this._mode = 'create';
for (const e of entriesArg) {
this.pendingEntries.push({ archivePath: e.archivePath, content: e.content });
}
return this;
}
/**
* Add an entire directory to the archive (queued, resolved at build time)
*/
public directory(sourcePath: string, archiveBase?: string): this {
this.ensureNotInExtractMode('directory');
if (!this._mode) this._mode = 'create';
this.pendingDirectories.push({ sourcePath, archiveBase });
return this;
}
/**
* Add a SmartFile to the archive
*/
public addSmartFile(fileArg: plugins.smartfile.SmartFile, archivePath?: string): this {
this.ensureNotInExtractMode('addSmartFile');
if (!this._mode) this._mode = 'create';
this.pendingEntries.push({
archivePath: archivePath || fileArg.relative,
content: fileArg,
});
return this;
}
/**
* Add a StreamFile to the archive
*/
public addStreamFile(fileArg: plugins.smartfile.StreamFile, archivePath?: string): this {
this.ensureNotInExtractMode('addStreamFile');
if (!this._mode) this._mode = 'create';
this.pendingEntries.push({
archivePath: archivePath || fileArg.relativeFilePath,
content: fileArg,
});
return this;
}
// ============================================
// FILTER METHODS (both modes)
// ============================================
/**
* Filter entries by predicate function
*/
public filter(predicate: TEntryFilter): this {
this._filters.push(predicate);
return this;
}
/**
* Include only entries matching the pattern
*/
public include(pattern: string | RegExp): this {
const regex = typeof pattern === 'string' ? new RegExp(pattern) : pattern;
this._includePatterns.push(regex);
return this;
}
/**
* Exclude entries matching the pattern
*/
public exclude(pattern: string | RegExp): this {
const regex = typeof pattern === 'string' ? new RegExp(pattern) : pattern;
this._excludePatterns.push(regex);
return this;
}
// ============================================
// EXTRACTION OPTIONS
// ============================================
2023-11-06 18:14:21 +01:00
/**
* Strip N leading path components from extracted files
2023-11-06 18:14:21 +01:00
*/
public stripComponents(n: number): this {
this._stripComponents = n;
return this;
}
/**
* Overwrite existing files during extraction
*/
public overwrite(value: boolean = true): this {
this._overwrite = value;
return this;
2023-11-06 18:14:21 +01:00
}
/**
* Set output filename for single-file archives (gz, bz2)
*/
public fileName(name: string): this {
this._fileName = name;
return this;
2023-11-06 18:14:21 +01:00
}
// ============================================
// TERMINAL METHODS - EXTRACTION
// ============================================
/**
* Extract archive to filesystem directory
*/
public async extract(targetDir: string): Promise<void> {
this.ensureExtractionSource();
2023-11-06 19:38:36 +01:00
const done = plugins.smartpromise.defer<void>();
const streamFileStream = await this.toStreamFiles();
2024-03-17 00:29:42 +01:00
streamFileStream.pipe(
new plugins.smartstream.SmartDuplex({
objectMode: true,
writeFunction: async (streamFileArg: plugins.smartfile.StreamFile) => {
const innerDone = plugins.smartpromise.defer<void>();
2024-03-17 00:35:17 +01:00
const streamFile = streamFileArg;
let relativePath = streamFile.relativeFilePath || this._fileName || 'extracted_file';
// Apply stripComponents
if (this._stripComponents > 0) {
const parts = relativePath.split('/');
relativePath = parts.slice(this._stripComponents).join('/');
if (!relativePath) {
innerDone.resolve();
return;
}
}
// Apply filter
const filterFn = this.buildFilterFunction();
if (filterFn) {
const entryInfo: IArchiveEntryInfo = {
path: relativePath,
size: 0,
isDirectory: false,
isFile: true,
};
if (!filterFn(entryInfo)) {
innerDone.resolve();
return;
}
}
2024-03-17 00:29:42 +01:00
const readStream = await streamFile.createReadStream();
await plugins.fsPromises.mkdir(targetDir, { recursive: true });
const writePath = plugins.path.join(targetDir, relativePath);
await plugins.fsPromises.mkdir(plugins.path.dirname(writePath), { recursive: true });
const writeStream = plugins.fs.createWriteStream(writePath);
2024-03-17 00:29:42 +01:00
readStream.pipe(writeStream);
writeStream.on('finish', () => {
innerDone.resolve();
2024-03-17 00:29:42 +01:00
});
await innerDone.promise;
2024-03-17 00:29:42 +01:00
},
finalFunction: async () => {
2023-11-11 18:28:50 +01:00
done.resolve();
2024-03-17 00:29:42 +01:00
},
})
2024-03-17 00:29:42 +01:00
);
2023-11-06 19:38:36 +01:00
return done.promise;
}
2023-11-06 18:14:21 +01:00
/**
* Extract archive to a stream of StreamFile objects
*/
public async toStreamFiles(): Promise<plugins.smartstream.StreamIntake<plugins.smartfile.StreamFile>> {
this.ensureExtractionSource();
const streamFileIntake = new plugins.smartstream.StreamIntake<plugins.smartfile.StreamFile>({
objectMode: true,
});
// Guard to prevent multiple signalEnd calls
let hasSignaledEnd = false;
const safeSignalEnd = () => {
if (!hasSignaledEnd) {
hasSignaledEnd = true;
streamFileIntake.signalEnd();
}
};
const archiveStream = await this.getSourceStream();
2023-11-06 18:14:21 +01:00
const createAnalyzedStream = () => this.archiveAnalyzer.getAnalyzedStream();
const createUnpackStream = () =>
plugins.smartstream.createTransformFunction<IAnalyzedResult, void>(
2023-11-06 18:14:21 +01:00
async (analyzedResultChunk) => {
2023-11-07 04:19:54 +01:00
if (analyzedResultChunk.fileType?.mime === 'application/x-tar') {
const tarStream = analyzedResultChunk.decompressionStream as plugins.tarStream.Extract;
2024-03-17 00:29:42 +01:00
tarStream.on('entry', async (header, stream, next) => {
2024-06-06 20:59:04 +02:00
if (header.type === 'directory') {
stream.resume();
stream.on('end', () => next());
2024-06-06 20:59:04 +02:00
return;
}
const passThrough = new plugins.stream.PassThrough();
const streamfile = plugins.smartfile.StreamFile.fromStream(passThrough, header.name);
2024-03-17 00:29:42 +01:00
streamFileIntake.push(streamfile);
stream.pipe(passThrough);
stream.on('end', () => {
passThrough.end();
next();
2024-03-17 00:29:42 +01:00
});
});
tarStream.on('finish', () => {
safeSignalEnd();
2023-11-07 04:19:54 +01:00
});
analyzedResultChunk.resultStream.pipe(analyzedResultChunk.decompressionStream);
2024-03-17 00:29:42 +01:00
} else if (analyzedResultChunk.fileType?.mime === 'application/zip') {
analyzedResultChunk.resultStream
.pipe(analyzedResultChunk.decompressionStream)
.pipe(
new plugins.smartstream.SmartDuplex({
objectMode: true,
writeFunction: async (streamFileArg: plugins.smartfile.StreamFile) => {
streamFileIntake.push(streamFileArg);
},
finalFunction: async () => {
safeSignalEnd();
},
})
);
} else if (analyzedResultChunk.isArchive && analyzedResultChunk.decompressionStream) {
// For nested archives (like gzip containing tar)
analyzedResultChunk.resultStream
2023-11-06 18:14:21 +01:00
.pipe(analyzedResultChunk.decompressionStream)
.pipe(createAnalyzedStream())
.pipe(createUnpackStream());
} else {
const streamFile = plugins.smartfile.StreamFile.fromStream(
analyzedResultChunk.resultStream,
analyzedResultChunk.fileType?.ext
2023-11-06 18:14:21 +01:00
);
streamFileIntake.push(streamFile);
safeSignalEnd();
2023-11-06 18:14:21 +01:00
}
2024-03-17 00:29:42 +01:00
},
{ objectMode: true }
2023-11-06 18:14:21 +01:00
);
archiveStream.pipe(createAnalyzedStream()).pipe(createUnpackStream());
return streamFileIntake;
}
/**
* Extract archive to an array of SmartFile objects (in-memory)
*/
public async toSmartFiles(): Promise<plugins.smartfile.SmartFile[]> {
this.ensureExtractionSource();
const streamFiles = await this.toStreamFiles();
const smartFiles: plugins.smartfile.SmartFile[] = [];
const filterFn = this.buildFilterFunction();
const pendingConversions: Promise<void>[] = [];
return new Promise((resolve, reject) => {
streamFiles.on('data', (streamFile: plugins.smartfile.StreamFile) => {
// Track all async conversions to ensure they complete before resolving
const conversion = (async () => {
try {
const smartFile = await streamFile.toSmartFile();
// Apply filter if configured
if (filterFn) {
const passes = filterFn({
path: smartFile.relative,
size: smartFile.contents.length,
isDirectory: false,
isFile: true,
});
if (!passes) return;
}
smartFiles.push(smartFile);
} catch (err) {
reject(err);
}
})();
pendingConversions.push(conversion);
});
streamFiles.on('end', async () => {
// Wait for all conversions to complete before resolving
await Promise.all(pendingConversions);
resolve(smartFiles);
});
streamFiles.on('error', reject);
});
}
/**
* Extract a single file from the archive by path
*/
public async extractFile(filePath: string): Promise<plugins.smartfile.SmartFile | null> {
this.ensureExtractionSource();
const streamFiles = await this.toStreamFiles();
return new Promise((resolve, reject) => {
let found = false;
streamFiles.on('data', async (streamFile: plugins.smartfile.StreamFile) => {
if (streamFile.relativeFilePath === filePath || streamFile.relativeFilePath?.endsWith(filePath)) {
found = true;
try {
const smartFile = await streamFile.toSmartFile();
resolve(smartFile);
} catch (err) {
reject(err);
}
}
});
streamFiles.on('end', () => {
if (!found) {
resolve(null);
}
});
streamFiles.on('error', reject);
});
}
// ============================================
// TERMINAL METHODS - OUTPUT
// ============================================
/**
* Build and finalize the archive, returning this instance
*/
public async build(): Promise<SmartArchive> {
await this.doBuild();
return this;
}
/**
* Internal build implementation (avoids thenable recursion)
*/
private async doBuild(): Promise<void> {
if (this._mode === 'extract') {
// For extraction mode, nothing to build
return;
}
if (this.archiveBuffer) {
// Already built
return;
}
// For creation mode, build the archive buffer
this.ensureCreationFormat();
await this.resolveDirectories();
const entries = this.getFilteredEntries();
if (this.creationFormat === 'tar' || this.creationFormat === 'tar.gz' || this.creationFormat === 'tgz') {
if (this.creationFormat === 'tar') {
this.archiveBuffer = await this.tarTools.packFiles(entries);
} else {
this.archiveBuffer = await this.tarTools.packFilesToTarGz(entries, this._compressionLevel);
}
} else if (this.creationFormat === 'zip') {
this.archiveBuffer = await this.zipTools.createZip(entries, this._compressionLevel);
} else if (this.creationFormat === 'gz') {
if (entries.length !== 1) {
throw new Error('GZIP format only supports a single file');
}
let content: Buffer;
if (typeof entries[0].content === 'string') {
content = Buffer.from(entries[0].content);
} else if (Buffer.isBuffer(entries[0].content)) {
content = entries[0].content;
} else {
throw new Error('GZIP format requires string or Buffer content');
}
this.archiveBuffer = await this.gzipTools.compress(content, this._compressionLevel);
} else {
throw new Error(`Unsupported format: ${this.creationFormat}`);
}
}
/**
* Build archive and return as Buffer
*/
public async toBuffer(): Promise<Buffer> {
if (this._mode === 'create' && !this.archiveBuffer) {
await this.doBuild();
}
if (this.archiveBuffer) {
return this.archiveBuffer;
}
// For extraction mode, get the source as buffer
const stream = await this.getSourceStream();
return this.streamToBuffer(stream);
}
/**
* Build archive and write to file
*/
public async toFile(filePath: string): Promise<void> {
const buffer = await this.toBuffer();
await plugins.fsPromises.mkdir(plugins.path.dirname(filePath), { recursive: true });
await plugins.fsPromises.writeFile(filePath, buffer);
}
/**
* Get archive as a readable stream
*/
public async toStream(): Promise<plugins.stream.Readable> {
if (this._mode === 'create' && !this.archiveBuffer) {
await this.doBuild();
}
if (this.archiveBuffer) {
return plugins.stream.Readable.from(this.archiveBuffer);
}
return this.getSourceStream();
}
// ============================================
// TERMINAL METHODS - ANALYSIS
// ============================================
/**
* Analyze the archive and return metadata
*/
public async analyze(): Promise<IArchiveInfo> {
this.ensureExtractionSource();
const stream = await this.getSourceStream();
const firstChunk = await this.readFirstChunk(stream);
const fileType = await plugins.fileType.fileTypeFromBuffer(firstChunk);
let format: TArchiveFormat | null = null;
let isCompressed = false;
let isArchive = false;
if (fileType) {
switch (fileType.mime) {
case 'application/gzip':
format = 'gz';
isCompressed = true;
isArchive = true;
break;
case 'application/zip':
format = 'zip';
isCompressed = true;
isArchive = true;
break;
case 'application/x-tar':
format = 'tar';
isArchive = true;
break;
case 'application/x-bzip2':
format = 'bz2';
isCompressed = true;
isArchive = true;
break;
}
}
return {
format,
isCompressed,
isArchive,
};
}
/**
* List all entries in the archive
*/
public async list(): Promise<IArchiveEntryInfo[]> {
this.ensureExtractionSource();
const entries: IArchiveEntryInfo[] = [];
const streamFiles = await this.toStreamFiles();
return new Promise((resolve, reject) => {
streamFiles.on('data', (streamFile: plugins.smartfile.StreamFile) => {
entries.push({
path: streamFile.relativeFilePath || 'unknown',
size: 0, // Size not available without reading
isDirectory: false,
isFile: true,
});
});
streamFiles.on('end', () => resolve(entries));
streamFiles.on('error', reject);
});
}
/**
* Check if a specific file exists in the archive
*/
public async hasFile(filePath: string): Promise<boolean> {
this.ensureExtractionSource();
const entries = await this.list();
return entries.some((e) => e.path === filePath || e.path.endsWith(filePath));
}
// ============================================
// PRIVATE HELPERS
// ============================================
/**
* Ensure we're not in create mode when calling extraction methods
*/
private ensureNotInCreateMode(methodName: string): void {
if (this._mode === 'create') {
throw new Error(
`Cannot call .${methodName}() in creation mode. ` +
`Use extraction methods (.url(), .file(), .stream(), .buffer()) for extraction mode.`
);
}
}
/**
* Ensure we're not in extract mode when calling creation methods
*/
private ensureNotInExtractMode(methodName: string): void {
if (this._mode === 'extract') {
throw new Error(
`Cannot call .${methodName}() in extraction mode. ` +
`Use .format() for creation mode.`
);
}
}
/**
* Ensure an extraction source is configured
*/
private ensureExtractionSource(): void {
if (!this.sourceUrl && !this.sourceFilePath && !this.sourceStream && !this.archiveBuffer) {
throw new Error(
'No source configured. Call .url(), .file(), .stream(), or .buffer() first.'
);
}
}
/**
* Ensure a format is configured for creation
*/
private ensureCreationFormat(): void {
if (!this.creationFormat) {
throw new Error('No format specified. Call .format() before creating archive.');
}
}
/**
* Get the source stream
*/
private async getSourceStream(): Promise<plugins.stream.Readable> {
if (this.archiveBuffer) {
return plugins.stream.Readable.from(this.archiveBuffer);
}
if (this.sourceStream) {
return this.sourceStream;
}
if (this.sourceUrl) {
const response = await plugins.smartrequest.SmartRequest.create()
.url(this.sourceUrl)
.get();
const webStream = response.stream();
return plugins.stream.Readable.fromWeb(webStream as any);
}
if (this.sourceFilePath) {
return plugins.fs.createReadStream(this.sourceFilePath);
}
throw new Error('No archive source configured');
}
/**
* Build a combined filter function from all configured filters
*/
private buildFilterFunction(): TEntryFilter | undefined {
const hasFilters =
this._filters.length > 0 ||
this._includePatterns.length > 0 ||
this._excludePatterns.length > 0;
if (!hasFilters) {
return undefined;
}
return (entry: IArchiveEntryInfo) => {
// Check include patterns (if any specified, at least one must match)
if (this._includePatterns.length > 0) {
const included = this._includePatterns.some((p) => p.test(entry.path));
if (!included) return false;
}
// Check exclude patterns (none must match)
for (const pattern of this._excludePatterns) {
if (pattern.test(entry.path)) return false;
}
// Check custom filters (all must pass)
for (const filter of this._filters) {
if (!filter(entry)) return false;
}
return true;
};
}
/**
* Resolve pending directories to entries
*/
private async resolveDirectories(): Promise<void> {
for (const dir of this.pendingDirectories) {
const files = await plugins.listFileTree(dir.sourcePath, '**/*');
for (const filePath of files) {
const archivePath = dir.archiveBase
? plugins.path.join(dir.archiveBase, filePath)
: filePath;
const absolutePath = plugins.path.join(dir.sourcePath, filePath);
const content = await plugins.fsPromises.readFile(absolutePath);
this.pendingEntries.push({
archivePath,
content,
});
}
}
this.pendingDirectories = [];
}
/**
* Get entries filtered by include/exclude patterns
*/
private getFilteredEntries(): IArchiveEntry[] {
const filterFn = this.buildFilterFunction();
if (!filterFn) {
return this.pendingEntries;
}
return this.pendingEntries.filter((entry) =>
filterFn({
path: entry.archivePath,
size: 0,
isDirectory: false,
isFile: true,
})
);
}
/**
* Convert a stream to buffer
*/
private async streamToBuffer(stream: plugins.stream.Readable): Promise<Buffer> {
const chunks: Buffer[] = [];
return new Promise((resolve, reject) => {
stream.on('data', (chunk) => chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)));
stream.on('end', () => resolve(Buffer.concat(chunks)));
stream.on('error', reject);
});
}
/**
* Read first chunk from stream
*/
private async readFirstChunk(stream: plugins.stream.Readable): Promise<Buffer> {
return new Promise((resolve, reject) => {
const onData = (chunk: Buffer) => {
stream.removeListener('data', onData);
stream.removeListener('error', reject);
resolve(chunk);
};
stream.on('data', onData);
stream.on('error', reject);
});
}
2023-11-06 18:14:21 +01:00
}