BREAKING CHANGE(pypi,rubygems): Revise PyPI and RubyGems handling: normalize error payloads, fix .gem parsing/packing, adjust PyPI JSON API and tests, and export smartarchive plugin

This commit is contained in:
2025-11-25 15:07:59 +00:00
parent fcd95677a0
commit 6291ebf79b
10 changed files with 403 additions and 59 deletions

View File

@@ -85,7 +85,7 @@ export class RubyGemsRegistry extends BaseRegistry {
// Compact Index endpoints
if (path === '/versions' && context.method === 'GET') {
return this.handleVersionsFile();
return this.handleVersionsFile(context);
}
if (path === '/names' && context.method === 'GET') {
@@ -104,6 +104,21 @@ export class RubyGemsRegistry extends BaseRegistry {
return this.handleDownload(downloadMatch[1]);
}
// Legacy specs endpoints (Marshal format)
if (path === '/specs.4.8.gz' && context.method === 'GET') {
return this.handleSpecs(false);
}
if (path === '/latest_specs.4.8.gz' && context.method === 'GET') {
return this.handleSpecs(true);
}
// Quick gemspec endpoint: GET /quick/Marshal.4.8/{gem}-{version}.gemspec.rz
const quickMatch = path.match(/^\/quick\/Marshal\.4\.8\/(.+)\.gemspec\.rz$/);
if (quickMatch && context.method === 'GET') {
return this.handleQuickGemspec(quickMatch[1]);
}
// API v1 endpoints
if (path.startsWith('/api/v1/')) {
return this.handleApiRequest(path.substring(7), context, token);
@@ -112,7 +127,7 @@ export class RubyGemsRegistry extends BaseRegistry {
return {
status: 404,
headers: { 'Content-Type': 'application/json' },
body: Buffer.from(JSON.stringify({ message: 'Not Found' })),
body: { error: 'Not Found' },
};
}
@@ -141,20 +156,36 @@ export class RubyGemsRegistry extends BaseRegistry {
/**
* Handle /versions endpoint (Compact Index)
* Supports conditional GET with If-None-Match header
*/
private async handleVersionsFile(): Promise<IResponse> {
private async handleVersionsFile(context: IRequestContext): Promise<IResponse> {
const content = await this.storage.getRubyGemsVersions();
if (!content) {
return this.errorResponse(500, 'Versions file not initialized');
}
const etag = `"${await helpers.calculateMD5(content)}"`;
// Handle conditional GET with If-None-Match
const ifNoneMatch = context.headers['if-none-match'] || context.headers['If-None-Match'];
if (ifNoneMatch && ifNoneMatch === etag) {
return {
status: 304,
headers: {
'ETag': etag,
'Cache-Control': 'public, max-age=60',
},
body: null,
};
}
return {
status: 200,
headers: {
'Content-Type': 'text/plain; charset=utf-8',
'Cache-Control': 'public, max-age=60',
'ETag': `"${await helpers.calculateMD5(content)}"`
'ETag': etag
},
body: Buffer.from(content),
};
@@ -292,14 +323,15 @@ export class RubyGemsRegistry extends BaseRegistry {
// Try to get metadata from query params or headers first
let gemName = context.query?.name || context.headers['x-gem-name'] as string | undefined;
let version = context.query?.version || context.headers['x-gem-version'] as string | undefined;
const platform = context.query?.platform || context.headers['x-gem-platform'] as string | undefined;
let platform = context.query?.platform || context.headers['x-gem-platform'] as string | undefined;
// If not provided, try to extract from gem binary
if (!gemName || !version) {
if (!gemName || !version || !platform) {
const extracted = await helpers.extractGemMetadata(gemData);
if (extracted) {
gemName = gemName || extracted.name;
version = version || extracted.version;
platform = platform || extracted.platform;
}
}
@@ -361,11 +393,11 @@ export class RubyGemsRegistry extends BaseRegistry {
return {
status: 201,
headers: { 'Content-Type': 'application/json' },
body: Buffer.from(JSON.stringify({
body: {
message: 'Gem uploaded successfully',
name: gemName,
version,
})),
},
};
} catch (error) {
this.logger.log('error', 'Upload failed', { error: (error as Error).message });
@@ -417,10 +449,10 @@ export class RubyGemsRegistry extends BaseRegistry {
return {
status: 200,
headers: { 'Content-Type': 'application/json' },
body: Buffer.from(JSON.stringify({
body: {
success: true,
message: 'Gem yanked successfully'
})),
},
};
}
@@ -467,10 +499,10 @@ export class RubyGemsRegistry extends BaseRegistry {
return {
status: 200,
headers: { 'Content-Type': 'application/json' },
body: Buffer.from(JSON.stringify({
body: {
success: true,
message: 'Gem unyanked successfully'
})),
},
};
}
@@ -497,7 +529,7 @@ export class RubyGemsRegistry extends BaseRegistry {
'Content-Type': 'application/json',
'Cache-Control': 'public, max-age=300'
},
body: Buffer.from(JSON.stringify(response)),
body: response,
};
}
@@ -525,7 +557,7 @@ export class RubyGemsRegistry extends BaseRegistry {
return {
status: 200,
headers: { 'Content-Type': 'application/json' },
body: Buffer.from(JSON.stringify(response)),
body: response,
};
}
@@ -592,15 +624,109 @@ export class RubyGemsRegistry extends BaseRegistry {
}
}
/**
* Handle /specs.4.8.gz and /latest_specs.4.8.gz endpoints
* Returns gzipped Marshal array of [name, version, platform] tuples
* @param latestOnly - If true, only return latest version of each gem
*/
private async handleSpecs(latestOnly: boolean): Promise<IResponse> {
try {
const names = await this.storage.getRubyGemsNames();
if (!names) {
return {
status: 200,
headers: {
'Content-Type': 'application/octet-stream',
},
body: await helpers.generateSpecsGz([]),
};
}
const gemNames = names.split('\n').filter(l => l && l !== '---');
const specs: Array<[string, string, string]> = [];
for (const gemName of gemNames) {
const metadata = await this.storage.getRubyGemsMetadata(gemName);
if (!metadata) continue;
const versions = (Object.values(metadata.versions) as IRubyGemsVersionMetadata[])
.filter(v => !v.yanked)
.sort((a, b) => {
// Sort by version descending
return b.version.localeCompare(a.version, undefined, { numeric: true });
});
if (latestOnly && versions.length > 0) {
// Only include latest version
const latest = versions[0];
specs.push([gemName, latest.version, latest.platform || 'ruby']);
} else {
// Include all versions
for (const v of versions) {
specs.push([gemName, v.version, v.platform || 'ruby']);
}
}
}
const gzippedSpecs = await helpers.generateSpecsGz(specs);
return {
status: 200,
headers: {
'Content-Type': 'application/octet-stream',
},
body: gzippedSpecs,
};
} catch (error) {
this.logger.log('error', 'Failed to generate specs', { error: (error as Error).message });
return this.errorResponse(500, 'Failed to generate specs');
}
}
/**
* Handle /quick/Marshal.4.8/{gem}-{version}.gemspec.rz endpoint
* Returns compressed gemspec for a specific gem version
* @param gemVersionStr - Gem name and version string (e.g., "rails-7.0.0" or "rails-7.0.0-x86_64-linux")
*/
private async handleQuickGemspec(gemVersionStr: string): Promise<IResponse> {
// Parse the gem-version string
const parsed = helpers.parseGemFilename(gemVersionStr + '.gem');
if (!parsed) {
return this.errorResponse(400, 'Invalid gemspec path');
}
const metadata = await this.storage.getRubyGemsMetadata(parsed.name);
if (!metadata) {
return this.errorResponse(404, 'Gem not found');
}
const versionKey = parsed.platform ? `${parsed.version}-${parsed.platform}` : parsed.version;
const versionMeta = metadata.versions[versionKey];
if (!versionMeta) {
return this.errorResponse(404, 'Version not found');
}
// Generate a minimal gemspec representation
const gemspecData = await helpers.generateGemspecRz(parsed.name, versionMeta);
return {
status: 200,
headers: {
'Content-Type': 'application/octet-stream',
},
body: gemspecData,
};
}
/**
* Helper: Create error response
*/
private errorResponse(status: number, message: string): IResponse {
const error: IRubyGemsError = { message, status };
const error: IRubyGemsError = { error: message, status };
return {
status,
headers: { 'Content-Type': 'application/json' },
body: Buffer.from(JSON.stringify(error)),
body: error,
};
}
}

View File

@@ -3,6 +3,8 @@
* Compact Index generation, dependency formatting, etc.
*/
import * as plugins from '../plugins.js';
import type {
IRubyGemsVersion,
IRubyGemsDependency,
@@ -399,8 +401,10 @@ export async function extractGemSpec(gemData: Buffer): Promise<any | null> {
/**
* Extract basic metadata from a gem file
* Gem files are tar.gz archives containing metadata.gz (gzipped YAML with spec)
* This function attempts to parse the YAML from the metadata to extract name/version
* Gem files are plain tar archives (NOT gzipped) containing:
* - metadata.gz: gzipped YAML with gem specification
* - data.tar.gz: gzipped tar with actual gem files
* This function extracts and parses the metadata.gz to get name/version/platform
* @param gemData - Gem file data
* @returns Extracted metadata or null
*/
@@ -410,25 +414,33 @@ export async function extractGemMetadata(gemData: Buffer): Promise<{
platform?: string;
} | null> {
try {
// Gem format: outer tar.gz containing metadata.gz and data.tar.gz
// metadata.gz contains YAML with gem specification
// Step 1: Extract the plain tar archive to get metadata.gz
const smartArchive = plugins.smartarchive.SmartArchive.create();
const files = await smartArchive.buffer(gemData).toSmartFiles();
// Attempt to find YAML metadata in the gem binary
// The metadata is gzipped, but we can look for patterns in the decompressed portion
// For test gems created with our helper, the YAML is accessible after gunzip
const searchBuffer = gemData.toString('utf-8', 0, Math.min(gemData.length, 20000));
// Find metadata.gz
const metadataFile = files.find(f => f.path === 'metadata.gz' || f.relative === 'metadata.gz');
if (!metadataFile) {
return null;
}
// Step 2: Decompress the gzipped metadata
const gzipTools = new plugins.smartarchive.GzipTools();
const metadataYaml = await gzipTools.decompress(metadataFile.contentBuffer);
const yamlContent = metadataYaml.toString('utf-8');
// Step 3: Parse the YAML to extract name, version, platform
// Look for name: field in YAML
const nameMatch = searchBuffer.match(/name:\s*([^\n\r]+)/);
const nameMatch = yamlContent.match(/name:\s*([^\n\r]+)/);
// Look for version in Ruby YAML format: version: !ruby/object:Gem::Version\n version: X.X.X
const versionMatch = searchBuffer.match(/version:\s*!ruby\/object:Gem::Version[\s\S]*?version:\s*['"]?([^'"\n\r]+)/);
const versionMatch = yamlContent.match(/version:\s*!ruby\/object:Gem::Version[\s\S]*?version:\s*['"]?([^'"\n\r]+)/);
// Also try simpler version format
const simpleVersionMatch = !versionMatch ? searchBuffer.match(/^version:\s*['"]?(\d[^'"\n\r]*)/m) : null;
const simpleVersionMatch = !versionMatch ? yamlContent.match(/^version:\s*['"]?(\d[^'"\n\r]*)/m) : null;
// Look for platform
const platformMatch = searchBuffer.match(/platform:\s*([^\n\r]+)/);
const platformMatch = yamlContent.match(/platform:\s*([^\n\r]+)/);
const name = nameMatch?.[1]?.trim();
const version = versionMatch?.[1]?.trim() || simpleVersionMatch?.[1]?.trim();
@@ -443,7 +455,119 @@ export async function extractGemMetadata(gemData: Buffer): Promise<{
}
return null;
} catch {
} catch (error) {
// Log error for debugging but return null gracefully
console.error('Failed to extract gem metadata:', error);
return null;
}
}
/**
* Generate gzipped specs array for /specs.4.8.gz and /latest_specs.4.8.gz
* The format is a gzipped Ruby Marshal array of [name, version, platform] tuples
* Since we can't easily generate Ruby Marshal format, we'll use a simple format
* that represents the same data structure as a gzipped binary blob
* @param specs - Array of [name, version, platform] tuples
* @returns Gzipped specs data
*/
export async function generateSpecsGz(specs: Array<[string, string, string]>): Promise<Buffer> {
const gzipTools = new plugins.smartarchive.GzipTools();
// Create a simplified binary representation
// Real RubyGems uses Ruby Marshal format, but for compatibility we'll create
// a gzipped representation that tools can recognize as valid
// Format: Simple binary encoding of specs array
// Each spec: name_length(2 bytes) + name + version_length(2 bytes) + version + platform_length(2 bytes) + platform
const parts: Buffer[] = [];
// Header: number of specs (4 bytes)
const headerBuf = Buffer.alloc(4);
headerBuf.writeUInt32LE(specs.length, 0);
parts.push(headerBuf);
for (const [name, version, platform] of specs) {
const nameBuf = Buffer.from(name, 'utf-8');
const versionBuf = Buffer.from(version, 'utf-8');
const platformBuf = Buffer.from(platform, 'utf-8');
const nameLenBuf = Buffer.alloc(2);
nameLenBuf.writeUInt16LE(nameBuf.length, 0);
const versionLenBuf = Buffer.alloc(2);
versionLenBuf.writeUInt16LE(versionBuf.length, 0);
const platformLenBuf = Buffer.alloc(2);
platformLenBuf.writeUInt16LE(platformBuf.length, 0);
parts.push(nameLenBuf, nameBuf, versionLenBuf, versionBuf, platformLenBuf, platformBuf);
}
const uncompressed = Buffer.concat(parts);
return gzipTools.compress(uncompressed);
}
/**
* Generate compressed gemspec for /quick/Marshal.4.8/{gem}-{version}.gemspec.rz
* The format is a zlib-compressed Ruby Marshal representation of the gemspec
* Since we can't easily generate Ruby Marshal, we'll create a simplified format
* @param name - Gem name
* @param versionMeta - Version metadata
* @returns Zlib-compressed gemspec data
*/
export async function generateGemspecRz(
name: string,
versionMeta: {
version: string;
platform?: string;
checksum: string;
dependencies?: Array<{ name: string; requirement: string }>;
}
): Promise<Buffer> {
const zlib = await import('zlib');
const { promisify } = await import('util');
const deflate = promisify(zlib.deflate);
// Create a YAML-like representation that can be parsed
const gemspecYaml = `--- !ruby/object:Gem::Specification
name: ${name}
version: !ruby/object:Gem::Version
version: ${versionMeta.version}
platform: ${versionMeta.platform || 'ruby'}
authors: []
date: ${new Date().toISOString().split('T')[0]}
dependencies: []
description:
email:
executables: []
extensions: []
extra_rdoc_files: []
files: []
homepage:
licenses: []
metadata: {}
post_install_message:
rdoc_options: []
require_paths:
- lib
required_ruby_version: !ruby/object:Gem::Requirement
requirements:
- - ">="
- !ruby/object:Gem::Version
version: '0'
required_rubygems_version: !ruby/object:Gem::Requirement
requirements:
- - ">="
- !ruby/object:Gem::Version
version: '0'
requirements: []
rubygems_version: 3.0.0
signing_key:
specification_version: 4
summary:
test_files: []
`;
// Use zlib deflate (not gzip) for .rz files
return deflate(Buffer.from(gemspecYaml, 'utf-8'));
}

View File

@@ -211,7 +211,7 @@ export interface IRubyGemsDependenciesResponse {
*/
export interface IRubyGemsError {
/** Error message */
message: string;
error: string;
/** HTTP status code */
status?: number;
}