fix(protocol): Fix inline timing metadata parsing and enhance test coverage for performance metrics and timing edge cases

This commit is contained in:
Philipp Kunz 2025-05-26 08:22:26 +00:00
parent 845f146e91
commit 899045e6aa
7 changed files with 651 additions and 4 deletions

View File

@ -1,5 +1,12 @@
# Changelog
## 2025-05-26 - 2.2.5 - fix(protocol)
Fix inline timing metadata parsing and enhance test coverage for performance metrics and timing edge cases
- Updated the protocol parser to correctly parse inline key:value pairs while excluding prefixed formats (META:, SKIP:, TODO:, EVENT:)
- Added new tests for performance metrics, timing edge cases, and protocol timing to verify accurate timing capture and retry handling
- Expanded documentation in readme.hints.md to detail the updated timing implementation and parser fixes
## 2025-05-26 - 2.2.4 - fix(logging)
Improve performance metrics reporting and add local permissions configuration

View File

@ -265,4 +265,59 @@ Previously reported issues with these methods have been resolved:
- Other tests are not executed but still counted
- Both regular and parallel only tests supported
These fixes ensure accurate test counts and proper TAP-compliant output for all test states.
These fixes ensure accurate test counts and proper TAP-compliant output for all test states.
## Test Timing Implementation
### Timing Architecture
Test timing is captured using `@push.rocks/smarttime`'s `HrtMeasurement` class, which provides high-resolution timing:
1. **Timing Capture**:
- Each `TapTest` instance has its own `HrtMeasurement`
- Timer starts immediately before test function execution
- Timer stops after test completes (or fails/times out)
- Millisecond precision is used for reporting
2. **Protocol Integration**:
- Timing is embedded in TAP output using Protocol V2 markers
- Inline format for simple timing: `ok 1 - test name ⟦TSTEST:time:123⟧`
- Block format for complex metadata: `⟦TSTEST:META:{"time":456,"file":"test.ts"}⟧`
3. **Performance Metrics Calculation**:
- Average is calculated from sum of individual test times, not total runtime
- Slowest test detection prefers tests with >0ms duration
- Failed tests still contribute their execution time to metrics
### Edge Cases and Considerations
1. **Sub-millisecond Tests**:
- Very fast tests may report 0ms due to millisecond rounding
- Performance metrics handle this by showing "All tests completed in <1ms" when appropriate
2. **Special Test States**:
- **Skipped tests**: Report 0ms (not executed)
- **Todo tests**: Report 0ms (not executed)
- **Failed tests**: Report actual execution time before failure
- **Timeout tests**: Report time until timeout occurred
3. **Parallel Test Timing**:
- Each parallel test tracks its own execution time independently
- Parallel tests may have overlapping execution periods
- Total suite time reflects wall-clock time, not sum of test times
4. **Hook Timing**:
- `beforeEach`/`afterEach` hooks are not included in individual test times
- Only the actual test function execution is measured
5. **Retry Timing**:
- When tests retry, only the final attempt's duration is reported
- Each retry attempt emits separate `test:started` events
### Parser Fix for Timing Metadata
The protocol parser was fixed to correctly handle inline timing metadata:
- Changed condition from `!simpleMatch[1].includes(':')` to check for simple key:value pairs
- Excludes prefixed formats (META:, SKIP:, TODO:, EVENT:) while parsing simple formats like `time:250`
This ensures timing metadata is correctly extracted and displayed in test results.

View File

@ -0,0 +1,167 @@
import { tap, expect } from '../../ts_tapbundle/index.js';
// Create tests with known, distinct timing patterns to verify metrics calculation
tap.test('metric test 1 - 10ms baseline', async (tools) => {
await tools.delayFor(10);
expect(true).toBeTrue();
});
tap.test('metric test 2 - 20ms double baseline', async (tools) => {
await tools.delayFor(20);
expect(true).toBeTrue();
});
tap.test('metric test 3 - 30ms triple baseline', async (tools) => {
await tools.delayFor(30);
expect(true).toBeTrue();
});
tap.test('metric test 4 - 40ms quadruple baseline', async (tools) => {
await tools.delayFor(40);
expect(true).toBeTrue();
});
tap.test('metric test 5 - 50ms quintuple baseline', async (tools) => {
await tools.delayFor(50);
expect(true).toBeTrue();
});
// Test that should be the slowest
tap.test('metric test slowest - 200ms intentionally slow', async (tools) => {
await tools.delayFor(200);
expect(true).toBeTrue();
});
// Tests to verify edge cases in average calculation
tap.test('metric test fast 1 - minimal work', async () => {
expect(1).toEqual(1);
});
tap.test('metric test fast 2 - minimal work', async () => {
expect(2).toEqual(2);
});
tap.test('metric test fast 3 - minimal work', async () => {
expect(3).toEqual(3);
});
// Test to verify that failed tests still contribute to timing metrics
tap.test('metric test that fails - 60ms before failure', async (tools) => {
await tools.delayFor(60);
expect(true).toBeFalse(); // This will fail
});
// Describe block with timing to test aggregation
tap.describe('performance metrics in describe block', () => {
tap.test('described test 1 - 15ms', async (tools) => {
await tools.delayFor(15);
expect(true).toBeTrue();
});
tap.test('described test 2 - 25ms', async (tools) => {
await tools.delayFor(25);
expect(true).toBeTrue();
});
tap.test('described test 3 - 35ms', async (tools) => {
await tools.delayFor(35);
expect(true).toBeTrue();
});
});
// Test timing with hooks
tap.describe('performance with hooks', () => {
let hookTime = 0;
tap.beforeEach(async () => {
// Hooks shouldn't count toward test time
await new Promise(resolve => setTimeout(resolve, 10));
hookTime += 10;
});
tap.afterEach(async () => {
// Hooks shouldn't count toward test time
await new Promise(resolve => setTimeout(resolve, 10));
hookTime += 10;
});
tap.test('test with hooks 1 - should only count test time', async (tools) => {
await tools.delayFor(30);
expect(true).toBeTrue();
// Test time should be ~30ms, not 50ms (including hooks)
});
tap.test('test with hooks 2 - should only count test time', async (tools) => {
await tools.delayFor(40);
expect(true).toBeTrue();
// Test time should be ~40ms, not 60ms (including hooks)
});
});
// Parallel tests to verify timing is captured correctly
tap.describe('parallel timing verification', () => {
const startTimes: Map<string, number> = new Map();
const endTimes: Map<string, number> = new Map();
tap.testParallel('parallel metric 1 - 80ms', async (tools) => {
startTimes.set('p1', Date.now());
await tools.delayFor(80);
endTimes.set('p1', Date.now());
expect(true).toBeTrue();
});
tap.testParallel('parallel metric 2 - 90ms', async (tools) => {
startTimes.set('p2', Date.now());
await tools.delayFor(90);
endTimes.set('p2', Date.now());
expect(true).toBeTrue();
});
tap.testParallel('parallel metric 3 - 100ms', async (tools) => {
startTimes.set('p3', Date.now());
await tools.delayFor(100);
endTimes.set('p3', Date.now());
expect(true).toBeTrue();
});
tap.test('verify parallel execution', async () => {
// This test runs after parallel tests
// Verify they actually ran in parallel by checking overlapping times
if (startTimes.size === 3 && endTimes.size === 3) {
const p1Start = startTimes.get('p1')!;
const p2Start = startTimes.get('p2')!;
const p3Start = startTimes.get('p3')!;
const p1End = endTimes.get('p1')!;
const p2End = endTimes.get('p2')!;
const p3End = endTimes.get('p3')!;
// Start times should be very close (within 50ms)
expect(Math.abs(p1Start - p2Start)).toBeLessThan(50);
expect(Math.abs(p2Start - p3Start)).toBeLessThan(50);
// There should be overlap in execution
const p1Overlaps = p1Start < p2End && p1End > p2Start;
const p2Overlaps = p2Start < p3End && p2End > p3Start;
expect(p1Overlaps || p2Overlaps).toBeTrue();
} else {
// Skip verification if parallel tests didn't run yet
expect(true).toBeTrue();
}
});
});
// Test to ensure average calculation handles mixed timing correctly
tap.test('final metrics test - 5ms minimal', async (tools) => {
await tools.delayFor(5);
expect(true).toBeTrue();
console.log('\n📊 Expected Performance Metrics Summary:');
console.log('- Tests include a mix of durations from <1ms to 200ms');
console.log('- Slowest test should be "metric test slowest" at ~200ms');
console.log('- Average should be calculated from individual test times');
console.log('- Failed test should still contribute its 60ms to timing');
console.log('- Parallel tests should show their individual times (80ms, 90ms, 100ms)');
});
tap.start();

View File

@ -0,0 +1,214 @@
import { tap, expect } from '../../ts_tapbundle/index.js';
tap.test('ultra-fast test - should capture sub-millisecond timing', async () => {
// This test does almost nothing, should complete in < 1ms
const x = 1 + 1;
expect(x).toEqual(2);
});
tap.test('test with exact 1ms delay', async (tools) => {
const start = Date.now();
await tools.delayFor(1);
const elapsed = Date.now() - start;
// Should be at least 1ms but could be more due to event loop
expect(elapsed).toBeGreaterThanOrEqual(1);
});
tap.test('test with 10ms delay', async (tools) => {
await tools.delayFor(10);
expect(true).toBeTrue();
});
tap.test('test with 100ms delay', async (tools) => {
await tools.delayFor(100);
expect(true).toBeTrue();
});
tap.test('test with 250ms delay', async (tools) => {
await tools.delayFor(250);
expect(true).toBeTrue();
});
tap.test('test with 500ms delay', async (tools) => {
await tools.delayFor(500);
expect(true).toBeTrue();
});
tap.test('test with variable processing time', async (tools) => {
// Simulate variable processing
const iterations = 1000000;
let sum = 0;
for (let i = 0; i < iterations; i++) {
sum += Math.sqrt(i);
}
expect(sum).toBeGreaterThan(0);
// Add a small delay to ensure measurable time
await tools.delayFor(5);
});
tap.test('test with multiple async operations', async () => {
// Multiple promises in parallel
const results = await Promise.all([
new Promise(resolve => setTimeout(() => resolve(1), 10)),
new Promise(resolve => setTimeout(() => resolve(2), 20)),
new Promise(resolve => setTimeout(() => resolve(3), 30))
]);
expect(results).toEqual([1, 2, 3]);
// This should take at least 30ms (the longest delay)
});
tap.test('test with synchronous heavy computation', async () => {
// Heavy synchronous computation
const fibonacci = (n: number): number => {
if (n <= 1) return n;
return fibonacci(n - 1) + fibonacci(n - 2);
};
// Calculate fibonacci(30) - should take measurable time
const result = fibonacci(30);
expect(result).toEqual(832040);
});
// Test with retry to see if timing accumulates correctly
tap.retry(2).test('test with retry - fails first then passes', async (tools) => {
// Get or initialize retry count
const retryCount = tools.context.get('retryCount') || 0;
tools.context.set('retryCount', retryCount + 1);
await tools.delayFor(50);
if (retryCount === 0) {
throw new Error('First attempt fails');
}
expect(retryCount).toEqual(1);
});
// Test timeout handling
tap.timeout(100).test('test with timeout - should complete just in time', async (tools) => {
await tools.delayFor(80); // Just under the timeout
expect(true).toBeTrue();
});
// Skip test - should show 0ms
tap.skip.test('skipped test - should report 0ms', async (tools) => {
await tools.delayFor(1000); // This won't execute
expect(true).toBeTrue();
});
// Todo test - should show 0ms
tap.todo.test('todo test - should report 0ms', async (tools) => {
await tools.delayFor(1000); // This won't execute
expect(true).toBeTrue();
});
// Test with skip inside
tap.test('test that skips conditionally - should show time until skip', async (tools) => {
await tools.delayFor(25);
const shouldSkip = true;
if (shouldSkip) {
tools.skip('Skipping after 25ms');
}
// This won't execute
await tools.delayFor(1000);
expect(true).toBeTrue();
});
// Test with very precise timing
tap.test('test with precise timing measurements', async (tools) => {
const measurements: number[] = [];
for (let i = 0; i < 5; i++) {
const start = process.hrtime.bigint();
await tools.delayFor(10);
const end = process.hrtime.bigint();
const durationMs = Number(end - start) / 1_000_000;
measurements.push(durationMs);
}
// All measurements should be at least 10ms
measurements.forEach(m => {
expect(m).toBeGreaterThanOrEqual(10);
});
// But not too much more (accounting for timer precision)
measurements.forEach(m => {
expect(m).toBeLessThan(20);
});
});
// Test that intentionally has 0 actual work
tap.test('empty test - absolute minimum execution time', async () => {
// Literally nothing
});
// Test with promise that resolves immediately
tap.test('test with immediate promise resolution', async () => {
await Promise.resolve();
expect(true).toBeTrue();
});
// Test with microtask queue
tap.test('test with microtask queue processing', async () => {
let value = 0;
await Promise.resolve().then(() => {
value = 1;
return Promise.resolve();
}).then(() => {
value = 2;
return Promise.resolve();
}).then(() => {
value = 3;
});
expect(value).toEqual(3);
});
// Test to verify timing accumulation in describe blocks
tap.describe('timing in describe blocks', () => {
let startTime: number;
tap.beforeEach(async () => {
startTime = Date.now();
await new Promise(resolve => setTimeout(resolve, 5));
});
tap.afterEach(async () => {
await new Promise(resolve => setTimeout(resolve, 5));
});
tap.test('first test in describe', async (tools) => {
await tools.delayFor(10);
const elapsed = Date.now() - startTime;
expect(elapsed).toBeGreaterThanOrEqual(10);
});
tap.test('second test in describe', async (tools) => {
await tools.delayFor(20);
const elapsed = Date.now() - startTime;
expect(elapsed).toBeGreaterThanOrEqual(20);
});
});
// Parallel tests to see timing differences
tap.testParallel('parallel test 1 - 100ms', async (tools) => {
await tools.delayFor(100);
expect(true).toBeTrue();
});
tap.testParallel('parallel test 2 - 50ms', async (tools) => {
await tools.delayFor(50);
expect(true).toBeTrue();
});
tap.testParallel('parallel test 3 - 150ms', async (tools) => {
await tools.delayFor(150);
expect(true).toBeTrue();
});
tap.start();

View File

@ -0,0 +1,204 @@
import { tap, expect } from '../../ts_tapbundle/index.js';
import { ProtocolParser, ProtocolEmitter } from '../../ts_tapbundle_protocol/index.js';
// Test the protocol's ability to emit and parse timing metadata
tap.test('protocol should correctly emit timing metadata', async () => {
const emitter = new ProtocolEmitter();
const testResult = {
ok: true,
testNumber: 1,
description: 'test with timing',
metadata: {
time: 123
}
};
const lines = emitter.emitTest(testResult);
// Should have inline timing metadata
expect(lines.length).toEqual(1);
expect(lines[0]).toInclude('⟦TSTEST:time:123⟧');
});
tap.test('protocol should correctly parse timing metadata', async () => {
const parser = new ProtocolParser();
const line = 'ok 1 - test with timing ⟦TSTEST:time:456⟧';
const messages = parser.parseLine(line);
expect(messages.length).toEqual(1);
expect(messages[0].type).toEqual('test');
const content = messages[0].content as any;
expect(content.metadata).toBeDefined();
expect(content.metadata.time).toEqual(456);
});
tap.test('protocol should handle 0ms timing', async () => {
const parser = new ProtocolParser();
const line = 'ok 1 - ultra fast test ⟦TSTEST:time:0⟧';
const messages = parser.parseLine(line);
const content = messages[0].content as any;
expect(content.metadata.time).toEqual(0);
});
tap.test('protocol should handle large timing values', async () => {
const parser = new ProtocolParser();
const line = 'ok 1 - slow test ⟦TSTEST:time:999999⟧';
const messages = parser.parseLine(line);
const content = messages[0].content as any;
expect(content.metadata.time).toEqual(999999);
});
tap.test('protocol should handle timing with other metadata', async () => {
const emitter = new ProtocolEmitter();
const testResult = {
ok: true,
testNumber: 1,
description: 'complex test',
metadata: {
time: 789,
file: 'test.ts',
tags: ['slow', 'integration']
}
};
const lines = emitter.emitTest(testResult);
// Should use block metadata format for complex metadata
expect(lines.length).toBeGreaterThan(1);
expect(lines[1]).toInclude('META:');
expect(lines[1]).toInclude('"time":789');
});
tap.test('protocol should parse timing from block metadata', async () => {
const parser = new ProtocolParser();
const lines = [
'ok 1 - complex test',
'⟦TSTEST:META:{"time":321,"file":"test.ts"}⟧'
];
let testResult: any;
for (const line of lines) {
const messages = parser.parseLine(line);
if (messages.length > 0 && messages[0].type === 'test') {
testResult = messages[0].content;
}
}
expect(testResult).toBeDefined();
expect(testResult.metadata).toBeUndefined(); // Metadata comes separately in block format
});
tap.test('timing for skipped tests should be 0 or missing', async () => {
const emitter = new ProtocolEmitter();
const testResult = {
ok: true,
testNumber: 1,
description: 'skipped test',
directive: {
type: 'skip' as const,
reason: 'Not ready'
},
metadata: {
time: 0
}
};
const lines = emitter.emitTest(testResult);
expect(lines[0]).toInclude('# SKIP');
// If time is 0, it might be included or omitted
if (lines[0].includes('⟦TSTEST:')) {
expect(lines[0]).toInclude('time:0');
}
});
tap.test('protocol should handle fractional milliseconds', async () => {
const emitter = new ProtocolEmitter();
// Even though we use integers, test that protocol handles them correctly
const testResult = {
ok: true,
testNumber: 1,
description: 'precise test',
metadata: {
time: 123 // Protocol uses integers for milliseconds
}
};
const lines = emitter.emitTest(testResult);
expect(lines[0]).toInclude('time:123');
});
tap.test('protocol should handle timing in retry scenarios', async () => {
const emitter = new ProtocolEmitter();
const testResult = {
ok: true,
testNumber: 1,
description: 'retry test',
metadata: {
time: 200,
retry: 2
}
};
const lines = emitter.emitTest(testResult);
// Should include both time and retry
expect(lines[0]).toMatch(/time:200.*retry:2|retry:2.*time:200/);
});
// Test actual timing capture
tap.test('HrtMeasurement should capture accurate timing', async (tools) => {
// Import HrtMeasurement
const { HrtMeasurement } = await import('@push.rocks/smarttime');
const measurement = new HrtMeasurement();
measurement.start();
await tools.delayFor(50);
measurement.stop();
// Should be at least 50ms
expect(measurement.milliSeconds).toBeGreaterThanOrEqual(50);
// But not too much more (allow for some overhead)
expect(measurement.milliSeconds).toBeLessThan(100);
});
tap.test('multiple timing measurements should be independent', async (tools) => {
const { HrtMeasurement } = await import('@push.rocks/smarttime');
const measurement1 = new HrtMeasurement();
const measurement2 = new HrtMeasurement();
measurement1.start();
await tools.delayFor(25);
measurement2.start();
await tools.delayFor(25);
measurement1.stop();
await tools.delayFor(25);
measurement2.stop();
// measurement1 should be ~50ms (25ms + 25ms)
expect(measurement1.milliSeconds).toBeGreaterThanOrEqual(50);
expect(measurement1.milliSeconds).toBeLessThan(70);
// measurement2 should be ~50ms (25ms + 25ms)
expect(measurement2.milliSeconds).toBeGreaterThanOrEqual(50);
expect(measurement2.milliSeconds).toBeLessThan(70);
});
tap.start();

View File

@ -3,6 +3,6 @@
*/
export const commitinfo = {
name: '@git.zone/tstest',
version: '2.2.4',
version: '2.2.5',
description: 'a test utility to run tests that match test/**/*.ts'
}

View File

@ -269,8 +269,8 @@ export class ProtocolParser {
// Extract simple key:value pairs
const simpleMatch = line.match(new RegExp(`${this.escapeRegex(PROTOCOL_MARKERS.START)}([^${this.escapeRegex(PROTOCOL_MARKERS.END)}]+)${this.escapeRegex(PROTOCOL_MARKERS.END)}`));
if (simpleMatch && !simpleMatch[1].includes(':')) {
// Not a prefixed format, might be key:value pairs
if (simpleMatch && simpleMatch[1].includes(':') && !simpleMatch[1].includes('META:') && !simpleMatch[1].includes('SKIP:') && !simpleMatch[1].includes('TODO:') && !simpleMatch[1].includes('EVENT:')) {
// This is a simple key:value format (not a prefixed format)
const pairs = simpleMatch[1].split(',');
for (const pair of pairs) {
const [key, value] = pair.split(':');