fix(protocol): Fix inline timing metadata parsing and enhance test coverage for performance metrics and timing edge cases

2025-05-26 08:22:26 +00:00
parent 845f146e91
commit 899045e6aa
7 changed files with 651 additions and 4 deletions
--- a/changelog.md
+++ b/changelog.md
@@ -1,5 +1,12 @@
 # Changelog

+## 2025-05-26 - 2.2.5 - fix(protocol)
+Fix inline timing metadata parsing and enhance test coverage for performance metrics and timing edge cases
+
+- Updated the protocol parser to correctly parse inline key:value pairs while excluding prefixed formats (META:, SKIP:, TODO:, EVENT:)
+- Added new tests for performance metrics, timing edge cases, and protocol timing to verify accurate timing capture and retry handling
+- Expanded documentation in readme.hints.md to detail the updated timing implementation and parser fixes
+
 ## 2025-05-26 - 2.2.4 - fix(logging)
 Improve performance metrics reporting and add local permissions configuration

--- a/readme.hints.md
+++ b/readme.hints.md
@@ -265,4 +265,59 @@ Previously reported issues with these methods have been resolved:
   - Other tests are not executed but still counted
   - Both regular and parallel only tests supported

-These fixes ensure accurate test counts and proper TAP-compliant output for all test states.
+These fixes ensure accurate test counts and proper TAP-compliant output for all test states.
+
+## Test Timing Implementation
+
+### Timing Architecture
+
+Test timing is captured using `@push.rocks/smarttime`'s `HrtMeasurement` class, which provides high-resolution timing:
+
+1. **Timing Capture**:
+   - Each `TapTest` instance has its own `HrtMeasurement`
+   - Timer starts immediately before test function execution
+   - Timer stops after test completes (or fails/times out)
+   - Millisecond precision is used for reporting
+
+2. **Protocol Integration**:
+   - Timing is embedded in TAP output using Protocol V2 markers
+   - Inline format for simple timing: `ok 1 - test name ⟦TSTEST:time:123⟧`
+   - Block format for complex metadata: `⟦TSTEST:META:{"time":456,"file":"test.ts"}⟧`
+
+3. **Performance Metrics Calculation**:
+   - Average is calculated from sum of individual test times, not total runtime
+   - Slowest test detection prefers tests with >0ms duration
+   - Failed tests still contribute their execution time to metrics
+
+### Edge Cases and Considerations
+
+1. **Sub-millisecond Tests**:
+   - Very fast tests may report 0ms due to millisecond rounding
+   - Performance metrics handle this by showing "All tests completed in <1ms" when appropriate
+
+2. **Special Test States**:
+   - **Skipped tests**: Report 0ms (not executed)
+   - **Todo tests**: Report 0ms (not executed)
+   - **Failed tests**: Report actual execution time before failure
+   - **Timeout tests**: Report time until timeout occurred
+
+3. **Parallel Test Timing**:
+   - Each parallel test tracks its own execution time independently
+   - Parallel tests may have overlapping execution periods
+   - Total suite time reflects wall-clock time, not sum of test times
+
+4. **Hook Timing**:
+   - `beforeEach`/`afterEach` hooks are not included in individual test times
+   - Only the actual test function execution is measured
+
+5. **Retry Timing**:
+   - When tests retry, only the final attempt's duration is reported
+   - Each retry attempt emits separate `test:started` events
+
+### Parser Fix for Timing Metadata
+
+The protocol parser was fixed to correctly handle inline timing metadata:
+- Changed condition from `!simpleMatch[1].includes(':')` to check for simple key:value pairs
+- Excludes prefixed formats (META:, SKIP:, TODO:, EVENT:) while parsing simple formats like `time:250`
+
+This ensures timing metadata is correctly extracted and displayed in test results.
--- a/test/tapbundle/test.performance-metrics.ts
+++ b/test/tapbundle/test.performance-metrics.ts
@@ -0,0 +1,167 @@
+import { tap, expect } from '../../ts_tapbundle/index.js';
+
+// Create tests with known, distinct timing patterns to verify metrics calculation
+tap.test('metric test 1 - 10ms baseline', async (tools) => {
+  await tools.delayFor(10);
+  expect(true).toBeTrue();
+});
+
+tap.test('metric test 2 - 20ms double baseline', async (tools) => {
+  await tools.delayFor(20);
+  expect(true).toBeTrue();
+});
+
+tap.test('metric test 3 - 30ms triple baseline', async (tools) => {
+  await tools.delayFor(30);
+  expect(true).toBeTrue();
+});
+
+tap.test('metric test 4 - 40ms quadruple baseline', async (tools) => {
+  await tools.delayFor(40);
+  expect(true).toBeTrue();
+});
+
+tap.test('metric test 5 - 50ms quintuple baseline', async (tools) => {
+  await tools.delayFor(50);
+  expect(true).toBeTrue();
+});
+
+// Test that should be the slowest
+tap.test('metric test slowest - 200ms intentionally slow', async (tools) => {
+  await tools.delayFor(200);
+  expect(true).toBeTrue();
+});
+
+// Tests to verify edge cases in average calculation
+tap.test('metric test fast 1 - minimal work', async () => {
+  expect(1).toEqual(1);
+});
+
+tap.test('metric test fast 2 - minimal work', async () => {
+  expect(2).toEqual(2);
+});
+
+tap.test('metric test fast 3 - minimal work', async () => {
+  expect(3).toEqual(3);
+});
+
+// Test to verify that failed tests still contribute to timing metrics
+tap.test('metric test that fails - 60ms before failure', async (tools) => {
+  await tools.delayFor(60);
+  expect(true).toBeFalse(); // This will fail
+});
+
+// Describe block with timing to test aggregation
+tap.describe('performance metrics in describe block', () => {
+  tap.test('described test 1 - 15ms', async (tools) => {
+    await tools.delayFor(15);
+    expect(true).toBeTrue();
+  });
+  
+  tap.test('described test 2 - 25ms', async (tools) => {
+    await tools.delayFor(25);
+    expect(true).toBeTrue();
+  });
+  
+  tap.test('described test 3 - 35ms', async (tools) => {
+    await tools.delayFor(35);
+    expect(true).toBeTrue();
+  });
+});
+
+// Test timing with hooks
+tap.describe('performance with hooks', () => {
+  let hookTime = 0;
+  
+  tap.beforeEach(async () => {
+    // Hooks shouldn't count toward test time
+    await new Promise(resolve => setTimeout(resolve, 10));
+    hookTime += 10;
+  });
+  
+  tap.afterEach(async () => {
+    // Hooks shouldn't count toward test time
+    await new Promise(resolve => setTimeout(resolve, 10));
+    hookTime += 10;
+  });
+  
+  tap.test('test with hooks 1 - should only count test time', async (tools) => {
+    await tools.delayFor(30);
+    expect(true).toBeTrue();
+    // Test time should be ~30ms, not 50ms (including hooks)
+  });
+  
+  tap.test('test with hooks 2 - should only count test time', async (tools) => {
+    await tools.delayFor(40);
+    expect(true).toBeTrue();
+    // Test time should be ~40ms, not 60ms (including hooks)
+  });
+});
+
+// Parallel tests to verify timing is captured correctly
+tap.describe('parallel timing verification', () => {
+  const startTimes: Map<string, number> = new Map();
+  const endTimes: Map<string, number> = new Map();
+  
+  tap.testParallel('parallel metric 1 - 80ms', async (tools) => {
+    startTimes.set('p1', Date.now());
+    await tools.delayFor(80);
+    endTimes.set('p1', Date.now());
+    expect(true).toBeTrue();
+  });
+  
+  tap.testParallel('parallel metric 2 - 90ms', async (tools) => {
+    startTimes.set('p2', Date.now());
+    await tools.delayFor(90);
+    endTimes.set('p2', Date.now());
+    expect(true).toBeTrue();
+  });
+  
+  tap.testParallel('parallel metric 3 - 100ms', async (tools) => {
+    startTimes.set('p3', Date.now());
+    await tools.delayFor(100);
+    endTimes.set('p3', Date.now());
+    expect(true).toBeTrue();
+  });
+  
+  tap.test('verify parallel execution', async () => {
+    // This test runs after parallel tests
+    // Verify they actually ran in parallel by checking overlapping times
+    if (startTimes.size === 3 && endTimes.size === 3) {
+      const p1Start = startTimes.get('p1')!;
+      const p2Start = startTimes.get('p2')!;
+      const p3Start = startTimes.get('p3')!;
+      const p1End = endTimes.get('p1')!;
+      const p2End = endTimes.get('p2')!;
+      const p3End = endTimes.get('p3')!;
+      
+      // Start times should be very close (within 50ms)
+      expect(Math.abs(p1Start - p2Start)).toBeLessThan(50);
+      expect(Math.abs(p2Start - p3Start)).toBeLessThan(50);
+      
+      // There should be overlap in execution
+      const p1Overlaps = p1Start < p2End && p1End > p2Start;
+      const p2Overlaps = p2Start < p3End && p2End > p3Start;
+      
+      expect(p1Overlaps || p2Overlaps).toBeTrue();
+    } else {
+      // Skip verification if parallel tests didn't run yet
+      expect(true).toBeTrue();
+    }
+  });
+});
+
+// Test to ensure average calculation handles mixed timing correctly
+tap.test('final metrics test - 5ms minimal', async (tools) => {
+  await tools.delayFor(5);
+  expect(true).toBeTrue();
+  
+  console.log('\n📊 Expected Performance Metrics Summary:');
+  console.log('- Tests include a mix of durations from <1ms to 200ms');
+  console.log('- Slowest test should be "metric test slowest" at ~200ms');
+  console.log('- Average should be calculated from individual test times');
+  console.log('- Failed test should still contribute its 60ms to timing');
+  console.log('- Parallel tests should show their individual times (80ms, 90ms, 100ms)');
+});
+
+tap.start();
--- a/test/tapbundle/test.timing-edge-cases.ts
+++ b/test/tapbundle/test.timing-edge-cases.ts
@@ -0,0 +1,214 @@
+import { tap, expect } from '../../ts_tapbundle/index.js';
+
+tap.test('ultra-fast test - should capture sub-millisecond timing', async () => {
+  // This test does almost nothing, should complete in < 1ms
+  const x = 1 + 1;
+  expect(x).toEqual(2);
+});
+
+tap.test('test with exact 1ms delay', async (tools) => {
+  const start = Date.now();
+  await tools.delayFor(1);
+  const elapsed = Date.now() - start;
+  // Should be at least 1ms but could be more due to event loop
+  expect(elapsed).toBeGreaterThanOrEqual(1);
+});
+
+tap.test('test with 10ms delay', async (tools) => {
+  await tools.delayFor(10);
+  expect(true).toBeTrue();
+});
+
+tap.test('test with 100ms delay', async (tools) => {
+  await tools.delayFor(100);
+  expect(true).toBeTrue();
+});
+
+tap.test('test with 250ms delay', async (tools) => {
+  await tools.delayFor(250);
+  expect(true).toBeTrue();
+});
+
+tap.test('test with 500ms delay', async (tools) => {
+  await tools.delayFor(500);
+  expect(true).toBeTrue();
+});
+
+tap.test('test with variable processing time', async (tools) => {
+  // Simulate variable processing
+  const iterations = 1000000;
+  let sum = 0;
+  for (let i = 0; i < iterations; i++) {
+    sum += Math.sqrt(i);
+  }
+  expect(sum).toBeGreaterThan(0);
+  
+  // Add a small delay to ensure measurable time
+  await tools.delayFor(5);
+});
+
+tap.test('test with multiple async operations', async () => {
+  // Multiple promises in parallel
+  const results = await Promise.all([
+    new Promise(resolve => setTimeout(() => resolve(1), 10)),
+    new Promise(resolve => setTimeout(() => resolve(2), 20)),
+    new Promise(resolve => setTimeout(() => resolve(3), 30))
+  ]);
+  
+  expect(results).toEqual([1, 2, 3]);
+  // This should take at least 30ms (the longest delay)
+});
+
+tap.test('test with synchronous heavy computation', async () => {
+  // Heavy synchronous computation
+  const fibonacci = (n: number): number => {
+    if (n <= 1) return n;
+    return fibonacci(n - 1) + fibonacci(n - 2);
+  };
+  
+  // Calculate fibonacci(30) - should take measurable time
+  const result = fibonacci(30);
+  expect(result).toEqual(832040);
+});
+
+// Test with retry to see if timing accumulates correctly
+tap.retry(2).test('test with retry - fails first then passes', async (tools) => {
+  // Get or initialize retry count
+  const retryCount = tools.context.get('retryCount') || 0;
+  tools.context.set('retryCount', retryCount + 1);
+  
+  await tools.delayFor(50);
+  
+  if (retryCount === 0) {
+    throw new Error('First attempt fails');
+  }
+  
+  expect(retryCount).toEqual(1);
+});
+
+// Test timeout handling
+tap.timeout(100).test('test with timeout - should complete just in time', async (tools) => {
+  await tools.delayFor(80); // Just under the timeout
+  expect(true).toBeTrue();
+});
+
+// Skip test - should show 0ms
+tap.skip.test('skipped test - should report 0ms', async (tools) => {
+  await tools.delayFor(1000); // This won't execute
+  expect(true).toBeTrue();
+});
+
+// Todo test - should show 0ms
+tap.todo.test('todo test - should report 0ms', async (tools) => {
+  await tools.delayFor(1000); // This won't execute
+  expect(true).toBeTrue();
+});
+
+// Test with skip inside
+tap.test('test that skips conditionally - should show time until skip', async (tools) => {
+  await tools.delayFor(25);
+  
+  const shouldSkip = true;
+  if (shouldSkip) {
+    tools.skip('Skipping after 25ms');
+  }
+  
+  // This won't execute
+  await tools.delayFor(1000);
+  expect(true).toBeTrue();
+});
+
+// Test with very precise timing
+tap.test('test with precise timing measurements', async (tools) => {
+  const measurements: number[] = [];
+  
+  for (let i = 0; i < 5; i++) {
+    const start = process.hrtime.bigint();
+    await tools.delayFor(10);
+    const end = process.hrtime.bigint();
+    const durationMs = Number(end - start) / 1_000_000;
+    measurements.push(durationMs);
+  }
+  
+  // All measurements should be at least 10ms
+  measurements.forEach(m => {
+    expect(m).toBeGreaterThanOrEqual(10);
+  });
+  
+  // But not too much more (accounting for timer precision)
+  measurements.forEach(m => {
+    expect(m).toBeLessThan(20);
+  });
+});
+
+// Test that intentionally has 0 actual work
+tap.test('empty test - absolute minimum execution time', async () => {
+  // Literally nothing
+});
+
+// Test with promise that resolves immediately
+tap.test('test with immediate promise resolution', async () => {
+  await Promise.resolve();
+  expect(true).toBeTrue();
+});
+
+// Test with microtask queue
+tap.test('test with microtask queue processing', async () => {
+  let value = 0;
+  
+  await Promise.resolve().then(() => {
+    value = 1;
+    return Promise.resolve();
+  }).then(() => {
+    value = 2;
+    return Promise.resolve();
+  }).then(() => {
+    value = 3;
+  });
+  
+  expect(value).toEqual(3);
+});
+
+// Test to verify timing accumulation in describe blocks
+tap.describe('timing in describe blocks', () => {
+  let startTime: number;
+  
+  tap.beforeEach(async () => {
+    startTime = Date.now();
+    await new Promise(resolve => setTimeout(resolve, 5));
+  });
+  
+  tap.afterEach(async () => {
+    await new Promise(resolve => setTimeout(resolve, 5));
+  });
+  
+  tap.test('first test in describe', async (tools) => {
+    await tools.delayFor(10);
+    const elapsed = Date.now() - startTime;
+    expect(elapsed).toBeGreaterThanOrEqual(10);
+  });
+  
+  tap.test('second test in describe', async (tools) => {
+    await tools.delayFor(20);
+    const elapsed = Date.now() - startTime;
+    expect(elapsed).toBeGreaterThanOrEqual(20);
+  });
+});
+
+// Parallel tests to see timing differences
+tap.testParallel('parallel test 1 - 100ms', async (tools) => {
+  await tools.delayFor(100);
+  expect(true).toBeTrue();
+});
+
+tap.testParallel('parallel test 2 - 50ms', async (tools) => {
+  await tools.delayFor(50);
+  expect(true).toBeTrue();
+});
+
+tap.testParallel('parallel test 3 - 150ms', async (tools) => {
+  await tools.delayFor(150);
+  expect(true).toBeTrue();
+});
+
+tap.start();
--- a/test/tapbundle/test.timing-protocol.ts
+++ b/test/tapbundle/test.timing-protocol.ts
@@ -0,0 +1,204 @@
+import { tap, expect } from '../../ts_tapbundle/index.js';
+import { ProtocolParser, ProtocolEmitter } from '../../ts_tapbundle_protocol/index.js';
+
+// Test the protocol's ability to emit and parse timing metadata
+tap.test('protocol should correctly emit timing metadata', async () => {
+  const emitter = new ProtocolEmitter();
+  
+  const testResult = {
+    ok: true,
+    testNumber: 1,
+    description: 'test with timing',
+    metadata: {
+      time: 123
+    }
+  };
+  
+  const lines = emitter.emitTest(testResult);
+  
+  // Should have inline timing metadata
+  expect(lines.length).toEqual(1);
+  expect(lines[0]).toInclude('⟦TSTEST:time:123⟧');
+});
+
+tap.test('protocol should correctly parse timing metadata', async () => {
+  const parser = new ProtocolParser();
+  
+  const line = 'ok 1 - test with timing ⟦TSTEST:time:456⟧';
+  const messages = parser.parseLine(line);
+  
+  expect(messages.length).toEqual(1);
+  expect(messages[0].type).toEqual('test');
+  
+  const content = messages[0].content as any;
+  expect(content.metadata).toBeDefined();
+  expect(content.metadata.time).toEqual(456);
+});
+
+tap.test('protocol should handle 0ms timing', async () => {
+  const parser = new ProtocolParser();
+  
+  const line = 'ok 1 - ultra fast test ⟦TSTEST:time:0⟧';
+  const messages = parser.parseLine(line);
+  
+  const content = messages[0].content as any;
+  expect(content.metadata.time).toEqual(0);
+});
+
+tap.test('protocol should handle large timing values', async () => {
+  const parser = new ProtocolParser();
+  
+  const line = 'ok 1 - slow test ⟦TSTEST:time:999999⟧';
+  const messages = parser.parseLine(line);
+  
+  const content = messages[0].content as any;
+  expect(content.metadata.time).toEqual(999999);
+});
+
+tap.test('protocol should handle timing with other metadata', async () => {
+  const emitter = new ProtocolEmitter();
+  
+  const testResult = {
+    ok: true,
+    testNumber: 1,
+    description: 'complex test',
+    metadata: {
+      time: 789,
+      file: 'test.ts',
+      tags: ['slow', 'integration']
+    }
+  };
+  
+  const lines = emitter.emitTest(testResult);
+  
+  // Should use block metadata format for complex metadata
+  expect(lines.length).toBeGreaterThan(1);
+  expect(lines[1]).toInclude('META:');
+  expect(lines[1]).toInclude('"time":789');
+});
+
+tap.test('protocol should parse timing from block metadata', async () => {
+  const parser = new ProtocolParser();
+  
+  const lines = [
+    'ok 1 - complex test',
+    '⟦TSTEST:META:{"time":321,"file":"test.ts"}⟧'
+  ];
+  
+  let testResult: any;
+  
+  for (const line of lines) {
+    const messages = parser.parseLine(line);
+    if (messages.length > 0 && messages[0].type === 'test') {
+      testResult = messages[0].content;
+    }
+  }
+  
+  expect(testResult).toBeDefined();
+  expect(testResult.metadata).toBeUndefined(); // Metadata comes separately in block format
+});
+
+tap.test('timing for skipped tests should be 0 or missing', async () => {
+  const emitter = new ProtocolEmitter();
+  
+  const testResult = {
+    ok: true,
+    testNumber: 1,
+    description: 'skipped test',
+    directive: {
+      type: 'skip' as const,
+      reason: 'Not ready'
+    },
+    metadata: {
+      time: 0
+    }
+  };
+  
+  const lines = emitter.emitTest(testResult);
+  expect(lines[0]).toInclude('# SKIP');
+  
+  // If time is 0, it might be included or omitted
+  if (lines[0].includes('⟦TSTEST:')) {
+    expect(lines[0]).toInclude('time:0');
+  }
+});
+
+tap.test('protocol should handle fractional milliseconds', async () => {
+  const emitter = new ProtocolEmitter();
+  
+  // Even though we use integers, test that protocol handles them correctly
+  const testResult = {
+    ok: true,
+    testNumber: 1,
+    description: 'precise test',
+    metadata: {
+      time: 123 // Protocol uses integers for milliseconds
+    }
+  };
+  
+  const lines = emitter.emitTest(testResult);
+  expect(lines[0]).toInclude('time:123');
+});
+
+tap.test('protocol should handle timing in retry scenarios', async () => {
+  const emitter = new ProtocolEmitter();
+  
+  const testResult = {
+    ok: true,
+    testNumber: 1,
+    description: 'retry test',
+    metadata: {
+      time: 200,
+      retry: 2
+    }
+  };
+  
+  const lines = emitter.emitTest(testResult);
+  // Should include both time and retry
+  expect(lines[0]).toMatch(/time:200.*retry:2|retry:2.*time:200/);
+});
+
+// Test actual timing capture
+tap.test('HrtMeasurement should capture accurate timing', async (tools) => {
+  // Import HrtMeasurement
+  const { HrtMeasurement } = await import('@push.rocks/smarttime');
+  
+  const measurement = new HrtMeasurement();
+  measurement.start();
+  
+  await tools.delayFor(50);
+  
+  measurement.stop();
+  
+  // Should be at least 50ms
+  expect(measurement.milliSeconds).toBeGreaterThanOrEqual(50);
+  // But not too much more (allow for some overhead)
+  expect(measurement.milliSeconds).toBeLessThan(100);
+});
+
+tap.test('multiple timing measurements should be independent', async (tools) => {
+  const { HrtMeasurement } = await import('@push.rocks/smarttime');
+  
+  const measurement1 = new HrtMeasurement();
+  const measurement2 = new HrtMeasurement();
+  
+  measurement1.start();
+  await tools.delayFor(25);
+  
+  measurement2.start();
+  await tools.delayFor(25);
+  
+  measurement1.stop();
+  await tools.delayFor(25);
+  measurement2.stop();
+  
+  // measurement1 should be ~50ms (25ms + 25ms)
+  expect(measurement1.milliSeconds).toBeGreaterThanOrEqual(50);
+  expect(measurement1.milliSeconds).toBeLessThan(70);
+  
+  // measurement2 should be ~50ms (25ms + 25ms)  
+  expect(measurement2.milliSeconds).toBeGreaterThanOrEqual(50);
+  expect(measurement2.milliSeconds).toBeLessThan(70);
+});
+
+tap.start();
--- a/ts/00_commitinfo_data.ts
+++ b/ts/00_commitinfo_data.ts
@@ -3,6 +3,6 @@
 */
 export const commitinfo = {
  name: '@git.zone/tstest',
-  version: '2.2.4',
+  version: '2.2.5',
  description: 'a test utility to run tests that match test/**/*.ts'
 }
--- a/ts_tapbundle_protocol/protocol.parser.ts
+++ b/ts_tapbundle_protocol/protocol.parser.ts
@@ -269,8 +269,8 @@ export class ProtocolParser {

    // Extract simple key:value pairs
    const simpleMatch = line.match(new RegExp(`${this.escapeRegex(PROTOCOL_MARKERS.START)}([^${this.escapeRegex(PROTOCOL_MARKERS.END)}]+)${this.escapeRegex(PROTOCOL_MARKERS.END)}`));
-    if (simpleMatch && !simpleMatch[1].includes(':')) {
-      // Not a prefixed format, might be key:value pairs
+    if (simpleMatch && simpleMatch[1].includes(':') && !simpleMatch[1].includes('META:') && !simpleMatch[1].includes('SKIP:') && !simpleMatch[1].includes('TODO:') && !simpleMatch[1].includes('EVENT:')) {
+      // This is a simple key:value format (not a prefixed format)
      const pairs = simpleMatch[1].split(',');
      for (const pair of pairs) {
        const [key, value] = pair.split(':');