Ryan Malloy 343f989714 🎬 Complete project reorganization and video-themed testing framework
MAJOR ENHANCEMENTS:
• Professional documentation structure in docs/ with symlinked examples
• Comprehensive test organization under tests/ directory
• Advanced video-themed testing framework with HTML dashboards
• Enhanced Makefile with categorized test commands

DOCUMENTATION RESTRUCTURE:
• docs/user-guide/ - User-facing guides and features
• docs/development/ - Technical documentation
• docs/migration/ - Upgrade instructions
• docs/reference/ - API references and roadmaps
• examples/ - Practical usage examples (symlinked to docs/examples)

TEST ORGANIZATION:
• tests/unit/ - Unit tests with enhanced reporting
• tests/integration/ - End-to-end tests
• tests/docker/ - Docker integration configurations
• tests/framework/ - Custom testing framework components
• tests/development-archives/ - Historical test data

TESTING FRAMEWORK FEATURES:
• Video-themed HTML dashboards with cinema aesthetics
• Quality scoring system (0-10 scale with letter grades)
• Test categorization (unit, integration, 360°, AI, streaming, performance)
• Parallel execution with configurable workers
• Performance metrics and trend analysis
• Interactive filtering and expandable test details

INTEGRATION IMPROVEMENTS:
• Updated docker-compose paths for new structure
• Enhanced Makefile with video processing test commands
• Backward compatibility with existing tests
• CI/CD ready with JSON reports and exit codes
• Professional quality assurance workflows

TECHNICAL ACHIEVEMENTS:
• 274 tests organized with smart categorization
• 94.8% unit test success rate with enhanced reporting
• Video processing domain-specific fixtures and assertions
• Beautiful dark terminal aesthetic with video processing colors
• Production-ready framework with enterprise-grade features

Commands: make test-smoke, make test-unit, make test-360, make test-all
Reports: Video-themed HTML dashboards in test-reports/
Quality: Comprehensive scoring and performance tracking
2025-09-21 23:41:16 -06:00

395 lines
13 KiB
Python

"""Quality metrics calculation and assessment for video processing tests."""
import time
import psutil
from typing import Dict, List, Any, Optional, Tuple
from dataclasses import dataclass, field
from pathlib import Path
import json
import sqlite3
from datetime import datetime, timedelta
@dataclass
class QualityScore:
"""Individual quality score component."""
name: str
score: float # 0-10 scale
weight: float # 0-1 scale
details: Dict[str, Any] = field(default_factory=dict)
@dataclass
class TestQualityMetrics:
"""Comprehensive quality metrics for a test run."""
test_name: str
timestamp: datetime
duration: float
success: bool
# Individual scores
functional_score: float = 0.0
performance_score: float = 0.0
reliability_score: float = 0.0
maintainability_score: float = 0.0
# Resource usage
peak_memory_mb: float = 0.0
cpu_usage_percent: float = 0.0
disk_io_mb: float = 0.0
# Test-specific metrics
assertions_passed: int = 0
assertions_total: int = 0
error_count: int = 0
warning_count: int = 0
# Video processing specific
videos_processed: int = 0
encoding_fps: float = 0.0
output_quality_score: float = 0.0
@property
def overall_score(self) -> float:
"""Calculate weighted overall quality score."""
scores = [
QualityScore("Functional", self.functional_score, 0.40),
QualityScore("Performance", self.performance_score, 0.25),
QualityScore("Reliability", self.reliability_score, 0.20),
QualityScore("Maintainability", self.maintainability_score, 0.15),
]
weighted_sum = sum(score.score * score.weight for score in scores)
return min(10.0, max(0.0, weighted_sum))
@property
def grade(self) -> str:
"""Get letter grade based on overall score."""
score = self.overall_score
if score >= 9.0:
return "A+"
elif score >= 8.5:
return "A"
elif score >= 8.0:
return "A-"
elif score >= 7.5:
return "B+"
elif score >= 7.0:
return "B"
elif score >= 6.5:
return "B-"
elif score >= 6.0:
return "C+"
elif score >= 5.5:
return "C"
elif score >= 5.0:
return "C-"
elif score >= 4.0:
return "D"
else:
return "F"
class QualityMetricsCalculator:
"""Calculate comprehensive quality metrics for test runs."""
def __init__(self, test_name: str):
self.test_name = test_name
self.start_time = time.time()
self.start_memory = psutil.virtual_memory().used / 1024 / 1024
self.process = psutil.Process()
# Tracking data
self.assertions_passed = 0
self.assertions_total = 0
self.errors: List[str] = []
self.warnings: List[str] = []
self.videos_processed = 0
self.encoding_metrics: List[Dict[str, float]] = []
def record_assertion(self, passed: bool, message: str = ""):
"""Record a test assertion result."""
self.assertions_total += 1
if passed:
self.assertions_passed += 1
else:
self.errors.append(f"Assertion failed: {message}")
def record_error(self, error: str):
"""Record an error occurrence."""
self.errors.append(error)
def record_warning(self, warning: str):
"""Record a warning."""
self.warnings.append(warning)
def record_video_processing(self, input_size_mb: float, duration: float, output_quality: float = 8.0):
"""Record video processing metrics."""
self.videos_processed += 1
encoding_fps = input_size_mb / max(duration, 0.001) # Avoid division by zero
self.encoding_metrics.append({
"input_size_mb": input_size_mb,
"duration": duration,
"encoding_fps": encoding_fps,
"output_quality": output_quality
})
def calculate_functional_score(self) -> float:
"""Calculate functional quality score (0-10)."""
if self.assertions_total == 0:
return 0.0
# Base score from assertion pass rate
pass_rate = self.assertions_passed / self.assertions_total
base_score = pass_rate * 10
# Bonus for comprehensive testing
if self.assertions_total >= 20:
base_score = min(10.0, base_score + 0.5)
elif self.assertions_total >= 10:
base_score = min(10.0, base_score + 0.25)
# Penalty for errors
error_penalty = min(3.0, len(self.errors) * 0.5)
final_score = max(0.0, base_score - error_penalty)
return final_score
def calculate_performance_score(self) -> float:
"""Calculate performance quality score (0-10)."""
duration = time.time() - self.start_time
current_memory = psutil.virtual_memory().used / 1024 / 1024
memory_usage = current_memory - self.start_memory
# Base score starts at 10
score = 10.0
# Duration penalty (tests should be fast)
if duration > 30: # 30 seconds
score -= min(3.0, (duration - 30) / 10)
# Memory usage penalty
if memory_usage > 100: # 100MB
score -= min(2.0, (memory_usage - 100) / 100)
# Bonus for video processing efficiency
if self.encoding_metrics:
avg_fps = sum(m["encoding_fps"] for m in self.encoding_metrics) / len(self.encoding_metrics)
if avg_fps > 10: # Good encoding speed
score = min(10.0, score + 0.5)
return max(0.0, score)
def calculate_reliability_score(self) -> float:
"""Calculate reliability quality score (0-10)."""
score = 10.0
# Error penalty
error_penalty = min(5.0, len(self.errors) * 1.0)
score -= error_penalty
# Warning penalty (less severe)
warning_penalty = min(2.0, len(self.warnings) * 0.2)
score -= warning_penalty
# Bonus for error-free execution
if len(self.errors) == 0:
score = min(10.0, score + 0.5)
return max(0.0, score)
def calculate_maintainability_score(self) -> float:
"""Calculate maintainability quality score (0-10)."""
# This would typically analyze code complexity, documentation, etc.
# For now, we'll use heuristics based on test structure
score = 8.0 # Default good score
# Bonus for good assertion coverage
if self.assertions_total >= 15:
score = min(10.0, score + 1.0)
elif self.assertions_total >= 10:
score = min(10.0, score + 0.5)
elif self.assertions_total < 5:
score -= 1.0
# Penalty for excessive errors (indicates poor test design)
if len(self.errors) > 5:
score -= 1.0
return max(0.0, score)
def finalize(self) -> TestQualityMetrics:
"""Calculate final quality metrics."""
duration = time.time() - self.start_time
current_memory = psutil.virtual_memory().used / 1024 / 1024
memory_usage = max(0, current_memory - self.start_memory)
# CPU usage (approximate)
try:
cpu_usage = self.process.cpu_percent()
except:
cpu_usage = 0.0
# Average encoding metrics
avg_encoding_fps = 0.0
avg_output_quality = 8.0
if self.encoding_metrics:
avg_encoding_fps = sum(m["encoding_fps"] for m in self.encoding_metrics) / len(self.encoding_metrics)
avg_output_quality = sum(m["output_quality"] for m in self.encoding_metrics) / len(self.encoding_metrics)
return TestQualityMetrics(
test_name=self.test_name,
timestamp=datetime.now(),
duration=duration,
success=len(self.errors) == 0,
functional_score=self.calculate_functional_score(),
performance_score=self.calculate_performance_score(),
reliability_score=self.calculate_reliability_score(),
maintainability_score=self.calculate_maintainability_score(),
peak_memory_mb=memory_usage,
cpu_usage_percent=cpu_usage,
assertions_passed=self.assertions_passed,
assertions_total=self.assertions_total,
error_count=len(self.errors),
warning_count=len(self.warnings),
videos_processed=self.videos_processed,
encoding_fps=avg_encoding_fps,
output_quality_score=avg_output_quality,
)
class TestHistoryDatabase:
"""Manage test history and metrics tracking."""
def __init__(self, db_path: Path = Path("test-history.db")):
self.db_path = db_path
self._init_database()
def _init_database(self):
"""Initialize the test history database."""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS test_runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
test_name TEXT NOT NULL,
timestamp DATETIME NOT NULL,
duration REAL NOT NULL,
success BOOLEAN NOT NULL,
overall_score REAL NOT NULL,
functional_score REAL NOT NULL,
performance_score REAL NOT NULL,
reliability_score REAL NOT NULL,
maintainability_score REAL NOT NULL,
peak_memory_mb REAL NOT NULL,
cpu_usage_percent REAL NOT NULL,
assertions_passed INTEGER NOT NULL,
assertions_total INTEGER NOT NULL,
error_count INTEGER NOT NULL,
warning_count INTEGER NOT NULL,
videos_processed INTEGER NOT NULL,
encoding_fps REAL NOT NULL,
output_quality_score REAL NOT NULL,
metadata_json TEXT
)
""")
cursor.execute("""
CREATE INDEX IF NOT EXISTS idx_test_name_timestamp
ON test_runs(test_name, timestamp DESC)
""")
conn.commit()
conn.close()
def save_metrics(self, metrics: TestQualityMetrics, metadata: Optional[Dict[str, Any]] = None):
"""Save test metrics to database."""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
cursor.execute("""
INSERT INTO test_runs (
test_name, timestamp, duration, success, overall_score,
functional_score, performance_score, reliability_score, maintainability_score,
peak_memory_mb, cpu_usage_percent, assertions_passed, assertions_total,
error_count, warning_count, videos_processed, encoding_fps,
output_quality_score, metadata_json
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
metrics.test_name,
metrics.timestamp.isoformat(),
metrics.duration,
metrics.success,
metrics.overall_score,
metrics.functional_score,
metrics.performance_score,
metrics.reliability_score,
metrics.maintainability_score,
metrics.peak_memory_mb,
metrics.cpu_usage_percent,
metrics.assertions_passed,
metrics.assertions_total,
metrics.error_count,
metrics.warning_count,
metrics.videos_processed,
metrics.encoding_fps,
metrics.output_quality_score,
json.dumps(metadata or {})
))
conn.commit()
conn.close()
def get_test_history(self, test_name: str, days: int = 30) -> List[Dict[str, Any]]:
"""Get historical metrics for a test."""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
since_date = datetime.now() - timedelta(days=days)
cursor.execute("""
SELECT * FROM test_runs
WHERE test_name = ? AND timestamp >= ?
ORDER BY timestamp DESC
""", (test_name, since_date.isoformat()))
columns = [desc[0] for desc in cursor.description]
results = [dict(zip(columns, row)) for row in cursor.fetchall()]
conn.close()
return results
def get_quality_trends(self, days: int = 30) -> Dict[str, List[float]]:
"""Get quality score trends over time."""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
since_date = datetime.now() - timedelta(days=days)
cursor.execute("""
SELECT DATE(timestamp) as date,
AVG(overall_score) as avg_score,
AVG(functional_score) as avg_functional,
AVG(performance_score) as avg_performance,
AVG(reliability_score) as avg_reliability
FROM test_runs
WHERE timestamp >= ?
GROUP BY DATE(timestamp)
ORDER BY date
""", (since_date.isoformat(),))
results = cursor.fetchall()
conn.close()
if not results:
return {}
return {
"dates": [row[0] for row in results],
"overall": [row[1] for row in results],
"functional": [row[2] for row in results],
"performance": [row[3] for row in results],
"reliability": [row[4] for row in results],
}