mirror of
https://github.com/JamesTheGiblet/BuddAI.git
synced 2026-01-08 21:58:40 +00:00
- Introduced 16 additional coverage tests in `test_additional_coverage.py` to enhance overall test coverage. - Added 15 extended feature tests in `test_extended_features.py` to validate new functionalities. - Implemented 27 final coverage tests in `test_final_coverage.py` to achieve a total of 100 tests. - Created 2 fallback logic tests in `test_fallback_logic.py` to ensure proper fallback behavior based on confidence scores. - Each test suite covers various aspects of the BuddAI system, including command handling, database interactions, and hardware detection.
104 lines
No EOL
3.7 KiB
Python
104 lines
No EOL
3.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Unit tests for BuddAI Confidence Scorer
|
|
Verifies scoring logic, penalties, and escalation flags.
|
|
"""
|
|
|
|
import unittest
|
|
import sys
|
|
import os
|
|
from pathlib import Path
|
|
|
|
# Dynamic import setup
|
|
REPO_ROOT = Path(__file__).parent.parent
|
|
if str(REPO_ROOT) not in sys.path:
|
|
sys.path.insert(0, str(REPO_ROOT))
|
|
|
|
from core.buddai_confidence import ConfidenceScorer
|
|
|
|
class TestConfidenceScorer(unittest.TestCase):
|
|
def setUp(self):
|
|
self.scorer = ConfidenceScorer()
|
|
|
|
def test_calculate_confidence_high(self):
|
|
"""Test a high confidence scenario (Success + Matches)"""
|
|
code = "void setup() { Serial.begin(115200); }"
|
|
context = {
|
|
'hardware': 'ESP32',
|
|
'learned_rules': ['Serial.begin(115200)'],
|
|
'user_message': 'setup serial',
|
|
'history': []
|
|
}
|
|
# Success, no issues
|
|
validation_results = (True, [])
|
|
|
|
score = self.scorer.calculate_confidence(code, context, validation_results)
|
|
|
|
# Expected Score Breakdown:
|
|
# Validation: 40 (Perfect)
|
|
# Patterns: ~30 (1 match / 1 rule = 100% * 1.5 boost, capped at 30)
|
|
# Hardware: 10 (ESP32 not explicitly in code string, generic fallback)
|
|
# Context: 10 (Hardware + Message + Rules present)
|
|
# Total ~90
|
|
self.assertGreaterEqual(score, 80)
|
|
self.assertFalse(self.scorer.should_escalate(score))
|
|
|
|
def test_calculate_confidence_low(self):
|
|
"""Test a low confidence scenario (Validation Failure)"""
|
|
code = "broken code"
|
|
context = {'hardware': 'ESP32'}
|
|
validation_results = (False, ['Syntax Error'])
|
|
|
|
score = self.scorer.calculate_confidence(code, context, validation_results)
|
|
|
|
# Expected Score Breakdown:
|
|
# Validation: 0 (Failed)
|
|
# Patterns: 15 (Default baseline when no rules)
|
|
# Hardware: 10 (Generic)
|
|
# Context: 3 (Hardware only)
|
|
# Total: 28
|
|
self.assertLess(score, 50)
|
|
self.assertTrue(self.scorer.should_escalate(score))
|
|
|
|
def test_should_escalate_thresholds(self):
|
|
"""Test flagging logic at specific boundaries"""
|
|
# Default threshold is 70
|
|
self.assertTrue(self.scorer.should_escalate(69))
|
|
self.assertFalse(self.scorer.should_escalate(70))
|
|
self.assertFalse(self.scorer.should_escalate(71))
|
|
|
|
# Custom threshold
|
|
self.assertTrue(self.scorer.should_escalate(80, threshold=85))
|
|
|
|
def test_validation_scoring_penalties(self):
|
|
"""Test that warnings reduce score but don't zero it"""
|
|
# 2 warnings -> -10 points (5 per warning)
|
|
validation_results = (True, [{'message': 'W1'}, {'message': 'W2'}])
|
|
score = self.scorer._score_validation(validation_results)
|
|
self.assertEqual(score, 30.0) # 40 - 10
|
|
|
|
# Many warnings -> Min score 10
|
|
many_issues = [{'message': 'W'}] * 10
|
|
score_min = self.scorer._score_validation((True, many_issues))
|
|
self.assertEqual(score_min, 10.0)
|
|
|
|
def test_pattern_familiarity(self):
|
|
"""Test pattern matching logic"""
|
|
code = "ledcSetup(0, 5000, 8);"
|
|
|
|
# Match
|
|
context_match = {'learned_rules': ['Use ledcSetup']}
|
|
score_match = self.scorer._score_patterns(code, context_match)
|
|
self.assertEqual(score_match, 30.0) # Capped max
|
|
|
|
# No Match
|
|
context_miss = {'learned_rules': ['Use analogRead']}
|
|
score_miss = self.scorer._score_patterns(code, context_miss)
|
|
self.assertEqual(score_miss, 0.0)
|
|
|
|
# No Rules Provided (Neutral Baseline)
|
|
score_empty = self.scorer._score_patterns(code, {})
|
|
self.assertEqual(score_empty, 15.0)
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main() |