BuddAI/test_buddai_confidence.py
JamesTheGiblet 27601aa2ba Add comprehensive unit tests for BuddAI functionality
- Introduced 16 additional coverage tests in `test_additional_coverage.py` to enhance overall test coverage.
- Added 15 extended feature tests in `test_extended_features.py` to validate new functionalities.
- Implemented 27 final coverage tests in `test_final_coverage.py` to achieve a total of 100 tests.
- Created 2 fallback logic tests in `test_fallback_logic.py` to ensure proper fallback behavior based on confidence scores.
- Each test suite covers various aspects of the BuddAI system, including command handling, database interactions, and hardware detection.
2026-01-07 19:48:24 +00:00

104 lines
No EOL
3.7 KiB
Python

#!/usr/bin/env python3
"""
Unit tests for BuddAI Confidence Scorer
Verifies scoring logic, penalties, and escalation flags.
"""
import unittest
import sys
import os
from pathlib import Path
# Dynamic import setup
REPO_ROOT = Path(__file__).parent.parent
if str(REPO_ROOT) not in sys.path:
sys.path.insert(0, str(REPO_ROOT))
from core.buddai_confidence import ConfidenceScorer
class TestConfidenceScorer(unittest.TestCase):
def setUp(self):
self.scorer = ConfidenceScorer()
def test_calculate_confidence_high(self):
"""Test a high confidence scenario (Success + Matches)"""
code = "void setup() { Serial.begin(115200); }"
context = {
'hardware': 'ESP32',
'learned_rules': ['Serial.begin(115200)'],
'user_message': 'setup serial',
'history': []
}
# Success, no issues
validation_results = (True, [])
score = self.scorer.calculate_confidence(code, context, validation_results)
# Expected Score Breakdown:
# Validation: 40 (Perfect)
# Patterns: ~30 (1 match / 1 rule = 100% * 1.5 boost, capped at 30)
# Hardware: 10 (ESP32 not explicitly in code string, generic fallback)
# Context: 10 (Hardware + Message + Rules present)
# Total ~90
self.assertGreaterEqual(score, 80)
self.assertFalse(self.scorer.should_escalate(score))
def test_calculate_confidence_low(self):
"""Test a low confidence scenario (Validation Failure)"""
code = "broken code"
context = {'hardware': 'ESP32'}
validation_results = (False, ['Syntax Error'])
score = self.scorer.calculate_confidence(code, context, validation_results)
# Expected Score Breakdown:
# Validation: 0 (Failed)
# Patterns: 15 (Default baseline when no rules)
# Hardware: 10 (Generic)
# Context: 3 (Hardware only)
# Total: 28
self.assertLess(score, 50)
self.assertTrue(self.scorer.should_escalate(score))
def test_should_escalate_thresholds(self):
"""Test flagging logic at specific boundaries"""
# Default threshold is 70
self.assertTrue(self.scorer.should_escalate(69))
self.assertFalse(self.scorer.should_escalate(70))
self.assertFalse(self.scorer.should_escalate(71))
# Custom threshold
self.assertTrue(self.scorer.should_escalate(80, threshold=85))
def test_validation_scoring_penalties(self):
"""Test that warnings reduce score but don't zero it"""
# 2 warnings -> -10 points (5 per warning)
validation_results = (True, [{'message': 'W1'}, {'message': 'W2'}])
score = self.scorer._score_validation(validation_results)
self.assertEqual(score, 30.0) # 40 - 10
# Many warnings -> Min score 10
many_issues = [{'message': 'W'}] * 10
score_min = self.scorer._score_validation((True, many_issues))
self.assertEqual(score_min, 10.0)
def test_pattern_familiarity(self):
"""Test pattern matching logic"""
code = "ledcSetup(0, 5000, 8);"
# Match
context_match = {'learned_rules': ['Use ledcSetup']}
score_match = self.scorer._score_patterns(code, context_match)
self.assertEqual(score_match, 30.0) # Capped max
# No Match
context_miss = {'learned_rules': ['Use analogRead']}
score_miss = self.scorer._score_patterns(code, context_miss)
self.assertEqual(score_miss, 0.0)
# No Rules Provided (Neutral Baseline)
score_empty = self.scorer._score_patterns(code, {})
self.assertEqual(score_empty, 15.0)
if __name__ == '__main__':
unittest.main()