mirror of
https://github.com/JamesTheGiblet/BuddAI.git
synced 2026-01-08 21:58:40 +00:00
Add comprehensive unit tests for BuddAI functionality
- Introduced 16 additional coverage tests in `test_additional_coverage.py` to enhance overall test coverage. - Added 15 extended feature tests in `test_extended_features.py` to validate new functionalities. - Implemented 27 final coverage tests in `test_final_coverage.py` to achieve a total of 100 tests. - Created 2 fallback logic tests in `test_fallback_logic.py` to ensure proper fallback behavior based on confidence scores. - Each test suite covers various aspects of the BuddAI system, including command handling, database interactions, and hardware detection.
This commit is contained in:
parent
f9fd27d228
commit
27601aa2ba
34 changed files with 5022 additions and 2921 deletions
104
test_buddai_confidence.py
Normal file
104
test_buddai_confidence.py
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Unit tests for BuddAI Confidence Scorer
|
||||
Verifies scoring logic, penalties, and escalation flags.
|
||||
"""
|
||||
|
||||
import unittest
|
||||
import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
# Dynamic import setup
|
||||
REPO_ROOT = Path(__file__).parent.parent
|
||||
if str(REPO_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(REPO_ROOT))
|
||||
|
||||
from core.buddai_confidence import ConfidenceScorer
|
||||
|
||||
class TestConfidenceScorer(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.scorer = ConfidenceScorer()
|
||||
|
||||
def test_calculate_confidence_high(self):
|
||||
"""Test a high confidence scenario (Success + Matches)"""
|
||||
code = "void setup() { Serial.begin(115200); }"
|
||||
context = {
|
||||
'hardware': 'ESP32',
|
||||
'learned_rules': ['Serial.begin(115200)'],
|
||||
'user_message': 'setup serial',
|
||||
'history': []
|
||||
}
|
||||
# Success, no issues
|
||||
validation_results = (True, [])
|
||||
|
||||
score = self.scorer.calculate_confidence(code, context, validation_results)
|
||||
|
||||
# Expected Score Breakdown:
|
||||
# Validation: 40 (Perfect)
|
||||
# Patterns: ~30 (1 match / 1 rule = 100% * 1.5 boost, capped at 30)
|
||||
# Hardware: 10 (ESP32 not explicitly in code string, generic fallback)
|
||||
# Context: 10 (Hardware + Message + Rules present)
|
||||
# Total ~90
|
||||
self.assertGreaterEqual(score, 80)
|
||||
self.assertFalse(self.scorer.should_escalate(score))
|
||||
|
||||
def test_calculate_confidence_low(self):
|
||||
"""Test a low confidence scenario (Validation Failure)"""
|
||||
code = "broken code"
|
||||
context = {'hardware': 'ESP32'}
|
||||
validation_results = (False, ['Syntax Error'])
|
||||
|
||||
score = self.scorer.calculate_confidence(code, context, validation_results)
|
||||
|
||||
# Expected Score Breakdown:
|
||||
# Validation: 0 (Failed)
|
||||
# Patterns: 15 (Default baseline when no rules)
|
||||
# Hardware: 10 (Generic)
|
||||
# Context: 3 (Hardware only)
|
||||
# Total: 28
|
||||
self.assertLess(score, 50)
|
||||
self.assertTrue(self.scorer.should_escalate(score))
|
||||
|
||||
def test_should_escalate_thresholds(self):
|
||||
"""Test flagging logic at specific boundaries"""
|
||||
# Default threshold is 70
|
||||
self.assertTrue(self.scorer.should_escalate(69))
|
||||
self.assertFalse(self.scorer.should_escalate(70))
|
||||
self.assertFalse(self.scorer.should_escalate(71))
|
||||
|
||||
# Custom threshold
|
||||
self.assertTrue(self.scorer.should_escalate(80, threshold=85))
|
||||
|
||||
def test_validation_scoring_penalties(self):
|
||||
"""Test that warnings reduce score but don't zero it"""
|
||||
# 2 warnings -> -10 points (5 per warning)
|
||||
validation_results = (True, [{'message': 'W1'}, {'message': 'W2'}])
|
||||
score = self.scorer._score_validation(validation_results)
|
||||
self.assertEqual(score, 30.0) # 40 - 10
|
||||
|
||||
# Many warnings -> Min score 10
|
||||
many_issues = [{'message': 'W'}] * 10
|
||||
score_min = self.scorer._score_validation((True, many_issues))
|
||||
self.assertEqual(score_min, 10.0)
|
||||
|
||||
def test_pattern_familiarity(self):
|
||||
"""Test pattern matching logic"""
|
||||
code = "ledcSetup(0, 5000, 8);"
|
||||
|
||||
# Match
|
||||
context_match = {'learned_rules': ['Use ledcSetup']}
|
||||
score_match = self.scorer._score_patterns(code, context_match)
|
||||
self.assertEqual(score_match, 30.0) # Capped max
|
||||
|
||||
# No Match
|
||||
context_miss = {'learned_rules': ['Use analogRead']}
|
||||
score_miss = self.scorer._score_patterns(code, context_miss)
|
||||
self.assertEqual(score_miss, 0.0)
|
||||
|
||||
# No Rules Provided (Neutral Baseline)
|
||||
score_empty = self.scorer._score_patterns(code, {})
|
||||
self.assertEqual(score_empty, 15.0)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue