mirror of
https://github.com/JamesTheGiblet/BuddAI.git
synced 2026-01-08 21:58:40 +00:00
Add comprehensive unit tests for BuddAI confidence scoring and fallback mechanisms
- Implemented tests for confidence scoring logic in `test_buddai_confidence.py` and `test_confidence.py`, covering high and low confidence scenarios, escalation thresholds, and validation scoring penalties. - Created tests for fallback logging functionality in `test_fallback_logging.py`, ensuring fallback prompts are logged correctly and the `/logs` command retrieves log content. - Developed tests for fallback prompts in `test_fallback_prompts.py`, verifying that specific prompts are used for different models based on confidence levels. - Generated detailed test reports for multiple test runs, confirming all tests passed successfully.
This commit is contained in:
parent
27601aa2ba
commit
48716ff271
20 changed files with 2236 additions and 156 deletions
54
tests/test_confidence.py
Normal file
54
tests/test_confidence.py
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Unit tests for Confidence Scoring
|
||||
"""
|
||||
import unittest
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Setup path
|
||||
REPO_ROOT = Path(__file__).parent.parent
|
||||
if str(REPO_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(REPO_ROOT))
|
||||
|
||||
from core.buddai_confidence import ConfidenceScorer
|
||||
|
||||
class TestConfidence(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.scorer = ConfidenceScorer()
|
||||
|
||||
def test_confidence_high(self):
|
||||
"""Known good code → should score >70%"""
|
||||
# A perfect scenario: Valid code, matches hardware, matches rules
|
||||
code = "void setup() { Serial.begin(115200); }"
|
||||
context = {
|
||||
'hardware': 'ESP32',
|
||||
'learned_rules': ['Serial.begin(115200)'],
|
||||
'user_message': 'setup serial'
|
||||
}
|
||||
validation_results = (True, []) # Valid, no issues
|
||||
|
||||
score = self.scorer.calculate_confidence(code, context, validation_results)
|
||||
self.assertGreater(score, 70, f"Score {score} should be > 70")
|
||||
|
||||
def test_confidence_low(self):
|
||||
"""Edge case code → should score <70%"""
|
||||
# A failure scenario: Invalid code
|
||||
code = "broken code"
|
||||
context = {'hardware': 'ESP32'}
|
||||
validation_results = (False, ['Syntax Error'])
|
||||
|
||||
score = self.scorer.calculate_confidence(code, context, validation_results)
|
||||
self.assertLess(score, 70, f"Score {score} should be < 70")
|
||||
|
||||
def test_threshold_detection(self):
|
||||
"""Verify escalation trigger logic"""
|
||||
# Default threshold 70
|
||||
self.assertTrue(self.scorer.should_escalate(69))
|
||||
self.assertFalse(self.scorer.should_escalate(71))
|
||||
|
||||
# Custom threshold
|
||||
self.assertTrue(self.scorer.should_escalate(80, threshold=85))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue