Add comprehensive unit tests for BuddAI confidence scoring and fallback mechanisms

- Implemented tests for confidence scoring logic in `test_buddai_confidence.py` and `test_confidence.py`, covering high and low confidence scenarios, escalation thresholds, and validation scoring penalties. - Created tests for fallback logging functionality in `test_fallback_logging.py`, ensuring fallback prompts are logged correctly and the `/logs` command retrieves log content. - Developed tests for fallback prompts in `test_fallback_prompts.py`, verifying that specific prompts are used for different models based on confidence levels. - Generated detailed test reports for multiple test runs, confirming all tests passed successfully.
2026-01-08 21:58:40 +00:00 · 2026-01-07 20:27:39 +00:00 · 2026-01-07 20:27:39 +00:00 · 48716ff271
commit 48716ff271
parent 27601aa2ba
20 changed files with 2236 additions and 156 deletions
--- a/tests/test_confidence.py
+++ b/tests/test_confidence.py
@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+"""
+Unit tests for Confidence Scoring
+"""
+import unittest
+import sys
+from pathlib import Path
+
+# Setup path
+REPO_ROOT = Path(__file__).parent.parent
+if str(REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(REPO_ROOT))
+
+from core.buddai_confidence import ConfidenceScorer
+
+class TestConfidence(unittest.TestCase):
+    def setUp(self):
+        self.scorer = ConfidenceScorer()
+
+    def test_confidence_high(self):
+        """Known good code → should score >70%"""
+        # A perfect scenario: Valid code, matches hardware, matches rules
+        code = "void setup() { Serial.begin(115200); }"
+        context = {
+            'hardware': 'ESP32',
+            'learned_rules': ['Serial.begin(115200)'],
+            'user_message': 'setup serial'
+        }
+        validation_results = (True, []) # Valid, no issues
+        
+        score = self.scorer.calculate_confidence(code, context, validation_results)
+        self.assertGreater(score, 70, f"Score {score} should be > 70")
+
+    def test_confidence_low(self):
+        """Edge case code → should score <70%"""
+        # A failure scenario: Invalid code
+        code = "broken code"
+        context = {'hardware': 'ESP32'}
+        validation_results = (False, ['Syntax Error'])
+        
+        score = self.scorer.calculate_confidence(code, context, validation_results)
+        self.assertLess(score, 70, f"Score {score} should be < 70")
+
+    def test_threshold_detection(self):
+        """Verify escalation trigger logic"""
+        # Default threshold 70
+        self.assertTrue(self.scorer.should_escalate(69))
+        self.assertFalse(self.scorer.should_escalate(71))
+        
+        # Custom threshold
+        self.assertTrue(self.scorer.should_escalate(80, threshold=85))
+
+if __name__ == '__main__':
+    unittest.main()