BuddAI/docs/buddai_confidence.py
JamesTheGiblet 27601aa2ba Add comprehensive unit tests for BuddAI functionality
- Introduced 16 additional coverage tests in `test_additional_coverage.py` to enhance overall test coverage.
- Added 15 extended feature tests in `test_extended_features.py` to validate new functionalities.
- Implemented 27 final coverage tests in `test_final_coverage.py` to achieve a total of 100 tests.
- Created 2 fallback logic tests in `test_fallback_logic.py` to ensure proper fallback behavior based on confidence scores.
- Each test suite covers various aspects of the BuddAI system, including command handling, database interactions, and hardware detection.
2026-01-07 19:48:24 +00:00

127 lines
No EOL
4.3 KiB
Python

import re
class ConfidenceScorer:
"""
Calculates confidence scores for generated code based on validation results,
pattern familiarity, hardware alignment, and context completeness.
"""
def calculate_confidence(self, code: str, context: dict, validation_results: tuple) -> int:
"""
Calculates a 0-100 confidence score based on multiple factors.
Args:
code (str): The generated code to evaluate.
context (dict): Context dictionary containing hardware, rules, etc.
validation_results (tuple): A tuple of (success: bool, issues: list).
Returns:
int: A confidence score between 0 and 100.
"""
score = 0.0
# 1. Validation pass rate (0-40 points)
score += self._score_validation(validation_results)
# 2. Pattern familiarity (0-30 points)
score += self._score_patterns(code, context)
# 3. Hardware match (0-20 points)
score += self._score_hardware(code, context)
# 4. Context completeness (0-10 points)
score += self._score_context(context)
return int(min(100, max(0, score)))
def should_escalate(self, confidence: int, threshold: int = 70) -> bool:
"""
Determines if the generation should be escalated or flagged for review.
Args:
confidence (int): The calculated confidence score.
threshold (int): The score below which escalation is triggered.
Returns:
bool: True if confidence is below threshold, False otherwise.
"""
return confidence < threshold
def _score_validation(self, validation_results: tuple) -> float:
"""
Calculates score based on validation results (Max 40 points).
"""
if not validation_results:
return 0.0
success, issues = validation_results
if not success:
return 0.0
# Start with full points for success
score = 40.0
# Deduct points for non-critical issues/warnings
if issues:
# Deduct 5 points per warning, but don't go below 10 if successful
penalty = len(issues) * 5.0
score = max(10.0, score - penalty)
return score
def _score_patterns(self, code: str, context: dict) -> float:
"""
Calculates score based on pattern familiarity (Max 30 points).
Checks if learned rules or preferred patterns appear in the code.
"""
learned_rules = context.get('learned_rules', [])
if not learned_rules:
# If no rules are known/provided, return a neutral baseline
return 15.0
matches = 0
code_lower = code.lower()
for rule in learned_rules:
# Heuristic: Check if key terms from the rule exist in the code.
rule_text = rule if isinstance(rule, str) else str(rule)
# Extract significant words (simple heuristic)
keywords = [w.lower() for w in re.split(r'\W+', rule_text) if len(w) > 4]
if keywords and any(k in code_lower for k in keywords):
matches += 1
if not matches:
return 0.0
# Calculate score proportional to matches, capped at 30
match_ratio = matches / len(learned_rules)
# Boost factor (1.5) allows full score even if not 100% of context rules apply
return min(30.0, match_ratio * 30.0 * 1.5)
def _score_hardware(self, code: str, context: dict) -> float:
"""
Calculates score based on hardware match (Max 20 points).
"""
target_hardware = context.get('hardware', '').lower()
code_lower = code.lower()
if not target_hardware or target_hardware == 'generic':
return 10.0
# Check for hardware alignment
if target_hardware in code_lower:
return 20.0
return 10.0
def _score_context(self, context: dict) -> float:
"""
Calculates score based on context completeness (Max 10 points).
"""
score = 0.0
if context.get('hardware'): score += 3.0
if context.get('user_message') or context.get('intent'): score += 3.0
if context.get('history') or context.get('learned_rules'): score += 4.0
return min(10.0, score)