Files
macha-autonomous/chat.py
Lily Miller 06c9d9fb45 Fix: Remove incorrect tool_definitions parameter from _query_ollama_with_tools call
The method retrieves tool definitions internally, doesn't accept them as a parameter.

This was causing TypeError in macha-chat.
2025-10-06 16:29:56 -06:00

380 lines
15 KiB
Python

#!/usr/bin/env python3
"""
Interactive chat interface with Macha AI agent.
Unified chat/conversation interface using tool-calling architecture.
"""
import json
import os
import subprocess
import sys
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Any, Optional
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))
from agent import MachaAgent
class MachaChatSession:
"""Interactive chat session with Macha using tool-calling architecture"""
def __init__(
self,
ollama_host: str = "http://localhost:11434",
model: str = "gpt-oss:latest",
state_dir: Path = Path("/var/lib/macha"),
enable_tools: bool = True
):
"""Initialize chat session with Macha
Args:
ollama_host: Ollama API endpoint
model: Model name to use
state_dir: State directory for agent
enable_tools: Whether to enable tool calling (should always be True)
"""
self.agent = MachaAgent(
ollama_host=ollama_host,
model=model,
state_dir=state_dir,
enable_tools=enable_tools,
use_queue=True,
priority="INTERACTIVE"
)
self.conversation_history: List[Dict[str, str]] = []
self.session_start = datetime.now().isoformat()
def _auto_diagnose_ollama(self) -> str:
"""Automatically diagnose Ollama issues"""
diagnostics = []
diagnostics.append("🔍 AUTO-DIAGNOSIS: Investigating Ollama failure...\n")
# Check if Ollama service is running
try:
result = subprocess.run(
['systemctl', 'is-active', 'ollama.service'],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0:
diagnostics.append("✅ Ollama service is active")
else:
diagnostics.append(f"❌ Ollama service is NOT active: {result.stdout.strip()}")
# Get service status
status_result = subprocess.run(
['systemctl', 'status', 'ollama.service', '--no-pager', '-l'],
capture_output=True,
text=True,
timeout=5
)
diagnostics.append(f"\nService status:\n```\n{status_result.stdout[-500:]}\n```")
except Exception as e:
diagnostics.append(f"⚠️ Could not check service status: {e}")
# Check memory usage
try:
result = subprocess.run(['free', '-h'], capture_output=True, text=True, timeout=5)
lines = result.stdout.split('\n')
for line in lines[:3]: # First 3 lines
diagnostics.append(f" {line}")
except Exception as e:
diagnostics.append(f"⚠️ Could not check memory: {e}")
# Check which models are loaded
try:
import requests
response = requests.get(f"{self.agent.ollama_host}/api/tags", timeout=5)
if response.status_code == 200:
models = response.json().get('models', [])
diagnostics.append(f"\n📦 Loaded models ({len(models)}):")
for model in models:
name = model.get('name', 'unknown')
size = model.get('size', 0) / (1024**3)
is_current = "← TARGET" if name == self.agent.model else ""
diagnostics.append(f"{name} ({size:.1f} GB) {is_current}")
# Check if target model is loaded
model_names = [m.get('name') for m in models]
if self.agent.model not in model_names:
diagnostics.append(f"\n❌ TARGET MODEL NOT LOADED: {self.agent.model}")
diagnostics.append(f" Available models: {', '.join(model_names)}")
else:
diagnostics.append(f"❌ Ollama API returned {response.status_code}")
except Exception as e:
diagnostics.append(f"⚠️ Could not query Ollama API: {e}")
# Check recent Ollama logs
try:
result = subprocess.run(
['journalctl', '-u', 'ollama.service', '-n', '10', '--no-pager'],
capture_output=True,
text=True,
timeout=5
)
if result.stdout:
diagnostics.append(f"\n📋 Recent Ollama logs (last 10 lines):\n```\n{result.stdout}\n```")
except Exception as e:
diagnostics.append(f"⚠️ Could not check logs: {e}")
return "\n".join(diagnostics)
def process_message(self, user_message: str, verbose: bool = False) -> str:
"""Process a user message and return Macha's response
Args:
user_message: The user's message
verbose: Whether to show detailed token counts
Returns:
Macha's response
"""
# Add user message to history
self.conversation_history.append({
'role': 'user',
'message': user_message,
'timestamp': datetime.now().isoformat()
})
# Build chat messages for tool-calling API
messages = []
# Query relevant knowledge based on user message
knowledge_context = self.agent._query_relevant_knowledge(user_message, limit=3)
# Add recent conversation history (last 15 messages to stay within context limits)
recent_history = self.conversation_history[-15:]
for entry in recent_history:
content = entry['message']
# Truncate very long messages (e.g., command outputs)
if len(content) > 3000:
content = content[:1500] + "\n... [message truncated] ...\n" + content[-1500:]
# Add knowledge context to last user message if available
if entry == recent_history[-1] and knowledge_context:
content += knowledge_context
messages.append({
"role": entry['role'],
"content": content
})
if verbose:
# Estimate tokens for debugging
total_chars = sum(len(json.dumps(m)) for m in messages)
estimated_tokens = total_chars // 4
print(f"[Context: {estimated_tokens:,} tokens, {len(messages)} messages]")
try:
# Use tool-aware chat API - this handles all tool calling automatically
# Note: tool definitions are retrieved internally by _query_ollama_with_tools
ai_response = self.agent._query_ollama_with_tools(messages)
except Exception as e:
error_msg = (
f"❌ CRITICAL: Failed to communicate with Ollama inference engine\n\n"
f"Error Type: {type(e).__name__}\n"
f"Error Message: {str(e)}\n\n"
)
# Auto-diagnose the issue
diagnostics = self._auto_diagnose_ollama()
return error_msg + "\n" + diagnostics
if not ai_response:
error_msg = (
f"❌ Empty response from Ollama inference engine\n\n"
f"The request succeeded but returned no data. This usually means:\n"
f" • The model ({self.agent.model}) is still loading\n"
f" • Ollama ran out of memory during generation\n"
f" • The prompt was too large for the context window\n\n"
)
# Auto-diagnose the issue
diagnostics = self._auto_diagnose_ollama()
return error_msg + "\n" + diagnostics
# Add response to history
self.conversation_history.append({
'role': 'assistant',
'message': ai_response,
'timestamp': datetime.now().isoformat()
})
return ai_response
def run_interactive(self):
"""Run the interactive chat session"""
print("=" * 70)
print("🌐 MACHA INTERACTIVE CHAT")
print("=" * 70)
print("Type your message and press Enter. Commands:")
print(" /exit or /quit - End the chat session")
print(" /clear - Clear conversation history")
print(" /history - Show conversation history")
print(" /debug - Show Ollama connection status")
print("=" * 70)
print()
while True:
try:
# Get user input
user_input = input("\n💬 YOU: ").strip()
if not user_input:
continue
# Handle special commands
if user_input.lower() in ['/exit', '/quit']:
print("\n👋 Ending chat session. Goodbye!")
break
elif user_input.lower() == '/clear':
self.conversation_history.clear()
print("🧹 Conversation history cleared.")
continue
elif user_input.lower() == '/history':
print("\n" + "=" * 70)
print("CONVERSATION HISTORY")
print("=" * 70)
for entry in self.conversation_history:
role = entry['role'].upper()
msg = entry['message'][:100] + "..." if len(entry['message']) > 100 else entry['message']
print(f"{role}: {msg}")
print("=" * 70)
continue
elif user_input.lower() == '/debug':
print("\n" + "=" * 70)
print("MACHA ARCHITECTURE & STATUS")
print("=" * 70)
print("\n🏗️ SYSTEM ARCHITECTURE:")
print(f" Hostname: macha.coven.systems")
print(f" Service: macha-autonomous.service (systemd)")
print(f" Working Directory: /var/lib/macha")
print("\n👤 EXECUTION CONTEXT:")
current_user = os.getenv('USER') or os.getenv('USERNAME') or 'unknown'
print(f" Current User: {current_user}")
print(f" UID: {os.getuid()}")
# Check if user has sudo access
try:
result = subprocess.run(['sudo', '-n', 'true'],
capture_output=True, timeout=1)
if result.returncode == 0:
print(f" Sudo Access: ✓ Yes (passwordless)")
else:
print(f" Sudo Access: ⚠ Requires password")
except:
print(f" Sudo Access: ❌ No")
print(f" Note: Chat runs as invoking user (you), using macha's tools")
print("\n🧠 INFERENCE ENGINE:")
print(f" Backend: Ollama")
print(f" Host: {self.agent.ollama_host}")
print(f" Model: {self.agent.model}")
print(f" Service: ollama.service (systemd)")
print(f" Queue Worker: ollama-queue-worker.service")
print("\n💾 DATABASE:")
print(f" Backend: ChromaDB")
print(f" State: {self.agent.state_dir}")
print("\n🔍 OLLAMA STATUS:")
# Try to query Ollama status
try:
import requests
# Check if Ollama is running
response = requests.get(f"{self.agent.ollama_host}/api/tags", timeout=5)
if response.status_code == 200:
models = response.json().get('models', [])
print(f" Status: ✓ Running")
print(f" Loaded models: {len(models)}")
for model in models:
name = model.get('name', 'unknown')
size = model.get('size', 0) / (1024**3) # GB
is_current = "← ACTIVE" if name == self.agent.model else ""
print(f"{name} ({size:.1f} GB) {is_current}")
else:
print(f" Status: ❌ Error (HTTP {response.status_code})")
except Exception as e:
print(f" Status: ❌ Cannot connect: {e}")
print(f" Hint: Check 'systemctl status ollama.service'")
print("\n🛠️ TOOLS:")
print(f" Enabled: {self.agent.enable_tools}")
if self.agent.enable_tools:
print(f" Available tools: {len(self.agent.tools.get_tool_definitions())}")
print(f" Architecture: Centralized command_patterns.py")
print("\n💡 CONVERSATION:")
print(f" History: {len(self.conversation_history)} messages")
print(f" Session started: {self.session_start}")
print("=" * 70)
continue
# Process the message
print("\n🤖 MACHA: ", end='', flush=True)
response = self.process_message(user_input, verbose=False)
print(response)
except KeyboardInterrupt:
print("\n\n👋 Chat interrupted. Use /exit to quit properly.")
continue
except EOFError:
print("\n\n👋 Ending chat session. Goodbye!")
break
except Exception as e:
print(f"\n❌ Error: {e}")
import traceback
traceback.print_exc()
continue
def ask_once(self, question: str, verbose: bool = True) -> str:
"""Ask a single question and return the response (for macha-ask command)
Args:
question: The question to ask
verbose: Whether to show detailed context information
Returns:
Macha's response
"""
response = self.process_message(question, verbose=verbose)
return response
def main():
"""Main entry point for macha-chat"""
session = MachaChatSession()
session.run_interactive()
def ask_main():
"""Entry point for macha-ask"""
if len(sys.argv) < 2:
print("Usage: macha-ask <question>", file=sys.stderr)
sys.exit(1)
question = " ".join(sys.argv[1:])
session = MachaChatSession()
response = session.ask_once(question, verbose=True)
print("\n" + "=" * 60)
print("MACHA:")
print("=" * 60)
print(response)
print("=" * 60)
print()
if __name__ == "__main__":
main()