Initial commit: Split Macha autonomous system into separate flake

Macha is now a standalone NixOS flake that can be imported into other systems. This provides: - Independent versioning - Easier reusability - Cleaner separation of concerns - Better development workflow Includes: - Complete autonomous system code - NixOS module with full configuration options - Queue-based architecture with priority system - Chunked map-reduce for large outputs - ChromaDB knowledge base - Tool calling system - Multi-host SSH management - Gotify notification integration All capabilities from DESIGN.md are preserved.
2025-10-06 14:32:37 -06:00
commit 22ba493d9e
30 changed files with 10306 additions and 0 deletions
--- a/chat.py
+++ b/chat.py
@@ -0,0 +1,522 @@
+#!/usr/bin/env python3
+"""
+Interactive chat interface with Macha AI agent.
+Allows conversational interaction and directive execution.
+"""
+
+import json
+import os
+import subprocess
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import List, Dict, Any
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent))
+
+from agent import MachaAgent
+
+
+class MachaChatSession:
+    """Interactive chat session with Macha"""
+    
+    def __init__(self):
+        self.agent = MachaAgent(use_queue=True, priority="INTERACTIVE")
+        self.conversation_history: List[Dict[str, str]] = []
+        self.session_start = datetime.now().isoformat()
+        
+    def _create_chat_prompt(self, user_message: str) -> str:
+        """Create a prompt for the chat session"""
+        
+        # Build conversation context
+        context = ""
+        if self.conversation_history:
+            context = "\n\nCONVERSATION HISTORY:\n"
+            for entry in self.conversation_history[-10:]:  # Last 10 messages
+                role = entry['role'].upper()
+                msg = entry['message']
+                context += f"{role}: {msg}\n"
+        
+        prompt = f"""{MachaAgent.SYSTEM_PROMPT}
+
+TASK: INTERACTIVE CHAT SESSION
+
+You are in an interactive chat session with the system administrator.
+You can have a natural conversation and execute commands when directed.
+
+CAPABILITIES:
+- Answer questions about system status
+- Explain configurations and issues
+- Execute commands when explicitly asked
+- Provide guidance and recommendations
+
+COMMAND EXECUTION:
+When the user asks you to run a command or perform an action that requires execution:
+1. Respond with a JSON object containing the command to execute
+2. Format: {{"action": "execute", "command": "the command", "explanation": "why you're running it"}}
+3. After seeing the output, continue the conversation naturally
+
+RESPONSE FORMAT:
+- For normal conversation: Respond naturally in plain text
+- For command execution: Respond with JSON containing action/command/explanation
+- Keep responses concise but informative
+
+RULES:
+- Only execute commands when explicitly asked or when it's clearly needed
+- Explain what you're about to do before executing
+- Never execute destructive commands without explicit confirmation
+- If unsure, ask for clarification
+{context}
+
+USER: {user_message}
+
+MACHA:"""
+        
+        return prompt
+    
+    def _execute_command(self, command: str) -> Dict[str, Any]:
+        """Execute a shell command and return results"""
+        try:
+            result = subprocess.run(
+                command,
+                shell=True,
+                capture_output=True,
+                text=True,
+                timeout=30
+            )
+            
+            # Check if command failed due to permissions
+            needs_sudo = False
+            permission_errors = [
+                'Interactive authentication required',
+                'Permission denied',
+                'Operation not permitted',
+                'Must be root',
+                'insufficient privileges',
+                'authentication is required'
+            ]
+            
+            if result.returncode != 0:
+                error_text = (result.stderr + result.stdout).lower()
+                for perm_error in permission_errors:
+                    if perm_error.lower() in error_text:
+                        needs_sudo = True
+                        break
+            
+            # Retry with sudo if permission error detected
+            if needs_sudo and not command.strip().startswith('sudo'):
+                print(f"\n⚠️  Permission denied, retrying with sudo...")
+                sudo_command = f"sudo {command}"
+                result = subprocess.run(
+                    sudo_command,
+                    shell=True,
+                    capture_output=True,
+                    text=True,
+                    timeout=30
+                )
+                
+                return {
+                    'success': result.returncode == 0,
+                    'exit_code': result.returncode,
+                    'stdout': result.stdout,
+                    'stderr': result.stderr,
+                    'command': sudo_command,
+                    'retried_with_sudo': True
+                }
+            
+            return {
+                'success': result.returncode == 0,
+                'exit_code': result.returncode,
+                'stdout': result.stdout,
+                'stderr': result.stderr,
+                'command': command,
+                'retried_with_sudo': False
+            }
+        except subprocess.TimeoutExpired:
+            return {
+                'success': False,
+                'exit_code': -1,
+                'stdout': '',
+                'stderr': 'Command timed out after 30 seconds',
+                'command': command,
+                'retried_with_sudo': False
+            }
+        except Exception as e:
+            return {
+                'success': False,
+                'exit_code': -1,
+                'stdout': '',
+                'stderr': str(e),
+                'command': command,
+                'retried_with_sudo': False
+            }
+    
+    def _parse_response(self, response: str) -> Dict[str, Any]:
+        """Parse AI response to determine if it's a command or text"""
+        try:
+            # Try to parse as JSON
+            parsed = json.loads(response.strip())
+            if isinstance(parsed, dict) and 'action' in parsed:
+                return parsed
+        except json.JSONDecodeError:
+            pass
+        
+        # It's plain text conversation
+        return {'action': 'chat', 'message': response}
+    
+    def _auto_diagnose_ollama(self) -> str:
+        """Automatically diagnose Ollama issues"""
+        diagnostics = []
+        
+        diagnostics.append("🔍 AUTO-DIAGNOSIS: Investigating Ollama failure...\n")
+        
+        # Check if Ollama service is running
+        try:
+            result = subprocess.run(
+                ['systemctl', 'is-active', 'ollama.service'],
+                capture_output=True,
+                text=True,
+                timeout=5
+            )
+            if result.returncode == 0:
+                diagnostics.append("✅ Ollama service is active")
+            else:
+                diagnostics.append(f"❌ Ollama service is NOT active: {result.stdout.strip()}")
+                # Get service status
+                status_result = subprocess.run(
+                    ['systemctl', 'status', 'ollama.service', '--no-pager', '-l'],
+                    capture_output=True,
+                    text=True,
+                    timeout=5
+                )
+                diagnostics.append(f"\nService status:\n```\n{status_result.stdout[-500:]}\n```")
+        except Exception as e:
+            diagnostics.append(f"⚠️  Could not check service status: {e}")
+        
+        # Check memory usage
+        try:
+            result = subprocess.run(['free', '-h'], capture_output=True, text=True, timeout=5)
+            lines = result.stdout.split('\n')
+            for line in lines[:3]:  # First 3 lines
+                diagnostics.append(f"  {line}")
+        except Exception as e:
+            diagnostics.append(f"⚠️  Could not check memory: {e}")
+        
+        # Check which models are loaded
+        try:
+            import requests
+            response = requests.get(f"{self.agent.ollama_host}/api/tags", timeout=5)
+            if response.status_code == 200:
+                models = response.json().get('models', [])
+                diagnostics.append(f"\n📦 Loaded models ({len(models)}):")
+                for model in models:
+                    name = model.get('name', 'unknown')
+                    size = model.get('size', 0) / (1024**3)
+                    is_current = "← TARGET" if name == self.agent.model else ""
+                    diagnostics.append(f"  • {name} ({size:.1f} GB) {is_current}")
+                
+                # Check if target model is loaded
+                model_names = [m.get('name') for m in models]
+                if self.agent.model not in model_names:
+                    diagnostics.append(f"\n❌ TARGET MODEL NOT LOADED: {self.agent.model}")
+                    diagnostics.append(f"   Available models: {', '.join(model_names)}")
+            else:
+                diagnostics.append(f"❌ Ollama API returned {response.status_code}")
+        except Exception as e:
+            diagnostics.append(f"⚠️  Could not query Ollama API: {e}")
+        
+        # Check recent Ollama logs
+        try:
+            result = subprocess.run(
+                ['journalctl', '-u', 'ollama.service', '-n', '10', '--no-pager'],
+                capture_output=True,
+                text=True,
+                timeout=5
+            )
+            if result.stdout:
+                diagnostics.append(f"\n📋 Recent Ollama logs (last 10 lines):\n```\n{result.stdout}\n```")
+        except Exception as e:
+            diagnostics.append(f"⚠️  Could not check logs: {e}")
+        
+        return "\n".join(diagnostics)
+    
+    def process_message(self, user_message: str) -> str:
+        """Process a user message and return Macha's response"""
+        
+        # Add user message to history
+        self.conversation_history.append({
+            'role': 'user',
+            'message': user_message,
+            'timestamp': datetime.now().isoformat()
+        })
+        
+        # Build chat messages for tool-calling API
+        messages = []
+        
+        # Query relevant knowledge based on user message
+        knowledge_context = self.agent._query_relevant_knowledge(user_message, limit=3)
+        
+        # Add recent conversation history (last 15 messages to stay within context limits)
+        # With tool calling, messages grow quickly, so we limit more aggressively
+        recent_history = self.conversation_history[-15:]  # Last ~7 exchanges
+        for entry in recent_history:
+            content = entry['message']
+            # Truncate very long messages (e.g., command outputs)
+            if len(content) > 3000:
+                content = content[:1500] + "\n... [message truncated] ...\n" + content[-1500:]
+            # Add knowledge context to first user message if available
+            if entry == recent_history[-1] and knowledge_context:
+                content += knowledge_context
+            messages.append({
+                "role": entry['role'],
+                "content": content
+            })
+        
+        try:
+            # Use tool-aware chat API
+            ai_response = self.agent._query_ollama_with_tools(messages)
+        except Exception as e:
+            error_msg = (
+                f"❌ CRITICAL: Failed to communicate with Ollama inference engine\n\n"
+                f"Error Type: {type(e).__name__}\n"
+                f"Error Message: {str(e)}\n\n"
+            )
+            # Auto-diagnose the issue
+            diagnostics = self._auto_diagnose_ollama()
+            return error_msg + "\n" + diagnostics
+        
+        if not ai_response:
+            error_msg = (
+                f"❌ Empty response from Ollama inference engine\n\n"
+                f"The request succeeded but returned no data. This usually means:\n"
+                f"  • The model ({self.agent.model}) is still loading\n"
+                f"  • Ollama ran out of memory during generation\n"
+                f"  • The prompt was too large for the context window\n\n"
+            )
+            # Auto-diagnose the issue
+            diagnostics = self._auto_diagnose_ollama()
+            return error_msg + "\n" + diagnostics
+        
+        # Check if Ollama returned an error
+        try:
+            error_check = json.loads(ai_response)
+            if isinstance(error_check, dict) and 'error' in error_check:
+                error_msg = (
+                    f"❌ Ollama API Error\n\n"
+                    f"Error: {error_check.get('error', 'Unknown error')}\n"
+                    f"Diagnosis: {error_check.get('diagnosis', 'No details')}\n\n"
+                )
+                # Auto-diagnose the issue
+                diagnostics = self._auto_diagnose_ollama()
+                return error_msg + "\n" + diagnostics
+        except json.JSONDecodeError:
+            # Not JSON, it's a normal response
+            pass
+        
+        # Parse response
+        parsed = self._parse_response(ai_response)
+        
+        if parsed.get('action') == 'execute':
+            # AI wants to execute a command
+            command = parsed.get('command', '')
+            explanation = parsed.get('explanation', '')
+            
+            # Show what we're about to do
+            response = f"🔧 {explanation}\n\nExecuting: `{command}`\n\n"
+            
+            # Execute the command
+            result = self._execute_command(command)
+            
+            # Show if we retried with sudo
+            if result.get('retried_with_sudo'):
+                response += f"⚠️  Permission denied, retried as: `{result['command']}`\n\n"
+            
+            if result['success']:
+                response += "✅ Command succeeded:\n"
+                if result['stdout']:
+                    response += f"```\n{result['stdout']}\n```"
+                else:
+                    response += "(no output)"
+            else:
+                response += f"❌ Command failed (exit code {result['exit_code']}):\n"
+                if result['stderr']:
+                    response += f"```\n{result['stderr']}\n```"
+                elif result['stdout']:
+                    response += f"```\n{result['stdout']}\n```"
+            
+            # Add command execution to history
+            self.conversation_history.append({
+                'role': 'macha',
+                'message': response,
+                'timestamp': datetime.now().isoformat(),
+                'command_result': result
+            })
+            
+            # Now ask AI to respond to the command output
+            followup_prompt = f"""The command completed. Here's what happened:
+
+Command: {command}
+Success: {result['success']}
+Output: {result['stdout'][:500] if result['stdout'] else '(none)'}
+Error: {result['stderr'][:500] if result['stderr'] else '(none)'}
+
+Please provide a brief analysis or next steps."""
+            
+            followup_response = self.agent._query_ollama(followup_prompt)
+            
+            if followup_response:
+                response += f"\n\n{followup_response}"
+            
+            return response
+        
+        else:
+            # Normal conversation response
+            message = parsed.get('message', ai_response)
+            
+            self.conversation_history.append({
+                'role': 'macha',
+                'message': message,
+                'timestamp': datetime.now().isoformat()
+            })
+            
+            return message
+    
+    def run(self):
+        """Run the interactive chat session"""
+        print("=" * 70)
+        print("🌐 MACHA INTERACTIVE CHAT")
+        print("=" * 70)
+        print("Type your message and press Enter. Commands:")
+        print("  /exit or /quit - End the chat session")
+        print("  /clear - Clear conversation history")
+        print("  /history - Show conversation history")
+        print("  /debug - Show Ollama connection status")
+        print("=" * 70)
+        print()
+        
+        while True:
+            try:
+                # Get user input
+                user_input = input("\n💬 YOU: ").strip()
+                
+                if not user_input:
+                    continue
+                
+                # Handle special commands
+                if user_input.lower() in ['/exit', '/quit']:
+                    print("\n👋 Ending chat session. Goodbye!")
+                    break
+                
+                elif user_input.lower() == '/clear':
+                    self.conversation_history.clear()
+                    print("🧹 Conversation history cleared.")
+                    continue
+                
+                elif user_input.lower() == '/history':
+                    print("\n" + "=" * 70)
+                    print("CONVERSATION HISTORY")
+                    print("=" * 70)
+                    for entry in self.conversation_history:
+                        role = entry['role'].upper()
+                        msg = entry['message'][:100] + "..." if len(entry['message']) > 100 else entry['message']
+                        print(f"{role}: {msg}")
+                    print("=" * 70)
+                    continue
+                
+                elif user_input.lower() == '/debug':
+                    import os
+                    import subprocess
+                    
+                    print("\n" + "=" * 70)
+                    print("MACHA ARCHITECTURE & STATUS")
+                    print("=" * 70)
+                    
+                    print("\n🏗️  SYSTEM ARCHITECTURE:")
+                    print(f"  Hostname: macha.coven.systems")
+                    print(f"  Service: macha-autonomous.service (systemd)")
+                    print(f"  Working Directory: /var/lib/macha")
+                    
+                    print("\n👤 EXECUTION CONTEXT:")
+                    current_user = os.getenv('USER') or os.getenv('USERNAME') or 'unknown'
+                    print(f"  Current User: {current_user}")
+                    print(f"  UID: {os.getuid()}")
+                    
+                    # Check if user has sudo access
+                    try:
+                        result = subprocess.run(['sudo', '-n', 'true'], 
+                                              capture_output=True, timeout=1)
+                        if result.returncode == 0:
+                            print(f"  Sudo Access: ✓ Yes (passwordless)")
+                        else:
+                            print(f"  Sudo Access: ⚠ Requires password")
+                    except:
+                        print(f"  Sudo Access: ❌ No")
+                    
+                    print(f"  Note: Chat runs as invoking user (you), not as macha-autonomous")
+                    
+                    print("\n🧠 INFERENCE ENGINE:")
+                    print(f"  Backend: Ollama")
+                    print(f"  Host: {self.agent.ollama_host}")
+                    print(f"  Model: {self.agent.model}")
+                    print(f"  Service: ollama.service (systemd)")
+                    
+                    print("\n💾 DATABASE:")
+                    print(f"  Backend: ChromaDB")
+                    print(f"  Host: http://localhost:8000")
+                    print(f"  Data: /var/lib/chromadb")
+                    print(f"  Service: chromadb.service (systemd)")
+                    
+                    print("\n🔍 OLLAMA STATUS:")
+                    # Try to query Ollama status
+                    try:
+                        import requests
+                        # Check if Ollama is running
+                        response = requests.get(f"{self.agent.ollama_host}/api/tags", timeout=5)
+                        if response.status_code == 200:
+                            models = response.json().get('models', [])
+                            print(f"  Status: ✓ Running")
+                            print(f"  Loaded models: {len(models)}")
+                            for model in models:
+                                name = model.get('name', 'unknown')
+                                size = model.get('size', 0) / (1024**3)  # GB
+                                is_current = "← ACTIVE" if name == self.agent.model else ""
+                                print(f"    • {name} ({size:.1f} GB) {is_current}")
+                        else:
+                            print(f"  Status: ❌ Error (HTTP {response.status_code})")
+                    except Exception as e:
+                        print(f"  Status: ❌ Cannot connect: {e}")
+                        print(f"  Hint: Check 'systemctl status ollama.service'")
+                    
+                    print("\n💡 CONVERSATION:")
+                    print(f"  History: {len(self.conversation_history)} messages")
+                    print(f"  Session started: {self.session_start}")
+                    
+                    print("=" * 70)
+                    continue
+                
+                # Process the message
+                print("\n🤖 MACHA: ", end='', flush=True)
+                response = self.process_message(user_input)
+                print(response)
+                
+            except KeyboardInterrupt:
+                print("\n\n👋 Chat interrupted. Use /exit to quit properly.")
+                continue
+            except EOFError:
+                print("\n\n👋 Ending chat session. Goodbye!")
+                break
+            except Exception as e:
+                print(f"\n❌ Error: {e}")
+                continue
+
+
+def main():
+    """Main entry point"""
+    session = MachaChatSession()
+    session.run()
+
+
+if __name__ == "__main__":
+    main()
+