macha-autonomous/executor.py

#!/usr/bin/env python3
"""
Action Executor - Safely executes proposed fixes with rollback capability
"""

import json
import subprocess
import shutil
from typing import Dict, List, Any, Optional
from pathlib import Path
from datetime import datetime
import time


class SafeExecutor:
    """Executes system maintenance actions with safety checks"""

    # Actions that are considered safe to auto-execute
    SAFE_ACTIONS = {
        "systemd_restart",  # Restart failed services
        "cleanup",  # Disk cleanup, log rotation
        "investigation",  # Read-only diagnostics
    }

    # Services that should NEVER be stopped/disabled
    PROTECTED_SERVICES = {
        "sshd",
        "systemd-networkd",
        "NetworkManager",
        "systemd-resolved",
        "dbus",
    }

    def __init__(
        self,
        state_dir: Path = Path("/var/lib/macha"),
        autonomy_level: str = "suggest",  # observe, suggest, auto-safe, auto-full
        dry_run: bool = False,
        agent = None  # Optional agent for learning from actions
    ):
        self.state_dir = state_dir
        self.state_dir.mkdir(parents=True, exist_ok=True)
        self.autonomy_level = autonomy_level
        self.dry_run = dry_run
        self.agent = agent
        self.action_log = self.state_dir / "actions.jsonl"
        self.approval_queue = self.state_dir / "approval_queue.json"

    def execute_action(self, action: Dict[str, Any], monitoring_context: Dict[str, Any]) -> Dict[str, Any]:
        """Execute a proposed action with appropriate safety checks"""

        action_type = action.get("action_type", "unknown")
        risk_level = action.get("risk_level", "high")

        # Determine if we should execute
        should_execute, reason = self._should_execute(action_type, risk_level)

        if not should_execute:
            if self.autonomy_level == "suggest":
                # Queue for approval
                self._queue_for_approval(action, monitoring_context)
                return {
                    "executed": False,
                    "status": "queued_for_approval",
                    "reason": reason,
                    "queue_file": str(self.approval_queue)
                }
            else:
                return {
                    "executed": False,
                    "status": "blocked",
                    "reason": reason
                }

        # Execute the action
        if self.dry_run:
            return self._dry_run_action(action)

        return self._execute_action_impl(action, monitoring_context)

    def _should_execute(self, action_type: str, risk_level: str) -> tuple[bool, str]:
        """Determine if an action should be auto-executed based on autonomy level"""

        if self.autonomy_level == "observe":
            return False, "Autonomy level set to observe-only"

        # Auto-approve low-risk investigation actions
        if action_type == "investigation" and risk_level == "low":
            return True, "Auto-approved: Low-risk information gathering"

        if self.autonomy_level == "suggest":
            return False, "Autonomy level requires manual approval"

        if self.autonomy_level == "auto-safe":
            if action_type in self.SAFE_ACTIONS and risk_level == "low":
                return True, "Auto-executing safe action"
            return False, "Action requires higher autonomy level"

        if self.autonomy_level == "auto-full":
            if risk_level == "high":
                return False, "High risk actions always require approval"
            return True, "Auto-executing approved action"

        return False, "Unknown autonomy level"

    def _execute_action_impl(self, action: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
        """Actually execute the action"""

        action_type = action.get("action_type")
        result = {
            "executed": True,
            "timestamp": datetime.now().isoformat(),
            "action": action,
            "success": False,
            "output": "",
            "error": None
        }

        try:
            if action_type == "systemd_restart":
                result.update(self._restart_services(action))

            elif action_type == "cleanup":
                result.update(self._perform_cleanup(action))

            elif action_type == "nix_rebuild":
                result.update(self._nix_rebuild(action))

            elif action_type == "config_change":
                result.update(self._apply_config_change(action))

            elif action_type == "investigation":
                result.update(self._run_investigation(action))

            else:
                result["error"] = f"Unknown action type: {action_type}"

        except Exception as e:
            result["error"] = str(e)
            result["success"] = False

        # Log the action
        self._log_action(result)

        # Learn from successful operations
        if result.get("success") and self.agent:
            try:
                self.agent.reflect_and_learn(
                    situation=action.get("diagnosis", "Unknown situation"),
                    action_taken=action.get("proposed_action", "Unknown action"),
                    outcome=result.get("output", ""),
                    success=True
                )
            except Exception as e:
                # Don't fail the action if learning fails
                print(f"Note: Could not record learning: {e}")

        return result

    def _restart_services(self, action: Dict[str, Any]) -> Dict[str, Any]:
        """Restart systemd services"""
        commands = action.get("commands", [])
        output_lines = []

        for cmd in commands:
            if not cmd.startswith("systemctl restart "):
                continue

            service = cmd.split()[-1]

            # Safety check
            if any(protected in service for protected in self.PROTECTED_SERVICES):
                output_lines.append(f"BLOCKED: {service} is protected")
                continue

            try:
                result = subprocess.run(
                    ["systemctl", "restart", service],
                    capture_output=True,
                    text=True,
                    timeout=30
                )

                if result.returncode == 0:
                    output_lines.append(f"✓ Restarted {service}")
                else:
                    output_lines.append(f"✗ Failed to restart {service}: {result.stderr}")

            except subprocess.TimeoutExpired:
                output_lines.append(f"✗ Timeout restarting {service}")

        return {
            "success": len(output_lines) > 0,
            "output": "\n".join(output_lines)
        }

    def _perform_cleanup(self, action: Dict[str, Any]) -> Dict[str, Any]:
        """Perform system cleanup tasks"""
        output_lines = []

        # Nix store cleanup
        if "nix" in action.get("proposed_action", "").lower():
            try:
                result = subprocess.run(
                    ["nix-collect-garbage", "--delete-old"],
                    capture_output=True,
                    text=True,
                    timeout=300
                )
                output_lines.append(f"Nix cleanup: {result.stdout}")
            except Exception as e:
                output_lines.append(f"Nix cleanup failed: {e}")

        # Journal cleanup (keep last 7 days)
        try:
            result = subprocess.run(
                ["journalctl", "--vacuum-time=7d"],
                capture_output=True,
                text=True,
                timeout=60
            )
            output_lines.append(f"Journal cleanup: {result.stdout}")
        except Exception as e:
            output_lines.append(f"Journal cleanup failed: {e}")

        return {
            "success": True,
            "output": "\n".join(output_lines)
        }

    def _nix_rebuild(self, action: Dict[str, Any]) -> Dict[str, Any]:
        """Rebuild NixOS configuration"""

        # This is HIGH RISK - always requires approval or full autonomy
        # And we should test first

        output_lines = []

        # First, try a dry build
        try:
            result = subprocess.run(
                ["nixos-rebuild", "dry-build", "--flake", ".#macha"],
                capture_output=True,
                text=True,
                timeout=600,
                cwd="/home/lily/Documents/nixos-servers"
            )

            if result.returncode != 0:
                return {
                    "success": False,
                    "output": f"Dry build failed:\n{result.stderr}"
                }

            output_lines.append("✓ Dry build successful")

        except Exception as e:
            return {
                "success": False,
                "output": f"Dry build error: {e}"
            }

        # Now do the actual rebuild
        try:
            result = subprocess.run(
                ["nixos-rebuild", "switch", "--flake", ".#macha"],
                capture_output=True,
                text=True,
                timeout=1200,
                cwd="/home/lily/Documents/nixos-servers"
            )

            output_lines.append(result.stdout)

            return {
                "success": result.returncode == 0,
                "output": "\n".join(output_lines),
                "error": result.stderr if result.returncode != 0 else None
            }

        except Exception as e:
            return {
                "success": False,
                "output": "\n".join(output_lines),
                "error": str(e)
            }

    def _apply_config_change(self, action: Dict[str, Any]) -> Dict[str, Any]:
        """Apply a configuration file change"""

        config_changes = action.get("config_changes", {})
        file_path = config_changes.get("file")

        if not file_path:
            return {
                "success": False,
                "output": "No file specified in config_changes"
            }

        # For now, we DON'T auto-modify configs - too risky
        # Instead, we create a suggested patch file

        patch_file = self.state_dir / f"suggested_patch_{int(time.time())}.txt"
        with open(patch_file, 'w') as f:
            f.write(f"Suggested change to {file_path}:\n\n")
            f.write(config_changes.get("change", "No change description"))
            f.write(f"\n\nReasoning: {action.get('reasoning', 'No reasoning provided')}")

        return {
            "success": True,
            "output": f"Config change suggestion saved to {patch_file}\nThis requires manual review and application."
        }

    def _run_investigation(self, action: Dict[str, Any]) -> Dict[str, Any]:
        """Run diagnostic commands"""
        commands = action.get("commands", [])
        output_lines = []

        for cmd in commands:
            # Only allow safe read-only commands
            safe_commands = ["journalctl", "systemctl status", "df", "free", "ps", "netstat", "ss"]
            if not any(cmd.startswith(safe) for safe in safe_commands):
                output_lines.append(f"BLOCKED unsafe command: {cmd}")
                continue

            try:
                result = subprocess.run(
                    cmd,
                    shell=True,
                    capture_output=True,
                    text=True,
                    timeout=30
                )
                output_lines.append(f"$ {cmd}")
                output_lines.append(result.stdout)
            except Exception as e:
                output_lines.append(f"Error running {cmd}: {e}")

        return {
            "success": True,
            "output": "\n".join(output_lines)
        }

    def _dry_run_action(self, action: Dict[str, Any]) -> Dict[str, Any]:
        """Simulate action execution"""
        return {
            "executed": False,
            "status": "dry_run",
            "action": action,
            "output": "Dry run mode - no actual changes made"
        }

    def _queue_for_approval(self, action: Dict[str, Any], context: Dict[str, Any]):
        """Add action to approval queue"""
        queue = []
        if self.approval_queue.exists():
            with open(self.approval_queue, 'r') as f:
                queue = json.load(f)

        # Check for duplicate pending actions
        proposed_action = action.get("proposed_action", "")
        diagnosis = action.get("diagnosis", "")

        for existing in queue:
            # Skip already approved/rejected items
            if existing.get("approved") is not None:
                continue

            existing_action = existing.get("action", {})
            existing_proposed = existing_action.get("proposed_action", "")
            existing_diagnosis = existing_action.get("diagnosis", "")

            # Check if this is essentially the same issue
            # Match if diagnosis is very similar OR proposed action is very similar
            if (diagnosis and existing_diagnosis and
                self._similarity_check(diagnosis, existing_diagnosis) > 0.7):
                print(f"Skipping duplicate action - similar diagnosis already queued")
                return

            if (proposed_action and existing_proposed and
                self._similarity_check(proposed_action, existing_proposed) > 0.7):
                print(f"Skipping duplicate action - similar proposal already queued")
                return

        queue.append({
            "timestamp": datetime.now().isoformat(),
            "action": action,
            "context": context,
            "approved": None
        })

        with open(self.approval_queue, 'w') as f:
            json.dump(queue, f, indent=2)

    def _similarity_check(self, str1: str, str2: str) -> float:
        """Simple similarity check between two strings"""
        # Normalize strings
        s1 = str1.lower().strip()
        s2 = str2.lower().strip()

        # Exact match
        if s1 == s2:
            return 1.0

        # Check for significant word overlap
        words1 = set(s1.split())
        words2 = set(s2.split())

        # Remove common words that don't indicate similarity
        common_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had'}
        words1 = words1 - common_words
        words2 = words2 - common_words

        if not words1 or not words2:
            return 0.0

        # Calculate Jaccard similarity
        intersection = len(words1 & words2)
        union = len(words1 | words2)

        return intersection / union if union > 0 else 0.0

    def _log_action(self, result: Dict[str, Any]):
        """Log executed actions"""
        with open(self.action_log, 'a') as f:
            f.write(json.dumps(result) + '\n')

    def get_approval_queue(self) -> List[Dict[str, Any]]:
        """Get pending actions awaiting approval"""
        if not self.approval_queue.exists():
            return []

        with open(self.approval_queue, 'r') as f:
            return json.load(f)

    def approve_action(self, index: int) -> bool:
        """Approve and execute a queued action, then remove it from queue"""
        queue = self.get_approval_queue()
        if 0 <= index < len(queue):
            action_item = queue[index]

            # Execute the approved action
            result = self._execute_action_impl(action_item["action"], action_item["context"])

            # Archive the action (success or failure)
            self._archive_action(action_item, result)

            # Remove from queue regardless of outcome
            queue.pop(index)

            with open(self.approval_queue, 'w') as f:
                json.dump(queue, f, indent=2)

            return result.get("success", False)

        return False

    def _archive_action(self, action_item: Dict[str, Any], result: Dict[str, Any]):
        """Archive an approved action with its execution result"""
        archive_file = self.state_dir / "approved_actions.jsonl"

        archive_entry = {
            "timestamp": datetime.now().isoformat(),
            "original_timestamp": action_item.get("timestamp"),
            "action": action_item.get("action"),
            "context": action_item.get("context"),
            "result": result
        }

        with open(archive_file, 'a') as f:
            f.write(json.dumps(archive_entry) + '\n')

    def reject_action(self, index: int) -> bool:
        """Reject and remove a queued action"""
        queue = self.get_approval_queue()
        if 0 <= index < len(queue):
            removed_action = queue.pop(index)

            with open(self.approval_queue, 'w') as f:
                json.dump(queue, f, indent=2)

            return True

        return False


if __name__ == "__main__":
    import sys

    if len(sys.argv) > 1:
        if sys.argv[1] == "queue":
            executor = SafeExecutor()
            queue = executor.get_approval_queue()
            if queue:
                print("\n" + "="*70)
                print(f"PENDING ACTIONS: {len(queue)}")
                print("="*70)
                for i, item in enumerate(queue):
                    action = item.get("action", {})
                    timestamp = item.get("timestamp", "unknown")
                    approved = item.get("approved")

                    status = "✓ APPROVED" if approved else "⏳ PENDING" if approved is None else "✗ REJECTED"

                    print(f"\n[{i}] {status} - {timestamp}")
                    print("-" * 70)
                    print(f"DIAGNOSIS: {action.get('diagnosis', 'N/A')}")
                    print(f"\nPROPOSED ACTION: {action.get('proposed_action', 'N/A')}")
                    print(f"TYPE: {action.get('action_type', 'N/A')}")
                    print(f"RISK: {action.get('risk_level', 'N/A')}")

                    if action.get('commands'):
                        print(f"\nCOMMANDS:")
                        for cmd in action['commands']:
                            print(f"  - {cmd}")

                    if action.get('config_changes'):
                        print(f"\nCONFIG CHANGES:")
                        for key, value in action['config_changes'].items():
                            print(f"  {key}: {value}")

                    print(f"\nREASONING: {action.get('reasoning', 'N/A')}")
                print("\n" + "="*70 + "\n")
            else:
                print("No pending actions")

        elif sys.argv[1] == "approve" and len(sys.argv) > 2:
            executor = SafeExecutor()
            index = int(sys.argv[2])
            success = executor.approve_action(index)
            print(f"Approval {'succeeded' if success else 'failed'}")

        elif sys.argv[1] == "reject" and len(sys.argv) > 2:
            executor = SafeExecutor()
            index = int(sys.argv[2])
            success = executor.reject_action(index)
            print(f"Action {'rejected and removed from queue' if success else 'rejection failed'}")