Macha is now a standalone NixOS flake that can be imported into other systems. This provides: - Independent versioning - Easier reusability - Cleaner separation of concerns - Better development workflow Includes: - Complete autonomous system code - NixOS module with full configuration options - Queue-based architecture with priority system - Chunked map-reduce for large outputs - ChromaDB knowledge base - Tool calling system - Multi-host SSH management - Gotify notification integration All capabilities from DESIGN.md are preserved.
538 lines
20 KiB
Python
538 lines
20 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Action Executor - Safely executes proposed fixes with rollback capability
|
|
"""
|
|
|
|
import json
|
|
import subprocess
|
|
import shutil
|
|
from typing import Dict, List, Any, Optional
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
import time
|
|
|
|
|
|
class SafeExecutor:
|
|
"""Executes system maintenance actions with safety checks"""
|
|
|
|
# Actions that are considered safe to auto-execute
|
|
SAFE_ACTIONS = {
|
|
"systemd_restart", # Restart failed services
|
|
"cleanup", # Disk cleanup, log rotation
|
|
"investigation", # Read-only diagnostics
|
|
}
|
|
|
|
# Services that should NEVER be stopped/disabled
|
|
PROTECTED_SERVICES = {
|
|
"sshd",
|
|
"systemd-networkd",
|
|
"NetworkManager",
|
|
"systemd-resolved",
|
|
"dbus",
|
|
}
|
|
|
|
def __init__(
|
|
self,
|
|
state_dir: Path = Path("/var/lib/macha"),
|
|
autonomy_level: str = "suggest", # observe, suggest, auto-safe, auto-full
|
|
dry_run: bool = False,
|
|
agent = None # Optional agent for learning from actions
|
|
):
|
|
self.state_dir = state_dir
|
|
self.state_dir.mkdir(parents=True, exist_ok=True)
|
|
self.autonomy_level = autonomy_level
|
|
self.dry_run = dry_run
|
|
self.agent = agent
|
|
self.action_log = self.state_dir / "actions.jsonl"
|
|
self.approval_queue = self.state_dir / "approval_queue.json"
|
|
|
|
def execute_action(self, action: Dict[str, Any], monitoring_context: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Execute a proposed action with appropriate safety checks"""
|
|
|
|
action_type = action.get("action_type", "unknown")
|
|
risk_level = action.get("risk_level", "high")
|
|
|
|
# Determine if we should execute
|
|
should_execute, reason = self._should_execute(action_type, risk_level)
|
|
|
|
if not should_execute:
|
|
if self.autonomy_level == "suggest":
|
|
# Queue for approval
|
|
self._queue_for_approval(action, monitoring_context)
|
|
return {
|
|
"executed": False,
|
|
"status": "queued_for_approval",
|
|
"reason": reason,
|
|
"queue_file": str(self.approval_queue)
|
|
}
|
|
else:
|
|
return {
|
|
"executed": False,
|
|
"status": "blocked",
|
|
"reason": reason
|
|
}
|
|
|
|
# Execute the action
|
|
if self.dry_run:
|
|
return self._dry_run_action(action)
|
|
|
|
return self._execute_action_impl(action, monitoring_context)
|
|
|
|
def _should_execute(self, action_type: str, risk_level: str) -> tuple[bool, str]:
|
|
"""Determine if an action should be auto-executed based on autonomy level"""
|
|
|
|
if self.autonomy_level == "observe":
|
|
return False, "Autonomy level set to observe-only"
|
|
|
|
# Auto-approve low-risk investigation actions
|
|
if action_type == "investigation" and risk_level == "low":
|
|
return True, "Auto-approved: Low-risk information gathering"
|
|
|
|
if self.autonomy_level == "suggest":
|
|
return False, "Autonomy level requires manual approval"
|
|
|
|
if self.autonomy_level == "auto-safe":
|
|
if action_type in self.SAFE_ACTIONS and risk_level == "low":
|
|
return True, "Auto-executing safe action"
|
|
return False, "Action requires higher autonomy level"
|
|
|
|
if self.autonomy_level == "auto-full":
|
|
if risk_level == "high":
|
|
return False, "High risk actions always require approval"
|
|
return True, "Auto-executing approved action"
|
|
|
|
return False, "Unknown autonomy level"
|
|
|
|
def _execute_action_impl(self, action: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Actually execute the action"""
|
|
|
|
action_type = action.get("action_type")
|
|
result = {
|
|
"executed": True,
|
|
"timestamp": datetime.now().isoformat(),
|
|
"action": action,
|
|
"success": False,
|
|
"output": "",
|
|
"error": None
|
|
}
|
|
|
|
try:
|
|
if action_type == "systemd_restart":
|
|
result.update(self._restart_services(action))
|
|
|
|
elif action_type == "cleanup":
|
|
result.update(self._perform_cleanup(action))
|
|
|
|
elif action_type == "nix_rebuild":
|
|
result.update(self._nix_rebuild(action))
|
|
|
|
elif action_type == "config_change":
|
|
result.update(self._apply_config_change(action))
|
|
|
|
elif action_type == "investigation":
|
|
result.update(self._run_investigation(action))
|
|
|
|
else:
|
|
result["error"] = f"Unknown action type: {action_type}"
|
|
|
|
except Exception as e:
|
|
result["error"] = str(e)
|
|
result["success"] = False
|
|
|
|
# Log the action
|
|
self._log_action(result)
|
|
|
|
# Learn from successful operations
|
|
if result.get("success") and self.agent:
|
|
try:
|
|
self.agent.reflect_and_learn(
|
|
situation=action.get("diagnosis", "Unknown situation"),
|
|
action_taken=action.get("proposed_action", "Unknown action"),
|
|
outcome=result.get("output", ""),
|
|
success=True
|
|
)
|
|
except Exception as e:
|
|
# Don't fail the action if learning fails
|
|
print(f"Note: Could not record learning: {e}")
|
|
|
|
return result
|
|
|
|
def _restart_services(self, action: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Restart systemd services"""
|
|
commands = action.get("commands", [])
|
|
output_lines = []
|
|
|
|
for cmd in commands:
|
|
if not cmd.startswith("systemctl restart "):
|
|
continue
|
|
|
|
service = cmd.split()[-1]
|
|
|
|
# Safety check
|
|
if any(protected in service for protected in self.PROTECTED_SERVICES):
|
|
output_lines.append(f"BLOCKED: {service} is protected")
|
|
continue
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
["systemctl", "restart", service],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=30
|
|
)
|
|
|
|
if result.returncode == 0:
|
|
output_lines.append(f"✓ Restarted {service}")
|
|
else:
|
|
output_lines.append(f"✗ Failed to restart {service}: {result.stderr}")
|
|
|
|
except subprocess.TimeoutExpired:
|
|
output_lines.append(f"✗ Timeout restarting {service}")
|
|
|
|
return {
|
|
"success": len(output_lines) > 0,
|
|
"output": "\n".join(output_lines)
|
|
}
|
|
|
|
def _perform_cleanup(self, action: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Perform system cleanup tasks"""
|
|
output_lines = []
|
|
|
|
# Nix store cleanup
|
|
if "nix" in action.get("proposed_action", "").lower():
|
|
try:
|
|
result = subprocess.run(
|
|
["nix-collect-garbage", "--delete-old"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=300
|
|
)
|
|
output_lines.append(f"Nix cleanup: {result.stdout}")
|
|
except Exception as e:
|
|
output_lines.append(f"Nix cleanup failed: {e}")
|
|
|
|
# Journal cleanup (keep last 7 days)
|
|
try:
|
|
result = subprocess.run(
|
|
["journalctl", "--vacuum-time=7d"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=60
|
|
)
|
|
output_lines.append(f"Journal cleanup: {result.stdout}")
|
|
except Exception as e:
|
|
output_lines.append(f"Journal cleanup failed: {e}")
|
|
|
|
return {
|
|
"success": True,
|
|
"output": "\n".join(output_lines)
|
|
}
|
|
|
|
def _nix_rebuild(self, action: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Rebuild NixOS configuration"""
|
|
|
|
# This is HIGH RISK - always requires approval or full autonomy
|
|
# And we should test first
|
|
|
|
output_lines = []
|
|
|
|
# First, try a dry build
|
|
try:
|
|
result = subprocess.run(
|
|
["nixos-rebuild", "dry-build", "--flake", ".#macha"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=600,
|
|
cwd="/home/lily/Documents/nixos-servers"
|
|
)
|
|
|
|
if result.returncode != 0:
|
|
return {
|
|
"success": False,
|
|
"output": f"Dry build failed:\n{result.stderr}"
|
|
}
|
|
|
|
output_lines.append("✓ Dry build successful")
|
|
|
|
except Exception as e:
|
|
return {
|
|
"success": False,
|
|
"output": f"Dry build error: {e}"
|
|
}
|
|
|
|
# Now do the actual rebuild
|
|
try:
|
|
result = subprocess.run(
|
|
["nixos-rebuild", "switch", "--flake", ".#macha"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=1200,
|
|
cwd="/home/lily/Documents/nixos-servers"
|
|
)
|
|
|
|
output_lines.append(result.stdout)
|
|
|
|
return {
|
|
"success": result.returncode == 0,
|
|
"output": "\n".join(output_lines),
|
|
"error": result.stderr if result.returncode != 0 else None
|
|
}
|
|
|
|
except Exception as e:
|
|
return {
|
|
"success": False,
|
|
"output": "\n".join(output_lines),
|
|
"error": str(e)
|
|
}
|
|
|
|
def _apply_config_change(self, action: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Apply a configuration file change"""
|
|
|
|
config_changes = action.get("config_changes", {})
|
|
file_path = config_changes.get("file")
|
|
|
|
if not file_path:
|
|
return {
|
|
"success": False,
|
|
"output": "No file specified in config_changes"
|
|
}
|
|
|
|
# For now, we DON'T auto-modify configs - too risky
|
|
# Instead, we create a suggested patch file
|
|
|
|
patch_file = self.state_dir / f"suggested_patch_{int(time.time())}.txt"
|
|
with open(patch_file, 'w') as f:
|
|
f.write(f"Suggested change to {file_path}:\n\n")
|
|
f.write(config_changes.get("change", "No change description"))
|
|
f.write(f"\n\nReasoning: {action.get('reasoning', 'No reasoning provided')}")
|
|
|
|
return {
|
|
"success": True,
|
|
"output": f"Config change suggestion saved to {patch_file}\nThis requires manual review and application."
|
|
}
|
|
|
|
def _run_investigation(self, action: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Run diagnostic commands"""
|
|
commands = action.get("commands", [])
|
|
output_lines = []
|
|
|
|
for cmd in commands:
|
|
# Only allow safe read-only commands
|
|
safe_commands = ["journalctl", "systemctl status", "df", "free", "ps", "netstat", "ss"]
|
|
if not any(cmd.startswith(safe) for safe in safe_commands):
|
|
output_lines.append(f"BLOCKED unsafe command: {cmd}")
|
|
continue
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
cmd,
|
|
shell=True,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=30
|
|
)
|
|
output_lines.append(f"$ {cmd}")
|
|
output_lines.append(result.stdout)
|
|
except Exception as e:
|
|
output_lines.append(f"Error running {cmd}: {e}")
|
|
|
|
return {
|
|
"success": True,
|
|
"output": "\n".join(output_lines)
|
|
}
|
|
|
|
def _dry_run_action(self, action: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Simulate action execution"""
|
|
return {
|
|
"executed": False,
|
|
"status": "dry_run",
|
|
"action": action,
|
|
"output": "Dry run mode - no actual changes made"
|
|
}
|
|
|
|
def _queue_for_approval(self, action: Dict[str, Any], context: Dict[str, Any]):
|
|
"""Add action to approval queue"""
|
|
queue = []
|
|
if self.approval_queue.exists():
|
|
with open(self.approval_queue, 'r') as f:
|
|
queue = json.load(f)
|
|
|
|
# Check for duplicate pending actions
|
|
proposed_action = action.get("proposed_action", "")
|
|
diagnosis = action.get("diagnosis", "")
|
|
|
|
for existing in queue:
|
|
# Skip already approved/rejected items
|
|
if existing.get("approved") is not None:
|
|
continue
|
|
|
|
existing_action = existing.get("action", {})
|
|
existing_proposed = existing_action.get("proposed_action", "")
|
|
existing_diagnosis = existing_action.get("diagnosis", "")
|
|
|
|
# Check if this is essentially the same issue
|
|
# Match if diagnosis is very similar OR proposed action is very similar
|
|
if (diagnosis and existing_diagnosis and
|
|
self._similarity_check(diagnosis, existing_diagnosis) > 0.7):
|
|
print(f"Skipping duplicate action - similar diagnosis already queued")
|
|
return
|
|
|
|
if (proposed_action and existing_proposed and
|
|
self._similarity_check(proposed_action, existing_proposed) > 0.7):
|
|
print(f"Skipping duplicate action - similar proposal already queued")
|
|
return
|
|
|
|
queue.append({
|
|
"timestamp": datetime.now().isoformat(),
|
|
"action": action,
|
|
"context": context,
|
|
"approved": None
|
|
})
|
|
|
|
with open(self.approval_queue, 'w') as f:
|
|
json.dump(queue, f, indent=2)
|
|
|
|
def _similarity_check(self, str1: str, str2: str) -> float:
|
|
"""Simple similarity check between two strings"""
|
|
# Normalize strings
|
|
s1 = str1.lower().strip()
|
|
s2 = str2.lower().strip()
|
|
|
|
# Exact match
|
|
if s1 == s2:
|
|
return 1.0
|
|
|
|
# Check for significant word overlap
|
|
words1 = set(s1.split())
|
|
words2 = set(s2.split())
|
|
|
|
# Remove common words that don't indicate similarity
|
|
common_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had'}
|
|
words1 = words1 - common_words
|
|
words2 = words2 - common_words
|
|
|
|
if not words1 or not words2:
|
|
return 0.0
|
|
|
|
# Calculate Jaccard similarity
|
|
intersection = len(words1 & words2)
|
|
union = len(words1 | words2)
|
|
|
|
return intersection / union if union > 0 else 0.0
|
|
|
|
def _log_action(self, result: Dict[str, Any]):
|
|
"""Log executed actions"""
|
|
with open(self.action_log, 'a') as f:
|
|
f.write(json.dumps(result) + '\n')
|
|
|
|
def get_approval_queue(self) -> List[Dict[str, Any]]:
|
|
"""Get pending actions awaiting approval"""
|
|
if not self.approval_queue.exists():
|
|
return []
|
|
|
|
with open(self.approval_queue, 'r') as f:
|
|
return json.load(f)
|
|
|
|
def approve_action(self, index: int) -> bool:
|
|
"""Approve and execute a queued action, then remove it from queue"""
|
|
queue = self.get_approval_queue()
|
|
if 0 <= index < len(queue):
|
|
action_item = queue[index]
|
|
|
|
# Execute the approved action
|
|
result = self._execute_action_impl(action_item["action"], action_item["context"])
|
|
|
|
# Archive the action (success or failure)
|
|
self._archive_action(action_item, result)
|
|
|
|
# Remove from queue regardless of outcome
|
|
queue.pop(index)
|
|
|
|
with open(self.approval_queue, 'w') as f:
|
|
json.dump(queue, f, indent=2)
|
|
|
|
return result.get("success", False)
|
|
|
|
return False
|
|
|
|
def _archive_action(self, action_item: Dict[str, Any], result: Dict[str, Any]):
|
|
"""Archive an approved action with its execution result"""
|
|
archive_file = self.state_dir / "approved_actions.jsonl"
|
|
|
|
archive_entry = {
|
|
"timestamp": datetime.now().isoformat(),
|
|
"original_timestamp": action_item.get("timestamp"),
|
|
"action": action_item.get("action"),
|
|
"context": action_item.get("context"),
|
|
"result": result
|
|
}
|
|
|
|
with open(archive_file, 'a') as f:
|
|
f.write(json.dumps(archive_entry) + '\n')
|
|
|
|
def reject_action(self, index: int) -> bool:
|
|
"""Reject and remove a queued action"""
|
|
queue = self.get_approval_queue()
|
|
if 0 <= index < len(queue):
|
|
removed_action = queue.pop(index)
|
|
|
|
with open(self.approval_queue, 'w') as f:
|
|
json.dump(queue, f, indent=2)
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
|
|
if len(sys.argv) > 1:
|
|
if sys.argv[1] == "queue":
|
|
executor = SafeExecutor()
|
|
queue = executor.get_approval_queue()
|
|
if queue:
|
|
print("\n" + "="*70)
|
|
print(f"PENDING ACTIONS: {len(queue)}")
|
|
print("="*70)
|
|
for i, item in enumerate(queue):
|
|
action = item.get("action", {})
|
|
timestamp = item.get("timestamp", "unknown")
|
|
approved = item.get("approved")
|
|
|
|
status = "✓ APPROVED" if approved else "⏳ PENDING" if approved is None else "✗ REJECTED"
|
|
|
|
print(f"\n[{i}] {status} - {timestamp}")
|
|
print("-" * 70)
|
|
print(f"DIAGNOSIS: {action.get('diagnosis', 'N/A')}")
|
|
print(f"\nPROPOSED ACTION: {action.get('proposed_action', 'N/A')}")
|
|
print(f"TYPE: {action.get('action_type', 'N/A')}")
|
|
print(f"RISK: {action.get('risk_level', 'N/A')}")
|
|
|
|
if action.get('commands'):
|
|
print(f"\nCOMMANDS:")
|
|
for cmd in action['commands']:
|
|
print(f" - {cmd}")
|
|
|
|
if action.get('config_changes'):
|
|
print(f"\nCONFIG CHANGES:")
|
|
for key, value in action['config_changes'].items():
|
|
print(f" {key}: {value}")
|
|
|
|
print(f"\nREASONING: {action.get('reasoning', 'N/A')}")
|
|
print("\n" + "="*70 + "\n")
|
|
else:
|
|
print("No pending actions")
|
|
|
|
elif sys.argv[1] == "approve" and len(sys.argv) > 2:
|
|
executor = SafeExecutor()
|
|
index = int(sys.argv[2])
|
|
success = executor.approve_action(index)
|
|
print(f"Approval {'succeeded' if success else 'failed'}")
|
|
|
|
elif sys.argv[1] == "reject" and len(sys.argv) > 2:
|
|
executor = SafeExecutor()
|
|
index = int(sys.argv[2])
|
|
success = executor.reject_action(index)
|
|
print(f"Action {'rejected and removed from queue' if success else 'rejection failed'}")
|