Initial commit: Split Macha autonomous system into separate flake

Macha is now a standalone NixOS flake that can be imported into other
systems. This provides:

- Independent versioning
- Easier reusability
- Cleaner separation of concerns
- Better development workflow

Includes:
- Complete autonomous system code
- NixOS module with full configuration options
- Queue-based architecture with priority system
- Chunked map-reduce for large outputs
- ChromaDB knowledge base
- Tool calling system
- Multi-host SSH management
- Gotify notification integration

All capabilities from DESIGN.md are preserved.
This commit is contained in:
Lily Miller
2025-10-06 14:32:37 -06:00
commit 22ba493d9e
30 changed files with 10306 additions and 0 deletions

537
executor.py Normal file
View File

@@ -0,0 +1,537 @@
#!/usr/bin/env python3
"""
Action Executor - Safely executes proposed fixes with rollback capability
"""
import json
import subprocess
import shutil
from typing import Dict, List, Any, Optional
from pathlib import Path
from datetime import datetime
import time
class SafeExecutor:
"""Executes system maintenance actions with safety checks"""
# Actions that are considered safe to auto-execute
SAFE_ACTIONS = {
"systemd_restart", # Restart failed services
"cleanup", # Disk cleanup, log rotation
"investigation", # Read-only diagnostics
}
# Services that should NEVER be stopped/disabled
PROTECTED_SERVICES = {
"sshd",
"systemd-networkd",
"NetworkManager",
"systemd-resolved",
"dbus",
}
def __init__(
self,
state_dir: Path = Path("/var/lib/macha"),
autonomy_level: str = "suggest", # observe, suggest, auto-safe, auto-full
dry_run: bool = False,
agent = None # Optional agent for learning from actions
):
self.state_dir = state_dir
self.state_dir.mkdir(parents=True, exist_ok=True)
self.autonomy_level = autonomy_level
self.dry_run = dry_run
self.agent = agent
self.action_log = self.state_dir / "actions.jsonl"
self.approval_queue = self.state_dir / "approval_queue.json"
def execute_action(self, action: Dict[str, Any], monitoring_context: Dict[str, Any]) -> Dict[str, Any]:
"""Execute a proposed action with appropriate safety checks"""
action_type = action.get("action_type", "unknown")
risk_level = action.get("risk_level", "high")
# Determine if we should execute
should_execute, reason = self._should_execute(action_type, risk_level)
if not should_execute:
if self.autonomy_level == "suggest":
# Queue for approval
self._queue_for_approval(action, monitoring_context)
return {
"executed": False,
"status": "queued_for_approval",
"reason": reason,
"queue_file": str(self.approval_queue)
}
else:
return {
"executed": False,
"status": "blocked",
"reason": reason
}
# Execute the action
if self.dry_run:
return self._dry_run_action(action)
return self._execute_action_impl(action, monitoring_context)
def _should_execute(self, action_type: str, risk_level: str) -> tuple[bool, str]:
"""Determine if an action should be auto-executed based on autonomy level"""
if self.autonomy_level == "observe":
return False, "Autonomy level set to observe-only"
# Auto-approve low-risk investigation actions
if action_type == "investigation" and risk_level == "low":
return True, "Auto-approved: Low-risk information gathering"
if self.autonomy_level == "suggest":
return False, "Autonomy level requires manual approval"
if self.autonomy_level == "auto-safe":
if action_type in self.SAFE_ACTIONS and risk_level == "low":
return True, "Auto-executing safe action"
return False, "Action requires higher autonomy level"
if self.autonomy_level == "auto-full":
if risk_level == "high":
return False, "High risk actions always require approval"
return True, "Auto-executing approved action"
return False, "Unknown autonomy level"
def _execute_action_impl(self, action: Dict[str, Any], context: Dict[str, Any]) -> Dict[str, Any]:
"""Actually execute the action"""
action_type = action.get("action_type")
result = {
"executed": True,
"timestamp": datetime.now().isoformat(),
"action": action,
"success": False,
"output": "",
"error": None
}
try:
if action_type == "systemd_restart":
result.update(self._restart_services(action))
elif action_type == "cleanup":
result.update(self._perform_cleanup(action))
elif action_type == "nix_rebuild":
result.update(self._nix_rebuild(action))
elif action_type == "config_change":
result.update(self._apply_config_change(action))
elif action_type == "investigation":
result.update(self._run_investigation(action))
else:
result["error"] = f"Unknown action type: {action_type}"
except Exception as e:
result["error"] = str(e)
result["success"] = False
# Log the action
self._log_action(result)
# Learn from successful operations
if result.get("success") and self.agent:
try:
self.agent.reflect_and_learn(
situation=action.get("diagnosis", "Unknown situation"),
action_taken=action.get("proposed_action", "Unknown action"),
outcome=result.get("output", ""),
success=True
)
except Exception as e:
# Don't fail the action if learning fails
print(f"Note: Could not record learning: {e}")
return result
def _restart_services(self, action: Dict[str, Any]) -> Dict[str, Any]:
"""Restart systemd services"""
commands = action.get("commands", [])
output_lines = []
for cmd in commands:
if not cmd.startswith("systemctl restart "):
continue
service = cmd.split()[-1]
# Safety check
if any(protected in service for protected in self.PROTECTED_SERVICES):
output_lines.append(f"BLOCKED: {service} is protected")
continue
try:
result = subprocess.run(
["systemctl", "restart", service],
capture_output=True,
text=True,
timeout=30
)
if result.returncode == 0:
output_lines.append(f"✓ Restarted {service}")
else:
output_lines.append(f"✗ Failed to restart {service}: {result.stderr}")
except subprocess.TimeoutExpired:
output_lines.append(f"✗ Timeout restarting {service}")
return {
"success": len(output_lines) > 0,
"output": "\n".join(output_lines)
}
def _perform_cleanup(self, action: Dict[str, Any]) -> Dict[str, Any]:
"""Perform system cleanup tasks"""
output_lines = []
# Nix store cleanup
if "nix" in action.get("proposed_action", "").lower():
try:
result = subprocess.run(
["nix-collect-garbage", "--delete-old"],
capture_output=True,
text=True,
timeout=300
)
output_lines.append(f"Nix cleanup: {result.stdout}")
except Exception as e:
output_lines.append(f"Nix cleanup failed: {e}")
# Journal cleanup (keep last 7 days)
try:
result = subprocess.run(
["journalctl", "--vacuum-time=7d"],
capture_output=True,
text=True,
timeout=60
)
output_lines.append(f"Journal cleanup: {result.stdout}")
except Exception as e:
output_lines.append(f"Journal cleanup failed: {e}")
return {
"success": True,
"output": "\n".join(output_lines)
}
def _nix_rebuild(self, action: Dict[str, Any]) -> Dict[str, Any]:
"""Rebuild NixOS configuration"""
# This is HIGH RISK - always requires approval or full autonomy
# And we should test first
output_lines = []
# First, try a dry build
try:
result = subprocess.run(
["nixos-rebuild", "dry-build", "--flake", ".#macha"],
capture_output=True,
text=True,
timeout=600,
cwd="/home/lily/Documents/nixos-servers"
)
if result.returncode != 0:
return {
"success": False,
"output": f"Dry build failed:\n{result.stderr}"
}
output_lines.append("✓ Dry build successful")
except Exception as e:
return {
"success": False,
"output": f"Dry build error: {e}"
}
# Now do the actual rebuild
try:
result = subprocess.run(
["nixos-rebuild", "switch", "--flake", ".#macha"],
capture_output=True,
text=True,
timeout=1200,
cwd="/home/lily/Documents/nixos-servers"
)
output_lines.append(result.stdout)
return {
"success": result.returncode == 0,
"output": "\n".join(output_lines),
"error": result.stderr if result.returncode != 0 else None
}
except Exception as e:
return {
"success": False,
"output": "\n".join(output_lines),
"error": str(e)
}
def _apply_config_change(self, action: Dict[str, Any]) -> Dict[str, Any]:
"""Apply a configuration file change"""
config_changes = action.get("config_changes", {})
file_path = config_changes.get("file")
if not file_path:
return {
"success": False,
"output": "No file specified in config_changes"
}
# For now, we DON'T auto-modify configs - too risky
# Instead, we create a suggested patch file
patch_file = self.state_dir / f"suggested_patch_{int(time.time())}.txt"
with open(patch_file, 'w') as f:
f.write(f"Suggested change to {file_path}:\n\n")
f.write(config_changes.get("change", "No change description"))
f.write(f"\n\nReasoning: {action.get('reasoning', 'No reasoning provided')}")
return {
"success": True,
"output": f"Config change suggestion saved to {patch_file}\nThis requires manual review and application."
}
def _run_investigation(self, action: Dict[str, Any]) -> Dict[str, Any]:
"""Run diagnostic commands"""
commands = action.get("commands", [])
output_lines = []
for cmd in commands:
# Only allow safe read-only commands
safe_commands = ["journalctl", "systemctl status", "df", "free", "ps", "netstat", "ss"]
if not any(cmd.startswith(safe) for safe in safe_commands):
output_lines.append(f"BLOCKED unsafe command: {cmd}")
continue
try:
result = subprocess.run(
cmd,
shell=True,
capture_output=True,
text=True,
timeout=30
)
output_lines.append(f"$ {cmd}")
output_lines.append(result.stdout)
except Exception as e:
output_lines.append(f"Error running {cmd}: {e}")
return {
"success": True,
"output": "\n".join(output_lines)
}
def _dry_run_action(self, action: Dict[str, Any]) -> Dict[str, Any]:
"""Simulate action execution"""
return {
"executed": False,
"status": "dry_run",
"action": action,
"output": "Dry run mode - no actual changes made"
}
def _queue_for_approval(self, action: Dict[str, Any], context: Dict[str, Any]):
"""Add action to approval queue"""
queue = []
if self.approval_queue.exists():
with open(self.approval_queue, 'r') as f:
queue = json.load(f)
# Check for duplicate pending actions
proposed_action = action.get("proposed_action", "")
diagnosis = action.get("diagnosis", "")
for existing in queue:
# Skip already approved/rejected items
if existing.get("approved") is not None:
continue
existing_action = existing.get("action", {})
existing_proposed = existing_action.get("proposed_action", "")
existing_diagnosis = existing_action.get("diagnosis", "")
# Check if this is essentially the same issue
# Match if diagnosis is very similar OR proposed action is very similar
if (diagnosis and existing_diagnosis and
self._similarity_check(diagnosis, existing_diagnosis) > 0.7):
print(f"Skipping duplicate action - similar diagnosis already queued")
return
if (proposed_action and existing_proposed and
self._similarity_check(proposed_action, existing_proposed) > 0.7):
print(f"Skipping duplicate action - similar proposal already queued")
return
queue.append({
"timestamp": datetime.now().isoformat(),
"action": action,
"context": context,
"approved": None
})
with open(self.approval_queue, 'w') as f:
json.dump(queue, f, indent=2)
def _similarity_check(self, str1: str, str2: str) -> float:
"""Simple similarity check between two strings"""
# Normalize strings
s1 = str1.lower().strip()
s2 = str2.lower().strip()
# Exact match
if s1 == s2:
return 1.0
# Check for significant word overlap
words1 = set(s1.split())
words2 = set(s2.split())
# Remove common words that don't indicate similarity
common_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had'}
words1 = words1 - common_words
words2 = words2 - common_words
if not words1 or not words2:
return 0.0
# Calculate Jaccard similarity
intersection = len(words1 & words2)
union = len(words1 | words2)
return intersection / union if union > 0 else 0.0
def _log_action(self, result: Dict[str, Any]):
"""Log executed actions"""
with open(self.action_log, 'a') as f:
f.write(json.dumps(result) + '\n')
def get_approval_queue(self) -> List[Dict[str, Any]]:
"""Get pending actions awaiting approval"""
if not self.approval_queue.exists():
return []
with open(self.approval_queue, 'r') as f:
return json.load(f)
def approve_action(self, index: int) -> bool:
"""Approve and execute a queued action, then remove it from queue"""
queue = self.get_approval_queue()
if 0 <= index < len(queue):
action_item = queue[index]
# Execute the approved action
result = self._execute_action_impl(action_item["action"], action_item["context"])
# Archive the action (success or failure)
self._archive_action(action_item, result)
# Remove from queue regardless of outcome
queue.pop(index)
with open(self.approval_queue, 'w') as f:
json.dump(queue, f, indent=2)
return result.get("success", False)
return False
def _archive_action(self, action_item: Dict[str, Any], result: Dict[str, Any]):
"""Archive an approved action with its execution result"""
archive_file = self.state_dir / "approved_actions.jsonl"
archive_entry = {
"timestamp": datetime.now().isoformat(),
"original_timestamp": action_item.get("timestamp"),
"action": action_item.get("action"),
"context": action_item.get("context"),
"result": result
}
with open(archive_file, 'a') as f:
f.write(json.dumps(archive_entry) + '\n')
def reject_action(self, index: int) -> bool:
"""Reject and remove a queued action"""
queue = self.get_approval_queue()
if 0 <= index < len(queue):
removed_action = queue.pop(index)
with open(self.approval_queue, 'w') as f:
json.dump(queue, f, indent=2)
return True
return False
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
if sys.argv[1] == "queue":
executor = SafeExecutor()
queue = executor.get_approval_queue()
if queue:
print("\n" + "="*70)
print(f"PENDING ACTIONS: {len(queue)}")
print("="*70)
for i, item in enumerate(queue):
action = item.get("action", {})
timestamp = item.get("timestamp", "unknown")
approved = item.get("approved")
status = "✓ APPROVED" if approved else "⏳ PENDING" if approved is None else "✗ REJECTED"
print(f"\n[{i}] {status} - {timestamp}")
print("-" * 70)
print(f"DIAGNOSIS: {action.get('diagnosis', 'N/A')}")
print(f"\nPROPOSED ACTION: {action.get('proposed_action', 'N/A')}")
print(f"TYPE: {action.get('action_type', 'N/A')}")
print(f"RISK: {action.get('risk_level', 'N/A')}")
if action.get('commands'):
print(f"\nCOMMANDS:")
for cmd in action['commands']:
print(f" - {cmd}")
if action.get('config_changes'):
print(f"\nCONFIG CHANGES:")
for key, value in action['config_changes'].items():
print(f" {key}: {value}")
print(f"\nREASONING: {action.get('reasoning', 'N/A')}")
print("\n" + "="*70 + "\n")
else:
print("No pending actions")
elif sys.argv[1] == "approve" and len(sys.argv) > 2:
executor = SafeExecutor()
index = int(sys.argv[2])
success = executor.approve_action(index)
print(f"Approval {'succeeded' if success else 'failed'}")
elif sys.argv[1] == "reject" and len(sys.argv) > 2:
executor = SafeExecutor()
index = int(sys.argv[2])
success = executor.reject_action(index)
print(f"Action {'rejected and removed from queue' if success else 'rejection failed'}")