Fix: Always use explicit SSH key path for all SSH operations

CRITICAL FIX: SSH keys were not being auto-loaded, causing connection failures.

Changes:
- tools.py: SSH commands now include -i /var/lib/macha/.ssh/id_ed25519
- remote_monitor.py: Use explicit key path instead of sudo ssh
- system_discovery.py: Added explicit key path to all SSH calls
- system_prompt.txt: Document automatic SSH key loading
- DESIGN.md: Clarify CRITICAL requirement for explicit key paths

All SSH operations now explicitly specify:
  -i /var/lib/macha/.ssh/id_ed25519 -o StrictHostKeyChecking=no

This ensures Macha can reliably connect to remote hosts without
depending on SSH agent or automatic key discovery.
This commit is contained in:
Lily Miller
2025-10-06 15:04:51 -06:00
parent 22ba493d9e
commit ab72a98849
5 changed files with 20 additions and 9 deletions

View File

@@ -26,6 +26,8 @@ Macha is an AI-powered autonomous system administrator capable of monitoring, ma
**Macha CAN and SHOULD use SSH to manage other hosts.** **Macha CAN and SHOULD use SSH to manage other hosts.**
#### SSH Access #### SSH Access
- **CRITICAL**: Always uses explicit SSH key path: `-i /var/lib/macha/.ssh/id_ed25519`
- All SSH commands automatically include the `-i` flag with absolute key path
- Runs as `macha` user (UID 2501) - Runs as `macha` user (UID 2501)
- Has `NOPASSWD` sudo access for administrative commands - Has `NOPASSWD` sudo access for administrative commands
- Shares SSH keys with other hosts in the infrastructure - Shares SSH keys with other hosts in the infrastructure
@@ -37,8 +39,9 @@ Macha is an AI-powered autonomous system administrator capable of monitoring, ma
ssh rhiannon systemctl status ollama ssh rhiannon systemctl status ollama
ssh alexander df -h ssh alexander df -h
``` ```
- Commands automatically prefixed with `sudo` by the tools layer - Commands automatically transformed by the tools layer
- Full command: `ssh macha@rhiannon sudo systemctl status ollama` - Full command: `ssh -i /var/lib/macha/.ssh/id_ed25519 -o StrictHostKeyChecking=no macha@rhiannon sudo systemctl status ollama`
- SSH key path is always explicit, commands are automatically prefixed with `sudo`
2. **Status checks:** 2. **Status checks:**
- Check service health on remote hosts - Check service health on remote hosts

View File

@@ -36,9 +36,11 @@ class RemoteMonitor:
(success, stdout, stderr) (success, stdout, stderr)
""" """
try: try:
# Use sudo to run SSH as root (which has the keys) # Use explicit SSH key path from macha user's home directory
ssh_key = "/var/lib/macha/.ssh/id_ed25519"
ssh_cmd = [ ssh_cmd = [
"sudo", "ssh", "ssh",
"-i", ssh_key,
"-o", "StrictHostKeyChecking=no", "-o", "StrictHostKeyChecking=no",
"-o", "ConnectTimeout=10", "-o", "ConnectTimeout=10",
self.ssh_target, self.ssh_target,

View File

@@ -65,9 +65,11 @@ class SystemDiscovery:
def detect_os_type(self, hostname: str) -> str: def detect_os_type(self, hostname: str) -> str:
"""Detect the operating system of a remote host via SSH""" """Detect the operating system of a remote host via SSH"""
try: try:
# Use explicit SSH key path
ssh_key = "/var/lib/macha/.ssh/id_ed25519"
# Try to detect OS via SSH # Try to detect OS via SSH
result = subprocess.run( result = subprocess.run(
["ssh", "-o", "ConnectTimeout=5", "-o", "StrictHostKeyChecking=no", ["ssh", "-i", ssh_key, "-o", "ConnectTimeout=5", "-o", "StrictHostKeyChecking=no",
hostname, "cat /etc/os-release"], hostname, "cat /etc/os-release"],
capture_output=True, capture_output=True,
text=True, text=True,
@@ -95,7 +97,7 @@ class SystemDiscovery:
# Try uname for other systems # Try uname for other systems
result = subprocess.run( result = subprocess.run(
["ssh", "-o", "ConnectTimeout=5", "-o", "StrictHostKeyChecking=no", ["ssh", "-i", ssh_key, "-o", "ConnectTimeout=5", "-o", "StrictHostKeyChecking=no",
hostname, "uname -s"], hostname, "uname -s"],
capture_output=True, capture_output=True,
text=True, text=True,

View File

@@ -75,6 +75,7 @@ You manage multiple hosts in the infrastructure. You have TWO tools for remote o
- You CAN and SHOULD use SSH to check other hosts - You CAN and SHOULD use SSH to check other hosts
- Examples: 'ssh rhiannon systemctl status ollama', 'ssh alexander df -h' - Examples: 'ssh rhiannon systemctl status ollama', 'ssh alexander df -h'
- Commands are automatically run with sudo as the macha user - Commands are automatically run with sudo as the macha user
- SSH keys are automatically loaded from /var/lib/macha/.ssh/id_ed25519
- Use for: checking services, reading logs, gathering metrics, quick diagnostics - Use for: checking services, reading logs, gathering metrics, quick diagnostics
- Hosts available: rhiannon, alexander, UCAR-Kinston, test-vm - Hosts available: rhiannon, alexander, UCAR-Kinston, test-vm

View File

@@ -269,17 +269,20 @@ class SysadminTools:
} }
# Automatically configure SSH commands to use macha user on remote systems # Automatically configure SSH commands to use macha user on remote systems
# Transform: ssh hostname cmd -> ssh macha@hostname sudo cmd # Transform: ssh hostname cmd -> ssh -i /var/lib/macha/.ssh/id_ed25519 macha@hostname sudo cmd
if command.strip().startswith('ssh ') and '@' not in command.split()[1]: if command.strip().startswith('ssh ') and '@' not in command.split()[1]:
parts = command.split(maxsplit=2) parts = command.split(maxsplit=2)
if len(parts) >= 2: if len(parts) >= 2:
hostname = parts[1] hostname = parts[1]
remaining = ' '.join(parts[2:]) if len(parts) > 2 else '' remaining = ' '.join(parts[2:]) if len(parts) > 2 else ''
# Always use explicit SSH key path
ssh_key = "/var/lib/macha/.ssh/id_ed25519"
ssh_opts = f"-i {ssh_key} -o StrictHostKeyChecking=no"
# If there's a command to run remotely, prefix it with sudo # If there's a command to run remotely, prefix it with sudo
if remaining: if remaining:
command = f"ssh macha@{hostname} sudo {remaining}".strip() command = f"ssh {ssh_opts} macha@{hostname} sudo {remaining}".strip()
else: else:
command = f"ssh macha@{hostname}".strip() command = f"ssh {ssh_opts} macha@{hostname}".strip()
try: try:
result = subprocess.run( result = subprocess.run(