Initial commit: Split Macha autonomous system into separate flake

Macha is now a standalone NixOS flake that can be imported into other systems. This provides: - Independent versioning - Easier reusability - Cleaner separation of concerns - Better development workflow Includes: - Complete autonomous system code - NixOS module with full configuration options - Queue-based architecture with priority system - Chunked map-reduce for large outputs - ChromaDB knowledge base - Tool calling system - Multi-host SSH management - Gotify notification integration All capabilities from DESIGN.md are preserved.
2025-10-06 14:32:37 -06:00
commit 22ba493d9e
30 changed files with 10306 additions and 0 deletions
--- a/ollama_worker.py
+++ b/ollama_worker.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python3
+"""
+Ollama Queue Worker - Daemon that processes queued Ollama requests
+"""
+
+import sys
+import requests
+from pathlib import Path
+from ollama_queue import OllamaQueue
+
+class OllamaClient:
+    """Simple Ollama API client for the queue worker"""
+    
+    def __init__(self, host: str = "http://localhost:11434"):
+        self.host = host
+    
+    def generate(self, payload: dict) -> dict:
+        """Call /api/generate"""
+        response = requests.post(
+            f"{self.host}/api/generate",
+            json=payload,
+            timeout=payload.get("timeout", 300),
+            stream=False
+        )
+        response.raise_for_status()
+        return response.json()
+    
+    def chat(self, payload: dict) -> dict:
+        """Call /api/chat"""
+        response = requests.post(
+            f"{self.host}/api/chat",
+            json=payload,
+            timeout=payload.get("timeout", 300),
+            stream=False
+        )
+        response.raise_for_status()
+        return response.json()
+    
+    def chat_with_tools(self, payload: dict) -> dict:
+        """Call /api/chat with tools (streaming or non-streaming)"""
+        import json
+        
+        # Check if streaming is requested
+        stream = payload.get("stream", False)
+        
+        response = requests.post(
+            f"{self.host}/api/chat",
+            json=payload,
+            timeout=payload.get("timeout", 300),
+            stream=stream
+        )
+        response.raise_for_status()
+        
+        if not stream:
+            # Non-streaming: return response directly
+            return response.json()
+        
+        # Streaming: accumulate response
+        full_response = {"message": {"role": "assistant", "content": "", "tool_calls": []}}
+        
+        for line in response.iter_lines():
+            if line:
+                chunk = json.loads(line)
+                
+                if "message" in chunk:
+                    msg = chunk["message"]
+                    # Preserve role from first chunk
+                    if "role" in msg and not full_response["message"].get("role"):
+                        full_response["message"]["role"] = msg["role"]
+                    if "content" in msg:
+                        full_response["message"]["content"] += msg["content"]
+                    if "tool_calls" in msg:
+                        full_response["message"]["tool_calls"].extend(msg["tool_calls"])
+                
+                if chunk.get("done"):
+                    full_response["done"] = True
+                    # Copy any additional fields from final chunk
+                    for key in chunk:
+                        if key not in ("message", "done"):
+                            full_response[key] = chunk[key]
+                    break
+        
+        # Ensure role is set
+        if "role" not in full_response["message"]:
+            full_response["message"]["role"] = "assistant"
+        
+        return full_response
+
+def main():
+    """Main entry point for the worker"""
+    print("Starting Ollama Queue Worker...")
+    
+    # Initialize queue and client
+    queue = OllamaQueue()
+    client = OllamaClient()
+    
+    # Cleanup old requests on startup
+    queue.cleanup_old_requests(max_age_seconds=3600)
+    
+    # Start processing
+    try:
+        queue.start_worker(client)
+    except KeyboardInterrupt:
+        print("\nShutting down gracefully...")
+        queue.running = False
+    
+    return 0
+
+if __name__ == "__main__":
+    sys.exit(main())
+