Initial commit: Split Macha autonomous system into separate flake
Macha is now a standalone NixOS flake that can be imported into other systems. This provides: - Independent versioning - Easier reusability - Cleaner separation of concerns - Better development workflow Includes: - Complete autonomous system code - NixOS module with full configuration options - Queue-based architecture with priority system - Chunked map-reduce for large outputs - ChromaDB knowledge base - Tool calling system - Multi-host SSH management - Gotify notification integration All capabilities from DESIGN.md are preserved.
This commit is contained in:
111
ollama_worker.py
Normal file
111
ollama_worker.py
Normal file
@@ -0,0 +1,111 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Ollama Queue Worker - Daemon that processes queued Ollama requests
|
||||
"""
|
||||
|
||||
import sys
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from ollama_queue import OllamaQueue
|
||||
|
||||
class OllamaClient:
|
||||
"""Simple Ollama API client for the queue worker"""
|
||||
|
||||
def __init__(self, host: str = "http://localhost:11434"):
|
||||
self.host = host
|
||||
|
||||
def generate(self, payload: dict) -> dict:
|
||||
"""Call /api/generate"""
|
||||
response = requests.post(
|
||||
f"{self.host}/api/generate",
|
||||
json=payload,
|
||||
timeout=payload.get("timeout", 300),
|
||||
stream=False
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def chat(self, payload: dict) -> dict:
|
||||
"""Call /api/chat"""
|
||||
response = requests.post(
|
||||
f"{self.host}/api/chat",
|
||||
json=payload,
|
||||
timeout=payload.get("timeout", 300),
|
||||
stream=False
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def chat_with_tools(self, payload: dict) -> dict:
|
||||
"""Call /api/chat with tools (streaming or non-streaming)"""
|
||||
import json
|
||||
|
||||
# Check if streaming is requested
|
||||
stream = payload.get("stream", False)
|
||||
|
||||
response = requests.post(
|
||||
f"{self.host}/api/chat",
|
||||
json=payload,
|
||||
timeout=payload.get("timeout", 300),
|
||||
stream=stream
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
if not stream:
|
||||
# Non-streaming: return response directly
|
||||
return response.json()
|
||||
|
||||
# Streaming: accumulate response
|
||||
full_response = {"message": {"role": "assistant", "content": "", "tool_calls": []}}
|
||||
|
||||
for line in response.iter_lines():
|
||||
if line:
|
||||
chunk = json.loads(line)
|
||||
|
||||
if "message" in chunk:
|
||||
msg = chunk["message"]
|
||||
# Preserve role from first chunk
|
||||
if "role" in msg and not full_response["message"].get("role"):
|
||||
full_response["message"]["role"] = msg["role"]
|
||||
if "content" in msg:
|
||||
full_response["message"]["content"] += msg["content"]
|
||||
if "tool_calls" in msg:
|
||||
full_response["message"]["tool_calls"].extend(msg["tool_calls"])
|
||||
|
||||
if chunk.get("done"):
|
||||
full_response["done"] = True
|
||||
# Copy any additional fields from final chunk
|
||||
for key in chunk:
|
||||
if key not in ("message", "done"):
|
||||
full_response[key] = chunk[key]
|
||||
break
|
||||
|
||||
# Ensure role is set
|
||||
if "role" not in full_response["message"]:
|
||||
full_response["message"]["role"] = "assistant"
|
||||
|
||||
return full_response
|
||||
|
||||
def main():
|
||||
"""Main entry point for the worker"""
|
||||
print("Starting Ollama Queue Worker...")
|
||||
|
||||
# Initialize queue and client
|
||||
queue = OllamaQueue()
|
||||
client = OllamaClient()
|
||||
|
||||
# Cleanup old requests on startup
|
||||
queue.cleanup_old_requests(max_age_seconds=3600)
|
||||
|
||||
# Start processing
|
||||
try:
|
||||
queue.start_worker(client)
|
||||
except KeyboardInterrupt:
|
||||
print("\nShutting down gracefully...")
|
||||
queue.running = False
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
Reference in New Issue
Block a user