diff --git a/apps/knowledge/tasks.py b/apps/knowledge/tasks.py index ffac9fe..3889724 100644 --- a/apps/knowledge/tasks.py +++ b/apps/knowledge/tasks.py @@ -9,6 +9,7 @@ from pypdf import PdfReader from apps.knowledge.models import RoleRagDocument, TrainingFile + def _decode_text_bytes(raw_bytes: bytes) -> str: try: return raw_bytes.decode('utf-8') @@ -63,7 +64,7 @@ def ingest_training_file_task(self, file_uuid): for text_segment in _get_text_chunks(raw_text): response = client.post( - f"{settings.INFERENCE_URL}/v1/semantic-chunk", + settings.INFERENCE_SEMANTIC_CHUNK_ENDPOINT, json={"text": text_segment, "threshold": 95} ) response.raise_for_status() diff --git a/apps/onboarding/consumers.py b/apps/onboarding/consumers.py index ec5f49b..53d8955 100644 --- a/apps/onboarding/consumers.py +++ b/apps/onboarding/consumers.py @@ -305,7 +305,7 @@ class OnboardingConsumer(AsyncWebsocketConsumer): try: response = await client.post( - f"{settings.INFERENCE_URL}/v1/chat/completions", + settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json={ "model": llm_config.get("model_id", "meta-llama-3.1-8b"), "messages": messages, diff --git a/apps/onboarding/mcp.py b/apps/onboarding/mcp.py index 38a1220..d04e384 100644 --- a/apps/onboarding/mcp.py +++ b/apps/onboarding/mcp.py @@ -6,6 +6,7 @@ from pgvector.django import CosineDistance from apps.knowledge.models import RoleRagDocument from apps.onboarding.models import OnboardingSession + class MCPRouter: def get_tool_definitions(self): @@ -47,7 +48,7 @@ class MCPRouter: async def _get_embedding(self, text): async with httpx.AsyncClient() as client: response = await client.post( - f"{settings.INFERENCE_URL}/v1/embeddings", + settings.INFERENCE_EMBEDDINGS_ENDPOINT, json={"input": text} ) diff --git a/apps/onboarding/viewsets.py b/apps/onboarding/viewsets.py index b380551..2f38a06 100644 --- a/apps/onboarding/viewsets.py +++ b/apps/onboarding/viewsets.py @@ -15,6 +15,7 @@ from apps.accounts.permissions import CanManageOrganization, can_manage_organiza from apps.onboarding.models import AgentConfig, AgentInteractionLog, OnboardingFlow, OnboardingSession from apps.onboarding.serializers import AgentConfigSerializer, AgentInteractionLogSerializer, OnboardingFlowSerializer, OnboardingSessionSerializer + class OnboardingFlowViewSet(ModelViewSet): queryset = OnboardingFlow.objects.all() serializer_class = OnboardingFlowSerializer @@ -445,7 +446,7 @@ class OnboardingSessionViewSet(ModelViewSet): try: with httpx.Client(timeout=60.0) as client: response = client.post( - f"{settings.INFERENCE_URL}/v1/chat/completions", + settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json={ "model": (config.llm_config or {}).get("model_id", "meta-llama-3.1-8b"), "messages": [ @@ -482,7 +483,7 @@ class OnboardingSessionViewSet(ModelViewSet): try: with httpx.Client(timeout=60.0) as client: response = client.post( - f"{settings.INFERENCE_URL}/v1/chat/completions", + settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json={ "model": (config.llm_config or {}).get("model_id", "meta-llama-3.1-8b"), "messages": [ diff --git a/config/settings.py b/config/settings.py index f027eb9..0a16cf7 100644 --- a/config/settings.py +++ b/config/settings.py @@ -3,8 +3,9 @@ Django settings will use prefix of DJANGO_ for environment variables. """ import os -from pathlib import Path import sys +from pathlib import Path + from dotenv import load_dotenv BASE_DIR = Path(__file__).resolve().parent.parent @@ -27,6 +28,9 @@ DJANGO_CELERY_BROKER_URL = os.getenv('DJANGO_CELERY_BROKER_URL', 'redis://localh INFERENCE_HOST = os.getenv('INFERENCE_HOST', 'localhost') INFERENCE_PORT = os.getenv('INFERENCE_PORT', '8001') INFERENCE_URL = f"http://{INFERENCE_HOST}:{INFERENCE_PORT}" +INFERENCE_SEMANTIC_CHUNK_ENDPOINT = f"{INFERENCE_URL}/v1/semantic-chunk" +INFERENCE_EMBEDDINGS_ENDPOINT = f"{INFERENCE_URL}/v1/embeddings" +INFERENCE_CHAT_COMPLETIONS_ENDPOINT = f"{INFERENCE_URL}/v1/chat/completions" INFERENCE_INGEST_TIMEOUT = float(os.getenv('INFERENCE_INGEST_TIMEOUT', '600')) STATIC_URL = os.getenv('DJANGO_STATIC_URL', '/static/')