diff --git a/apps/knowledge/tasks.py b/apps/knowledge/tasks.py index 9b65119..08fe55b 100644 --- a/apps/knowledge/tasks.py +++ b/apps/knowledge/tasks.py @@ -64,16 +64,13 @@ def ingest_training_file_task(self, file_uuid): all_documents = [] chunk_counter = 0 - timeout = Timeout(60.0) - - with Client(timeout=timeout, auth=settings.INFERENCE_AUTH) as client: - - for text_segment in _get_text_chunks(raw_text): + with Client(timeout=Timeout(settings.INFERENCE_REQUEST_TIMEOUT), auth=settings.INFERENCE_AUTH) as client: + for text_segment in _get_text_chunks(raw_text, size=settings.INGESTION_CHUNK_SIZE): response = client.post( settings.INFERENCE_SEMANTIC_CHUNK_ENDPOINT, json={ "text": text_segment, - "threshold": 95, + "threshold": settings.SEMANTIC_CHUNK_THRESHOLD, }, ) response.raise_for_status() @@ -180,14 +177,13 @@ def update_agent_prompts_from_file_task(self, role_uuid: str): ] try: - with Client(timeout=Timeout(60.0), auth=settings.INFERENCE_AUTH) as client: + with Client(timeout=Timeout(settings.INFERENCE_REQUEST_TIMEOUT), auth=settings.INFERENCE_AUTH) as client: for agent_type, user_prompt in refine_calls: if agent_type not in configs: continue response = client.post( settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json={ - "model": "meta-llama-3.1-8b-instruct", "messages": [{"role": "user", "content": user_prompt}], "max_tokens": 600, }, diff --git a/apps/onboarding/consumers/base.py b/apps/onboarding/consumers/base.py index 221307c..86f5be6 100644 --- a/apps/onboarding/consumers/base.py +++ b/apps/onboarding/consumers/base.py @@ -78,7 +78,7 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer): ### MCP Handling ### async def orchestrate(self, message: str, config: AgentConfig, minimum_turns: int = 2, maximum_turns: int = 5, - max_tokens: int | None = None, raise_on_error: bool = False, request_timeout: int = 60.0) -> str: + max_tokens: int | None = None, raise_on_error: bool = False, request_timeout: float = settings.INFERENCE_REQUEST_TIMEOUT) -> str: """ Orchestrates a multi-turn conversation with the agent, including tool calls and reasoning steps. """ @@ -153,7 +153,7 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer): payload["stop"] = stop try: chunks: list[str] = [] - async with httpx.AsyncClient(timeout=120.0, auth=settings.INFERENCE_AUTH) as client: + async with httpx.AsyncClient(timeout=settings.INFERENCE_STREAM_TIMEOUT, auth=settings.INFERENCE_AUTH) as client: async with client.stream("POST", settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json=payload) as response: response.raise_for_status() async for line in response.aiter_lines(): diff --git a/apps/onboarding/consumers/knowledge.py b/apps/onboarding/consumers/knowledge.py index 7d43401..086d3b7 100644 --- a/apps/onboarding/consumers/knowledge.py +++ b/apps/onboarding/consumers/knowledge.py @@ -111,7 +111,7 @@ class OnboardingKnowledgeConsumer(BaseOnboardingConsumer): payload['stop'] = stop try: chunks: list[str] = [] - async with httpx.AsyncClient(timeout=120.0, auth=settings.INFERENCE_AUTH) as client: + async with httpx.AsyncClient(timeout=settings.INFERENCE_STREAM_TIMEOUT, auth=settings.INFERENCE_AUTH) as client: async with client.stream('POST', settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json=payload) as response: response.raise_for_status() async for line in response.aiter_lines(): diff --git a/apps/onboarding/mcp.py b/apps/onboarding/mcp.py index e9ef1ec..4536598 100644 --- a/apps/onboarding/mcp.py +++ b/apps/onboarding/mcp.py @@ -65,7 +65,7 @@ class MCPRouter: async def _get_embedding(self, text): logger.info('MCP embedding request started') - async with httpx.AsyncClient(timeout=60.0, auth=settings.INFERENCE_AUTH) as client: + async with httpx.AsyncClient(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client: response = await client.post( settings.INFERENCE_EMBEDDINGS_ENDPOINT, json={'input': text}, diff --git a/apps/onboarding/viewsets.py b/apps/onboarding/viewsets.py index fae9463..d644b7d 100644 --- a/apps/onboarding/viewsets.py +++ b/apps/onboarding/viewsets.py @@ -545,7 +545,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet): prompt = OnboardingPrompts.grading_prompt(ai_fields, page_responses) try: - with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client: + with httpx.Client(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client: response = client.post( settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json={ @@ -754,7 +754,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet): context = f"Page: {page_title}\n\n{page_body}" if page_body else page_title prompt = f"Context:\n{context}\n\nQuestion: {message}" try: - with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client: + with httpx.Client(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client: response = client.post( settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json={ @@ -784,7 +784,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet): f"Return only the revised page body." ) try: - with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client: + with httpx.Client(timeout=settings.INFERENCE_REQUEST_TIMEOUT, auth=settings.INFERENCE_AUTH) as client: response = client.post( settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json={ diff --git a/config/settings.py b/config/settings.py index 38278fc..ad286f0 100644 --- a/config/settings.py +++ b/config/settings.py @@ -35,7 +35,11 @@ INFERENCE_SEMANTIC_CHUNK_ENDPOINT = f"{INFERENCE_URL}/v1/semantic-chunk" INFERENCE_EMBEDDINGS_ENDPOINT = f"{INFERENCE_URL}/v1/embeddings" INFERENCE_CHAT_COMPLETIONS_ENDPOINT = f"{INFERENCE_URL}/v1/chat/completions" INFERENCE_INGEST_TIMEOUT = float(os.getenv('INFERENCE_INGEST_TIMEOUT', '600')) +INFERENCE_REQUEST_TIMEOUT = float(os.getenv('INFERENCE_REQUEST_TIMEOUT', '60')) +INFERENCE_STREAM_TIMEOUT = float(os.getenv('INFERENCE_STREAM_TIMEOUT', '120')) EMBEDDING_DIMENSIONS = int(os.getenv('EMBEDDING_DIMENSIONS', '768')) +INGESTION_CHUNK_SIZE = int(os.getenv('INGESTION_CHUNK_SIZE', '10000')) +SEMANTIC_CHUNK_THRESHOLD = int(os.getenv('SEMANTIC_CHUNK_THRESHOLD', '95')) STATIC_URL = os.getenv('DJANGO_STATIC_URL', '/static/') MEDIA_URL = os.getenv('DJANGO_MEDIA_URL', '/media/')