From 1eada257b94fff6a9976855109786ee97c88bc60 Mon Sep 17 00:00:00 2001
From: Viswamedha Nalabotu <vxn217@student.bham.ac.uk>
Date: Sun, 22 Mar 2026 16:42:40 +0000
Subject: [PATCH] Separated auth

---
 apps/knowledge/tasks.py                | 4 ++--
 apps/onboarding/consumers/base.py      | 4 ++--
 apps/onboarding/consumers/knowledge.py | 2 +-
 apps/onboarding/mcp.py                 | 2 +-
 apps/onboarding/viewsets.py            | 6 +++---
 config/settings.py                     | 3 ++-
 6 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/apps/knowledge/tasks.py b/apps/knowledge/tasks.py
index 1494471..59ca63c 100644
--- a/apps/knowledge/tasks.py
+++ b/apps/knowledge/tasks.py
@@ -66,7 +66,7 @@ def ingest_training_file_task(self, file_uuid):
                                                            
         timeout = Timeout(60.0) 
 
-        with Client(timeout=timeout) as client:
+        with Client(timeout=timeout, auth=settings.INFERENCE_AUTH) as client:
                                                               
             for text_segment in _get_text_chunks(raw_text):
                 response = client.post(
@@ -180,7 +180,7 @@ def update_agent_prompts_from_file_task(self, role_uuid: str):
     ]
 
     try:
-        with Client(timeout=Timeout(60.0)) as client:
+        with Client(timeout=Timeout(60.0), auth=settings.INFERENCE_AUTH) as client:
             for agent_type, user_prompt in refine_calls:
                 if agent_type not in configs:
                     continue
diff --git a/apps/onboarding/consumers/base.py b/apps/onboarding/consumers/base.py
index 7f12636..6ba10f8 100644
--- a/apps/onboarding/consumers/base.py
+++ b/apps/onboarding/consumers/base.py
@@ -89,7 +89,7 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
             {"role": "user", "content": message}
         ]
         last_content = ""  
-        async with httpx.AsyncClient(timeout = request_timeout) as client:
+        async with httpx.AsyncClient(timeout=request_timeout, auth=settings.INFERENCE_AUTH) as client:
             for turn in range(1, maximum_turns + 1):
                 await self.send_log(LogType.THOUGHT, f"Agent reasoning (Turn {turn})...")
                 try:
@@ -157,7 +157,7 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
             payload["stop"] = stop
         try:
             chunks: list[str] = []
-            async with httpx.AsyncClient(timeout=120.0) as client:
+            async with httpx.AsyncClient(timeout=120.0, auth=settings.INFERENCE_AUTH) as client:
                 async with client.stream("POST", settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json=payload) as response:
                     response.raise_for_status()
                     async for line in response.aiter_lines():
diff --git a/apps/onboarding/consumers/knowledge.py b/apps/onboarding/consumers/knowledge.py
index 85d1d21..386c43a 100644
--- a/apps/onboarding/consumers/knowledge.py
+++ b/apps/onboarding/consumers/knowledge.py
@@ -113,7 +113,7 @@ class OnboardingKnowledgeConsumer(BaseOnboardingConsumer):
             payload['stop'] = stop
         try:
             chunks: list[str] = []
-            async with httpx.AsyncClient(timeout=120.0) as client:
+            async with httpx.AsyncClient(timeout=120.0, auth=settings.INFERENCE_AUTH) as client:
                 async with client.stream('POST', settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json=payload) as response:
                     response.raise_for_status()
                     async for line in response.aiter_lines():
diff --git a/apps/onboarding/mcp.py b/apps/onboarding/mcp.py
index 68e3c74..e9ef1ec 100644
--- a/apps/onboarding/mcp.py
+++ b/apps/onboarding/mcp.py
@@ -65,7 +65,7 @@ class MCPRouter:
 
     async def _get_embedding(self, text):
         logger.info('MCP embedding request started')
-        async with httpx.AsyncClient(timeout=60.0) as client:
+        async with httpx.AsyncClient(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
             response = await client.post(
                 settings.INFERENCE_EMBEDDINGS_ENDPOINT,
                 json={'input': text},
diff --git a/apps/onboarding/viewsets.py b/apps/onboarding/viewsets.py
index ba22807..4e0049f 100644
--- a/apps/onboarding/viewsets.py
+++ b/apps/onboarding/viewsets.py
@@ -558,7 +558,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
             prompt = OnboardingPrompts.grading_prompt(ai_fields, page_responses)
 
             try:
-                with httpx.Client(timeout=60.0) as client:
+                with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
                     response = client.post(
                         settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
                         json={
@@ -768,7 +768,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
         context = f"Page: {page_title}\n\n{page_body}" if page_body else page_title
         prompt = f"Context:\n{context}\n\nQuestion: {message}"
         try:
-            with httpx.Client(timeout=60.0) as client:
+            with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
                 model_id = (config.llm_config or {}).get('model_id', 'meta-llama-3.1-8b') if config else 'meta-llama-3.1-8b'
                 response = client.post(
                     settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
@@ -800,7 +800,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
             f"Return only the revised page body."
         )
         try:
-            with httpx.Client(timeout=60.0) as client:
+            with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
                 model_id = (config.llm_config or {}).get('model_id', 'meta-llama-3.1-8b') if config else 'meta-llama-3.1-8b'
                 response = client.post(
                     settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
diff --git a/config/settings.py b/config/settings.py
index 8b4c4f5..38278fc 100644
--- a/config/settings.py
+++ b/config/settings.py
@@ -29,7 +29,8 @@ INFERENCE_PORT = os.getenv('INFERENCE_PORT', '8001')
 INFERENCE_PROTOCOL = os.getenv('INFERENCE_PROTOCOL', 'http')
 INFERENCE_USERNAME = os.getenv('INFERENCE_USERNAME', 'admin')
 INFERENCE_PASSWORD = os.getenv('INFERENCE_PASSWORD', 'changeme')
-INFERENCE_URL = f"{INFERENCE_PROTOCOL}://{INFERENCE_USERNAME}:{INFERENCE_PASSWORD}@{INFERENCE_HOST}:{INFERENCE_PORT}"
+INFERENCE_URL = f"{INFERENCE_PROTOCOL}://{INFERENCE_HOST}:{INFERENCE_PORT}"
+INFERENCE_AUTH = (INFERENCE_USERNAME, INFERENCE_PASSWORD)
 INFERENCE_SEMANTIC_CHUNK_ENDPOINT = f"{INFERENCE_URL}/v1/semantic-chunk"
 INFERENCE_EMBEDDINGS_ENDPOINT = f"{INFERENCE_URL}/v1/embeddings"
 INFERENCE_CHAT_COMPLETIONS_ENDPOINT = f"{INFERENCE_URL}/v1/chat/completions"