Separated auth

2026-03-22 16:42:40 +00:00 · 2026-03-22 16:42:40 +00:00 · 1eada257b9
commit 1eada257b9
parent 69d707a4e0
6 changed files with 11 additions and 10 deletions
--- a/apps/knowledge/tasks.py
+++ b/apps/knowledge/tasks.py
@ -66,7 +66,7 @@ def ingest_training_file_task(self, file_uuid):
        timeout = Timeout(60.0) 
-        with Client(timeout=timeout) as client:
+        with Client(timeout=timeout, auth=settings.INFERENCE_AUTH) as client:
            for text_segment in _get_text_chunks(raw_text):
                response = client.post(
@ -180,7 +180,7 @@ def update_agent_prompts_from_file_task(self, role_uuid: str):
    ]
    try:
-        with Client(timeout=Timeout(60.0)) as client:
+        with Client(timeout=Timeout(60.0), auth=settings.INFERENCE_AUTH) as client:
            for agent_type, user_prompt in refine_calls:
                if agent_type not in configs:
                    continue
--- a/apps/onboarding/consumers/base.py
+++ b/apps/onboarding/consumers/base.py
@ -89,7 +89,7 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
            {"role": "user", "content": message}
        ]
        last_content = ""  
-        async with httpx.AsyncClient(timeout = request_timeout) as client:
+        async with httpx.AsyncClient(timeout=request_timeout, auth=settings.INFERENCE_AUTH) as client:
            for turn in range(1, maximum_turns + 1):
                await self.send_log(LogType.THOUGHT, f"Agent reasoning (Turn {turn})...")
                try:
@ -157,7 +157,7 @@ class BaseOnboardingConsumer(AsyncWebsocketConsumer):
            payload["stop"] = stop
        try:
            chunks: list[str] = []
-            async with httpx.AsyncClient(timeout=120.0) as client:
+            async with httpx.AsyncClient(timeout=120.0, auth=settings.INFERENCE_AUTH) as client:
                async with client.stream("POST", settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json=payload) as response:
                    response.raise_for_status()
                    async for line in response.aiter_lines():
--- a/apps/onboarding/consumers/knowledge.py
+++ b/apps/onboarding/consumers/knowledge.py
@ -113,7 +113,7 @@ class OnboardingKnowledgeConsumer(BaseOnboardingConsumer):
            payload['stop'] = stop
        try:
            chunks: list[str] = []
-            async with httpx.AsyncClient(timeout=120.0) as client:
+            async with httpx.AsyncClient(timeout=120.0, auth=settings.INFERENCE_AUTH) as client:
                async with client.stream('POST', settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT, json=payload) as response:
                    response.raise_for_status()
                    async for line in response.aiter_lines():
--- a/apps/onboarding/mcp.py
+++ b/apps/onboarding/mcp.py
@ -65,7 +65,7 @@ class MCPRouter:
    async def _get_embedding(self, text):
        logger.info('MCP embedding request started')
-        async with httpx.AsyncClient(timeout=60.0) as client:
+        async with httpx.AsyncClient(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
            response = await client.post(
                settings.INFERENCE_EMBEDDINGS_ENDPOINT,
                json={'input': text},
--- a/apps/onboarding/viewsets.py
+++ b/apps/onboarding/viewsets.py
@ -558,7 +558,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
            prompt = OnboardingPrompts.grading_prompt(ai_fields, page_responses)
            try:
-                with httpx.Client(timeout=60.0) as client:
+                with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
                    response = client.post(
                        settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
                        json={
@ -768,7 +768,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
        context = f"Page: {page_title}\n\n{page_body}" if page_body else page_title
        prompt = f"Context:\n{context}\n\nQuestion: {message}"
        try:
-            with httpx.Client(timeout=60.0) as client:
+            with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
                model_id = (config.llm_config or {}).get('model_id', 'meta-llama-3.1-8b') if config else 'meta-llama-3.1-8b'
                response = client.post(
                    settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
@ -800,7 +800,7 @@ class OnboardingSessionViewSet(RequestParamMixin, ModelViewSet):
            f"Return only the revised page body."
        )
        try:
-            with httpx.Client(timeout=60.0) as client:
+            with httpx.Client(timeout=60.0, auth=settings.INFERENCE_AUTH) as client:
                model_id = (config.llm_config or {}).get('model_id', 'meta-llama-3.1-8b') if config else 'meta-llama-3.1-8b'
                response = client.post(
                    settings.INFERENCE_CHAT_COMPLETIONS_ENDPOINT,
--- a/config/settings.py
+++ b/config/settings.py
@ -29,7 +29,8 @@ INFERENCE_PORT = os.getenv('INFERENCE_PORT', '8001')
 INFERENCE_PROTOCOL = os.getenv('INFERENCE_PROTOCOL', 'http')
 INFERENCE_USERNAME = os.getenv('INFERENCE_USERNAME', 'admin')
 INFERENCE_PASSWORD = os.getenv('INFERENCE_PASSWORD', 'changeme')
-INFERENCE_URL = f"{INFERENCE_PROTOCOL}://{INFERENCE_USERNAME}:{INFERENCE_PASSWORD}@{INFERENCE_HOST}:{INFERENCE_PORT}"
+INFERENCE_URL = f"{INFERENCE_PROTOCOL}://{INFERENCE_HOST}:{INFERENCE_PORT}"
 INFERENCE_AUTH = (INFERENCE_USERNAME, INFERENCE_PASSWORD)
 INFERENCE_SEMANTIC_CHUNK_ENDPOINT = f"{INFERENCE_URL}/v1/semantic-chunk"
 INFERENCE_EMBEDDINGS_ENDPOINT = f"{INFERENCE_URL}/v1/embeddings"
 INFERENCE_CHAT_COMPLETIONS_ENDPOINT = f"{INFERENCE_URL}/v1/chat/completions"