Update 1/model.py

This commit is contained in:
cheetahadmin 2025-11-21 04:26:19 +00:00
parent 2e3e7382cc
commit 869e7c376b

@ -2,7 +2,7 @@ import json
import torch import torch
import numpy as np import numpy as np
import triton_python_backend_utils as pb_utils import triton_python_backend_utils as pb_utils
import uuid # [추가] 랜덤 ID 생성을 위해 필요 import uuid
from typing import List, Dict, Any, Union, Tuple from typing import List, Dict, Any, Union, Tuple
from transformers import ( from transformers import (
@ -103,8 +103,7 @@ class TritonPythonModel:
responses = [] responses = []
for request in requests: for request in requests:
# [ID 생성 로직] # [ID 생성 로직] - 로그 추적용으로 유지 (Response에는 포함 X)
# 클라이언트가 보낸 ID가 있으면 사용하고, 없으면 랜덤 UUID 생성
request_id = request.request_id() request_id = request.request_id()
if not request_id: if not request_id:
request_id = str(uuid.uuid4()) request_id = str(uuid.uuid4())
@ -129,14 +128,13 @@ class TritonPythonModel:
# [LOGGING] Request ID 포함하여 결과 출력 # [LOGGING] Request ID 포함하여 결과 출력
self.logger.log_info(f"\n[RID: {request_id}] <<< [Output]: {output_text}") self.logger.log_info(f"\n[RID: {request_id}] <<< [Output]: {output_text}")
# 5. 응답 생성 (ID 전달) # 5. 응답 생성 (ID 전달 제거)
responses.append(self._create_response(output_text, request_id)) responses.append(self._create_response(output_text, request_id))
except Exception as e: except Exception as e:
self.logger.log_error(f"[RID: {request_id}] Error during execution: {e}") self.logger.log_error(f"[RID: {request_id}] Error during execution: {e}")
err_tensor = pb_utils.Tensor("text_output", np.array([str(e).encode('utf-8')], dtype=np.bytes_)) err_tensor = pb_utils.Tensor("text_output", np.array([str(e).encode('utf-8')], dtype=np.bytes_))
# 에러 응답에도 ID 포함 responses.append(pb_utils.InferenceResponse(output_tensors=[err_tensor]))
responses.append(pb_utils.InferenceResponse(output_tensors=[err_tensor], id=request_id))
return responses return responses
@ -198,13 +196,14 @@ class TritonPythonModel:
) )
def _create_response(self, output_text: str, request_id: str): def _create_response(self, output_text: str, request_id: str):
"""생성된 텍스트를 Triton Response 객체로 변환 (ID 포함)""" """생성된 텍스트를 Triton Response 객체로 변환"""
# request_id는 이 함수까지 전달은 되지만, InferenceResponse에 넣지는 않음
output_tensor = pb_utils.Tensor( output_tensor = pb_utils.Tensor(
"text_output", "text_output",
np.array([output_text.encode('utf-8')], dtype=np.bytes_) np.array([output_text.encode('utf-8')], dtype=np.bytes_)
) )
# id 파라미터를 추가하여 클라이언트에게도 ID를 반환 # [중요] id=request_id 제거됨
return pb_utils.InferenceResponse(output_tensors=[output_tensor], id=request_id) return pb_utils.InferenceResponse(output_tensors=[output_tensor])
def _get_config_param(self, key: str, default: str = None) -> str: def _get_config_param(self, key: str, default: str = None) -> str:
params = self.model_config.get('parameters', {}) params = self.model_config.get('parameters', {})