Compare commits

...

4 Commits

Author SHA1 Message Date
adccea5d5b Create New File 2025-04-30 01:42:24 +00:00
3fcea63de1 Update model.py 2025-04-30 01:40:12 +00:00
ae17e8df35 Create New File 2025-04-30 01:39:52 +00:00
5ee10348dd Create New File 2025-04-30 01:38:50 +00:00
3 changed files with 67 additions and 0 deletions

22
1/model.py Normal file

@ -0,0 +1,22 @@
import triton_python_backend_utils as pb_utils
from transformers import AutoTokenizer, AutoModel
import torch
class TritonPythonModel:
def initialize(self, args):
self.tokenizer = AutoTokenizer.from_pretrained("../../kicon_e5large_15_v1")
self.model = AutoModel.from_pretrained("../../kicon_e5large_15_v1")
self.model.eval()
def execute(self, requests):
responses = []
for request in requests:
input_text = pb_utils.get_input_tensor_by_name(request, "text").as_numpy()[0].decode("utf-8")
inputs = self.tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
outputs = self.model(**inputs)
embedding = outputs.last_hidden_state.mean(dim=1).numpy()
out_tensor = pb_utils.Tensor("embedding", embedding)
responses.append(pb_utils.InferenceResponse(output_tensors=[out_tensor]))
return responses

25
config.pbtxt Normal file

@ -0,0 +1,25 @@
name: "kicon_e5large_15_v1"
backend: "python"
input [
{
name: "text"
data_type: TYPE_STRING
dims: [1]
}
]
output [
{
name: "embedding"
data_type: TYPE_FP32
dims: [1024] # 임베딩 크기에 맞게 조정
}
]
max_batch_size: 0
instance_group [
{
kind: KIND_GPU
}
]

@ -0,0 +1,20 @@
name: "kicon_e5large_15_v1"
backend: "python"
input [
{
name: "text"
data_type: TYPE_STRING
dims: [1]
}
]
output [
{
name: "embedding"
data_type: TYPE_FP32
dims: [1, 1024]
}
]
max_batch_size: 0