Compare commits
1 Commits
main
...
refs/embed
| Author | SHA1 | Date | |
|---|---|---|---|
| e460abfb50 |
3
01jph5579a5rafy48p3yasaw1f.Dockerfile
Normal file
3
01jph5579a5rafy48p3yasaw1f.Dockerfile
Normal file
@ -0,0 +1,3 @@
|
||||
FROM registry.aifrica.co.kr/cheetah/cheetah-embedding:base
|
||||
#FROM alpine:latest
|
||||
COPY . /app/models/model
|
||||
22
1/model.py
22
1/model.py
@ -1,22 +0,0 @@
|
||||
import triton_python_backend_utils as pb_utils
|
||||
from transformers import AutoTokenizer, AutoModel
|
||||
import torch
|
||||
|
||||
class TritonPythonModel:
|
||||
def initialize(self, args):
|
||||
self.tokenizer = AutoTokenizer.from_pretrained("../../kicon_e5large_15_v1")
|
||||
self.model = AutoModel.from_pretrained("../../kicon_e5large_15_v1")
|
||||
self.model.eval()
|
||||
|
||||
def execute(self, requests):
|
||||
responses = []
|
||||
for request in requests:
|
||||
input_text = pb_utils.get_input_tensor_by_name(request, "text").as_numpy()[0].decode("utf-8")
|
||||
inputs = self.tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
|
||||
with torch.no_grad():
|
||||
outputs = self.model(**inputs)
|
||||
embedding = outputs.last_hidden_state.mean(dim=1).numpy()
|
||||
|
||||
out_tensor = pb_utils.Tensor("embedding", embedding)
|
||||
responses.append(pb_utils.InferenceResponse(output_tensors=[out_tensor]))
|
||||
return responses
|
||||
25
config.pbtxt
25
config.pbtxt
@ -1,25 +0,0 @@
|
||||
name: "kicon_e5large_15_v1"
|
||||
backend: "python"
|
||||
|
||||
input [
|
||||
{
|
||||
name: "text"
|
||||
data_type: TYPE_STRING
|
||||
dims: [1]
|
||||
}
|
||||
]
|
||||
|
||||
output [
|
||||
{
|
||||
name: "embedding"
|
||||
data_type: TYPE_FP32
|
||||
dims: [1024] # 임베딩 크기에 맞게 조정
|
||||
}
|
||||
]
|
||||
|
||||
max_batch_size: 0
|
||||
instance_group [
|
||||
{
|
||||
kind: KIND_GPU
|
||||
}
|
||||
]
|
||||
@ -1,20 +0,0 @@
|
||||
name: "kicon_e5large_15_v1"
|
||||
backend: "python"
|
||||
|
||||
input [
|
||||
{
|
||||
name: "text"
|
||||
data_type: TYPE_STRING
|
||||
dims: [1]
|
||||
}
|
||||
]
|
||||
|
||||
output [
|
||||
{
|
||||
name: "embedding"
|
||||
data_type: TYPE_FP32
|
||||
dims: [1, 1024]
|
||||
}
|
||||
]
|
||||
|
||||
max_batch_size: 0
|
||||
Loading…
Reference in New Issue
Block a user