Create New File
This commit is contained in:
parent
5ee10348dd
commit
ae17e8df35
22
1/model.py
Normal file
22
1/model.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
import triton_python_backend_utils as pb_utils
|
||||||
|
from transformers import AutoTokenizer, AutoModel
|
||||||
|
import torch
|
||||||
|
|
||||||
|
class TritonPythonModel:
|
||||||
|
def initialize(self, args):
|
||||||
|
self.tokenizer = AutoTokenizer.from_pretrained("../kicon_e5large_15_v1")
|
||||||
|
self.model = AutoModel.from_pretrained("../kicon_e5large_15_v1")
|
||||||
|
self.model.eval()
|
||||||
|
|
||||||
|
def execute(self, requests):
|
||||||
|
responses = []
|
||||||
|
for request in requests:
|
||||||
|
input_text = pb_utils.get_input_tensor_by_name(request, "text").as_numpy()[0].decode("utf-8")
|
||||||
|
inputs = self.tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
|
||||||
|
with torch.no_grad():
|
||||||
|
outputs = self.model(**inputs)
|
||||||
|
embedding = outputs.last_hidden_state.mean(dim=1).numpy()
|
||||||
|
|
||||||
|
out_tensor = pb_utils.Tensor("embedding", embedding)
|
||||||
|
responses.append(pb_utils.InferenceResponse(output_tensors=[out_tensor]))
|
||||||
|
return responses
|
||||||
Loading…
Reference in New Issue
Block a user