diff --git a/1/model.py b/1/model.py new file mode 100644 index 0000000..167e5cf --- /dev/null +++ b/1/model.py @@ -0,0 +1,22 @@ +import triton_python_backend_utils as pb_utils +from transformers import AutoTokenizer, AutoModel +import torch + +class TritonPythonModel: + def initialize(self, args): + self.tokenizer = AutoTokenizer.from_pretrained("../kicon_e5large_15_v1") + self.model = AutoModel.from_pretrained("../kicon_e5large_15_v1") + self.model.eval() + + def execute(self, requests): + responses = [] + for request in requests: + input_text = pb_utils.get_input_tensor_by_name(request, "text").as_numpy()[0].decode("utf-8") + inputs = self.tokenizer(input_text, return_tensors="pt", truncation=True, padding=True) + with torch.no_grad(): + outputs = self.model(**inputs) + embedding = outputs.last_hidden_state.mean(dim=1).numpy() + + out_tensor = pb_utils.Tensor("embedding", embedding) + responses.append(pb_utils.InferenceResponse(output_tensors=[out_tensor])) + return responses \ No newline at end of file