# Triton Backend for TransformerLLM. backend: "python" max_batch_size: 0 # Triton should expect as input a single string # input of variable length named 'text_input' input [ { name: "text_input" data_type: TYPE_STRING dims: [ -1 ] } ] # output of variable length named 'embedding_outputt' output [ { name: "embedding_output" data_type: TYPE_FP32 # 또는 TYPE_FP16 dims: [ -1, -1 ] # [seq_len, hidden_size] } ] parameters: [ { key: "base_model_path", value: {string_value: "/cheetah/input/model/groupuser/codesage-large-v2"} } ] instance_group [ { kind: KIND_AUTO count: 1 } ]