diff --git a/config.pbtxt b/config.pbtxt index e4cd404..65c0ee0 100644 --- a/config.pbtxt +++ b/config.pbtxt @@ -1,131 +1,75 @@ -# Triton Backend for vLLM. +# Triton Backend for Python. backend: "python" max_batch_size: 0 -# Triton should expect as input a single string -# input of variable length named 'text_input' input [ - { name: "text_input" data_type: TYPE_STRING dims: [ 1 ] - - }, { name: "max_length" data_type: TYPE_INT32 dims: [ 1 ] - - optional: true - - }, { name: "max_new_tokens" data_type: TYPE_INT32 dims: [ 1 ] - - optional: true - - }, { name: "do_sample" data_type: TYPE_BOOL dims: [ 1 ] - - optional: true - - }, { name: "top_k" data_type: TYPE_INT32 dims: [ 1 ] - - optional: true - - }, { name: "top_p" data_type: TYPE_FP32 dims: [ 1 ] - - optional: true - - }, { name: "temperature" data_type: TYPE_FP32 dims: [ 1 ] - - optional: true - - }, { name: "repetition_penalty" data_type: TYPE_FP32 dims: [ 1 ] - - optional: true - - }, { name: "stream" data_type: TYPE_BOOL dims: [ 1 ] - - optional: true - - } - ] - -# Triton should expect to respond with a single string -# output of variable length named 'text_output' output [ - { name: "text_output" data_type: TYPE_STRING dims: [ 1 ] - } - ] + parameters: [ { - key: "base_model_path", + key: "model_path", value: {string_value: "/cheetah/input/model/groupuser/TinyLlama-1.1B-Chat-v1.0"} - }, - { - key: "is_adapter_model", - value: {string_value: "false"} - }, - { - key: "adapter_model_path", - value: {string_value: ""} - }, - - { - key: "quantization", - value: {string_value: "none"} } ] + instance_group [ { kind: KIND_AUTO count: 1 } ] -