diff --git a/config.pbtxt b/config.pbtxt new file mode 100644 index 0000000..3bbd4bd --- /dev/null +++ b/config.pbtxt @@ -0,0 +1,91 @@ +# Triton backend to use +name: "Llama-3.2-1B-Instruct" +backend: "python" +max_batch_size: 0 + +# Triton should expect as input a single string +# input of variable length named 'text_input' +input [ + + { + name: "INPUT0" + data_type: TYPE_FP64 + dims: [ 1, 1, 1, 1 ] + + + reshape { + shape: [ 11, 11, 11 ] + } + + }, + { + name: "INPUT1" + data_type: TYPE_INT64 + dims: [ 2, 2, 2, 2 ] + + + reshape { + shape: [ 22, 22, 22 ] + } + + } + +] + + +# Triton should expect to respond with a single string +# output of variable length named 'text_output' +output [ + + { + name: "OUTPUT0" + data_type: TYPE_FP64 + dims: [ 1, 100 ] + + reshape { + shape: [ 100 ] + } + + }, + { + name: "OUTPUT1" + data_type: TYPE_BOOL + dims: [ 1 ] + + }, + { + name: "text_output" + data_type: TYPE_STRING + dims: [ -1 ] + + } + +] + +parameters: [ + { + key: "base_model_path", + value: {string_value: "/cheetah/input/model/groupuser/Llama-3.2-1B-Instruct"} + }, + { + key: "is_adapter_model", + value: {string_value: "false"} + }, + { + key: "adapter_model_path", + value: {string_value: ""} + }, + + { + key: "quantization", + value: {string_value: "none"} + } +] + +instance_group [ + { + kind: KIND_AUTO + count: 1 + } +] +