diff --git a/config.pbtxt b/config.pbtxt deleted file mode 100644 index 7b1219c..0000000 --- a/config.pbtxt +++ /dev/null @@ -1,89 +0,0 @@ -# Triton backend to use -name: "gemma-3-1b-it-finetuning" -backend: "python" -max_batch_size: 0 - -# Triton should expect as input a single string -# input of variable length named 'text_input' -input [ - { - name: "text_input" - data_type: TYPE_STRING - dims: [ -1 ] - }, - { - name: "max_length" - data_type: TYPE_INT32 - dims: [ 1 ] - optional: true - }, - { - name: "max_new_tokens" - data_type: TYPE_INT32 - dims: [ 1 ] - optional: true - }, - { - name: "do_sample" - data_type: TYPE_BOOL - dims: [ 1 ] - optional: true - }, - { - name: "top_k" - data_type: TYPE_INT32 - dims: [ 1 ] - optional: true - }, - { - name: "top_p" - data_type: TYPE_FP32 - dims: [ 1 ] - optional: true - }, - { - name: "temperature" - data_type: TYPE_FP32 - dims: [ 1 ] - optional: true - }, - { - name: "repetition_penalty" - data_type: TYPE_FP32 - dims: [ 1 ] - optional: true - }, - { - name: "stream" - data_type: TYPE_BOOL - dims: [ 1 ] - optional: true - } -] - - -# Triton should expect to respond with a single string -# output of variable length named 'text_output' -output [ - { - name: "text_output" - data_type: TYPE_STRING - dims: [ -1 ] - } -] - -parameters: [ - { - key: "enable_inference_trace", - value: {string_value: "True"} - } -] - - -instance_group [ - { - kind: KIND_AUTO, - count: 1 - } -] -