diff --git a/config.pbtxt b/config.pbtxt index 30ed19c..25bf543 100644 --- a/config.pbtxt +++ b/config.pbtxt @@ -1,21 +1,62 @@ # Triton backend to use -name: "tensorflow-fashion-mnist" -backend: "tensorflow" -platform: "tensorflow_savedmodel" +name: "Llama-3.2-1B-Instruct" +backend: "python" max_batch_size: 0 input [ { - name: "test1" - data_type: TYPE_UINT8 + name: "text_input" + data_type: TYPE_STRING + dims: [ -1 ] + }, + { + name: "max_length" + data_type: TYPE_INT32 + dims: [ 1 ] + }, + { + name: "max_new_tokens" + data_type: TYPE_INT32 + dims: [ 1 ] + }, + { + name: "do_sample" + data_type: TYPE_BOOL + dims: [ 1 ] + }, + { + name: "top_k" + data_type: TYPE_INT32 + dims: [ 1 ] + }, + { + name: "top_p" + data_type: TYPE_FP32 + dims: [ 1 ] + }, + { + name: "temperature" + data_type: TYPE_FP32 + dims: [ 1 ] + }, + { + name: "repetition_penalty" + data_type: TYPE_FP32 + dims: [ 1 ] + }, + { + name: "stream" + data_type: TYPE_BOOL dims: [ 1 ] - reshape { - shape: [ 1 ] - } } ] output [ + { + name: "text_output" + data_type: TYPE_STRING + dims: [ -1 ] + } ]