# Triton backend to use name: "Llama-3.2-1B-Instruct" backend: "tensorflow" platform: "tensorflow_savedmodel" max_batch_size: 0 input [ { name: "text_input" data_type: TYPE_STRING dims: [ apple ] }, { name: "max_length" data_type: TYPE_INT32 dims: [ 1 ] }, { name: "max_new_tokens" data_type: TYPE_INT32 dims: [ 1 ] }, { name: "do_sample" data_type: TYPE_BOOL dims: [ 1 ] }, { name: "top_k" data_type: TYPE_INT32 dims: [ 1 ] }, { name: "top_p" data_type: TYPE_FP32 dims: [ 1 ] }, { name: "temperature" data_type: TYPE_FP32 dims: [ 1 ] }, { name: "repetition_penalty" data_type: TYPE_FP32 dims: [ 1 ] }, { name: "stream" data_type: TYPE_BOOL dims: [ 1 ] } ] output [ { name: "text_output" data_type: TYPE_STRING dims: [ -1 ] } ] instance_group [ { kind: KIND_AUTO count: 1 } ]