From 4e2dd0789c9998392799dc85dd48134c955f0e75 Mon Sep 17 00:00:00 2001 From: groupuser Date: Mon, 24 Nov 2025 02:11:41 +0000 Subject: [PATCH] =?UTF-8?q?Automatically=20created=20from=20=EB=AA=A8?= =?UTF-8?q?=EB=8D=B8=20=EB=B0=B0=ED=8F=AC(481:base-gemma-3-1b-it)=20by=20?= =?UTF-8?q?=EA=B7=B8=EB=A3=B9=EC=82=AC=EC=9A=A9=EC=9E=90(groupuser)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.pbtxt | 131 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 config.pbtxt diff --git a/config.pbtxt b/config.pbtxt new file mode 100644 index 0000000..33fe263 --- /dev/null +++ b/config.pbtxt @@ -0,0 +1,131 @@ +# Triton Backend for TransformerLLM. +backend: "python" +max_batch_size: 0 + +# Triton should expect as input a single string +# input of variable length named 'text_input' +input [ + + { + name: "text_input" + data_type: TYPE_STRING + dims: [ 1 ] + + + }, + { + name: "max_length" + data_type: TYPE_INT32 + dims: [ 1 ] + + optional: true + + + }, + { + name: "max_new_tokens" + data_type: TYPE_INT32 + dims: [ 1 ] + + optional: true + + + }, + { + name: "do_sample" + data_type: TYPE_BOOL + dims: [ 1 ] + + optional: true + + + }, + { + name: "top_k" + data_type: TYPE_INT32 + dims: [ 1 ] + + optional: true + + + }, + { + name: "top_p" + data_type: TYPE_FP32 + dims: [ 1 ] + + optional: true + + + }, + { + name: "temperature" + data_type: TYPE_FP32 + dims: [ 1 ] + + optional: true + + + }, + { + name: "repetition_penalty" + data_type: TYPE_FP32 + dims: [ 1 ] + + optional: true + + + }, + { + name: "stream" + data_type: TYPE_BOOL + dims: [ 1 ] + + optional: true + + + } + +] + + +# Triton should expect to respond with a single string +# output of variable length named 'text_output' +output [ + + { + name: "text_output" + data_type: TYPE_STRING + dims: [ 1 ] + + } + +] + +parameters: [ + { + key: "base_model_path", + value: {string_value: "/cheetah/input/model/groupuser/base-gemma-3-1b-it"} + }, + { + key: "is_adapter_model", + value: {string_value: "false"} + }, + { + key: "adapter_model_path", + value: {string_value: ""} + }, + + { + key: "quantization", + value: {string_value: "none"} + } +] + +instance_group [ + { + kind: KIND_AUTO + count: 1 + } +] +