From 9f2212c522c510312d7ee5219df7712dd6862f29 Mon Sep 17 00:00:00 2001 From: groupuser Date: Wed, 13 May 2026 05:44:04 +0000 Subject: [PATCH] =?UTF-8?q?Automatically=20updated=20from=20=EB=AA=A8?= =?UTF-8?q?=EB=8D=B8=20=EB=B0=B0=ED=8F=AC(331:mistral-1L-tiny-git)=20by=20?= =?UTF-8?q?=EA=B7=B8=EB=A3=B9=EC=82=AC=EC=9A=A9=EC=9E=90(groupuser)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.pbtxt | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/config.pbtxt b/config.pbtxt index c5f4522..e2fae33 100644 --- a/config.pbtxt +++ b/config.pbtxt @@ -1,74 +1,105 @@ -# Triton backend to use +# Triton Backend for TransformerLLM. backend: "python" max_batch_size: 0 # Triton should expect as input a single string # input of variable length named 'text_input' input [ + { name: "text_input" data_type: TYPE_STRING dims: [ -1 ] + + }, { name: "max_length" data_type: TYPE_INT32 dims: [ 1 ] + optional: true + + }, { name: "max_new_tokens" data_type: TYPE_INT32 dims: [ 1 ] + optional: true + + }, { name: "do_sample" data_type: TYPE_BOOL dims: [ 1 ] + optional: true + + }, { name: "top_k" data_type: TYPE_INT32 dims: [ 1 ] + optional: true + + }, { name: "top_p" data_type: TYPE_FP32 dims: [ 1 ] + optional: true + + }, { name: "temperature" data_type: TYPE_FP32 dims: [ 1 ] + optional: true + + }, { name: "repetition_penalty" data_type: TYPE_FP32 dims: [ 1 ] + optional: true + + }, { name: "stream" data_type: TYPE_BOOL dims: [ 1 ] + optional: true + + } + ] # Triton should expect to respond with a single string # output of variable length named 'text_output' output [ + { name: "text_output" data_type: TYPE_STRING dims: [ -1 ] + } + ] parameters: [ @@ -84,6 +115,7 @@ parameters: [ key: "adapter_model_path", value: {string_value: ""} }, + { key: "quantization", value: {string_value: "none"}