Automatically updated from 모델 배포(331:mistral-1L-tiny-git) by 그룹사용자(groupuser)

2026-05-13 05:44:04 +00:00 · 2026-05-13 05:44:04 +00:00 · 9f2212c522
commit 9f2212c522
parent 0bb2560057
1 changed files with 33 additions and 1 deletions
--- a/config.pbtxt
+++ b/config.pbtxt
@ -1,74 +1,105 @@
-# Triton backend to use
+# Triton Backend for TransformerLLM.
 backend: "python"
 max_batch_size: 0

 # Triton should expect as input a single string
 # input of variable length named 'text_input'
 input [
+
  {
    name: "text_input"
    data_type: TYPE_STRING
    dims: [ -1 ]
+
+
  },
  {
    name: "max_length"
    data_type: TYPE_INT32
    dims: [ 1 ]
+
    optional: true
+
+
  },
  {
    name: "max_new_tokens"
    data_type: TYPE_INT32
    dims: [ 1 ]
+
    optional: true
+
+
  },
  {
    name: "do_sample"
    data_type: TYPE_BOOL
    dims: [ 1 ]
+
    optional: true
+
+
  },
  {
    name: "top_k"
    data_type: TYPE_INT32
    dims: [ 1 ]
+
    optional: true
+
+
  },
  {
    name: "top_p"
    data_type: TYPE_FP32
    dims: [ 1 ]
+
    optional: true
+
+
  },
  {
    name: "temperature"
    data_type: TYPE_FP32
    dims: [ 1 ]
+
    optional: true
+
+
  },
  {
    name: "repetition_penalty"
    data_type: TYPE_FP32
    dims: [ 1 ]
+
    optional: true
+
+
  },
  {
    name: "stream"
    data_type: TYPE_BOOL
    dims: [ 1 ]
+
    optional: true
+
+
  }
+
 ]


 # Triton should expect to respond with a single string
 # output of variable length named 'text_output'
 output [
+
  {
    name: "text_output"
    data_type: TYPE_STRING
    dims: [ -1 ]
+
  }
+
 ]

 parameters: [
@ -84,6 +115,7 @@ parameters: [
    key: "adapter_model_path",
    value: {string_value: ""}
  },
+  
  {
    key: "quantization",
    value: {string_value: "none"}