Automatically updated from 모델 배포(742:TinyLlama-1.1B) by 그룹사용자(groupuser)

2026-05-14 06:34:16 +00:00 · 2026-05-14 06:34:16 +00:00 · bdbad187e1
commit bdbad187e1
parent 1366d031dc
1 changed files with 50 additions and 5 deletions
--- a/config.pbtxt
+++ b/config.pbtxt
@ -1,66 +1,110 @@
-# Triton Backend for vLLM.
-backend: "vllm"
-platform: "vllm"
+# Triton Backend for TransformerLLM.
+backend: "python"
 max_batch_size: 0

+# Triton should expect as input a single string
+# input of variable length named 'text_input'
 input [
+
  {
    name: "text_input"
    data_type: TYPE_STRING
    dims: [ 1 ]
+
+
  },
  {
    name: "max_length"
    data_type: TYPE_INT32
    dims: [ 1 ]
+
+
  },
  {
    name: "max_new_tokens"
    data_type: TYPE_INT32
    dims: [ 1 ]
+
+
  },
  {
    name: "do_sample"
    data_type: TYPE_BOOL
    dims: [ 1 ]
+
+
  },
  {
    name: "top_k"
    data_type: TYPE_INT32
    dims: [ 1 ]
+
+
  },
  {
    name: "top_p"
    data_type: TYPE_FP32
    dims: [ 1 ]
+
+
  },
  {
    name: "temperature"
    data_type: TYPE_FP32
    dims: [ 1 ]
+
+
  },
  {
    name: "repetition_penalty"
    data_type: TYPE_FP32
    dims: [ 1 ]
+
+
  },
  {
    name: "stream"
    data_type: TYPE_BOOL
    dims: [ 1 ]
+
+
  }
+
 ]

+
+# Triton should expect to respond with a single string
+# output of variable length named 'text_output'
 output [
+
  {
    name: "text_output"
    data_type: TYPE_STRING
    dims: [ 1 ]
+
  }
+
 ]

-
-
+parameters: [
+  {
+    key: "base_model_path",
+    value: {string_value: "/cheetah/input/model/groupuser/TinyLlama-1.1B-Chat-v1.0"}
+  },
+  {
+    key: "is_adapter_model",
+    value: {string_value: "false"}
+  },
+  {
+    key: "adapter_model_path",
+    value: {string_value: ""}
+  },
+  
+  {
+    key: "quantization",
+    value: {string_value: "none"}
+  }
+]

 instance_group [
  {
@ -68,3 +112,4 @@ instance_group [
    count: 1
  }
 ]
+