# Triton backend to use name: "gemma-3-4b-it" backend: "python" max_batch_size: 0 # Triton should expect as input a single string # input of variable length named 'text_input' input [ { name: "text_input" data_type: TYPE_STRING dims: [ 1.2 ] } ] # Triton should expect to respond with a single string # output of variable length named 'text_output' output [ ] parameters: [ { key: "base_model_path", value: {string_value: ""} }, { key: "is_adapter_model", value: {string_value: "false"} }, { key: "adapter_model_path", value: {string_value: ""} }, { key: "quantization", value: {string_value: "none"} } ] instance_group [ { kind: KIND_AUTO count: 1 } ]