# Triton Backend for vLLM. backend: "vllm" platform: "vllm" max_batch_size: 0 input [ ] output [ ] instance_group [ { kind: KIND_AUTO count: 1 } ]