diff --git a/config.pbtxt b/config.pbtxt index 2b24906..bbdcf2c 100644 --- a/config.pbtxt +++ b/config.pbtxt @@ -130,3 +130,20 @@ instance_group [ } ] + + +# "model": { +# "name": "Llama-3.2-1B-Instruct", +# "backend": "TransformerLLM", +# "tensorrtllm": { +# "workers": 1, +# "maxSeqLen": 1, +# "kvCacheType": "paged", +# "maxInputLen": 1024, +# "maxNumTokens": 0 +# }, +# "maxBatchSize": 4, +# "quantization": "int4", +# "modelInstanceGroupKind": "KIND_GPU", +# "modelInstanceGroupCount": 1 +# }