Update config.pbtxt
This commit is contained in:
parent
dd8eb6c342
commit
a1841e0300
17
config.pbtxt
17
config.pbtxt
@ -130,3 +130,20 @@ instance_group [
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# "model": {
|
||||||
|
# "name": "Llama-3.2-1B-Instruct",
|
||||||
|
# "backend": "TransformerLLM",
|
||||||
|
# "tensorrtllm": {
|
||||||
|
# "workers": 1,
|
||||||
|
# "maxSeqLen": 1,
|
||||||
|
# "kvCacheType": "paged",
|
||||||
|
# "maxInputLen": 1024,
|
||||||
|
# "maxNumTokens": 0
|
||||||
|
# },
|
||||||
|
# "maxBatchSize": 4,
|
||||||
|
# "quantization": "int4",
|
||||||
|
# "modelInstanceGroupKind": "KIND_GPU",
|
||||||
|
# "modelInstanceGroupCount": 1
|
||||||
|
# }
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user