| name: "deberta" | |
| platform: "onnxruntime_onnx" | |
| max_batch_size: 8 | |
| input [ | |
| { | |
| name: "input_ids" | |
| data_type: TYPE_INT64 | |
| dims: [ -1, 512 ] | |
| }, | |
| { | |
| name: "attention_mask" | |
| data_type: TYPE_INT64 | |
| dims: [ -1, 512 ] | |
| } | |
| ] | |
| output [ | |
| { | |
| name: "logits" | |
| data_type: TYPE_FP32 | |
| dims: [ -1, 2 ] | |
| } | |
| ] | |
| instance_group [ | |
| { | |
| count: 1 | |
| kind: KIND_GPU | |
| } | |
| ] | |
| dynamic_batching { } | |
| optimization { execution_accelerators { | |
| gpu_execution_accelerator : [ { | |
| name : "tensorrt" | |
| parameters { key: "precision_mode" value: "FP32" } | |
| } ] | |
| }} |