Spaces:
Runtime error
Runtime error
Commit
·
54ac152
1
Parent(s):
d61e332
Add max sequence length to tokenizer inference
Browse files
app.py
CHANGED
|
@@ -6,6 +6,8 @@ from huggingface_hub import hf_hub_download
|
|
| 6 |
from onnxruntime import InferenceSession
|
| 7 |
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
|
| 8 |
|
|
|
|
|
|
|
| 9 |
models = {
|
| 10 |
"Base model": "bert-large-uncased-whole-word-masking-finetuned-squad",
|
| 11 |
"Pruned model": "madlag/bert-large-uncased-wwm-squadv2-x2.63-f82.6-d16-hybrid-v1",
|
|
@@ -43,13 +45,19 @@ def run_normal_hf(model_name, inputs):
|
|
| 43 |
def inference(model_name, context, question):
|
| 44 |
tokenizer = AutoTokenizer.from_pretrained(models[model_name])
|
| 45 |
if model_name == "Pruned ONNX Optimized FP16":
|
| 46 |
-
inputs = dict(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
output, inference_time = run_ort_inference(model_name, inputs)
|
| 48 |
answer_start_scores, answer_end_scores = torch.tensor(output[0]), torch.tensor(
|
| 49 |
output[1]
|
| 50 |
)
|
| 51 |
else:
|
| 52 |
-
inputs = tokenizer(
|
|
|
|
|
|
|
| 53 |
output, inference_time = run_normal_hf(model_name, inputs)
|
| 54 |
answer_start_scores, answer_end_scores = output
|
| 55 |
|
|
|
|
| 6 |
from onnxruntime import InferenceSession
|
| 7 |
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
|
| 8 |
|
| 9 |
+
MAX_SEQUENCE_LENGTH = 512
|
| 10 |
+
|
| 11 |
models = {
|
| 12 |
"Base model": "bert-large-uncased-whole-word-masking-finetuned-squad",
|
| 13 |
"Pruned model": "madlag/bert-large-uncased-wwm-squadv2-x2.63-f82.6-d16-hybrid-v1",
|
|
|
|
| 45 |
def inference(model_name, context, question):
|
| 46 |
tokenizer = AutoTokenizer.from_pretrained(models[model_name])
|
| 47 |
if model_name == "Pruned ONNX Optimized FP16":
|
| 48 |
+
inputs = dict(
|
| 49 |
+
tokenizer(
|
| 50 |
+
question, context, return_tensors="np", max_length=MAX_SEQUENCE_LENGTH
|
| 51 |
+
)
|
| 52 |
+
)
|
| 53 |
output, inference_time = run_ort_inference(model_name, inputs)
|
| 54 |
answer_start_scores, answer_end_scores = torch.tensor(output[0]), torch.tensor(
|
| 55 |
output[1]
|
| 56 |
)
|
| 57 |
else:
|
| 58 |
+
inputs = tokenizer(
|
| 59 |
+
question, context, return_tensors="pt", max_length=MAX_SEQUENCE_LENGTH
|
| 60 |
+
)
|
| 61 |
output, inference_time = run_normal_hf(model_name, inputs)
|
| 62 |
answer_start_scores, answer_end_scores = output
|
| 63 |
|