| import transformers | |
| def tf_example(texts, model_name='M-CLIP/XLM-Roberta-Large-Vit-L-14'): | |
| from multilingual_clip import tf_multilingual_clip | |
| model = tf_multilingual_clip.MultiLingualCLIP.from_pretrained(model_name) | |
| tokenizer = transformers.AutoTokenizer.from_pretrained(model_name) | |
| inData = tokenizer.batch_encode_plus(texts, return_tensors='tf', padding=True) | |
| embeddings = model(inData) | |
| print(embeddings.shape) | |
| def pt_example(texts, model_name='M-CLIP/XLM-Roberta-Large-Vit-L-14'): | |
| from multilingual_clip import pt_multilingual_clip | |
| model = pt_multilingual_clip.MultilingualCLIP.from_pretrained(model_name) | |
| tokenizer = transformers.AutoTokenizer.from_pretrained(model_name) | |
| embeddings = model.forward(texts, tokenizer) | |
| print(embeddings.shape) | |
| if __name__ == '__main__': | |
| exampleTexts = [ | |
| 'Three blind horses listening to Mozart.', | |
| 'Älgen är skogens konung!', | |
| 'Wie leben Eisbären in der Antarktis?', | |
| 'Вы знали, что все белые медведи левши?' | |
| ] | |
| # tf_example(exampleTexts) | |
| pt_example(exampleTexts) | |