| from transformers import AutoModel, CLIPImageProcessor, CLIPTokenizer | |
| import torch | |
| import spaces | |
| model_name_or_path = "BAAI/EVA-CLIP-8B" | |
| image_size = 224 | |
| def load_model(): | |
| processor = CLIPImageProcessor.from_pretrained("openai/clip-vit-large-patch14") | |
| model = AutoModel.from_pretrained( | |
| model_name_or_path, | |
| torch_dtype=torch.bfloat16, | |
| trust_remote_code=True).to('cuda').eval() | |
| tokenizer = CLIPTokenizer.from_pretrained(model_name_or_path) | |
| return model, tokenizer, processor | |
| load_model() |