Multi-modal RAG template (#14186)

* OpenCLIP embeddings
* GPT-4V

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Lance Martin
2023-12-05 13:36:38 -08:00
committed by GitHub
parent 3b75d37cee
commit 66848871fc
13 changed files with 3890 additions and 7 deletions

View File

@@ -8,6 +8,9 @@ class OpenCLIPEmbeddings(BaseModel, Embeddings):
model: Any
preprocess: Any
tokenizer: Any
# Select model: https://github.com/mlfoundations/open_clip
model_name: str = "ViT-H-14"
checkpoint: str = "laion2b_s32b_b79k"
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
@@ -15,12 +18,11 @@ class OpenCLIPEmbeddings(BaseModel, Embeddings):
try:
import open_clip
### Smaller, less performant
# model_name = "ViT-B-32"
# checkpoint = "laion2b_s34b_b79k"
### Larger, more performant
model_name = "ViT-H-14"
checkpoint = "laion2b_s32b_b79k"
# Fall back to class defaults if not provided
model_name = values.get("model_name", cls.__fields__["model_name"].default)
checkpoint = values.get("checkpoint", cls.__fields__["checkpoint"].default)
# Load model
model, _, preprocess = open_clip.create_model_and_transforms(
model_name=model_name, pretrained=checkpoint
)