Multi-modal RAG template (#14186)

* OpenCLIP embeddings * GPT-4V --------- Co-authored-by: Erick Friis <erick@langchain.dev>
2025-09-25 04:49:17 +00:00 · 2023-12-05 13:36:38 -08:00
parent 3b75d37cee
commit 66848871fc
13 changed files with 3890 additions and 7 deletions
--- a/libs/experimental/langchain_experimental/open_clip/open_clip.py
+++ b/libs/experimental/langchain_experimental/open_clip/open_clip.py
@@ -8,6 +8,9 @@ class OpenCLIPEmbeddings(BaseModel, Embeddings):
    model: Any
    preprocess: Any
    tokenizer: Any
+    # Select model: https://github.com/mlfoundations/open_clip
+    model_name: str = "ViT-H-14"
+    checkpoint: str = "laion2b_s32b_b79k"

    @root_validator()
    def validate_environment(cls, values: Dict) -> Dict:
@@ -15,12 +18,11 @@ class OpenCLIPEmbeddings(BaseModel, Embeddings):
        try:
            import open_clip

-            ### Smaller, less performant
-            # model_name = "ViT-B-32"
-            # checkpoint = "laion2b_s34b_b79k"
-            ### Larger, more performant
-            model_name = "ViT-H-14"
-            checkpoint = "laion2b_s32b_b79k"
+            # Fall back to class defaults if not provided
+            model_name = values.get("model_name", cls.__fields__["model_name"].default)
+            checkpoint = values.get("checkpoint", cls.__fields__["checkpoint"].default)
+
+            # Load model
            model, _, preprocess = open_clip.create_model_and_transforms(
                model_name=model_name, pretrained=checkpoint
            )