From 66b7206ab6eff30de74f50fe00128793b11a7f97 Mon Sep 17 00:00:00 2001
From: Shivendra Soni <shivendrasoni91@gmail.com>
Date: Fri, 9 Aug 2024 19:29:10 +0530
Subject: [PATCH] community: Add llm-extraction option to FireCrawl Document
 Loader (#25231)

**Description:** This minor PR aims to add `llm_extraction` to Firecrawl
loader. This feature is supported on API and PythonSDK, but the
langchain loader omits adding this to the response.
**Twitter handle:** [scalable_pizza](https://x.com/scalablepizza)

---------

Co-authored-by: Chester Curme <chester.curme@gmail.com>
---
 .../langchain_community/document_loaders/firecrawl.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/libs/community/langchain_community/document_loaders/firecrawl.py b/libs/community/langchain_community/document_loaders/firecrawl.py
index 3ff3bb3b1e9..2577ce6eda4 100644
--- a/libs/community/langchain_community/document_loaders/firecrawl.py
+++ b/libs/community/langchain_community/document_loaders/firecrawl.py
@@ -63,7 +63,10 @@ class FireCrawlLoader(BaseLoader):
                 f"Unrecognized mode '{self.mode}'. Expected one of 'crawl', 'scrape'."
             )
         for doc in firecrawl_docs:
-            yield Document(
-                page_content=doc.get("markdown", ""),
-                metadata=doc.get("metadata", {}),
-            )
+            metadata = doc.get("metadata", {})
+            if (self.params is not None) and self.params.get(
+                "extractorOptions", {}
+            ).get("mode") == "llm-extraction":
+                metadata["llm_extraction"] = doc.get("llm_extraction")
+
+            yield Document(page_content=doc.get("markdown", ""), metadata=metadata)