From 66b7206ab6eff30de74f50fe00128793b11a7f97 Mon Sep 17 00:00:00 2001 From: Shivendra Soni Date: Fri, 9 Aug 2024 19:29:10 +0530 Subject: [PATCH] community: Add llm-extraction option to FireCrawl Document Loader (#25231) **Description:** This minor PR aims to add `llm_extraction` to Firecrawl loader. This feature is supported on API and PythonSDK, but the langchain loader omits adding this to the response. **Twitter handle:** [scalable_pizza](https://x.com/scalablepizza) --------- Co-authored-by: Chester Curme --- .../langchain_community/document_loaders/firecrawl.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/libs/community/langchain_community/document_loaders/firecrawl.py b/libs/community/langchain_community/document_loaders/firecrawl.py index 3ff3bb3b1e9..2577ce6eda4 100644 --- a/libs/community/langchain_community/document_loaders/firecrawl.py +++ b/libs/community/langchain_community/document_loaders/firecrawl.py @@ -63,7 +63,10 @@ class FireCrawlLoader(BaseLoader): f"Unrecognized mode '{self.mode}'. Expected one of 'crawl', 'scrape'." ) for doc in firecrawl_docs: - yield Document( - page_content=doc.get("markdown", ""), - metadata=doc.get("metadata", {}), - ) + metadata = doc.get("metadata", {}) + if (self.params is not None) and self.params.get( + "extractorOptions", {} + ).get("mode") == "llm-extraction": + metadata["llm_extraction"] = doc.get("llm_extraction") + + yield Document(page_content=doc.get("markdown", ""), metadata=metadata)