mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-25 16:13:25 +00:00
community: Add llm-extraction option to FireCrawl Document Loader (#25231)
**Description:** This minor PR aims to add `llm_extraction` to Firecrawl loader. This feature is supported on API and PythonSDK, but the langchain loader omits adding this to the response. **Twitter handle:** [scalable_pizza](https://x.com/scalablepizza) --------- Co-authored-by: Chester Curme <chester.curme@gmail.com>
This commit is contained in:
parent
c81c77b465
commit
66b7206ab6
@ -63,7 +63,10 @@ class FireCrawlLoader(BaseLoader):
|
|||||||
f"Unrecognized mode '{self.mode}'. Expected one of 'crawl', 'scrape'."
|
f"Unrecognized mode '{self.mode}'. Expected one of 'crawl', 'scrape'."
|
||||||
)
|
)
|
||||||
for doc in firecrawl_docs:
|
for doc in firecrawl_docs:
|
||||||
yield Document(
|
metadata = doc.get("metadata", {})
|
||||||
page_content=doc.get("markdown", ""),
|
if (self.params is not None) and self.params.get(
|
||||||
metadata=doc.get("metadata", {}),
|
"extractorOptions", {}
|
||||||
)
|
).get("mode") == "llm-extraction":
|
||||||
|
metadata["llm_extraction"] = doc.get("llm_extraction")
|
||||||
|
|
||||||
|
yield Document(page_content=doc.get("markdown", ""), metadata=metadata)
|
||||||
|
Loading…
Reference in New Issue
Block a user