mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-21 03:51:42 +00:00
community[patch]: Minor Improvement of extract hyperlinks tool output (#25728)
**Description:** Make the hyperlink only appear once in the extract_hyperlinks tool output. (for some websites output contains meaningless '#' hyperlinks multiple times which will extend the tokens of context window without any advantage) **Issue:** None **Dependencies:** None
This commit is contained in:
parent
ff0df5ea15
commit
25a6790e1a
@ -63,8 +63,9 @@ class ExtractHyperlinksTool(BaseBrowserTool):
|
|||||||
links = [urljoin(base_url, anchor.get("href", "")) for anchor in anchors]
|
links = [urljoin(base_url, anchor.get("href", "")) for anchor in anchors]
|
||||||
else:
|
else:
|
||||||
links = [anchor.get("href", "") for anchor in anchors]
|
links = [anchor.get("href", "") for anchor in anchors]
|
||||||
# Return the list of links as a JSON string
|
# Return the list of links as a JSON string. Duplicated link
|
||||||
return json.dumps(links)
|
# only appears once in the list
|
||||||
|
return json.dumps(list(set(links)))
|
||||||
|
|
||||||
def _run(
|
def _run(
|
||||||
self,
|
self,
|
||||||
|
Loading…
Reference in New Issue
Block a user