mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-23 03:19:38 +00:00
community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463)
Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
This commit is contained in:
158
libs/core/tests/unit_tests/utils/test_html.py
Normal file
158
libs/core/tests/unit_tests/utils/test_html.py
Normal file
@@ -0,0 +1,158 @@
|
||||
from langchain_core.utils.html import (
|
||||
PREFIXES_TO_IGNORE,
|
||||
SUFFIXES_TO_IGNORE,
|
||||
extract_sub_links,
|
||||
find_all_links,
|
||||
)
|
||||
|
||||
|
||||
def test_find_all_links_none() -> None:
|
||||
html = "<span>Hello world</span>"
|
||||
actual = find_all_links(html)
|
||||
assert actual == []
|
||||
|
||||
|
||||
def test_find_all_links_single() -> None:
|
||||
htmls = [
|
||||
"href='foobar.com'",
|
||||
'href="foobar.com"',
|
||||
'<div><a class="blah" href="foobar.com">hullo</a></div>',
|
||||
]
|
||||
actual = [find_all_links(html) for html in htmls]
|
||||
assert actual == [["foobar.com"]] * 3
|
||||
|
||||
|
||||
def test_find_all_links_multiple() -> None:
|
||||
html = (
|
||||
'<div><a class="blah" href="https://foobar.com">hullo</a></div>'
|
||||
'<div><a class="bleh" href="/baz/cool">buhbye</a></div>'
|
||||
)
|
||||
actual = find_all_links(html)
|
||||
assert sorted(actual) == [
|
||||
"/baz/cool",
|
||||
"https://foobar.com",
|
||||
]
|
||||
|
||||
|
||||
def test_find_all_links_ignore_suffix() -> None:
|
||||
html = 'href="foobar{suffix}"'
|
||||
for suffix in SUFFIXES_TO_IGNORE:
|
||||
actual = find_all_links(html.format(suffix=suffix))
|
||||
assert actual == []
|
||||
|
||||
# Don't ignore if pattern doesn't occur at end of link.
|
||||
html = 'href="foobar{suffix}more"'
|
||||
for suffix in SUFFIXES_TO_IGNORE:
|
||||
actual = find_all_links(html.format(suffix=suffix))
|
||||
assert actual == [f"foobar{suffix}more"]
|
||||
|
||||
|
||||
def test_find_all_links_ignore_prefix() -> None:
|
||||
html = 'href="{prefix}foobar"'
|
||||
for prefix in PREFIXES_TO_IGNORE:
|
||||
actual = find_all_links(html.format(prefix=prefix))
|
||||
assert actual == []
|
||||
|
||||
# Don't ignore if pattern doesn't occur at beginning of link.
|
||||
html = 'href="foobar{prefix}more"'
|
||||
for prefix in PREFIXES_TO_IGNORE:
|
||||
# Pound signs are split on when not prefixes.
|
||||
if prefix == "#":
|
||||
continue
|
||||
actual = find_all_links(html.format(prefix=prefix))
|
||||
assert actual == [f"foobar{prefix}more"]
|
||||
|
||||
|
||||
def test_find_all_links_drop_fragment() -> None:
|
||||
html = 'href="foobar.com/woah#section_one"'
|
||||
actual = find_all_links(html)
|
||||
assert actual == ["foobar.com/woah"]
|
||||
|
||||
|
||||
def test_extract_sub_links() -> None:
|
||||
html = (
|
||||
'<a href="https://foobar.com">one</a>'
|
||||
'<a href="http://baz.net">two</a>'
|
||||
'<a href="//foobar.com/hello">three</a>'
|
||||
'<a href="/how/are/you/doing">four</a>'
|
||||
)
|
||||
expected = sorted(
|
||||
[
|
||||
"https://foobar.com",
|
||||
"https://foobar.com/hello",
|
||||
"https://foobar.com/how/are/you/doing",
|
||||
]
|
||||
)
|
||||
actual = sorted(extract_sub_links(html, "https://foobar.com"))
|
||||
assert actual == expected
|
||||
|
||||
actual = extract_sub_links(html, "https://foobar.com/hello")
|
||||
expected = ["https://foobar.com/hello"]
|
||||
assert actual == expected
|
||||
|
||||
actual = sorted(
|
||||
extract_sub_links(html, "https://foobar.com/hello", prevent_outside=False)
|
||||
)
|
||||
expected = sorted(
|
||||
[
|
||||
"https://foobar.com",
|
||||
"http://baz.net",
|
||||
"https://foobar.com/hello",
|
||||
"https://foobar.com/how/are/you/doing",
|
||||
]
|
||||
)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
def test_extract_sub_links_base() -> None:
|
||||
html = (
|
||||
'<a href="https://foobar.com">one</a>'
|
||||
'<a href="http://baz.net">two</a>'
|
||||
'<a href="//foobar.com/hello">three</a>'
|
||||
'<a href="/how/are/you/doing">four</a>'
|
||||
'<a href="alexis.html"</a>'
|
||||
)
|
||||
|
||||
expected = sorted(
|
||||
[
|
||||
"https://foobar.com",
|
||||
"https://foobar.com/hello",
|
||||
"https://foobar.com/how/are/you/doing",
|
||||
"https://foobar.com/hello/alexis.html",
|
||||
]
|
||||
)
|
||||
actual = sorted(
|
||||
extract_sub_links(
|
||||
html, "https://foobar.com/hello/bill.html", base_url="https://foobar.com"
|
||||
)
|
||||
)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
def test_extract_sub_links_exclude() -> None:
|
||||
html = (
|
||||
'<a href="https://foobar.com">one</a>'
|
||||
'<a href="http://baz.net">two</a>'
|
||||
'<a href="//foobar.com/hello">three</a>'
|
||||
'<a href="/how/are/you/doing">four</a>'
|
||||
'<a href="alexis.html"</a>'
|
||||
)
|
||||
|
||||
expected = sorted(
|
||||
[
|
||||
"http://baz.net",
|
||||
"https://foobar.com",
|
||||
"https://foobar.com/hello",
|
||||
"https://foobar.com/hello/alexis.html",
|
||||
]
|
||||
)
|
||||
actual = sorted(
|
||||
extract_sub_links(
|
||||
html,
|
||||
"https://foobar.com/hello/bill.html",
|
||||
base_url="https://foobar.com",
|
||||
prevent_outside=False,
|
||||
exclude_prefixes=("https://foobar.com/how", "http://baz.org"),
|
||||
)
|
||||
)
|
||||
assert actual == expected
|
@@ -16,6 +16,11 @@ EXPECTED_ALL = [
|
||||
"xor_args",
|
||||
"try_load_from_hub",
|
||||
"build_extra_kwargs",
|
||||
"get_from_dict_or_env",
|
||||
"get_from_env",
|
||||
"stringify_dict",
|
||||
"comma_list",
|
||||
"stringify_value",
|
||||
]
|
||||
|
||||
|
||||
|
151
libs/core/tests/unit_tests/utils/test_json_schema.py
Normal file
151
libs/core/tests/unit_tests/utils/test_json_schema.py
Normal file
@@ -0,0 +1,151 @@
|
||||
import pytest
|
||||
|
||||
from langchain_core.utils.json_schema import dereference_refs
|
||||
|
||||
|
||||
def test_dereference_refs_no_refs() -> None:
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"first_name": {"type": "string"},
|
||||
},
|
||||
}
|
||||
actual = dereference_refs(schema)
|
||||
assert actual == schema
|
||||
|
||||
|
||||
def test_dereference_refs_one_ref() -> None:
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"first_name": {"$ref": "#/$defs/name"},
|
||||
},
|
||||
"$defs": {"name": {"type": "string"}},
|
||||
}
|
||||
expected = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"first_name": {"type": "string"},
|
||||
},
|
||||
"$defs": {"name": {"type": "string"}},
|
||||
}
|
||||
actual = dereference_refs(schema)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
def test_dereference_refs_multiple_refs() -> None:
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"first_name": {"$ref": "#/$defs/name"},
|
||||
"other": {"$ref": "#/$defs/other"},
|
||||
},
|
||||
"$defs": {
|
||||
"name": {"type": "string"},
|
||||
"other": {"type": "object", "properties": {"age": "int", "height": "int"}},
|
||||
},
|
||||
}
|
||||
expected = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"first_name": {"type": "string"},
|
||||
"other": {"type": "object", "properties": {"age": "int", "height": "int"}},
|
||||
},
|
||||
"$defs": {
|
||||
"name": {"type": "string"},
|
||||
"other": {"type": "object", "properties": {"age": "int", "height": "int"}},
|
||||
},
|
||||
}
|
||||
actual = dereference_refs(schema)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
def test_dereference_refs_nested_refs_skip() -> None:
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"info": {"$ref": "#/$defs/info"},
|
||||
},
|
||||
"$defs": {
|
||||
"name": {"type": "string"},
|
||||
"info": {
|
||||
"type": "object",
|
||||
"properties": {"age": "int", "name": {"$ref": "#/$defs/name"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
expected = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"info": {
|
||||
"type": "object",
|
||||
"properties": {"age": "int", "name": {"type": "string"}},
|
||||
},
|
||||
},
|
||||
"$defs": {
|
||||
"name": {"type": "string"},
|
||||
"info": {
|
||||
"type": "object",
|
||||
"properties": {"age": "int", "name": {"$ref": "#/$defs/name"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
actual = dereference_refs(schema)
|
||||
assert actual == expected
|
||||
|
||||
|
||||
def test_dereference_refs_nested_refs_no_skip() -> None:
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"info": {"$ref": "#/$defs/info"},
|
||||
},
|
||||
"$defs": {
|
||||
"name": {"type": "string"},
|
||||
"info": {
|
||||
"type": "object",
|
||||
"properties": {"age": "int", "name": {"$ref": "#/$defs/name"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
expected = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"info": {
|
||||
"type": "object",
|
||||
"properties": {"age": "int", "name": {"type": "string"}},
|
||||
},
|
||||
},
|
||||
"$defs": {
|
||||
"name": {"type": "string"},
|
||||
"info": {
|
||||
"type": "object",
|
||||
"properties": {"age": "int", "name": {"type": "string"}},
|
||||
},
|
||||
},
|
||||
}
|
||||
actual = dereference_refs(schema, skip_keys=())
|
||||
assert actual == expected
|
||||
|
||||
|
||||
def test_dereference_refs_missing_ref() -> None:
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"first_name": {"$ref": "#/$defs/name"},
|
||||
},
|
||||
"$defs": {},
|
||||
}
|
||||
with pytest.raises(KeyError):
|
||||
dereference_refs(schema)
|
||||
|
||||
|
||||
def test_dereference_refs_remote_ref() -> None:
|
||||
schema = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"first_name": {"$ref": "https://somewhere/else/name"},
|
||||
},
|
||||
}
|
||||
with pytest.raises(ValueError):
|
||||
dereference_refs(schema)
|
Reference in New Issue
Block a user