community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463)

Moved the following modules to new package langchain-community in a backwards compatible fashion:

```
mv langchain/langchain/adapters community/langchain_community
mv langchain/langchain/callbacks community/langchain_community/callbacks
mv langchain/langchain/chat_loaders community/langchain_community
mv langchain/langchain/chat_models community/langchain_community
mv langchain/langchain/document_loaders community/langchain_community
mv langchain/langchain/docstore community/langchain_community
mv langchain/langchain/document_transformers community/langchain_community
mv langchain/langchain/embeddings community/langchain_community
mv langchain/langchain/graphs community/langchain_community
mv langchain/langchain/llms community/langchain_community
mv langchain/langchain/memory/chat_message_histories community/langchain_community
mv langchain/langchain/retrievers community/langchain_community
mv langchain/langchain/storage community/langchain_community
mv langchain/langchain/tools community/langchain_community
mv langchain/langchain/utilities community/langchain_community
mv langchain/langchain/vectorstores community/langchain_community
mv langchain/langchain/agents/agent_toolkits community/langchain_community
mv langchain/langchain/cache.py community/langchain_community
mv langchain/langchain/adapters community/langchain_community
mv langchain/langchain/callbacks community/langchain_community/callbacks
mv langchain/langchain/chat_loaders community/langchain_community
mv langchain/langchain/chat_models community/langchain_community
mv langchain/langchain/document_loaders community/langchain_community
mv langchain/langchain/docstore community/langchain_community
mv langchain/langchain/document_transformers community/langchain_community
mv langchain/langchain/embeddings community/langchain_community
mv langchain/langchain/graphs community/langchain_community
mv langchain/langchain/llms community/langchain_community
mv langchain/langchain/memory/chat_message_histories community/langchain_community
mv langchain/langchain/retrievers community/langchain_community
mv langchain/langchain/storage community/langchain_community
mv langchain/langchain/tools community/langchain_community
mv langchain/langchain/utilities community/langchain_community
mv langchain/langchain/vectorstores community/langchain_community
mv langchain/langchain/agents/agent_toolkits community/langchain_community
mv langchain/langchain/cache.py community/langchain_community
```

Moved the following to core
```
mv langchain/langchain/utils/json_schema.py core/langchain_core/utils
mv langchain/langchain/utils/html.py core/langchain_core/utils
mv langchain/langchain/utils/strings.py core/langchain_core/utils
cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py
rm langchain/langchain/utils/env.py
```

See .scripts/community_split/script_integrations.sh for all changes
This commit is contained in:
Bagatur
2023-12-11 13:53:30 -08:00
committed by GitHub
parent c0f4b95aa9
commit ed58eeb9c5
2446 changed files with 171805 additions and 137118 deletions

View File

@@ -0,0 +1,158 @@
from langchain_core.utils.html import (
PREFIXES_TO_IGNORE,
SUFFIXES_TO_IGNORE,
extract_sub_links,
find_all_links,
)
def test_find_all_links_none() -> None:
html = "<span>Hello world</span>"
actual = find_all_links(html)
assert actual == []
def test_find_all_links_single() -> None:
htmls = [
"href='foobar.com'",
'href="foobar.com"',
'<div><a class="blah" href="foobar.com">hullo</a></div>',
]
actual = [find_all_links(html) for html in htmls]
assert actual == [["foobar.com"]] * 3
def test_find_all_links_multiple() -> None:
html = (
'<div><a class="blah" href="https://foobar.com">hullo</a></div>'
'<div><a class="bleh" href="/baz/cool">buhbye</a></div>'
)
actual = find_all_links(html)
assert sorted(actual) == [
"/baz/cool",
"https://foobar.com",
]
def test_find_all_links_ignore_suffix() -> None:
html = 'href="foobar{suffix}"'
for suffix in SUFFIXES_TO_IGNORE:
actual = find_all_links(html.format(suffix=suffix))
assert actual == []
# Don't ignore if pattern doesn't occur at end of link.
html = 'href="foobar{suffix}more"'
for suffix in SUFFIXES_TO_IGNORE:
actual = find_all_links(html.format(suffix=suffix))
assert actual == [f"foobar{suffix}more"]
def test_find_all_links_ignore_prefix() -> None:
html = 'href="{prefix}foobar"'
for prefix in PREFIXES_TO_IGNORE:
actual = find_all_links(html.format(prefix=prefix))
assert actual == []
# Don't ignore if pattern doesn't occur at beginning of link.
html = 'href="foobar{prefix}more"'
for prefix in PREFIXES_TO_IGNORE:
# Pound signs are split on when not prefixes.
if prefix == "#":
continue
actual = find_all_links(html.format(prefix=prefix))
assert actual == [f"foobar{prefix}more"]
def test_find_all_links_drop_fragment() -> None:
html = 'href="foobar.com/woah#section_one"'
actual = find_all_links(html)
assert actual == ["foobar.com/woah"]
def test_extract_sub_links() -> None:
html = (
'<a href="https://foobar.com">one</a>'
'<a href="http://baz.net">two</a>'
'<a href="//foobar.com/hello">three</a>'
'<a href="/how/are/you/doing">four</a>'
)
expected = sorted(
[
"https://foobar.com",
"https://foobar.com/hello",
"https://foobar.com/how/are/you/doing",
]
)
actual = sorted(extract_sub_links(html, "https://foobar.com"))
assert actual == expected
actual = extract_sub_links(html, "https://foobar.com/hello")
expected = ["https://foobar.com/hello"]
assert actual == expected
actual = sorted(
extract_sub_links(html, "https://foobar.com/hello", prevent_outside=False)
)
expected = sorted(
[
"https://foobar.com",
"http://baz.net",
"https://foobar.com/hello",
"https://foobar.com/how/are/you/doing",
]
)
assert actual == expected
def test_extract_sub_links_base() -> None:
html = (
'<a href="https://foobar.com">one</a>'
'<a href="http://baz.net">two</a>'
'<a href="//foobar.com/hello">three</a>'
'<a href="/how/are/you/doing">four</a>'
'<a href="alexis.html"</a>'
)
expected = sorted(
[
"https://foobar.com",
"https://foobar.com/hello",
"https://foobar.com/how/are/you/doing",
"https://foobar.com/hello/alexis.html",
]
)
actual = sorted(
extract_sub_links(
html, "https://foobar.com/hello/bill.html", base_url="https://foobar.com"
)
)
assert actual == expected
def test_extract_sub_links_exclude() -> None:
html = (
'<a href="https://foobar.com">one</a>'
'<a href="http://baz.net">two</a>'
'<a href="//foobar.com/hello">three</a>'
'<a href="/how/are/you/doing">four</a>'
'<a href="alexis.html"</a>'
)
expected = sorted(
[
"http://baz.net",
"https://foobar.com",
"https://foobar.com/hello",
"https://foobar.com/hello/alexis.html",
]
)
actual = sorted(
extract_sub_links(
html,
"https://foobar.com/hello/bill.html",
base_url="https://foobar.com",
prevent_outside=False,
exclude_prefixes=("https://foobar.com/how", "http://baz.org"),
)
)
assert actual == expected

View File

@@ -16,6 +16,11 @@ EXPECTED_ALL = [
"xor_args",
"try_load_from_hub",
"build_extra_kwargs",
"get_from_dict_or_env",
"get_from_env",
"stringify_dict",
"comma_list",
"stringify_value",
]

View File

@@ -0,0 +1,151 @@
import pytest
from langchain_core.utils.json_schema import dereference_refs
def test_dereference_refs_no_refs() -> None:
schema = {
"type": "object",
"properties": {
"first_name": {"type": "string"},
},
}
actual = dereference_refs(schema)
assert actual == schema
def test_dereference_refs_one_ref() -> None:
schema = {
"type": "object",
"properties": {
"first_name": {"$ref": "#/$defs/name"},
},
"$defs": {"name": {"type": "string"}},
}
expected = {
"type": "object",
"properties": {
"first_name": {"type": "string"},
},
"$defs": {"name": {"type": "string"}},
}
actual = dereference_refs(schema)
assert actual == expected
def test_dereference_refs_multiple_refs() -> None:
schema = {
"type": "object",
"properties": {
"first_name": {"$ref": "#/$defs/name"},
"other": {"$ref": "#/$defs/other"},
},
"$defs": {
"name": {"type": "string"},
"other": {"type": "object", "properties": {"age": "int", "height": "int"}},
},
}
expected = {
"type": "object",
"properties": {
"first_name": {"type": "string"},
"other": {"type": "object", "properties": {"age": "int", "height": "int"}},
},
"$defs": {
"name": {"type": "string"},
"other": {"type": "object", "properties": {"age": "int", "height": "int"}},
},
}
actual = dereference_refs(schema)
assert actual == expected
def test_dereference_refs_nested_refs_skip() -> None:
schema = {
"type": "object",
"properties": {
"info": {"$ref": "#/$defs/info"},
},
"$defs": {
"name": {"type": "string"},
"info": {
"type": "object",
"properties": {"age": "int", "name": {"$ref": "#/$defs/name"}},
},
},
}
expected = {
"type": "object",
"properties": {
"info": {
"type": "object",
"properties": {"age": "int", "name": {"type": "string"}},
},
},
"$defs": {
"name": {"type": "string"},
"info": {
"type": "object",
"properties": {"age": "int", "name": {"$ref": "#/$defs/name"}},
},
},
}
actual = dereference_refs(schema)
assert actual == expected
def test_dereference_refs_nested_refs_no_skip() -> None:
schema = {
"type": "object",
"properties": {
"info": {"$ref": "#/$defs/info"},
},
"$defs": {
"name": {"type": "string"},
"info": {
"type": "object",
"properties": {"age": "int", "name": {"$ref": "#/$defs/name"}},
},
},
}
expected = {
"type": "object",
"properties": {
"info": {
"type": "object",
"properties": {"age": "int", "name": {"type": "string"}},
},
},
"$defs": {
"name": {"type": "string"},
"info": {
"type": "object",
"properties": {"age": "int", "name": {"type": "string"}},
},
},
}
actual = dereference_refs(schema, skip_keys=())
assert actual == expected
def test_dereference_refs_missing_ref() -> None:
schema = {
"type": "object",
"properties": {
"first_name": {"$ref": "#/$defs/name"},
},
"$defs": {},
}
with pytest.raises(KeyError):
dereference_refs(schema)
def test_dereference_refs_remote_ref() -> None:
schema = {
"type": "object",
"properties": {
"first_name": {"$ref": "https://somewhere/else/name"},
},
}
with pytest.raises(ValueError):
dereference_refs(schema)