mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-25 12:33:39 +00:00
Merge branch 'master' into bagatur/format_content_as
This commit is contained in:
commit
fb002faba4
@ -91,10 +91,10 @@ class AzureOpenAIEmbeddings(OpenAIEmbeddings):
|
|||||||
values["azure_ad_token"] = values.get("azure_ad_token") or os.getenv(
|
values["azure_ad_token"] = values.get("azure_ad_token") or os.getenv(
|
||||||
"AZURE_OPENAI_AD_TOKEN"
|
"AZURE_OPENAI_AD_TOKEN"
|
||||||
)
|
)
|
||||||
# Azure OpenAI embedding models allow a maximum of 16 texts
|
# Azure OpenAI embedding models allow a maximum of 2048 texts
|
||||||
# at a time in each batch
|
# at a time in each batch
|
||||||
# See: https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings
|
# See: https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings
|
||||||
values["chunk_size"] = min(values["chunk_size"], 16)
|
values["chunk_size"] = min(values["chunk_size"], 2048)
|
||||||
try:
|
try:
|
||||||
import openai # noqa: F401
|
import openai # noqa: F401
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
@ -307,10 +307,10 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
|||||||
)
|
)
|
||||||
if values["openai_api_type"] in ("azure", "azure_ad", "azuread"):
|
if values["openai_api_type"] in ("azure", "azure_ad", "azuread"):
|
||||||
default_api_version = "2023-05-15"
|
default_api_version = "2023-05-15"
|
||||||
# Azure OpenAI embedding models allow a maximum of 16 texts
|
# Azure OpenAI embedding models allow a maximum of 2048
|
||||||
# at a time in each batch
|
# texts at a time in each batch
|
||||||
# See: https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings
|
# See: https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings
|
||||||
values["chunk_size"] = min(values["chunk_size"], 16)
|
values["chunk_size"] = min(values["chunk_size"], 2048)
|
||||||
else:
|
else:
|
||||||
default_api_version = ""
|
default_api_version = ""
|
||||||
values["openai_api_version"] = get_from_dict_or_env(
|
values["openai_api_version"] = get_from_dict_or_env(
|
||||||
|
@ -41,6 +41,19 @@ def merge_dicts(left: Dict[str, Any], *others: Dict[str, Any]) -> Dict[str, Any]
|
|||||||
" but with a different type."
|
" but with a different type."
|
||||||
)
|
)
|
||||||
elif isinstance(merged[right_k], str):
|
elif isinstance(merged[right_k], str):
|
||||||
|
# TODO: Add below special handling for 'type' key in 0.3 and remove
|
||||||
|
# merge_lists 'type' logic.
|
||||||
|
#
|
||||||
|
# if right_k == "type":
|
||||||
|
# if merged[right_k] == right_v:
|
||||||
|
# continue
|
||||||
|
# else:
|
||||||
|
# raise ValueError(
|
||||||
|
# "Unable to merge. Two different values seen for special "
|
||||||
|
# f"key 'type': {merged[right_k]} and {right_v}. 'type' "
|
||||||
|
# "should either occur once or have the same value across "
|
||||||
|
# "all dicts."
|
||||||
|
# )
|
||||||
merged[right_k] += right_v
|
merged[right_k] += right_v
|
||||||
elif isinstance(merged[right_k], dict):
|
elif isinstance(merged[right_k], dict):
|
||||||
merged[right_k] = merge_dicts(merged[right_k], right_v)
|
merged[right_k] = merge_dicts(merged[right_k], right_v)
|
||||||
@ -81,10 +94,10 @@ def merge_lists(left: Optional[List], *others: Optional[List]) -> Optional[List]
|
|||||||
if e_left["index"] == e["index"]
|
if e_left["index"] == e["index"]
|
||||||
]
|
]
|
||||||
if to_merge:
|
if to_merge:
|
||||||
# If a top-level "type" has been set for a chunk, it should no
|
# TODO: Remove this once merge_dict is updated with special
|
||||||
# longer be overridden by the "type" field in future chunks.
|
# handling for 'type'.
|
||||||
if "type" in merged[to_merge[0]] and "type" in e:
|
if "type" in e:
|
||||||
e.pop("type")
|
e = {k: v for k, v in e.items() if k != "type"}
|
||||||
merged[to_merge[0]] = merge_dicts(merged[to_merge[0]], e)
|
merged[to_merge[0]] = merge_dicts(merged[to_merge[0]], e)
|
||||||
else:
|
else:
|
||||||
merged.append(e)
|
merged.append(e)
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from contextlib import AbstractContextManager, nullcontext
|
from contextlib import AbstractContextManager, nullcontext
|
||||||
|
from copy import deepcopy
|
||||||
from typing import Any, Callable, Dict, Optional, Tuple, Type, Union
|
from typing import Any, Callable, Dict, Optional, Tuple, Type, Union
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
@ -120,9 +121,45 @@ def test_merge_dicts(
|
|||||||
else:
|
else:
|
||||||
err = nullcontext()
|
err = nullcontext()
|
||||||
|
|
||||||
|
left_copy = deepcopy(left)
|
||||||
|
right_copy = deepcopy(right)
|
||||||
with err:
|
with err:
|
||||||
actual = merge_dicts(left, right)
|
actual = merge_dicts(left, right)
|
||||||
assert actual == expected
|
assert actual == expected
|
||||||
|
# no mutation
|
||||||
|
assert left == left_copy
|
||||||
|
assert right == right_copy
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("left", "right", "expected"),
|
||||||
|
(
|
||||||
|
# 'type' special key handling
|
||||||
|
({"type": "foo"}, {"type": "foo"}, {"type": "foo"}),
|
||||||
|
(
|
||||||
|
{"type": "foo"},
|
||||||
|
{"type": "bar"},
|
||||||
|
pytest.raises(ValueError, match="Unable to merge."),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
@pytest.mark.xfail(reason="Refactors to make in 0.3")
|
||||||
|
def test_merge_dicts_0_3(
|
||||||
|
left: dict, right: dict, expected: Union[dict, AbstractContextManager]
|
||||||
|
) -> None:
|
||||||
|
if isinstance(expected, AbstractContextManager):
|
||||||
|
err = expected
|
||||||
|
else:
|
||||||
|
err = nullcontext()
|
||||||
|
|
||||||
|
left_copy = deepcopy(left)
|
||||||
|
right_copy = deepcopy(right)
|
||||||
|
with err:
|
||||||
|
actual = merge_dicts(left, right)
|
||||||
|
assert actual == expected
|
||||||
|
# no mutation
|
||||||
|
assert left == left_copy
|
||||||
|
assert right == right_copy
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
|
@ -302,19 +302,8 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
|||||||
values["openai_proxy"] = get_from_dict_or_env(
|
values["openai_proxy"] = get_from_dict_or_env(
|
||||||
values, "openai_proxy", "OPENAI_PROXY", default=""
|
values, "openai_proxy", "OPENAI_PROXY", default=""
|
||||||
)
|
)
|
||||||
if values["openai_api_type"] in ("azure", "azure_ad", "azuread"):
|
|
||||||
default_api_version = "2023-05-15"
|
|
||||||
# Azure OpenAI embedding models allow a maximum of 16 texts
|
|
||||||
# at a time in each batch
|
|
||||||
# See: https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings
|
|
||||||
values["chunk_size"] = min(values["chunk_size"], 16)
|
|
||||||
else:
|
|
||||||
default_api_version = ""
|
|
||||||
values["openai_api_version"] = get_from_dict_or_env(
|
values["openai_api_version"] = get_from_dict_or_env(
|
||||||
values,
|
values, "openai_api_version", "OPENAI_API_VERSION", default=""
|
||||||
"openai_api_version",
|
|
||||||
"OPENAI_API_VERSION",
|
|
||||||
default=default_api_version,
|
|
||||||
)
|
)
|
||||||
# Check OPENAI_ORGANIZATION for backwards compatibility.
|
# Check OPENAI_ORGANIZATION for backwards compatibility.
|
||||||
values["openai_organization"] = (
|
values["openai_organization"] = (
|
||||||
|
Loading…
Reference in New Issue
Block a user