mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-19 21:33:51 +00:00
core[patch]: deprecate hwchase17/langchain-hub, address path traversal (#18600)
Deprecates the old langchain-hub repository. Does *not* deprecate the new https://smith.langchain.com/hub @PinkDraconian has correctly raised that in the event someone is loading unsanitized user input into the `try_load_from_hub` function, they have the ability to load files from other locations in github than the hwchase17/langchain-hub repository. This PR adds some more path checking to that function and deprecates the functionality in favor of the hub built into LangSmith.
This commit is contained in:
parent
96cd50938a
commit
e1924b3e93
@ -9,16 +9,28 @@ from urllib.parse import urljoin
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
from langchain_core._api.deprecation import deprecated
|
||||||
|
|
||||||
DEFAULT_REF = os.environ.get("LANGCHAIN_HUB_DEFAULT_REF", "master")
|
DEFAULT_REF = os.environ.get("LANGCHAIN_HUB_DEFAULT_REF", "master")
|
||||||
|
LANGCHAINHUB_REPO = "https://raw.githubusercontent.com/hwchase17/langchain-hub/"
|
||||||
URL_BASE = os.environ.get(
|
URL_BASE = os.environ.get(
|
||||||
"LANGCHAIN_HUB_URL_BASE",
|
"LANGCHAIN_HUB_URL_BASE",
|
||||||
"https://raw.githubusercontent.com/hwchase17/langchain-hub/{ref}/",
|
LANGCHAINHUB_REPO + "{ref}/",
|
||||||
)
|
)
|
||||||
HUB_PATH_RE = re.compile(r"lc(?P<ref>@[^:]+)?://(?P<path>.*)")
|
HUB_PATH_RE = re.compile(r"lc(?P<ref>@[^:]+)?://(?P<path>.*)")
|
||||||
|
|
||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
|
@deprecated(
|
||||||
|
since="0.1.30",
|
||||||
|
removal="0.2",
|
||||||
|
message=(
|
||||||
|
"Using the hwchase17/langchain-hub "
|
||||||
|
"repo for prompts is deprecated. Please use "
|
||||||
|
"https://smith.langchain.com/hub instead."
|
||||||
|
),
|
||||||
|
)
|
||||||
def try_load_from_hub(
|
def try_load_from_hub(
|
||||||
path: Union[str, Path],
|
path: Union[str, Path],
|
||||||
loader: Callable[[str], T],
|
loader: Callable[[str], T],
|
||||||
@ -43,6 +55,8 @@ def try_load_from_hub(
|
|||||||
# Instead, use PurePosixPath to ensure that forward slashes are used as the
|
# Instead, use PurePosixPath to ensure that forward slashes are used as the
|
||||||
# path separator, regardless of the operating system.
|
# path separator, regardless of the operating system.
|
||||||
full_url = urljoin(URL_BASE.format(ref=ref), PurePosixPath(remote_path).__str__())
|
full_url = urljoin(URL_BASE.format(ref=ref), PurePosixPath(remote_path).__str__())
|
||||||
|
if not full_url.startswith(LANGCHAINHUB_REPO):
|
||||||
|
raise ValueError(f"Invalid hub path: {path}")
|
||||||
|
|
||||||
r = requests.get(full_url, timeout=5)
|
r = requests.get(full_url, timeout=5)
|
||||||
if r.status_code != 200:
|
if r.status_code != 200:
|
||||||
|
21
libs/core/poetry.lock
generated
21
libs/core/poetry.lock
generated
@ -2214,6 +2214,25 @@ urllib3 = ">=1.21.1,<3"
|
|||||||
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
|
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
|
||||||
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
|
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "responses"
|
||||||
|
version = "0.25.0"
|
||||||
|
description = "A utility library for mocking out the `requests` Python library."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "responses-0.25.0-py3-none-any.whl", hash = "sha256:2f0b9c2b6437db4b528619a77e5d565e4ec2a9532162ac1a131a83529db7be1a"},
|
||||||
|
{file = "responses-0.25.0.tar.gz", hash = "sha256:01ae6a02b4f34e39bffceb0fc6786b67a25eae919c6368d05eabc8d9576c2a66"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
pyyaml = "*"
|
||||||
|
requests = ">=2.30.0,<3.0"
|
||||||
|
urllib3 = ">=1.25.10,<3.0"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "tomli", "tomli-w", "types-PyYAML", "types-requests"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rfc3339-validator"
|
name = "rfc3339-validator"
|
||||||
version = "0.1.4"
|
version = "0.1.4"
|
||||||
@ -2796,4 +2815,4 @@ extended-testing = ["jinja2"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.8.1,<4.0"
|
python-versions = ">=3.8.1,<4.0"
|
||||||
content-hash = "8fe07123109b62d7210542d8aff20df6df00819e5b0f36bc12f02206c5161c43"
|
content-hash = "de97591989f083b89c7a7bc6dabba87e29e13fddc812450d5196d564b2c02ce1"
|
||||||
|
@ -58,6 +58,7 @@ pytest-watcher = "^0.3.4"
|
|||||||
pytest-asyncio = "^0.21.1"
|
pytest-asyncio = "^0.21.1"
|
||||||
grandalf = "^0.8"
|
grandalf = "^0.8"
|
||||||
pytest-profiling = "^1.7.0"
|
pytest-profiling = "^1.7.0"
|
||||||
|
responses = "^0.25.0"
|
||||||
|
|
||||||
|
|
||||||
[tool.poetry.group.test_integration]
|
[tool.poetry.group.test_integration]
|
||||||
|
@ -9,6 +9,7 @@ from urllib.parse import urljoin
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import responses
|
import responses
|
||||||
|
|
||||||
from langchain_core.utils.loading import DEFAULT_REF, URL_BASE, try_load_from_hub
|
from langchain_core.utils.loading import DEFAULT_REF, URL_BASE, try_load_from_hub
|
||||||
|
|
||||||
|
|
||||||
@ -94,3 +95,12 @@ def test_failed_request(mocked_responses: responses.RequestsMock) -> None:
|
|||||||
with pytest.raises(ValueError, match=re.compile("Could not find file at .*")):
|
with pytest.raises(ValueError, match=re.compile("Could not find file at .*")):
|
||||||
try_load_from_hub(f"lc://{path}", loader, "chains", {"json"})
|
try_load_from_hub(f"lc://{path}", loader, "chains", {"json"})
|
||||||
loader.assert_not_called()
|
loader.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
def test_path_traversal() -> None:
|
||||||
|
"""Test that a path traversal attack is prevented."""
|
||||||
|
path = "lc://chains/../../../../../../../../../it.json"
|
||||||
|
loader = Mock()
|
||||||
|
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
try_load_from_hub(path, loader, "chains", {"json"})
|
Loading…
Reference in New Issue
Block a user