mirror of
https://github.com/hwchase17/langchain.git
synced 2025-04-28 03:51:50 +00:00
core[patch]: deprecate hwchase17/langchain-hub, address path traversal (#18600)
Deprecates the old langchain-hub repository. Does *not* deprecate the new https://smith.langchain.com/hub @PinkDraconian has correctly raised that in the event someone is loading unsanitized user input into the `try_load_from_hub` function, they have the ability to load files from other locations in github than the hwchase17/langchain-hub repository. This PR adds some more path checking to that function and deprecates the functionality in favor of the hub built into LangSmith.
This commit is contained in:
parent
96cd50938a
commit
e1924b3e93
@ -9,16 +9,28 @@ from urllib.parse import urljoin
|
||||
|
||||
import requests
|
||||
|
||||
from langchain_core._api.deprecation import deprecated
|
||||
|
||||
DEFAULT_REF = os.environ.get("LANGCHAIN_HUB_DEFAULT_REF", "master")
|
||||
LANGCHAINHUB_REPO = "https://raw.githubusercontent.com/hwchase17/langchain-hub/"
|
||||
URL_BASE = os.environ.get(
|
||||
"LANGCHAIN_HUB_URL_BASE",
|
||||
"https://raw.githubusercontent.com/hwchase17/langchain-hub/{ref}/",
|
||||
LANGCHAINHUB_REPO + "{ref}/",
|
||||
)
|
||||
HUB_PATH_RE = re.compile(r"lc(?P<ref>@[^:]+)?://(?P<path>.*)")
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="0.1.30",
|
||||
removal="0.2",
|
||||
message=(
|
||||
"Using the hwchase17/langchain-hub "
|
||||
"repo for prompts is deprecated. Please use "
|
||||
"https://smith.langchain.com/hub instead."
|
||||
),
|
||||
)
|
||||
def try_load_from_hub(
|
||||
path: Union[str, Path],
|
||||
loader: Callable[[str], T],
|
||||
@ -43,6 +55,8 @@ def try_load_from_hub(
|
||||
# Instead, use PurePosixPath to ensure that forward slashes are used as the
|
||||
# path separator, regardless of the operating system.
|
||||
full_url = urljoin(URL_BASE.format(ref=ref), PurePosixPath(remote_path).__str__())
|
||||
if not full_url.startswith(LANGCHAINHUB_REPO):
|
||||
raise ValueError(f"Invalid hub path: {path}")
|
||||
|
||||
r = requests.get(full_url, timeout=5)
|
||||
if r.status_code != 200:
|
||||
|
21
libs/core/poetry.lock
generated
21
libs/core/poetry.lock
generated
@ -2214,6 +2214,25 @@ urllib3 = ">=1.21.1,<3"
|
||||
socks = ["PySocks (>=1.5.6,!=1.5.7)"]
|
||||
use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
|
||||
|
||||
[[package]]
|
||||
name = "responses"
|
||||
version = "0.25.0"
|
||||
description = "A utility library for mocking out the `requests` Python library."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "responses-0.25.0-py3-none-any.whl", hash = "sha256:2f0b9c2b6437db4b528619a77e5d565e4ec2a9532162ac1a131a83529db7be1a"},
|
||||
{file = "responses-0.25.0.tar.gz", hash = "sha256:01ae6a02b4f34e39bffceb0fc6786b67a25eae919c6368d05eabc8d9576c2a66"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
pyyaml = "*"
|
||||
requests = ">=2.30.0,<3.0"
|
||||
urllib3 = ">=1.25.10,<3.0"
|
||||
|
||||
[package.extras]
|
||||
tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "tomli", "tomli-w", "types-PyYAML", "types-requests"]
|
||||
|
||||
[[package]]
|
||||
name = "rfc3339-validator"
|
||||
version = "0.1.4"
|
||||
@ -2796,4 +2815,4 @@ extended-testing = ["jinja2"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.8.1,<4.0"
|
||||
content-hash = "8fe07123109b62d7210542d8aff20df6df00819e5b0f36bc12f02206c5161c43"
|
||||
content-hash = "de97591989f083b89c7a7bc6dabba87e29e13fddc812450d5196d564b2c02ce1"
|
||||
|
@ -58,6 +58,7 @@ pytest-watcher = "^0.3.4"
|
||||
pytest-asyncio = "^0.21.1"
|
||||
grandalf = "^0.8"
|
||||
pytest-profiling = "^1.7.0"
|
||||
responses = "^0.25.0"
|
||||
|
||||
|
||||
[tool.poetry.group.test_integration]
|
||||
|
@ -9,6 +9,7 @@ from urllib.parse import urljoin
|
||||
|
||||
import pytest
|
||||
import responses
|
||||
|
||||
from langchain_core.utils.loading import DEFAULT_REF, URL_BASE, try_load_from_hub
|
||||
|
||||
|
||||
@ -94,3 +95,12 @@ def test_failed_request(mocked_responses: responses.RequestsMock) -> None:
|
||||
with pytest.raises(ValueError, match=re.compile("Could not find file at .*")):
|
||||
try_load_from_hub(f"lc://{path}", loader, "chains", {"json"})
|
||||
loader.assert_not_called()
|
||||
|
||||
|
||||
def test_path_traversal() -> None:
|
||||
"""Test that a path traversal attack is prevented."""
|
||||
path = "lc://chains/../../../../../../../../../it.json"
|
||||
loader = Mock()
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
try_load_from_hub(path, loader, "chains", {"json"})
|
Loading…
Reference in New Issue
Block a user