mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-02 03:26:17 +00:00
CLI Git Improvements (#12311)
- delete repo sources like pip - git dep fixes - error messaging
This commit is contained in:
@@ -1,2 +1,3 @@
|
||||
DEFAULT_GIT_REPO = "https://github.com/langchain-ai/langchain.git"
|
||||
DEFAULT_GIT_SUBDIRECTORY = "templates"
|
||||
DEFAULT_GIT_REF = "langserve-templates"
|
||||
|
@@ -3,16 +3,22 @@ Manage LangServe application projects.
|
||||
"""
|
||||
|
||||
import typer
|
||||
from typing import Optional, List
|
||||
from typing import Optional, List, Tuple, Dict
|
||||
from typing_extensions import Annotated
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
import subprocess
|
||||
from langchain_cli.utils.git import copy_repo, update_repo
|
||||
from langchain_cli.utils.git import (
|
||||
copy_repo,
|
||||
update_repo,
|
||||
parse_dependency_string,
|
||||
DependencySource,
|
||||
)
|
||||
from langchain_cli.utils.packages import get_package_root
|
||||
from langchain_cli.utils.events import create_events
|
||||
from langserve.packages import list_packages, get_langserve_export
|
||||
import tomli
|
||||
from collections import defaultdict
|
||||
|
||||
REPO_DIR = Path(typer.get_app_dir("langchain")) / "git_repos"
|
||||
|
||||
@@ -29,9 +35,7 @@ def new(
|
||||
] = None,
|
||||
with_poetry: Annotated[
|
||||
bool,
|
||||
typer.Option(
|
||||
"--with-poetry/--no-poetry", help="Run poetry install"
|
||||
),
|
||||
typer.Option("--with-poetry/--no-poetry", help="Run poetry install"),
|
||||
] = False,
|
||||
):
|
||||
"""
|
||||
@@ -79,9 +83,7 @@ def add(
|
||||
] = [],
|
||||
with_poetry: Annotated[
|
||||
bool,
|
||||
typer.Option(
|
||||
"--with-poetry/--no-poetry", help="Run poetry install"
|
||||
),
|
||||
typer.Option("--with-poetry/--no-poetry", help="Run poetry install"),
|
||||
] = False,
|
||||
):
|
||||
"""
|
||||
@@ -97,6 +99,7 @@ def add(
|
||||
if dependencies is None:
|
||||
dependencies = []
|
||||
|
||||
method = ""
|
||||
# cannot have both repo and dependencies
|
||||
if len(repo) != 0:
|
||||
if len(dependencies) != 0:
|
||||
@@ -128,8 +131,17 @@ def add(
|
||||
for i, dependency in enumerate(dependencies):
|
||||
# update repo
|
||||
typer.echo(f"Adding {dependency}...")
|
||||
source_path = update_repo(dependency, REPO_DIR)
|
||||
dep = parse_dependency_string(dependency)
|
||||
source_repo_path = update_repo(dep["git"], dep["ref"], REPO_DIR)
|
||||
source_path = (
|
||||
source_repo_path / dep["subdirectory"]
|
||||
if dep["subdirectory"]
|
||||
else source_repo_path
|
||||
)
|
||||
pyproject_path = source_path / "pyproject.toml"
|
||||
if not pyproject_path.exists():
|
||||
typer.echo(f"Could not find {pyproject_path}")
|
||||
continue
|
||||
langserve_export = get_langserve_export(pyproject_path)
|
||||
|
||||
# detect name conflict
|
||||
@@ -161,9 +173,7 @@ def remove(
|
||||
api_paths: Annotated[List[str], typer.Argument(help="The API paths to remove")],
|
||||
with_poetry: Annotated[
|
||||
bool,
|
||||
typer.Option(
|
||||
"--with_poetry/--no-poetry", help="Don't run poetry remove"
|
||||
),
|
||||
typer.Option("--with_poetry/--no-poetry", help="Don't run poetry remove"),
|
||||
] = False,
|
||||
):
|
||||
"""
|
||||
|
@@ -3,7 +3,11 @@ from pathlib import Path
|
||||
|
||||
import shutil
|
||||
import re
|
||||
from langchain_cli.constants import DEFAULT_GIT_REPO, DEFAULT_GIT_SUBDIRECTORY
|
||||
from langchain_cli.constants import (
|
||||
DEFAULT_GIT_REPO,
|
||||
DEFAULT_GIT_SUBDIRECTORY,
|
||||
DEFAULT_GIT_REF,
|
||||
)
|
||||
import hashlib
|
||||
from git import Repo
|
||||
|
||||
@@ -14,65 +18,49 @@ class DependencySource(TypedDict):
|
||||
subdirectory: Optional[str]
|
||||
|
||||
|
||||
def _get_main_branch(repo: Repo) -> Optional[str]:
|
||||
"""
|
||||
Get the name of the main branch of a git repo.
|
||||
From https://stackoverflow.com/questions/69651536/how-to-get-master-main-branch-from-gitpython
|
||||
"""
|
||||
try:
|
||||
# replace "origin" with your remote name if differs
|
||||
show_result = repo.git.remote("show", "origin")
|
||||
|
||||
# The show_result contains a wall of text in the language that
|
||||
# is set by your locales. Now you can use regex to extract the
|
||||
# default branch name, but if your language is different
|
||||
# from english, you need to adjust this regex pattern.
|
||||
|
||||
matches = re.search(r"\s*HEAD branch:\s*(.*)", show_result)
|
||||
if matches:
|
||||
default_branch = matches.group(1)
|
||||
return default_branch
|
||||
except Exception:
|
||||
pass
|
||||
# fallback to main/master
|
||||
if "main" in repo.heads:
|
||||
return "main"
|
||||
if "master" in repo.heads:
|
||||
return "master"
|
||||
|
||||
raise ValueError("Could not find main branch")
|
||||
|
||||
|
||||
# use poetry dependency string format
|
||||
def _parse_dependency_string(package_string: str) -> DependencySource:
|
||||
def parse_dependency_string(package_string: str) -> DependencySource:
|
||||
if package_string.startswith("git+"):
|
||||
# remove git+
|
||||
remaining = package_string[4:]
|
||||
# split main string from params
|
||||
gitstring, *params = remaining.split("#")
|
||||
# parse params
|
||||
params_dict = {}
|
||||
for param in params:
|
||||
if not param:
|
||||
# ignore empty entries
|
||||
continue
|
||||
if "=" in param:
|
||||
key, value = param.split("=")
|
||||
if key in params_dict:
|
||||
raise ValueError(
|
||||
f"Duplicate parameter {key} in dependency string {package_string}"
|
||||
)
|
||||
params_dict[key] = value
|
||||
else:
|
||||
if "ref" in params_dict:
|
||||
raise ValueError(
|
||||
f"Duplicate parameter ref in dependency string {package_string}"
|
||||
)
|
||||
params_dict["ref"] = param
|
||||
gitstring = package_string[4:]
|
||||
subdirectory = None
|
||||
ref = None
|
||||
# first check for #subdirectory= on the end
|
||||
if "#subdirectory=" in gitstring:
|
||||
gitstring, subdirectory = gitstring.split("#subdirectory=")
|
||||
if "#" in subdirectory or "@" in subdirectory:
|
||||
raise ValueError(
|
||||
"#subdirectory must be the last part of the dependency string"
|
||||
)
|
||||
|
||||
# find first slash after ://
|
||||
# find @ or # after that slash
|
||||
# remainder is ref
|
||||
# if no @ or #, then ref is None
|
||||
|
||||
# find first slash after ://
|
||||
if "://" not in gitstring:
|
||||
raise ValueError(
|
||||
"git+ dependencies must start with git+https:// or git+ssh://"
|
||||
)
|
||||
|
||||
_, find_slash = gitstring.split("://", 1)
|
||||
|
||||
if "/" not in find_slash:
|
||||
post_slash = find_slash
|
||||
ref = None
|
||||
else:
|
||||
_, post_slash = find_slash.split("/", 1)
|
||||
if "@" in post_slash or "#" in post_slash:
|
||||
_, ref = re.split(r"[@#]", post_slash, 1)
|
||||
|
||||
# gitstring is everything before that
|
||||
gitstring = gitstring[: -len(ref) - 1] if ref is not None else gitstring
|
||||
|
||||
return DependencySource(
|
||||
git=gitstring,
|
||||
ref=params_dict.get("ref"),
|
||||
subdirectory=params_dict.get("subdirectory"),
|
||||
ref=ref,
|
||||
subdirectory=subdirectory,
|
||||
)
|
||||
|
||||
elif package_string.startswith("https://"):
|
||||
@@ -81,12 +69,13 @@ def _parse_dependency_string(package_string: str) -> DependencySource:
|
||||
# it's a default git repo dependency
|
||||
gitstring = DEFAULT_GIT_REPO
|
||||
subdirectory = str(Path(DEFAULT_GIT_SUBDIRECTORY) / package_string)
|
||||
return DependencySource(git=gitstring, ref=None, subdirectory=subdirectory)
|
||||
return DependencySource(
|
||||
git=gitstring, ref=DEFAULT_GIT_REF, subdirectory=subdirectory
|
||||
)
|
||||
|
||||
|
||||
def _get_repo_path(dependency: DependencySource, repo_dir: Path) -> Path:
|
||||
def _get_repo_path(gitstring: str, repo_dir: Path) -> Path:
|
||||
# only based on git for now
|
||||
gitstring = dependency["git"]
|
||||
hashed = hashlib.sha256(gitstring.encode("utf-8")).hexdigest()[:8]
|
||||
|
||||
removed_protocol = gitstring.split("://")[-1]
|
||||
@@ -98,32 +87,27 @@ def _get_repo_path(dependency: DependencySource, repo_dir: Path) -> Path:
|
||||
return repo_dir / directory_name
|
||||
|
||||
|
||||
def update_repo(gitpath: str, repo_dir: Path) -> Path:
|
||||
def update_repo(gitstring: str, ref: Optional[str], repo_dir: Path) -> Path:
|
||||
# see if path already saved
|
||||
dependency = _parse_dependency_string(gitpath)
|
||||
repo_path = _get_repo_path(dependency, repo_dir)
|
||||
if not repo_path.exists():
|
||||
repo = Repo.clone_from(dependency["git"], repo_path)
|
||||
else:
|
||||
repo = Repo(repo_path)
|
||||
repo_path = _get_repo_path(gitstring, repo_dir)
|
||||
if repo_path.exists():
|
||||
shutil.rmtree(repo_path)
|
||||
|
||||
# pull it
|
||||
ref = dependency.get("ref") if dependency.get("ref") else _get_main_branch(repo)
|
||||
repo.git.checkout(ref)
|
||||
|
||||
repo.git.pull()
|
||||
|
||||
return (
|
||||
repo_path
|
||||
if dependency["subdirectory"] is None
|
||||
else repo_path / dependency["subdirectory"]
|
||||
)
|
||||
# now we have fresh dir
|
||||
Repo.clone_from(gitstring, repo_path, branch=ref, depth=1)
|
||||
return repo_path
|
||||
|
||||
|
||||
def copy_repo(
|
||||
source: Path,
|
||||
destination: Path,
|
||||
) -> None:
|
||||
"""
|
||||
Copies a repo, ignoring git folders.
|
||||
|
||||
Raises FileNotFound error if it can't find source
|
||||
"""
|
||||
|
||||
def ignore_func(_, files):
|
||||
return [f for f in files if f == ".git"]
|
||||
|
||||
|
Reference in New Issue
Block a user