mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-11 07:50:47 +00:00
multiple: pydantic 2 compatibility, v0.3 (#26443)
Signed-off-by: ChengZi <chen.zhang@zilliz.com> Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com> Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Dan O'Donovan <dan.odonovan@gmail.com> Co-authored-by: Tom Daniel Grande <tomdgrande@gmail.com> Co-authored-by: Grande <Tom.Daniel.Grande@statsbygg.no> Co-authored-by: Bagatur <baskaryan@gmail.com> Co-authored-by: ccurme <chester.curme@gmail.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Tomaz Bratanic <bratanic.tomaz@gmail.com> Co-authored-by: ZhangShenao <15201440436@163.com> Co-authored-by: Friso H. Kingma <fhkingma@gmail.com> Co-authored-by: ChengZi <chen.zhang@zilliz.com> Co-authored-by: Nuno Campos <nuno@langchain.dev> Co-authored-by: Morgante Pell <morgantep@google.com>
This commit is contained in:
@@ -1,306 +1,54 @@
|
||||
"""Migrate LangChain to the most recent version."""
|
||||
|
||||
# Adapted from bump-pydantic
|
||||
# https://github.com/pydantic/bump-pydantic
|
||||
import difflib
|
||||
import functools
|
||||
import multiprocessing
|
||||
import os
|
||||
import time
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, TypeVar, Union
|
||||
|
||||
import libcst as cst
|
||||
import rich
|
||||
import typer
|
||||
from libcst.codemod import CodemodContext, ContextAwareTransformer
|
||||
from libcst.helpers import calculate_module_and_package
|
||||
from libcst.metadata import FullRepoManager, FullyQualifiedNameProvider, ScopeProvider
|
||||
from rich.console import Console
|
||||
from rich.progress import Progress
|
||||
from typer import Argument, Exit, Option, Typer
|
||||
from typing_extensions import ParamSpec
|
||||
|
||||
from langchain_cli.namespaces.migrate.codemods import Rule, gather_codemods
|
||||
from langchain_cli.namespaces.migrate.glob_helpers import match_glob
|
||||
|
||||
app = Typer(invoke_without_command=True, add_completion=False)
|
||||
|
||||
P = ParamSpec("P")
|
||||
T = TypeVar("T")
|
||||
|
||||
DEFAULT_IGNORES = [".venv/**"]
|
||||
from gritql import run
|
||||
|
||||
|
||||
@app.callback()
|
||||
def main(
|
||||
path: Path = Argument(..., exists=True, dir_okay=True, allow_dash=False),
|
||||
disable: List[Rule] = Option(default=[], help="Disable a rule."),
|
||||
diff: bool = Option(False, help="Show diff instead of applying changes."),
|
||||
ignore: List[str] = Option(
|
||||
default=DEFAULT_IGNORES, help="Ignore a path glob pattern."
|
||||
),
|
||||
log_file: Path = Option("log.txt", help="Log errors to this file."),
|
||||
include_ipynb: bool = Option(
|
||||
False, help="Include Jupyter Notebook files in the migration."
|
||||
),
|
||||
):
|
||||
"""Migrate langchain to the most recent version."""
|
||||
if not diff:
|
||||
if not typer.confirm(
|
||||
"✈️ This script will help you migrate to a recent version LangChain. "
|
||||
"This migration script will attempt to replace old imports in the code "
|
||||
"with new ones.\n\n"
|
||||
"🔄 You will need to run the migration script TWICE to migrate (e.g., "
|
||||
"to update llms import from langchain, the script will first move them to "
|
||||
"corresponding imports from the community package, and on the second "
|
||||
"run will migrate from the community package to the partner package "
|
||||
"when possible). \n\n"
|
||||
"🔍 You can pre-view the changes by running with the --diff flag. \n\n"
|
||||
"🚫 You can disable specific import changes by using the --disable "
|
||||
"flag. \n\n"
|
||||
"📄 Update your pyproject.toml or requirements.txt file to "
|
||||
"reflect any imports from new packages. For example, if you see new "
|
||||
"imports from langchain_openai, langchain_anthropic or "
|
||||
"langchain_text_splitters you "
|
||||
"should them to your dependencies! \n\n"
|
||||
'⚠️ This script is a "best-effort", and is likely to make some '
|
||||
"mistakes.\n\n"
|
||||
"🛡️ Backup your code prior to running the migration script -- it will "
|
||||
"modify your files!\n\n"
|
||||
"❓ Do you want to continue?"
|
||||
):
|
||||
raise Exit()
|
||||
console = Console(log_time=True)
|
||||
console.log("Start langchain-cli migrate")
|
||||
# NOTE: LIBCST_PARSER_TYPE=native is required according to https://github.com/Instagram/LibCST/issues/487.
|
||||
os.environ["LIBCST_PARSER_TYPE"] = "native"
|
||||
|
||||
if os.path.isfile(path):
|
||||
package = path.parent
|
||||
all_files = [path]
|
||||
else:
|
||||
package = path
|
||||
all_files = sorted(package.glob("**/*.py"))
|
||||
if include_ipynb:
|
||||
all_files.extend(sorted(package.glob("**/*.ipynb")))
|
||||
|
||||
filtered_files = [
|
||||
file
|
||||
for file in all_files
|
||||
if not any(match_glob(file, pattern) for pattern in ignore)
|
||||
]
|
||||
files = [str(file.relative_to(".")) for file in filtered_files]
|
||||
|
||||
if len(files) == 1:
|
||||
console.log("Found 1 file to process.")
|
||||
elif len(files) > 1:
|
||||
console.log(f"Found {len(files)} files to process.")
|
||||
else:
|
||||
console.log("No files to process.")
|
||||
raise Exit()
|
||||
|
||||
providers = {FullyQualifiedNameProvider, ScopeProvider}
|
||||
metadata_manager = FullRepoManager(".", files, providers=providers) # type: ignore[arg-type]
|
||||
metadata_manager.resolve_cache()
|
||||
|
||||
scratch: dict[str, Any] = {}
|
||||
start_time = time.time()
|
||||
|
||||
log_fp = log_file.open("a+", encoding="utf8")
|
||||
partial_run_codemods = functools.partial(
|
||||
get_and_run_codemods, disable, metadata_manager, scratch, package, diff
|
||||
)
|
||||
with Progress(*Progress.get_default_columns(), transient=True) as progress:
|
||||
task = progress.add_task(description="Executing codemods...", total=len(files))
|
||||
count_errors = 0
|
||||
difflines: List[List[str]] = []
|
||||
with multiprocessing.Pool() as pool:
|
||||
for error, _difflines in pool.imap_unordered(partial_run_codemods, files):
|
||||
progress.advance(task)
|
||||
|
||||
if _difflines is not None:
|
||||
difflines.append(_difflines)
|
||||
|
||||
if error is not None:
|
||||
count_errors += 1
|
||||
log_fp.writelines(error)
|
||||
|
||||
modified = [Path(f) for f in files if os.stat(f).st_mtime > start_time]
|
||||
|
||||
if not diff:
|
||||
if modified:
|
||||
console.log(f"Refactored {len(modified)} files.")
|
||||
else:
|
||||
console.log("No files were modified.")
|
||||
|
||||
for _difflines in difflines:
|
||||
color_diff(console, _difflines)
|
||||
|
||||
if count_errors > 0:
|
||||
console.log(f"Found {count_errors} errors. Please check the {log_file} file.")
|
||||
else:
|
||||
console.log("Run successfully!")
|
||||
|
||||
if difflines:
|
||||
raise Exit(1)
|
||||
def get_gritdir_path() -> Path:
|
||||
"""Get the path to the grit directory."""
|
||||
script_dir = Path(__file__).parent
|
||||
return script_dir / ".grit"
|
||||
|
||||
|
||||
def get_and_run_codemods(
|
||||
disabled_rules: List[Rule],
|
||||
metadata_manager: FullRepoManager,
|
||||
scratch: Dict[str, Any],
|
||||
package: Path,
|
||||
diff: bool,
|
||||
filename: str,
|
||||
) -> Tuple[Union[str, None], Union[List[str], None]]:
|
||||
"""Run codemods from rules.
|
||||
def migrate(
|
||||
ctx: typer.Context,
|
||||
) -> None:
|
||||
"""Migrate langchain to the most recent version.
|
||||
|
||||
Wrapper around run_codemods to be used with multiprocessing.Pool.
|
||||
Any undocumented arguments will be passed to the Grit CLI.
|
||||
"""
|
||||
codemods = gather_codemods(disabled=disabled_rules)
|
||||
return run_codemods(codemods, metadata_manager, scratch, package, diff, filename)
|
||||
rich.print(
|
||||
"✈️ This script will help you migrate to a recent version LangChain. "
|
||||
"This migration script will attempt to replace old imports in the code "
|
||||
"with new ones.\n\n"
|
||||
"🔄 You will need to run the migration script TWICE to migrate (e.g., "
|
||||
"to update llms import from langchain, the script will first move them to "
|
||||
"corresponding imports from the community package, and on the second "
|
||||
"run will migrate from the community package to the partner package "
|
||||
"when possible). \n\n"
|
||||
"🔍 You can pre-view the changes by running with the --diff flag. \n\n"
|
||||
"🚫 You can disable specific import changes by using the --disable "
|
||||
"flag. \n\n"
|
||||
"📄 Update your pyproject.toml or requirements.txt file to "
|
||||
"reflect any imports from new packages. For example, if you see new "
|
||||
"imports from langchain_openai, langchain_anthropic or "
|
||||
"langchain_text_splitters you "
|
||||
"should them to your dependencies! \n\n"
|
||||
'⚠️ This script is a "best-effort", and is likely to make some '
|
||||
"mistakes.\n\n"
|
||||
"🛡️ Backup your code prior to running the migration script -- it will "
|
||||
"modify your files!\n\n"
|
||||
)
|
||||
rich.print("-" * 10)
|
||||
rich.print()
|
||||
|
||||
final_code = run.apply_pattern(
|
||||
"langchain_all_migrations()",
|
||||
ctx.args,
|
||||
grit_dir=get_gritdir_path(),
|
||||
)
|
||||
|
||||
def _rewrite_file(
|
||||
filename: str,
|
||||
codemods: List[Type[ContextAwareTransformer]],
|
||||
diff: bool,
|
||||
context: CodemodContext,
|
||||
) -> Tuple[Union[str, None], Union[List[str], None]]:
|
||||
file_path = Path(filename)
|
||||
with file_path.open("r+", encoding="utf-8") as fp:
|
||||
code = fp.read()
|
||||
fp.seek(0)
|
||||
|
||||
input_tree = cst.parse_module(code)
|
||||
|
||||
for codemod in codemods:
|
||||
transformer = codemod(context=context)
|
||||
output_tree = transformer.transform_module(input_tree)
|
||||
input_tree = output_tree
|
||||
|
||||
output_code = input_tree.code
|
||||
if code != output_code:
|
||||
if diff:
|
||||
lines = difflib.unified_diff(
|
||||
code.splitlines(keepends=True),
|
||||
output_code.splitlines(keepends=True),
|
||||
fromfile=filename,
|
||||
tofile=filename,
|
||||
)
|
||||
return None, list(lines)
|
||||
else:
|
||||
fp.write(output_code)
|
||||
fp.truncate()
|
||||
return None, None
|
||||
|
||||
|
||||
def _rewrite_notebook(
|
||||
filename: str,
|
||||
codemods: List[Type[ContextAwareTransformer]],
|
||||
diff: bool,
|
||||
context: CodemodContext,
|
||||
) -> Tuple[Optional[str], Optional[List[str]]]:
|
||||
"""Try to rewrite a Jupyter Notebook file."""
|
||||
import nbformat
|
||||
|
||||
file_path = Path(filename)
|
||||
if file_path.suffix != ".ipynb":
|
||||
raise ValueError("Only Jupyter Notebook files (.ipynb) are supported.")
|
||||
|
||||
with file_path.open("r", encoding="utf-8") as fp:
|
||||
notebook = nbformat.read(fp, as_version=4)
|
||||
|
||||
diffs = []
|
||||
|
||||
for cell in notebook.cells:
|
||||
if cell.cell_type == "code":
|
||||
code = "".join(cell.source)
|
||||
|
||||
# Skip code if any of the lines begin with a magic command or
|
||||
# a ! command.
|
||||
# We can try to handle later.
|
||||
if any(
|
||||
line.startswith("!") or line.startswith("%")
|
||||
for line in code.splitlines()
|
||||
):
|
||||
continue
|
||||
|
||||
input_tree = cst.parse_module(code)
|
||||
|
||||
# TODO(Team): Quick hack, need to figure out
|
||||
# how to handle this correctly.
|
||||
# This prevents the code from trying to re-insert the imports
|
||||
# for every cell in the notebook.
|
||||
local_context = CodemodContext()
|
||||
|
||||
for codemod in codemods:
|
||||
transformer = codemod(context=local_context)
|
||||
output_tree = transformer.transform_module(input_tree)
|
||||
input_tree = output_tree
|
||||
|
||||
output_code = input_tree.code
|
||||
if code != output_code:
|
||||
cell.source = output_code.splitlines(keepends=True)
|
||||
if diff:
|
||||
cell_diff = difflib.unified_diff(
|
||||
code.splitlines(keepends=True),
|
||||
output_code.splitlines(keepends=True),
|
||||
fromfile=filename,
|
||||
tofile=filename,
|
||||
)
|
||||
diffs.extend(list(cell_diff))
|
||||
|
||||
if diff:
|
||||
return None, diffs
|
||||
|
||||
with file_path.open("w", encoding="utf-8") as fp:
|
||||
nbformat.write(notebook, fp)
|
||||
|
||||
return None, None
|
||||
|
||||
|
||||
def run_codemods(
|
||||
codemods: List[Type[ContextAwareTransformer]],
|
||||
metadata_manager: FullRepoManager,
|
||||
scratch: Dict[str, Any],
|
||||
package: Path,
|
||||
diff: bool,
|
||||
filename: str,
|
||||
) -> Tuple[Union[str, None], Union[List[str], None]]:
|
||||
try:
|
||||
module_and_package = calculate_module_and_package(str(package), filename)
|
||||
context = CodemodContext(
|
||||
metadata_manager=metadata_manager,
|
||||
filename=filename,
|
||||
full_module_name=module_and_package.name,
|
||||
full_package_name=module_and_package.package,
|
||||
)
|
||||
context.scratch.update(scratch)
|
||||
|
||||
if filename.endswith(".ipynb"):
|
||||
return _rewrite_notebook(filename, codemods, diff, context)
|
||||
else:
|
||||
return _rewrite_file(filename, codemods, diff, context)
|
||||
except cst.ParserSyntaxError as exc:
|
||||
return (
|
||||
f"A syntax error happened on {filename}. This file cannot be "
|
||||
f"formatted.\n"
|
||||
f"{exc}"
|
||||
), None
|
||||
except Exception:
|
||||
return f"An error happened on {filename}.\n{traceback.format_exc()}", None
|
||||
|
||||
|
||||
def color_diff(console: Console, lines: Iterable[str]) -> None:
|
||||
for line in lines:
|
||||
line = line.rstrip("\n")
|
||||
if line.startswith("+"):
|
||||
console.print(line, style="green")
|
||||
elif line.startswith("-"):
|
||||
console.print(line, style="red")
|
||||
elif line.startswith("^"):
|
||||
console.print(line, style="blue")
|
||||
else:
|
||||
console.print(line, style="white")
|
||||
raise typer.Exit(code=final_code)
|
||||
|
Reference in New Issue
Block a user