DB-GPT/dbgpt/app/knowledge/_cli/knowledge_cli.py

236 lines
5.2 KiB
Python

import functools
import logging
import os
import click
from dbgpt.configs.model_config import DATASETS_DIR
_DEFAULT_API_ADDRESS: str = "http://127.0.0.1:5670"
API_ADDRESS: str = _DEFAULT_API_ADDRESS
logger = logging.getLogger("dbgpt_cli")
@click.group("knowledge")
@click.option(
"--address",
type=str,
default=API_ADDRESS,
required=False,
show_default=True,
help=(
"Address of the Api server(If not set, try to read from environment variable: API_ADDRESS)."
),
)
def knowledge_cli_group(address: str):
"""Knowledge command line tool"""
global API_ADDRESS
if address == _DEFAULT_API_ADDRESS:
address = os.getenv("API_ADDRESS", _DEFAULT_API_ADDRESS)
API_ADDRESS = address
def add_knowledge_options(func):
@click.option(
"--space_name",
required=False,
type=str,
default="default",
show_default=True,
help="Your knowledge space name",
)
@functools.wraps(func)
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
return wrapper
@knowledge_cli_group.command()
@add_knowledge_options
@click.option(
"--vector_store_type",
required=False,
type=str,
default="Chroma",
show_default=True,
help="Vector store type.",
)
@click.option(
"--local_doc_path",
required=False,
type=str,
default=DATASETS_DIR,
show_default=True,
help="Your document directory or document file path.",
)
@click.option(
"--skip_wrong_doc",
required=False,
type=bool,
default=False,
is_flag=True,
help="Skip wrong document.",
)
@click.option(
"--overwrite",
required=False,
type=bool,
default=False,
is_flag=True,
help="Overwrite existing document(they has same name).",
)
@click.option(
"--max_workers",
required=False,
type=int,
default=None,
help="The maximum number of threads that can be used to upload document.",
)
@click.option(
"--pre_separator",
required=False,
type=str,
default=None,
help="Preseparator, this separator is used for pre-splitting before the document is "
"actually split by the text splitter. Preseparator are not included in the vectorized text. ",
)
@click.option(
"--separator",
required=False,
type=str,
default=None,
help="This is the document separator. Currently, only one separator is supported.",
)
@click.option(
"--chunk_size",
required=False,
type=int,
default=None,
help="Maximum size of chunks to split.",
)
@click.option(
"--chunk_overlap",
required=False,
type=int,
default=None,
help="Overlap in characters between chunks.",
)
def load(
space_name: str,
vector_store_type: str,
local_doc_path: str,
skip_wrong_doc: bool,
overwrite: bool,
max_workers: int,
pre_separator: str,
separator: str,
chunk_size: int,
chunk_overlap: int,
):
"""Load your local documents to DB-GPT"""
from dbgpt.app.knowledge._cli.knowledge_client import knowledge_init
knowledge_init(
API_ADDRESS,
space_name,
vector_store_type,
local_doc_path,
skip_wrong_doc,
overwrite,
max_workers,
pre_separator,
separator,
chunk_size,
chunk_overlap,
)
@knowledge_cli_group.command()
@add_knowledge_options
@click.option(
"--doc_name",
required=False,
type=str,
default=None,
help="The document name you want to delete. If doc_name is None, this command will delete the whole space.",
)
@click.option(
"-y",
required=False,
type=bool,
default=False,
is_flag=True,
help="Confirm your choice",
)
def delete(space_name: str, doc_name: str, y: bool):
"""Delete your knowledge space or document in space"""
from dbgpt.app.knowledge._cli.knowledge_client import knowledge_delete
knowledge_delete(API_ADDRESS, space_name, doc_name, confirm=y)
@knowledge_cli_group.command()
@click.option(
"--space_name",
required=False,
type=str,
default=None,
show_default=True,
help="Your knowledge space name. If None, list all spaces",
)
@click.option(
"--doc_id",
required=False,
type=int,
default=None,
show_default=True,
help="Your document id in knowledge space. If Not None, list all chunks in current document",
)
@click.option(
"--page",
required=False,
type=int,
default=1,
show_default=True,
help="The page for every query",
)
@click.option(
"--page_size",
required=False,
type=int,
default=20,
show_default=True,
help="The page size for every query",
)
@click.option(
"--show_content",
required=False,
type=bool,
default=False,
is_flag=True,
help="Query the document content of chunks",
)
@click.option(
"--output",
required=False,
type=click.Choice(["text", "html", "csv", "latex", "json"]),
default="text",
help="The output format",
)
def list(
space_name: str,
doc_id: int,
page: int,
page_size: int,
show_content: bool,
output: str,
):
"""List knowledge space"""
from dbgpt.app.knowledge._cli.knowledge_client import knowledge_list
knowledge_list(
API_ADDRESS, space_name, page, page_size, doc_id, show_content, output
)