mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-04 01:50:08 +00:00
236 lines
5.2 KiB
Python
236 lines
5.2 KiB
Python
import functools
|
|
import logging
|
|
import os
|
|
|
|
import click
|
|
|
|
from dbgpt.configs.model_config import DATASETS_DIR
|
|
|
|
_DEFAULT_API_ADDRESS: str = "http://127.0.0.1:5670"
|
|
API_ADDRESS: str = _DEFAULT_API_ADDRESS
|
|
|
|
logger = logging.getLogger("dbgpt_cli")
|
|
|
|
|
|
@click.group("knowledge")
|
|
@click.option(
|
|
"--address",
|
|
type=str,
|
|
default=API_ADDRESS,
|
|
required=False,
|
|
show_default=True,
|
|
help=(
|
|
"Address of the Api server(If not set, try to read from environment variable: API_ADDRESS)."
|
|
),
|
|
)
|
|
def knowledge_cli_group(address: str):
|
|
"""Knowledge command line tool"""
|
|
global API_ADDRESS
|
|
if address == _DEFAULT_API_ADDRESS:
|
|
address = os.getenv("API_ADDRESS", _DEFAULT_API_ADDRESS)
|
|
API_ADDRESS = address
|
|
|
|
|
|
def add_knowledge_options(func):
|
|
@click.option(
|
|
"--space_name",
|
|
required=False,
|
|
type=str,
|
|
default="default",
|
|
show_default=True,
|
|
help="Your knowledge space name",
|
|
)
|
|
@functools.wraps(func)
|
|
def wrapper(*args, **kwargs):
|
|
return func(*args, **kwargs)
|
|
|
|
return wrapper
|
|
|
|
|
|
@knowledge_cli_group.command()
|
|
@add_knowledge_options
|
|
@click.option(
|
|
"--vector_store_type",
|
|
required=False,
|
|
type=str,
|
|
default="Chroma",
|
|
show_default=True,
|
|
help="Vector store type.",
|
|
)
|
|
@click.option(
|
|
"--local_doc_path",
|
|
required=False,
|
|
type=str,
|
|
default=DATASETS_DIR,
|
|
show_default=True,
|
|
help="Your document directory or document file path.",
|
|
)
|
|
@click.option(
|
|
"--skip_wrong_doc",
|
|
required=False,
|
|
type=bool,
|
|
default=False,
|
|
is_flag=True,
|
|
help="Skip wrong document.",
|
|
)
|
|
@click.option(
|
|
"--overwrite",
|
|
required=False,
|
|
type=bool,
|
|
default=False,
|
|
is_flag=True,
|
|
help="Overwrite existing document(they has same name).",
|
|
)
|
|
@click.option(
|
|
"--max_workers",
|
|
required=False,
|
|
type=int,
|
|
default=None,
|
|
help="The maximum number of threads that can be used to upload document.",
|
|
)
|
|
@click.option(
|
|
"--pre_separator",
|
|
required=False,
|
|
type=str,
|
|
default=None,
|
|
help="Preseparator, this separator is used for pre-splitting before the document is "
|
|
"actually split by the text splitter. Preseparator are not included in the vectorized text. ",
|
|
)
|
|
@click.option(
|
|
"--separator",
|
|
required=False,
|
|
type=str,
|
|
default=None,
|
|
help="This is the document separator. Currently, only one separator is supported.",
|
|
)
|
|
@click.option(
|
|
"--chunk_size",
|
|
required=False,
|
|
type=int,
|
|
default=None,
|
|
help="Maximum size of chunks to split.",
|
|
)
|
|
@click.option(
|
|
"--chunk_overlap",
|
|
required=False,
|
|
type=int,
|
|
default=None,
|
|
help="Overlap in characters between chunks.",
|
|
)
|
|
def load(
|
|
space_name: str,
|
|
vector_store_type: str,
|
|
local_doc_path: str,
|
|
skip_wrong_doc: bool,
|
|
overwrite: bool,
|
|
max_workers: int,
|
|
pre_separator: str,
|
|
separator: str,
|
|
chunk_size: int,
|
|
chunk_overlap: int,
|
|
):
|
|
"""Load your local documents to DB-GPT"""
|
|
from dbgpt.app.knowledge._cli.knowledge_client import knowledge_init
|
|
|
|
knowledge_init(
|
|
API_ADDRESS,
|
|
space_name,
|
|
vector_store_type,
|
|
local_doc_path,
|
|
skip_wrong_doc,
|
|
overwrite,
|
|
max_workers,
|
|
pre_separator,
|
|
separator,
|
|
chunk_size,
|
|
chunk_overlap,
|
|
)
|
|
|
|
|
|
@knowledge_cli_group.command()
|
|
@add_knowledge_options
|
|
@click.option(
|
|
"--doc_name",
|
|
required=False,
|
|
type=str,
|
|
default=None,
|
|
help="The document name you want to delete. If doc_name is None, this command will delete the whole space.",
|
|
)
|
|
@click.option(
|
|
"-y",
|
|
required=False,
|
|
type=bool,
|
|
default=False,
|
|
is_flag=True,
|
|
help="Confirm your choice",
|
|
)
|
|
def delete(space_name: str, doc_name: str, y: bool):
|
|
"""Delete your knowledge space or document in space"""
|
|
from dbgpt.app.knowledge._cli.knowledge_client import knowledge_delete
|
|
|
|
knowledge_delete(API_ADDRESS, space_name, doc_name, confirm=y)
|
|
|
|
|
|
@knowledge_cli_group.command()
|
|
@click.option(
|
|
"--space_name",
|
|
required=False,
|
|
type=str,
|
|
default=None,
|
|
show_default=True,
|
|
help="Your knowledge space name. If None, list all spaces",
|
|
)
|
|
@click.option(
|
|
"--doc_id",
|
|
required=False,
|
|
type=int,
|
|
default=None,
|
|
show_default=True,
|
|
help="Your document id in knowledge space. If Not None, list all chunks in current document",
|
|
)
|
|
@click.option(
|
|
"--page",
|
|
required=False,
|
|
type=int,
|
|
default=1,
|
|
show_default=True,
|
|
help="The page for every query",
|
|
)
|
|
@click.option(
|
|
"--page_size",
|
|
required=False,
|
|
type=int,
|
|
default=20,
|
|
show_default=True,
|
|
help="The page size for every query",
|
|
)
|
|
@click.option(
|
|
"--show_content",
|
|
required=False,
|
|
type=bool,
|
|
default=False,
|
|
is_flag=True,
|
|
help="Query the document content of chunks",
|
|
)
|
|
@click.option(
|
|
"--output",
|
|
required=False,
|
|
type=click.Choice(["text", "html", "csv", "latex", "json"]),
|
|
default="text",
|
|
help="The output format",
|
|
)
|
|
def list(
|
|
space_name: str,
|
|
doc_id: int,
|
|
page: int,
|
|
page_size: int,
|
|
show_content: bool,
|
|
output: str,
|
|
):
|
|
"""List knowledge space"""
|
|
from dbgpt.app.knowledge._cli.knowledge_client import knowledge_list
|
|
|
|
knowledge_list(
|
|
API_ADDRESS, space_name, page, page_size, doc_id, show_content, output
|
|
)
|