From 3f6e9687ee06d0dd456de2b2a09193111a97983f Mon Sep 17 00:00:00 2001 From: FangYin Cheng Date: Mon, 21 Aug 2023 01:35:37 +0800 Subject: [PATCH] feat: Support windows fix: Fix install error on linux doc: Add torch cuda FAQ --- docs/getting_started/faq/deploy/deploy_faq.md | 37 ++- docs/getting_started/install/deploy/deploy.md | 5 + pilot/openapi/api_v1/api_v1.py | 1 + pilot/server/knowledge/api.py | 42 ++-- pilot/server/llmserver.py | 7 +- requirements.txt | 4 +- scripts/examples/load_examples.bat | 7 +- setup.py | 210 +++++++++++++++++- 8 files changed, 274 insertions(+), 39 deletions(-) diff --git a/docs/getting_started/faq/deploy/deploy_faq.md b/docs/getting_started/faq/deploy/deploy_faq.md index 42a0e6afa..4735ae2e3 100644 --- a/docs/getting_started/faq/deploy/deploy_faq.md +++ b/docs/getting_started/faq/deploy/deploy_faq.md @@ -45,4 +45,39 @@ print(f'Public url: {url}') time.sleep(60 * 60 * 24) ``` -Open `url` with your browser to see the website. \ No newline at end of file +Open `url` with your browser to see the website. + +##### Q5: (Windows) execute `pip install -e .` error + +The error log like the following: +``` +× python setup.py bdist_wheel did not run successfully. + │ exit code: 1 + ╰─> [11 lines of output] + running bdist_wheel + running build + running build_py + creating build + creating build\lib.win-amd64-cpython-310 + creating build\lib.win-amd64-cpython-310\cchardet + copying src\cchardet\version.py -> build\lib.win-amd64-cpython-310\cchardet + copying src\cchardet\__init__.py -> build\lib.win-amd64-cpython-310\cchardet + running build_ext + building 'cchardet._cchardet' extension + error: Microsoft Visual C++ 14.0 or greater is required. Get it with "Microsoft C++ Build Tools": https://visualstudio.microsoft.com/visual-cpp-build-tools/ + [end of output] +``` + +Download and install `Microsoft C++ Build Tools` from [visual-cpp-build-tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/) + + + +##### Q6: `Torch not compiled with CUDA enabled` + +``` +2023-08-19 16:24:30 | ERROR | stderr | raise AssertionError("Torch not compiled with CUDA enabled") +2023-08-19 16:24:30 | ERROR | stderr | AssertionError: Torch not compiled with CUDA enabled +``` + +1. Install [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive) +2. Reinstall PyTorch [start-locally](https://pytorch.org/get-started/locally/#start-locally) with CUDA support. \ No newline at end of file diff --git a/docs/getting_started/install/deploy/deploy.md b/docs/getting_started/install/deploy/deploy.md index 0422a5c58..2f2880fb2 100644 --- a/docs/getting_started/install/deploy/deploy.md +++ b/docs/getting_started/install/deploy/deploy.md @@ -102,6 +102,11 @@ You can configure basic parameters in the .env file, for example setting LLM_MOD bash ./scripts/examples/load_examples.sh ``` +On windows platform: +```PowerShell +.\scripts\examples\load_examples.bat +``` + 1.Run db-gpt server ```bash diff --git a/pilot/openapi/api_v1/api_v1.py b/pilot/openapi/api_v1/api_v1.py index 7fedd5e11..1d01b31b6 100644 --- a/pilot/openapi/api_v1/api_v1.py +++ b/pilot/openapi/api_v1/api_v1.py @@ -292,6 +292,7 @@ async def no_stream_generator(chat): async def stream_generator(chat): model_response = chat.stream_call() + msg = "[LLM_ERROR]: llm server has no output, maybe your prompt template is wrong." if not CFG.NEW_SERVER_MODE: for chunk in model_response.iter_lines(decode_unicode=False, delimiter=b"\0"): if chunk: diff --git a/pilot/server/knowledge/api.py b/pilot/server/knowledge/api.py index 51ee7f924..e2905d181 100644 --- a/pilot/server/knowledge/api.py +++ b/pilot/server/knowledge/api.py @@ -1,5 +1,6 @@ import os import shutil +import tempfile from tempfile import NamedTemporaryFile from fastapi import APIRouter, File, UploadFile, Form @@ -130,29 +131,28 @@ async def document_upload( if doc_file: if not os.path.exists(os.path.join(KNOWLEDGE_UPLOAD_ROOT_PATH, space_name)): os.makedirs(os.path.join(KNOWLEDGE_UPLOAD_ROOT_PATH, space_name)) - with NamedTemporaryFile( - dir=os.path.join(KNOWLEDGE_UPLOAD_ROOT_PATH, space_name), delete=False - ) as tmp: + # We can not move temp file in windows system when we open file in context of `with` + tmp_fd, tmp_path = tempfile.mkstemp( + dir=os.path.join(KNOWLEDGE_UPLOAD_ROOT_PATH, space_name) + ) + with os.fdopen(tmp_fd, "wb") as tmp: tmp.write(await doc_file.read()) - tmp_path = tmp.name - shutil.move( - tmp_path, - os.path.join( - KNOWLEDGE_UPLOAD_ROOT_PATH, space_name, doc_file.filename - ), + shutil.move( + tmp_path, + os.path.join(KNOWLEDGE_UPLOAD_ROOT_PATH, space_name, doc_file.filename), + ) + request = KnowledgeDocumentRequest() + request.doc_name = doc_name + request.doc_type = doc_type + request.content = os.path.join( + KNOWLEDGE_UPLOAD_ROOT_PATH, space_name, doc_file.filename + ) + return Result.succ( + knowledge_space_service.create_knowledge_document( + space=space_name, request=request ) - request = KnowledgeDocumentRequest() - request.doc_name = doc_name - request.doc_type = doc_type - request.content = os.path.join( - KNOWLEDGE_UPLOAD_ROOT_PATH, space_name, doc_file.filename - ) - return Result.succ( - knowledge_space_service.create_knowledge_document( - space=space_name, request=request - ) - ) - # return Result.succ([]) + ) + # return Result.succ([]) return Result.faild(code="E000X", msg=f"doc_file is None") except Exception as e: return Result.faild(code="E000X", msg=f"document add error {e}") diff --git a/pilot/server/llmserver.py b/pilot/server/llmserver.py index 10cdb0299..a0c3b6cd8 100644 --- a/pilot/server/llmserver.py +++ b/pilot/server/llmserver.py @@ -6,6 +6,7 @@ import json import os import sys from typing import List +import platform import uvicorn from fastapi import BackgroundTasks, FastAPI, Request @@ -95,7 +96,11 @@ class ModelWorker: # Please do not open the output in production! # The gpt4all thread shares stdout with the parent process, # and opening it may affect the frontend output. - print("output: ", output) + if "windows" in platform.platform().lower(): + # Do not print the model output, because it may contain Emoji, there is a problem with the GBK encoding + pass + else: + print("output: ", output) # return some model context to dgt-server ret = {"text": output, "error_code": 0, "model_context": model_context} yield json.dumps(ret).encode() + b"\0" diff --git a/requirements.txt b/requirements.txt index 55fdbadfb..b9516b038 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -torch==2.0.0 +# torch==2.0.0 aiohttp==3.8.4 aiosignal==1.3.1 async-timeout==4.0.2 @@ -47,7 +47,7 @@ gradio-client==0.0.8 # llama-index==0.5.27 # TODO move bitsandbytes to optional -bitsandbytes +# bitsandbytes accelerate>=0.20.3 unstructured==0.6.3 diff --git a/scripts/examples/load_examples.bat b/scripts/examples/load_examples.bat index 2ecf1c507..bd16f46f5 100644 --- a/scripts/examples/load_examples.bat +++ b/scripts/examples/load_examples.bat @@ -49,13 +49,14 @@ goto printUsage :printUsage echo USAGE: %0 [--db-file sqlite db file] [--sql-file sql file path to run] -echo [-d|--db-file sqlite db file path] default: %DEFAULT_DB_FILE% -echo [-f|--sql-file sqlite file to run] default: %DEFAULT_SQL_FILE% -echo [-h|--help] Usage message +echo [-d^|--db-file sqlite db file path] default: %DEFAULT_DB_FILE% +echo [-f^|--sql-file sqlite file to run] default: %DEFAULT_SQL_FILE% +echo [-h^|--help] Usage message exit /b 0 :argDone + if "%SQL_FILE%"=="" ( if not exist "%WORK_DIR%\pilot\data" mkdir "%WORK_DIR%\pilot\data" for %%f in (%WORK_DIR%\docker\examples\sqls\*_sqlite.sql) do ( diff --git a/setup.py b/setup.py index 5136f4fb8..c37013042 100644 --- a/setup.py +++ b/setup.py @@ -5,12 +5,19 @@ import platform import subprocess import os from enum import Enum - +import urllib.request +from urllib.parse import urlparse, quote +import re +from pip._internal.utils.appdirs import user_cache_dir +import shutil +import tempfile from setuptools import find_packages -with open("README.md", "r") as fh: +with open("README.md", mode="r", encoding="utf-8") as fh: long_description = fh.read() +BUILD_NO_CACHE = os.getenv("BUILD_NO_CACHE", "false").lower() == "true" + def parse_requirements(file_name: str) -> List[str]: with open(file_name) as f: @@ -21,9 +28,70 @@ def parse_requirements(file_name: str) -> List[str]: ] +def get_latest_version(package_name: str, index_url: str, default_version: str): + command = [ + "python", + "-m", + "pip", + "index", + "versions", + package_name, + "--index-url", + index_url, + ] + + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if result.returncode != 0: + print("Error executing command.") + print(result.stderr.decode()) + return default_version + + output = result.stdout.decode() + lines = output.split("\n") + for line in lines: + if "Available versions:" in line: + available_versions = line.split(":")[1].strip() + latest_version = available_versions.split(",")[0].strip() + return latest_version + + return default_version + + +def encode_url(package_url: str) -> str: + parsed_url = urlparse(package_url) + encoded_path = quote(parsed_url.path) + safe_url = parsed_url._replace(path=encoded_path).geturl() + return safe_url, parsed_url.path + + +def cache_package(package_url: str, package_name: str, is_windows: bool = False): + safe_url, parsed_url = encode_url(package_url) + if BUILD_NO_CACHE: + return safe_url + filename = os.path.basename(parsed_url) + cache_dir = os.path.join(user_cache_dir("pip"), "http", "wheels", package_name) + os.makedirs(cache_dir, exist_ok=True) + + local_path = os.path.join(cache_dir, filename) + if not os.path.exists(local_path): + # temp_file, temp_path = tempfile.mkstemp() + temp_path = local_path + ".tmp" + if os.path.exists(temp_path): + os.remove(temp_path) + try: + print(f"Download {safe_url} to {local_path}") + urllib.request.urlretrieve(safe_url, temp_path) + shutil.move(temp_path, local_path) + finally: + if os.path.exists(temp_path): + os.remove(temp_path) + return f"file:///{local_path}" if is_windows else f"file://{local_path}" + + class SetupSpec: def __init__(self) -> None: self.extras: dict = {} + self.install_requires: List[str] = [] setup_spec = SetupSpec() @@ -56,22 +124,27 @@ def get_cpu_avx_support() -> Tuple[OSType, AVXType]: cpu_avx = AVXType.BASIC env_cpu_avx = AVXType.of_type(os.getenv("DBGPT_LLAMA_CPP_AVX")) - cmds = ["lscpu"] - if system == "Windows": - cmds = ["coreinfo"] + if "windows" in system.lower(): os_type = OSType.WINDOWS + output = "avx2" + print("Current platform is windows, use avx2 as default cpu architecture") elif system == "Linux": - cmds = ["lscpu"] os_type = OSType.LINUX + result = subprocess.run( + ["lscpu"], stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + output = result.stdout.decode() elif system == "Darwin": - cmds = ["sysctl", "-a"] os_type = OSType.DARWIN + result = subprocess.run( + ["sysctl", "-a"], stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + output = result.stdout.decode() else: os_type = OSType.OTHER print("Unsupported OS to get cpu avx, use default") return os_type, env_cpu_avx if env_cpu_avx else cpu_avx - result = subprocess.run(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - output = result.stdout.decode() + if "avx512" in output.lower(): cpu_avx = AVXType.AVX512 elif "avx2" in output.lower(): @@ -82,15 +155,97 @@ def get_cpu_avx_support() -> Tuple[OSType, AVXType]: return os_type, env_cpu_avx if env_cpu_avx else cpu_avx -def get_cuda_version() -> str: +def get_cuda_version_from_torch(): try: import torch return torch.version.cuda + except: + return None + + +def get_cuda_version_from_nvcc(): + try: + output = subprocess.check_output(["nvcc", "--version"]) + version_line = [ + line for line in output.decode("utf-8").split("\n") if "release" in line + ][0] + return version_line.split("release")[-1].strip().split(",")[0] + except: + return None + + +def get_cuda_version_from_nvidia_smi(): + try: + output = subprocess.check_output(["nvidia-smi"]).decode("utf-8") + match = re.search(r"CUDA Version:\s+(\d+\.\d+)", output) + if match: + return match.group(1) + else: + return None + except: + return None + + +def get_cuda_version() -> str: + try: + cuda_version = get_cuda_version_from_torch() + if not cuda_version: + cuda_version = get_cuda_version_from_nvcc() + if not cuda_version: + cuda_version = get_cuda_version_from_nvidia_smi() + return cuda_version except Exception: return None +def torch_requires( + torch_version: str = "2.0.0", + torchvision_version: str = "0.15.1", + torchaudio_version: str = "2.0.1", +): + torch_pkgs = [] + os_type, _ = get_cpu_avx_support() + if os_type == OSType.DARWIN: + torch_pkgs = [ + f"torch=={torch_version}", + f"torchvision=={torchvision_version}", + f"torchaudio=={torchaudio_version}", + ] + else: + cuda_version = get_cuda_version() + if not cuda_version: + torch_pkgs = [ + f"torch=={torch_version}+cpu", + f"torchvision=={torchvision_version}+cpu", + f"torchaudio=={torchaudio_version}", + ] + else: + supported_versions = ["11.7", "11.8"] + if cuda_version not in supported_versions: + print( + f"PyTorch version {torch_version} supported cuda version: {supported_versions}, replace to {supported_versions[-1]}" + ) + cuda_version = supported_versions[-1] + cuda_version = "cu" + cuda_version.replace(".", "") + py_version = "cp310" + os_pkg_name = "linux_x86_64" if os_type == OSType.LINUX else "win_amd64" + torch_url = f"https://download.pytorch.org/whl/{cuda_version}/torch-{torch_version}+{cuda_version}-{py_version}-{py_version}-{os_pkg_name}.whl" + torchvision_url = f"https://download.pytorch.org/whl/{cuda_version}/torchvision-{torchvision_version}+{cuda_version}-{py_version}-{py_version}-{os_pkg_name}.whl" + torch_url_cached = cache_package( + torch_url, "torch", os_type == OSType.WINDOWS + ) + torchvision_url_cached = cache_package( + torchvision_url, "torchvision", os_type == OSType.WINDOWS + ) + torch_pkgs = [ + f"torch @ {torch_url_cached}", + f"torchvision @ {torchvision_url_cached}", + f"torchaudio=={torchaudio_version}", + ] + setup_spec.extras["torch"] = torch_pkgs + + def llama_cpp_python_cuda_requires(): cuda_version = get_cuda_version() device = "cpu" @@ -105,12 +260,15 @@ def llama_cpp_python_cuda_requires(): f"llama_cpp_python_cuda just support in os: {[r._value_ for r in supported_os]}" ) return + if cpu_avx == AVXType.AVX2 or AVXType.AVX512: + cpu_avx = AVXType.AVX cpu_avx = cpu_avx._value_ base_url = "https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui" llama_cpp_version = "0.1.77" py_version = "cp310" os_pkg_name = "linux_x86_64" if os_type == OSType.LINUX else "win_amd64" extra_index_url = f"{base_url}/llama_cpp_python_cuda-{llama_cpp_version}+{device}{cpu_avx}-{py_version}-{py_version}-{os_pkg_name}.whl" + extra_index_url, _ = encode_url(extra_index_url) print(f"Install llama_cpp_python_cuda from {extra_index_url}") setup_spec.extras["llama_cpp"].append(f"llama_cpp_python_cuda @ {extra_index_url}") @@ -124,6 +282,26 @@ def llama_cpp_requires(): llama_cpp_python_cuda_requires() +def quantization_requires(): + pkgs = [] + os_type, _ = get_cpu_avx_support() + if os_type != OSType.WINDOWS: + pkgs = ["bitsandbytes"] + else: + latest_version = get_latest_version( + "bitsandbytes", + "https://jllllll.github.io/bitsandbytes-windows-webui", + "0.41.1", + ) + extra_index_url = f"https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-{latest_version}-py3-none-win_amd64.whl" + local_pkg = cache_package( + extra_index_url, "bitsandbytes", os_type == OSType.WINDOWS + ) + pkgs = [f"bitsandbytes @ {local_pkg}"] + print(pkgs) + setup_spec.extras["quantization"] = pkgs + + def all_vector_store_requires(): """ pip install "db-gpt[vstore]" @@ -149,12 +327,22 @@ def all_requires(): setup_spec.extras["all"] = list(requires) +def init_install_requires(): + setup_spec.install_requires += parse_requirements("requirements.txt") + setup_spec.install_requires += setup_spec.extras["torch"] + setup_spec.install_requires += setup_spec.extras["quantization"] + print(f"Install requires: \n{','.join(setup_spec.install_requires)}") + + +torch_requires() llama_cpp_requires() +quantization_requires() all_vector_store_requires() all_datasource_requires() # must be last all_requires() +init_install_requires() setuptools.setup( name="db-gpt", @@ -166,7 +354,7 @@ setuptools.setup( " With this solution, you can be assured that there is no risk of data leakage, and your data is 100% private and secure.", long_description=long_description, long_description_content_type="text/markdown", - install_requires=parse_requirements("requirements.txt"), + install_requires=setup_spec.install_requires, url="https://github.com/eosphoros-ai/DB-GPT", license="https://opensource.org/license/mit/", python_requires=">=3.10",