From 3f6e9687ee06d0dd456de2b2a09193111a97983f Mon Sep 17 00:00:00 2001
From: FangYin Cheng <staneyffer@gmail.com>
Date: Mon, 21 Aug 2023 01:35:37 +0800
Subject: [PATCH] feat: Support windows

fix: Fix install error on linux

doc: Add torch cuda FAQ
---
 docs/getting_started/faq/deploy/deploy_faq.md |  37 ++-
 docs/getting_started/install/deploy/deploy.md |   5 +
 pilot/openapi/api_v1/api_v1.py                |   1 +
 pilot/server/knowledge/api.py                 |  42 ++--
 pilot/server/llmserver.py                     |   7 +-
 requirements.txt                              |   4 +-
 scripts/examples/load_examples.bat            |   7 +-
 setup.py                                      | 210 +++++++++++++++++-
 8 files changed, 274 insertions(+), 39 deletions(-)

diff --git a/docs/getting_started/faq/deploy/deploy_faq.md b/docs/getting_started/faq/deploy/deploy_faq.md
index 42a0e6afa..4735ae2e3 100644
--- a/docs/getting_started/faq/deploy/deploy_faq.md
+++ b/docs/getting_started/faq/deploy/deploy_faq.md
@@ -45,4 +45,39 @@ print(f'Public url: {url}')
 time.sleep(60 * 60 * 24)
 ```
 
-Open `url` with your browser to see the website.
\ No newline at end of file
+Open `url` with your browser to see the website.
+
+##### Q5: (Windows) execute `pip install -e .` error
+
+The error log like the following:
+``` 
+× python setup.py bdist_wheel did not run successfully.
+  │ exit code: 1
+  ╰─> [11 lines of output]
+      running bdist_wheel
+      running build
+      running build_py
+      creating build
+      creating build\lib.win-amd64-cpython-310
+      creating build\lib.win-amd64-cpython-310\cchardet
+      copying src\cchardet\version.py -> build\lib.win-amd64-cpython-310\cchardet
+      copying src\cchardet\__init__.py -> build\lib.win-amd64-cpython-310\cchardet
+      running build_ext
+      building 'cchardet._cchardet' extension
+      error: Microsoft Visual C++ 14.0 or greater is required. Get it with "Microsoft C++ Build Tools": https://visualstudio.microsoft.com/visual-cpp-build-tools/
+      [end of output]
+```
+
+Download and install `Microsoft C++ Build Tools` from [visual-cpp-build-tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/)
+
+
+
+##### Q6: `Torch not compiled with CUDA enabled`
+
+```
+2023-08-19 16:24:30 | ERROR | stderr |     raise AssertionError("Torch not compiled with CUDA enabled")
+2023-08-19 16:24:30 | ERROR | stderr | AssertionError: Torch not compiled with CUDA enabled
+```
+
+1. Install [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive)
+2. Reinstall PyTorch [start-locally](https://pytorch.org/get-started/locally/#start-locally) with CUDA support.
\ No newline at end of file
diff --git a/docs/getting_started/install/deploy/deploy.md b/docs/getting_started/install/deploy/deploy.md
index 0422a5c58..2f2880fb2 100644
--- a/docs/getting_started/install/deploy/deploy.md
+++ b/docs/getting_started/install/deploy/deploy.md
@@ -102,6 +102,11 @@ You can configure basic parameters in the .env file, for example setting LLM_MOD
 bash ./scripts/examples/load_examples.sh
 ```
 
+On windows platform:
+```PowerShell
+.\scripts\examples\load_examples.bat
+```
+
 1.Run db-gpt server 
 
 ```bash
diff --git a/pilot/openapi/api_v1/api_v1.py b/pilot/openapi/api_v1/api_v1.py
index 7fedd5e11..1d01b31b6 100644
--- a/pilot/openapi/api_v1/api_v1.py
+++ b/pilot/openapi/api_v1/api_v1.py
@@ -292,6 +292,7 @@ async def no_stream_generator(chat):
 
 async def stream_generator(chat):
     model_response = chat.stream_call()
+    msg = "[LLM_ERROR]: llm server has no output, maybe your prompt template is wrong."
     if not CFG.NEW_SERVER_MODE:
         for chunk in model_response.iter_lines(decode_unicode=False, delimiter=b"\0"):
             if chunk:
diff --git a/pilot/server/knowledge/api.py b/pilot/server/knowledge/api.py
index 51ee7f924..e2905d181 100644
--- a/pilot/server/knowledge/api.py
+++ b/pilot/server/knowledge/api.py
@@ -1,5 +1,6 @@
 import os
 import shutil
+import tempfile
 from tempfile import NamedTemporaryFile
 
 from fastapi import APIRouter, File, UploadFile, Form
@@ -130,29 +131,28 @@ async def document_upload(
         if doc_file:
             if not os.path.exists(os.path.join(KNOWLEDGE_UPLOAD_ROOT_PATH, space_name)):
                 os.makedirs(os.path.join(KNOWLEDGE_UPLOAD_ROOT_PATH, space_name))
-            with NamedTemporaryFile(
-                dir=os.path.join(KNOWLEDGE_UPLOAD_ROOT_PATH, space_name), delete=False
-            ) as tmp:
+            # We can not move temp file in windows system when we open file in context of `with`
+            tmp_fd, tmp_path = tempfile.mkstemp(
+                dir=os.path.join(KNOWLEDGE_UPLOAD_ROOT_PATH, space_name)
+            )
+            with os.fdopen(tmp_fd, "wb") as tmp:
                 tmp.write(await doc_file.read())
-                tmp_path = tmp.name
-                shutil.move(
-                    tmp_path,
-                    os.path.join(
-                        KNOWLEDGE_UPLOAD_ROOT_PATH, space_name, doc_file.filename
-                    ),
+            shutil.move(
+                tmp_path,
+                os.path.join(KNOWLEDGE_UPLOAD_ROOT_PATH, space_name, doc_file.filename),
+            )
+            request = KnowledgeDocumentRequest()
+            request.doc_name = doc_name
+            request.doc_type = doc_type
+            request.content = os.path.join(
+                KNOWLEDGE_UPLOAD_ROOT_PATH, space_name, doc_file.filename
+            )
+            return Result.succ(
+                knowledge_space_service.create_knowledge_document(
+                    space=space_name, request=request
                 )
-                request = KnowledgeDocumentRequest()
-                request.doc_name = doc_name
-                request.doc_type = doc_type
-                request.content = os.path.join(
-                    KNOWLEDGE_UPLOAD_ROOT_PATH, space_name, doc_file.filename
-                )
-                return Result.succ(
-                    knowledge_space_service.create_knowledge_document(
-                        space=space_name, request=request
-                    )
-                )
-                # return Result.succ([])
+            )
+            # return Result.succ([])
         return Result.faild(code="E000X", msg=f"doc_file is None")
     except Exception as e:
         return Result.faild(code="E000X", msg=f"document add error {e}")
diff --git a/pilot/server/llmserver.py b/pilot/server/llmserver.py
index 10cdb0299..a0c3b6cd8 100644
--- a/pilot/server/llmserver.py
+++ b/pilot/server/llmserver.py
@@ -6,6 +6,7 @@ import json
 import os
 import sys
 from typing import List
+import platform
 
 import uvicorn
 from fastapi import BackgroundTasks, FastAPI, Request
@@ -95,7 +96,11 @@ class ModelWorker:
                 # Please do not open the output in production!
                 # The gpt4all thread shares stdout with the parent process,
                 # and opening it may affect the frontend output.
-                print("output: ", output)
+                if "windows" in platform.platform().lower():
+                    # Do not print the model output, because it may contain Emoji, there is a problem with the GBK encoding
+                    pass
+                else:
+                    print("output: ", output)
                 # return some model context to dgt-server
                 ret = {"text": output, "error_code": 0, "model_context": model_context}
                 yield json.dumps(ret).encode() + b"\0"
diff --git a/requirements.txt b/requirements.txt
index 55fdbadfb..b9516b038 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-torch==2.0.0
+# torch==2.0.0
 aiohttp==3.8.4
 aiosignal==1.3.1
 async-timeout==4.0.2
@@ -47,7 +47,7 @@ gradio-client==0.0.8
 # llama-index==0.5.27
 
 # TODO move bitsandbytes to optional
-bitsandbytes
+# bitsandbytes
 accelerate>=0.20.3
 
 unstructured==0.6.3
diff --git a/scripts/examples/load_examples.bat b/scripts/examples/load_examples.bat
index 2ecf1c507..bd16f46f5 100644
--- a/scripts/examples/load_examples.bat
+++ b/scripts/examples/load_examples.bat
@@ -49,13 +49,14 @@ goto printUsage
 
 :printUsage
 echo USAGE: %0 [--db-file sqlite db file] [--sql-file sql file path to run]
-echo   [-d|--db-file sqlite db file path] default: %DEFAULT_DB_FILE%
-echo   [-f|--sql-file sqlite file to run] default: %DEFAULT_SQL_FILE%
-echo   [-h|--help] Usage message
+echo   [-d^|--db-file sqlite db file path] default: %DEFAULT_DB_FILE%
+echo   [-f^|--sql-file sqlite file to run] default: %DEFAULT_SQL_FILE%
+echo   [-h^|--help] Usage message
 exit /b 0
 
 :argDone
 
+
 if "%SQL_FILE%"=="" (
     if not exist "%WORK_DIR%\pilot\data" mkdir "%WORK_DIR%\pilot\data"
     for %%f in (%WORK_DIR%\docker\examples\sqls\*_sqlite.sql) do (
diff --git a/setup.py b/setup.py
index 5136f4fb8..c37013042 100644
--- a/setup.py
+++ b/setup.py
@@ -5,12 +5,19 @@ import platform
 import subprocess
 import os
 from enum import Enum
-
+import urllib.request
+from urllib.parse import urlparse, quote
+import re
+from pip._internal.utils.appdirs import user_cache_dir
+import shutil
+import tempfile
 from setuptools import find_packages
 
-with open("README.md", "r") as fh:
+with open("README.md", mode="r", encoding="utf-8") as fh:
     long_description = fh.read()
 
+BUILD_NO_CACHE = os.getenv("BUILD_NO_CACHE", "false").lower() == "true"
+
 
 def parse_requirements(file_name: str) -> List[str]:
     with open(file_name) as f:
@@ -21,9 +28,70 @@ def parse_requirements(file_name: str) -> List[str]:
         ]
 
 
+def get_latest_version(package_name: str, index_url: str, default_version: str):
+    command = [
+        "python",
+        "-m",
+        "pip",
+        "index",
+        "versions",
+        package_name,
+        "--index-url",
+        index_url,
+    ]
+
+    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    if result.returncode != 0:
+        print("Error executing command.")
+        print(result.stderr.decode())
+        return default_version
+
+    output = result.stdout.decode()
+    lines = output.split("\n")
+    for line in lines:
+        if "Available versions:" in line:
+            available_versions = line.split(":")[1].strip()
+            latest_version = available_versions.split(",")[0].strip()
+            return latest_version
+
+    return default_version
+
+
+def encode_url(package_url: str) -> str:
+    parsed_url = urlparse(package_url)
+    encoded_path = quote(parsed_url.path)
+    safe_url = parsed_url._replace(path=encoded_path).geturl()
+    return safe_url, parsed_url.path
+
+
+def cache_package(package_url: str, package_name: str, is_windows: bool = False):
+    safe_url, parsed_url = encode_url(package_url)
+    if BUILD_NO_CACHE:
+        return safe_url
+    filename = os.path.basename(parsed_url)
+    cache_dir = os.path.join(user_cache_dir("pip"), "http", "wheels", package_name)
+    os.makedirs(cache_dir, exist_ok=True)
+
+    local_path = os.path.join(cache_dir, filename)
+    if not os.path.exists(local_path):
+        # temp_file, temp_path = tempfile.mkstemp()
+        temp_path = local_path + ".tmp"
+        if os.path.exists(temp_path):
+            os.remove(temp_path)
+        try:
+            print(f"Download {safe_url} to {local_path}")
+            urllib.request.urlretrieve(safe_url, temp_path)
+            shutil.move(temp_path, local_path)
+        finally:
+            if os.path.exists(temp_path):
+                os.remove(temp_path)
+    return f"file:///{local_path}" if is_windows else f"file://{local_path}"
+
+
 class SetupSpec:
     def __init__(self) -> None:
         self.extras: dict = {}
+        self.install_requires: List[str] = []
 
 
 setup_spec = SetupSpec()
@@ -56,22 +124,27 @@ def get_cpu_avx_support() -> Tuple[OSType, AVXType]:
     cpu_avx = AVXType.BASIC
     env_cpu_avx = AVXType.of_type(os.getenv("DBGPT_LLAMA_CPP_AVX"))
 
-    cmds = ["lscpu"]
-    if system == "Windows":
-        cmds = ["coreinfo"]
+    if "windows" in system.lower():
         os_type = OSType.WINDOWS
+        output = "avx2"
+        print("Current platform is windows, use avx2 as default cpu architecture")
     elif system == "Linux":
-        cmds = ["lscpu"]
         os_type = OSType.LINUX
+        result = subprocess.run(
+            ["lscpu"], stdout=subprocess.PIPE, stderr=subprocess.PIPE
+        )
+        output = result.stdout.decode()
     elif system == "Darwin":
-        cmds = ["sysctl", "-a"]
         os_type = OSType.DARWIN
+        result = subprocess.run(
+            ["sysctl", "-a"], stdout=subprocess.PIPE, stderr=subprocess.PIPE
+        )
+        output = result.stdout.decode()
     else:
         os_type = OSType.OTHER
         print("Unsupported OS to get cpu avx, use default")
         return os_type, env_cpu_avx if env_cpu_avx else cpu_avx
-    result = subprocess.run(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    output = result.stdout.decode()
+
     if "avx512" in output.lower():
         cpu_avx = AVXType.AVX512
     elif "avx2" in output.lower():
@@ -82,15 +155,97 @@ def get_cpu_avx_support() -> Tuple[OSType, AVXType]:
     return os_type, env_cpu_avx if env_cpu_avx else cpu_avx
 
 
-def get_cuda_version() -> str:
+def get_cuda_version_from_torch():
     try:
         import torch
 
         return torch.version.cuda
+    except:
+        return None
+
+
+def get_cuda_version_from_nvcc():
+    try:
+        output = subprocess.check_output(["nvcc", "--version"])
+        version_line = [
+            line for line in output.decode("utf-8").split("\n") if "release" in line
+        ][0]
+        return version_line.split("release")[-1].strip().split(",")[0]
+    except:
+        return None
+
+
+def get_cuda_version_from_nvidia_smi():
+    try:
+        output = subprocess.check_output(["nvidia-smi"]).decode("utf-8")
+        match = re.search(r"CUDA Version:\s+(\d+\.\d+)", output)
+        if match:
+            return match.group(1)
+        else:
+            return None
+    except:
+        return None
+
+
+def get_cuda_version() -> str:
+    try:
+        cuda_version = get_cuda_version_from_torch()
+        if not cuda_version:
+            cuda_version = get_cuda_version_from_nvcc()
+        if not cuda_version:
+            cuda_version = get_cuda_version_from_nvidia_smi()
+        return cuda_version
     except Exception:
         return None
 
 
+def torch_requires(
+    torch_version: str = "2.0.0",
+    torchvision_version: str = "0.15.1",
+    torchaudio_version: str = "2.0.1",
+):
+    torch_pkgs = []
+    os_type, _ = get_cpu_avx_support()
+    if os_type == OSType.DARWIN:
+        torch_pkgs = [
+            f"torch=={torch_version}",
+            f"torchvision=={torchvision_version}",
+            f"torchaudio=={torchaudio_version}",
+        ]
+    else:
+        cuda_version = get_cuda_version()
+        if not cuda_version:
+            torch_pkgs = [
+                f"torch=={torch_version}+cpu",
+                f"torchvision=={torchvision_version}+cpu",
+                f"torchaudio=={torchaudio_version}",
+            ]
+        else:
+            supported_versions = ["11.7", "11.8"]
+            if cuda_version not in supported_versions:
+                print(
+                    f"PyTorch version {torch_version} supported cuda version: {supported_versions}, replace to {supported_versions[-1]}"
+                )
+                cuda_version = supported_versions[-1]
+            cuda_version = "cu" + cuda_version.replace(".", "")
+            py_version = "cp310"
+            os_pkg_name = "linux_x86_64" if os_type == OSType.LINUX else "win_amd64"
+            torch_url = f"https://download.pytorch.org/whl/{cuda_version}/torch-{torch_version}+{cuda_version}-{py_version}-{py_version}-{os_pkg_name}.whl"
+            torchvision_url = f"https://download.pytorch.org/whl/{cuda_version}/torchvision-{torchvision_version}+{cuda_version}-{py_version}-{py_version}-{os_pkg_name}.whl"
+            torch_url_cached = cache_package(
+                torch_url, "torch", os_type == OSType.WINDOWS
+            )
+            torchvision_url_cached = cache_package(
+                torchvision_url, "torchvision", os_type == OSType.WINDOWS
+            )
+            torch_pkgs = [
+                f"torch @ {torch_url_cached}",
+                f"torchvision @ {torchvision_url_cached}",
+                f"torchaudio=={torchaudio_version}",
+            ]
+    setup_spec.extras["torch"] = torch_pkgs
+
+
 def llama_cpp_python_cuda_requires():
     cuda_version = get_cuda_version()
     device = "cpu"
@@ -105,12 +260,15 @@ def llama_cpp_python_cuda_requires():
             f"llama_cpp_python_cuda just support in os: {[r._value_ for r in supported_os]}"
         )
         return
+    if cpu_avx == AVXType.AVX2 or AVXType.AVX512:
+        cpu_avx = AVXType.AVX
     cpu_avx = cpu_avx._value_
     base_url = "https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui"
     llama_cpp_version = "0.1.77"
     py_version = "cp310"
     os_pkg_name = "linux_x86_64" if os_type == OSType.LINUX else "win_amd64"
     extra_index_url = f"{base_url}/llama_cpp_python_cuda-{llama_cpp_version}+{device}{cpu_avx}-{py_version}-{py_version}-{os_pkg_name}.whl"
+    extra_index_url, _ = encode_url(extra_index_url)
     print(f"Install llama_cpp_python_cuda from {extra_index_url}")
 
     setup_spec.extras["llama_cpp"].append(f"llama_cpp_python_cuda @ {extra_index_url}")
@@ -124,6 +282,26 @@ def llama_cpp_requires():
     llama_cpp_python_cuda_requires()
 
 
+def quantization_requires():
+    pkgs = []
+    os_type, _ = get_cpu_avx_support()
+    if os_type != OSType.WINDOWS:
+        pkgs = ["bitsandbytes"]
+    else:
+        latest_version = get_latest_version(
+            "bitsandbytes",
+            "https://jllllll.github.io/bitsandbytes-windows-webui",
+            "0.41.1",
+        )
+        extra_index_url = f"https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-{latest_version}-py3-none-win_amd64.whl"
+        local_pkg = cache_package(
+            extra_index_url, "bitsandbytes", os_type == OSType.WINDOWS
+        )
+        pkgs = [f"bitsandbytes @ {local_pkg}"]
+        print(pkgs)
+    setup_spec.extras["quantization"] = pkgs
+
+
 def all_vector_store_requires():
     """
     pip install "db-gpt[vstore]"
@@ -149,12 +327,22 @@ def all_requires():
     setup_spec.extras["all"] = list(requires)
 
 
+def init_install_requires():
+    setup_spec.install_requires += parse_requirements("requirements.txt")
+    setup_spec.install_requires += setup_spec.extras["torch"]
+    setup_spec.install_requires += setup_spec.extras["quantization"]
+    print(f"Install requires: \n{','.join(setup_spec.install_requires)}")
+
+
+torch_requires()
 llama_cpp_requires()
+quantization_requires()
 all_vector_store_requires()
 all_datasource_requires()
 
 # must be last
 all_requires()
+init_install_requires()
 
 setuptools.setup(
     name="db-gpt",
@@ -166,7 +354,7 @@ setuptools.setup(
     " With this solution, you can be assured that there is no risk of data leakage, and your data is 100% private and secure.",
     long_description=long_description,
     long_description_content_type="text/markdown",
-    install_requires=parse_requirements("requirements.txt"),
+    install_requires=setup_spec.install_requires,
     url="https://github.com/eosphoros-ai/DB-GPT",
     license="https://opensource.org/license/mit/",
     python_requires=">=3.10",