diff --git a/pilot/configs/model_config.py b/pilot/configs/model_config.py
index faa93227f..da68ab332 100644
--- a/pilot/configs/model_config.py
+++ b/pilot/configs/model_config.py
@@ -21,15 +21,17 @@ LLM_MODEL_CONFIG = {
     "flan-t5-base": os.path.join(MODEL_PATH, "flan-t5-base"),
     "vicuna-13b": os.path.join(MODEL_PATH, "vicuna-13b"),
     "text2vec": os.path.join(MODEL_PATH, "text2vec-large-chinese"),
+    "text2vec-base": os.path.join(MODEL_PATH, "text2vec-base-chinese"),
     "sentence-transforms": os.path.join(MODEL_PATH, "all-MiniLM-L6-v2")
 }
 
 
-VECTOR_SEARCH_TOP_K = 3
+VECTOR_SEARCH_TOP_K = 20
 LLM_MODEL = "vicuna-13b"
 LIMIT_MODEL_CONCURRENCY = 5
 MAX_POSITION_EMBEDDINGS = 4096 
-VICUNA_MODEL_SERVER = "http://121.41.227.141:8000"
+# VICUNA_MODEL_SERVER = "http://121.41.227.141:8000"
+VICUNA_MODEL_SERVER = "http://120.79.27.110:8000"
 
 # Load model config
 ISLOAD_8BIT = True
@@ -44,4 +46,5 @@ DB_SETTINGS = {
 }
 
 VS_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "vs_store")
-KNOWLEDGE_UPLOAD_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data")
\ No newline at end of file
+KNOWLEDGE_UPLOAD_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data")
+KNOWLEDGE_CHUNK_SPLIT_SIZE = 100
diff --git a/pilot/server/webserver.py b/pilot/server/webserver.py
index 07d94b773..25940a437 100644
--- a/pilot/server/webserver.py
+++ b/pilot/server/webserver.py
@@ -499,6 +499,7 @@ def build_single_model_ui():
                         files = gr.File(label="添加文件",
                                         file_types=[".txt", ".md", ".docx", ".pdf"],
                                         file_count="multiple",
+                                        allow_flagged_uploads=True,
                                         show_label=False
                                         )
 
diff --git a/pilot/source_embedding/chn_document_splitter.py b/pilot/source_embedding/chn_document_splitter.py
index 090a6af56..10a77aeca 100644
--- a/pilot/source_embedding/chn_document_splitter.py
+++ b/pilot/source_embedding/chn_document_splitter.py
@@ -9,33 +9,17 @@ class CHNDocumentSplitter(CharacterTextSplitter):
         self.pdf = pdf
         self.sentence_size = sentence_size
 
-    # def split_text_version2(self, text: str) -> List[str]:
-    #     if self.pdf:
-    #         text = re.sub(r"\n{3,}", "\n", text)
-    #         text = re.sub('\s', ' ', text)
-    #         text = text.replace("\n\n", "")
-    #     sent_sep_pattern = re.compile('([﹒﹔﹖﹗．。！？]["’”」』]{0,2}|(?=["‘“「『]{1,2}|$))')  # del ：；
-    #     sent_list = []
-    #     for ele in sent_sep_pattern.split(text):
-    #         if sent_sep_pattern.match(ele) and sent_list:
-    #             sent_list[-1] += ele
-    #         elif ele:
-    #             sent_list.append(ele)
-    #     return sent_list
-
     def split_text(self, text: str) -> List[str]:
         if self.pdf:
             text = re.sub(r"\n{3,}", r"\n", text)
             text = re.sub('\s', " ", text)
             text = re.sub("\n\n", "", text)
 
-        text = re.sub(r'([;；.!?。！？\?])([^”’])', r"\1\n\2", text)  # 单字符断句符
-        text = re.sub(r'(\.{6})([^"’”」』])', r"\1\n\2", text)  # 英文省略号
-        text = re.sub(r'(\…{2})([^"’”」』])', r"\1\n\2", text)  # 中文省略号
+        text = re.sub(r'([;；.!?。！？\?])([^”’])', r"\1\n\2", text)
+        text = re.sub(r'(\.{6})([^"’”」』])', r"\1\n\2", text)
+        text = re.sub(r'(\…{2})([^"’”」』])', r"\1\n\2", text)
         text = re.sub(r'([;；!?。！？\?]["’”」』]{0,2})([^;；!?，。！？\?])', r'\1\n\2', text)
-        # 如果双引号前有终止符，那么双引号才是句子的终点，把分句符\n放到双引号后，注意前面的几句都小心保留了双引号
-        text = text.rstrip()  # 段尾如果有多余的\n就去掉它
-        # 很多规则中会考虑分号;，但是这里我把它忽略不计，破折号、英文双引号等同样忽略，需要的再做些简单调整即可。
+        text = text.rstrip()
         ls = [i for i in text.split("\n") if i]
         for ele in ls:
             if len(ele) > self.sentence_size:
diff --git a/pilot/source_embedding/knowledge_embedding.py b/pilot/source_embedding/knowledge_embedding.py
index 594723b6e..08d962908 100644
--- a/pilot/source_embedding/knowledge_embedding.py
+++ b/pilot/source_embedding/knowledge_embedding.py
@@ -4,13 +4,15 @@ from bs4 import BeautifulSoup
 from langchain.document_loaders import PyPDFLoader, TextLoader, markdown
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import Chroma
-from pilot.configs.model_config import DATASETS_DIR
+from pilot.configs.model_config import DATASETS_DIR, KNOWLEDGE_CHUNK_SPLIT_SIZE
 from pilot.source_embedding.chn_document_splitter import CHNDocumentSplitter
 from pilot.source_embedding.csv_embedding import CSVEmbedding
 from pilot.source_embedding.markdown_embedding import MarkdownEmbedding
 from pilot.source_embedding.pdf_embedding import PDFEmbedding
 import markdown
 
+from pilot.source_embedding.pdf_loader import UnstructuredPaddlePDFLoader
+
 
 class KnowledgeEmbedding:
     def __init__(self, file_path, model_name, vector_store_config, local_persist=True):
@@ -63,7 +65,7 @@ class KnowledgeEmbedding:
                 print("directly return vector store")
                 vector_store = Chroma(persist_directory=persist_dir, embedding_function=self.embeddings)
         else:
-            print(vector_name + "is new vector store, knowledge begin load...")
+            print(vector_name + " is new vector store, knowledge begin load...")
             documents = self._load_knownlege(self.file_path)
             vector_store = Chroma.from_documents(documents=documents,
                                                  embedding=self.embeddings,
@@ -88,7 +90,7 @@ class KnowledgeEmbedding:
     def _load_file(self, filename):
         if filename.lower().endswith(".md"):
             loader = TextLoader(filename)
-            text_splitter = CHNDocumentSplitter(pdf=True, sentence_size=100)
+            text_splitter = CHNDocumentSplitter(pdf=True, sentence_size=KNOWLEDGE_CHUNK_SPLIT_SIZE)
             docs = loader.load_and_split(text_splitter)
             i = 0
             for d in docs:
@@ -100,11 +102,15 @@ class KnowledgeEmbedding:
                 docs[i].page_content = docs[i].page_content.replace("\n", " ")
                 i += 1
         elif filename.lower().endswith(".pdf"):
-            loader = PyPDFLoader(filename)
-            textsplitter = CHNDocumentSplitter(pdf=True, sentence_size=100)
+            loader = UnstructuredPaddlePDFLoader(filename)
+            textsplitter = CHNDocumentSplitter(pdf=True, sentence_size=KNOWLEDGE_CHUNK_SPLIT_SIZE)
             docs = loader.load_and_split(textsplitter)
+            i = 0
+            for d in docs:
+                docs[i].page_content = d.page_content.replace("\n", " ").replace("�", "")
+                i += 1
         else:
             loader = TextLoader(filename)
-            text_splitor = CHNDocumentSplitter(sentence_size=100)
+            text_splitor = CHNDocumentSplitter(sentence_size=KNOWLEDGE_CHUNK_SPLIT_SIZE)
             docs = loader.load_and_split(text_splitor)
         return docs
\ No newline at end of file
diff --git a/pilot/source_embedding/markdown_embedding.py b/pilot/source_embedding/markdown_embedding.py
index fee9504b6..834226f75 100644
--- a/pilot/source_embedding/markdown_embedding.py
+++ b/pilot/source_embedding/markdown_embedding.py
@@ -7,6 +7,7 @@ from bs4 import BeautifulSoup
 from langchain.document_loaders import TextLoader
 from langchain.schema import Document
 import markdown
+from pilot.configs.model_config import KNOWLEDGE_CHUNK_SPLIT_SIZE
 
 from pilot.source_embedding import SourceEmbedding, register
 from pilot.source_embedding.chn_document_splitter import CHNDocumentSplitter
@@ -26,7 +27,7 @@ class MarkdownEmbedding(SourceEmbedding):
     def read(self):
         """Load from markdown path."""
         loader = TextLoader(self.file_path)
-        text_splitter = CHNDocumentSplitter(pdf=True, sentence_size=100)
+        text_splitter = CHNDocumentSplitter(pdf=True, sentence_size=KNOWLEDGE_CHUNK_SPLIT_SIZE)
         return loader.load_and_split(text_splitter)
 
     @register
diff --git a/pilot/source_embedding/pdf_embedding.py b/pilot/source_embedding/pdf_embedding.py
index bd0ae3aba..a8749695b 100644
--- a/pilot/source_embedding/pdf_embedding.py
+++ b/pilot/source_embedding/pdf_embedding.py
@@ -2,11 +2,12 @@
 # -*- coding: utf-8 -*-
 from typing import List
 
-from langchain.document_loaders import PyPDFLoader
 from langchain.schema import Document
+from pilot.configs.model_config import KNOWLEDGE_CHUNK_SPLIT_SIZE
 
 from pilot.source_embedding import SourceEmbedding, register
 from pilot.source_embedding.chn_document_splitter import CHNDocumentSplitter
+from pilot.source_embedding.pdf_loader import UnstructuredPaddlePDFLoader
 
 
 class PDFEmbedding(SourceEmbedding):
@@ -22,8 +23,8 @@ class PDFEmbedding(SourceEmbedding):
     @register
     def read(self):
         """Load from pdf path."""
-        loader = PyPDFLoader(self.file_path)
-        textsplitter = CHNDocumentSplitter(pdf=True, sentence_size=100)
+        loader = UnstructuredPaddlePDFLoader(self.file_path)
+        textsplitter = CHNDocumentSplitter(pdf=True, sentence_size=KNOWLEDGE_CHUNK_SPLIT_SIZE)
         return loader.load_and_split(textsplitter)
 
     @register
diff --git a/pilot/source_embedding/search_milvus.py b/pilot/source_embedding/search_milvus.py
index 18f93d1d3..ec0aa6813 100644
--- a/pilot/source_embedding/search_milvus.py
+++ b/pilot/source_embedding/search_milvus.py
@@ -50,7 +50,7 @@
 #
 # # text_embeddings = Text2Vectors()
 # mivuls = MilvusStore(cfg={"url": "127.0.0.1", "port": "19530", "alias": "default", "table_name": "test_k"})
-# 
+#
 # mivuls.insert(["textc","tezt2"])
 # print("success")
 # ct
diff --git a/pilot/vector_store/milvus_store.py b/pilot/vector_store/milvus_store.py
index 1f07c969e..eda0b4e38 100644
--- a/pilot/vector_store/milvus_store.py
+++ b/pilot/vector_store/milvus_store.py
@@ -1,6 +1,7 @@
-
+from langchain.embeddings import HuggingFaceEmbeddings
 from pymilvus import DataType, FieldSchema, CollectionSchema, connections, Collection
 
+from pilot.configs.model_config import LLM_MODEL_CONFIG
 from pilot.vector_store.vector_store_base import VectorStoreBase
 
 
@@ -9,7 +10,7 @@ class MilvusStore(VectorStoreBase):
         """Construct a milvus memory storage connection.
 
         Args:
-            cfg (Config): Auto-GPT global config.
+            cfg (Config): MilvusStore global config.
         """
         # self.configure(cfg)
 
@@ -71,21 +72,21 @@ class MilvusStore(VectorStoreBase):
                 self.index_params,
                 index_name="vector",
             )
+        info = self.collection.describe()
         self.collection.load()
 
-    # def add(self, data) -> str:
-    #     """Add an embedding of data into milvus.
-    #
-    #     Args:
-    #         data (str): The raw text to construct embedding index.
-    #
-    #     Returns:
-    #         str: log.
-    #     """
-    #     embedding = get_ada_embedding(data)
-    #     result = self.collection.insert([[embedding], [data]])
-    #     _text = (
-    #         "Inserting data into memory at primary key: "
-    #         f"{result.primary_keys[0]}:\n data: {data}"
-    #     )
-    #     return _text
\ No newline at end of file
+    def insert(self, text) -> str:
+        """Add an embedding of data into milvus.
+        Args:
+            text (str): The raw text to construct embedding index.
+        Returns:
+            str: log.
+        """
+        # embedding = get_ada_embedding(data)
+        embeddings = HuggingFaceEmbeddings(model_name=LLM_MODEL_CONFIG["sentence-transforms"])
+        result = self.collection.insert([embeddings.embed_documents(text), text])
+        _text = (
+            "Inserting data into memory at primary key: "
+            f"{result.primary_keys[0]}:\n data: {text}"
+        )
+        return _text
\ No newline at end of file
diff --git a/tools/knowlege_init.py b/tools/knowlege_init.py
index bc827953d..e9ecad49a 100644
--- a/tools/knowlege_init.py
+++ b/tools/knowlege_init.py
@@ -41,5 +41,4 @@ if __name__ == "__main__":
     append_mode = args.append
     kv  = LocalKnowledgeInit()
     vector_store = kv.knowledge_persist(file_path=DATASETS_DIR, vector_name=vector_name, append_mode=append_mode)
-    docs = vector_store.similarity_search("小明",1)
     print("your knowledge embedding success...")
\ No newline at end of file