From bc2db7347ecef6a648651c4c2d3f41ed28719680 Mon Sep 17 00:00:00 2001 From: aries_ckt <916701291@qq.com> Date: Wed, 2 Aug 2023 15:55:16 +0800 Subject: [PATCH] test:add document embedding and url embedding test --- tests/unit/embedding_engine/document_test.py | 20 +++++++++++++++++++ .../{test_url_embedding.py => url_test.py} | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 tests/unit/embedding_engine/document_test.py rename tests/unit/embedding_engine/{test_url_embedding.py => url_test.py} (93%) diff --git a/tests/unit/embedding_engine/document_test.py b/tests/unit/embedding_engine/document_test.py new file mode 100644 index 000000000..54c764afd --- /dev/null +++ b/tests/unit/embedding_engine/document_test.py @@ -0,0 +1,20 @@ +from pilot import EmbeddingEngine, KnowledgeType + +embedding_model = "your_embedding_model" +vector_store_type = "Chroma" +chroma_persist_path = "your_persist_path" +vector_store_config = { + "vector_store_name": "document_test", + "vector_store_type": vector_store_type, + "chroma_persist_path": chroma_persist_path, +} + +# it can be .md,.pdf,.docx, .csv, .html +document_path = "your_path/test.md" +embedding_engine = EmbeddingEngine( + knowledge_source=document_path, + knowledge_type=KnowledgeType.DOCUMENT.value, + model_name=embedding_model, + vector_store_config=vector_store_config) +# embedding document content to vector store +embedding_engine.knowledge_embedding() \ No newline at end of file diff --git a/tests/unit/embedding_engine/test_url_embedding.py b/tests/unit/embedding_engine/url_test.py similarity index 93% rename from tests/unit/embedding_engine/test_url_embedding.py rename to tests/unit/embedding_engine/url_test.py index 30f2a36cb..ed88c4a4a 100644 --- a/tests/unit/embedding_engine/test_url_embedding.py +++ b/tests/unit/embedding_engine/url_test.py @@ -1,7 +1,7 @@ from pilot import EmbeddingEngine, KnowledgeType url = "https://db-gpt.readthedocs.io/en/latest/getting_started/getting_started.html" -embedding_model = "text2vec" +embedding_model = "your_embedding_model" vector_store_type = "Chroma" chroma_persist_path = "your_persist_path" vector_store_config = {