diff --git a/tests/unit/embedding_engine/document_test.py b/tests/unit/embedding_engine/document_test.py new file mode 100644 index 000000000..54c764afd --- /dev/null +++ b/tests/unit/embedding_engine/document_test.py @@ -0,0 +1,20 @@ +from pilot import EmbeddingEngine, KnowledgeType + +embedding_model = "your_embedding_model" +vector_store_type = "Chroma" +chroma_persist_path = "your_persist_path" +vector_store_config = { + "vector_store_name": "document_test", + "vector_store_type": vector_store_type, + "chroma_persist_path": chroma_persist_path, +} + +# it can be .md,.pdf,.docx, .csv, .html +document_path = "your_path/test.md" +embedding_engine = EmbeddingEngine( + knowledge_source=document_path, + knowledge_type=KnowledgeType.DOCUMENT.value, + model_name=embedding_model, + vector_store_config=vector_store_config) +# embedding document content to vector store +embedding_engine.knowledge_embedding() \ No newline at end of file diff --git a/tests/unit/embedding_engine/test_url_embedding.py b/tests/unit/embedding_engine/url_test.py similarity index 93% rename from tests/unit/embedding_engine/test_url_embedding.py rename to tests/unit/embedding_engine/url_test.py index 30f2a36cb..ed88c4a4a 100644 --- a/tests/unit/embedding_engine/test_url_embedding.py +++ b/tests/unit/embedding_engine/url_test.py @@ -1,7 +1,7 @@ from pilot import EmbeddingEngine, KnowledgeType url = "https://db-gpt.readthedocs.io/en/latest/getting_started/getting_started.html" -embedding_model = "text2vec" +embedding_model = "your_embedding_model" vector_store_type = "Chroma" chroma_persist_path = "your_persist_path" vector_store_config = {