fix:issue2484 (#2485)

Close #2484 
# Description
fix issue 2484

# How Has This Been Tested?
create knowledge space with Chinese

# Snapshots:

Include snapshots for easier review.

# Checklist:

- [ ] My code follows the style guidelines of this project
- [ ] I have already rebased the commits and make the commit message
conform to the project standard.
- [ ] I have performed a self-review of my own code
- [ ] I have commented my code, particularly in hard-to-understand areas
- [ ] I have made corresponding changes to the documentation
- [ ] Any dependent changes have been merged and published in downstream
modules
This commit is contained in:
magic.chen 2025-03-19 02:42:35 -03:00 committed by GitHub
commit 0e9faf475f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -14,6 +14,7 @@ from dbgpt.storage.vector_store.base import (
VectorStoreConfig,
)
from dbgpt.storage.vector_store.filters import FilterOperator, MetadataFilters
from dbgpt.util import string_utils
from dbgpt.util.i18n_utils import _
logger = logging.getLogger(__name__)
@ -110,6 +111,11 @@ class ChromaStore(VectorStoreBase):
self.embeddings = embedding_fn
if not self.embeddings:
raise ValueError("Embeddings is None")
self._collection_name = name
if string_utils.contains_chinese(name):
bytes_str = self._collection_name.encode("utf-8")
hex_str = bytes_str.hex()
self._collection_name = hex_str
chroma_settings = Settings(
# chroma_db_impl="duckdb+parquet", => deprecated configuration of Chroma
persist_directory=self.persist_dir,
@ -121,9 +127,9 @@ class ChromaStore(VectorStoreBase):
path=self.persist_dir, settings=chroma_settings
)
collection_metadata = collection_metadata or {"hnsw:space": "cosine"}
self._collection_name = name
self._collection = self._chroma_client.get_or_create_collection(
name=name,
name=self._collection_name,
embedding_function=None,
metadata=collection_metadata,
)