mirror of
https://github.com/hwchase17/langchain.git
synced 2025-04-28 11:55:21 +00:00
Add docstrings for Clickhouse class methods (#19195)
Thank you for contributing to LangChain! - [ ] **PR title**: "package: description" - Where "package" is whichever of langchain, community, core, experimental, etc. is being modified. Use "docs: ..." for purely docs changes, "templates: ..." for template changes, "infra: ..." for CI changes. - Example: "community: add foobar LLM" - [ ] **PR message**: ***Delete this entire checklist*** and replace with - **Description:** a description of the change - **Issue:** the issue # it fixes, if applicable - **Dependencies:** any dependencies required for this change - **Twitter handle:** if your PR gets announced, and you'd like a mention, we'll gladly shout you out! - [ ] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. - [x] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ Additional guidelines: - Make sure optional dependencies are imported within a function. - Please do not add dependencies to pyproject.toml files (even optional ones) unless they are required for unit tests. - Most PRs should not touch more than one package. - Changes should be backwards compatible. - If you are adding something to community, do not re-import it in langchain. If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, hwchase17.
This commit is contained in:
parent
dc4ce82ddd
commit
24a0a4472a
1
.gitignore
vendored
1
.gitignore
vendored
@ -116,6 +116,7 @@ celerybeat.pid
|
|||||||
.env
|
.env
|
||||||
.envrc
|
.envrc
|
||||||
.venv*
|
.venv*
|
||||||
|
venv*
|
||||||
env/
|
env/
|
||||||
ENV/
|
ENV/
|
||||||
env.bak/
|
env.bak/
|
||||||
|
@ -211,12 +211,48 @@ CREATE TABLE IF NOT EXISTS {self.config.database}.{self.config.table}(
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def embeddings(self) -> Embeddings:
|
def embeddings(self) -> Embeddings:
|
||||||
|
"""Provides access to the embedding mechanism used by the Clickhouse instance.
|
||||||
|
|
||||||
|
This property allows direct access to the embedding function or model being
|
||||||
|
used by the Clickhouse instance to convert text documents into embedding vectors
|
||||||
|
for vector similarity search.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The `Embeddings` instance associated with this Clickhouse instance.
|
||||||
|
"""
|
||||||
return self.embedding_function
|
return self.embedding_function
|
||||||
|
|
||||||
def escape_str(self, value: str) -> str:
|
def escape_str(self, value: str) -> str:
|
||||||
|
"""Escape special characters in a string for Clickhouse SQL queries.
|
||||||
|
|
||||||
|
This method is used internally to prepare strings for safe insertion
|
||||||
|
into SQL queries by escaping special characters that might otherwise
|
||||||
|
interfere with the query syntax.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
value: The string to be escaped.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The escaped string, safe for insertion into SQL queries.
|
||||||
|
"""
|
||||||
return "".join(f"{self.BS}{c}" if c in self.must_escape else c for c in value)
|
return "".join(f"{self.BS}{c}" if c in self.must_escape else c for c in value)
|
||||||
|
|
||||||
def _build_insert_sql(self, transac: Iterable, column_names: Iterable[str]) -> str:
|
def _build_insert_sql(self, transac: Iterable, column_names: Iterable[str]) -> str:
|
||||||
|
"""Construct an SQL query for inserting data into the Clickhouse database.
|
||||||
|
|
||||||
|
This method formats and constructs an SQL `INSERT` query string using the
|
||||||
|
provided transaction data and column names. It is utilized internally during
|
||||||
|
the process of batch insertion of documents and their embeddings into the
|
||||||
|
database.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
transac: iterable of tuples, representing a row of data to be inserted.
|
||||||
|
column_names: iterable of strings representing the names of the columns
|
||||||
|
into which data will be inserted.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A string containing the constructed SQL `INSERT` query.
|
||||||
|
"""
|
||||||
ks = ",".join(column_names)
|
ks = ",".join(column_names)
|
||||||
_data = []
|
_data = []
|
||||||
for n in transac:
|
for n in transac:
|
||||||
@ -231,6 +267,17 @@ CREATE TABLE IF NOT EXISTS {self.config.database}.{self.config.table}(
|
|||||||
return i_str
|
return i_str
|
||||||
|
|
||||||
def _insert(self, transac: Iterable, column_names: Iterable[str]) -> None:
|
def _insert(self, transac: Iterable, column_names: Iterable[str]) -> None:
|
||||||
|
"""Execute an SQL query to insert data into the Clickhouse database.
|
||||||
|
|
||||||
|
This method performs the actual insertion of data into the database by
|
||||||
|
executing the SQL query constructed by `_build_insert_sql`. It's a critical
|
||||||
|
step in adding new documents and their associated data into the vector store.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
transac:iterable of tuples, representing a row of data to be inserted.
|
||||||
|
column_names: An iterable of strings representing the names of the columns
|
||||||
|
into which data will be inserted.
|
||||||
|
"""
|
||||||
_insert_query = self._build_insert_sql(transac, column_names)
|
_insert_query = self._build_insert_sql(transac, column_names)
|
||||||
self.client.command(_insert_query)
|
self.client.command(_insert_query)
|
||||||
|
|
||||||
@ -345,6 +392,21 @@ CREATE TABLE IF NOT EXISTS {self.config.database}.{self.config.table}(
|
|||||||
def _build_query_sql(
|
def _build_query_sql(
|
||||||
self, q_emb: List[float], topk: int, where_str: Optional[str] = None
|
self, q_emb: List[float], topk: int, where_str: Optional[str] = None
|
||||||
) -> str:
|
) -> str:
|
||||||
|
"""Construct an SQL query for performing a similarity search.
|
||||||
|
|
||||||
|
This internal method generates an SQL query for finding the top-k most similar
|
||||||
|
vectors in the database to a given query vector.It allows for optional filtering
|
||||||
|
conditions to be applied via a WHERE clause.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
q_emb: The query vector as a list of floats.
|
||||||
|
topk: The number of top similar items to retrieve.
|
||||||
|
where_str: opt str representing additional WHERE conditions for the query
|
||||||
|
Defaults to None.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A string containing the SQL query for the similarity search.
|
||||||
|
"""
|
||||||
q_emb_str = ",".join(map(str, q_emb))
|
q_emb_str = ",".join(map(str, q_emb))
|
||||||
if where_str:
|
if where_str:
|
||||||
where_str = f"PREWHERE {where_str}"
|
where_str = f"PREWHERE {where_str}"
|
||||||
|
Loading…
Reference in New Issue
Block a user