mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-12 21:11:43 +00:00
ci: Add script to check for pickle usage in community (#22863)
Add script to check for pickle usage in community.
This commit is contained in:
@@ -242,7 +242,7 @@ def _load_pickled_fn_from_hex_string(
|
||||
raise ValueError(f"Please install cloudpickle>=2.0.0. Error: {e}")
|
||||
|
||||
try:
|
||||
return cloudpickle.loads(bytes.fromhex(data))
|
||||
return cloudpickle.loads(bytes.fromhex(data)) # ignore[pickle]: explicit-opt-in
|
||||
except Exception as e:
|
||||
raise ValueError(
|
||||
f"Failed to load the pickled function from a hexadecimal string. Error: {e}"
|
||||
|
@@ -36,7 +36,9 @@ def _send_pipeline_to_device(pipeline: Any, device: int) -> Any:
|
||||
"""Send a pipeline to a device on the cluster."""
|
||||
if isinstance(pipeline, str):
|
||||
with open(pipeline, "rb") as f:
|
||||
pipeline = pickle.load(f)
|
||||
# This code path can only be triggered if the user
|
||||
# passed allow_dangerous_deserialization=True
|
||||
pipeline = pickle.load(f) # ignore[pickle]: explicit-opt-in
|
||||
|
||||
if importlib.util.find_spec("torch") is not None:
|
||||
import torch
|
||||
|
@@ -152,6 +152,8 @@ class TFIDFRetriever(BaseRetriever):
|
||||
|
||||
# Load docs and tfidf array as pickle.
|
||||
with open(path / f"{file_name}.pkl", "rb") as f:
|
||||
docs, tfidf_array = pickle.load(f)
|
||||
# This code path can only be triggered if the user
|
||||
# passed allow_dangerous_deserialization=True
|
||||
docs, tfidf_array = pickle.load(f) # ignore[pickle]: explicit-opt-in
|
||||
|
||||
return cls(vectorizer=vectorizer, docs=docs, tfidf_array=tfidf_array)
|
||||
|
@@ -456,7 +456,14 @@ class Annoy(VectorStore):
|
||||
annoy = guard_import("annoy")
|
||||
# load docstore and index_to_docstore_id
|
||||
with open(path / "index.pkl", "rb") as file:
|
||||
docstore, index_to_docstore_id, config_object = pickle.load(file)
|
||||
# Code path can only be reached if allow_dangerous_deserialization is True
|
||||
(
|
||||
docstore,
|
||||
index_to_docstore_id,
|
||||
config_object,
|
||||
) = pickle.load( # ignore[pickle]: explicit-opt-in
|
||||
file
|
||||
)
|
||||
|
||||
f = int(config_object["ANNOY"]["f"])
|
||||
metric = config_object["ANNOY"]["metric"]
|
||||
|
@@ -1093,7 +1093,13 @@ class FAISS(VectorStore):
|
||||
|
||||
# load docstore and index_to_docstore_id
|
||||
with open(path / f"{index_name}.pkl", "rb") as f:
|
||||
docstore, index_to_docstore_id = pickle.load(f)
|
||||
(
|
||||
docstore,
|
||||
index_to_docstore_id,
|
||||
) = pickle.load( # ignore[pickle]: explicit-opt-in
|
||||
f
|
||||
)
|
||||
|
||||
return cls(embeddings, index, docstore, index_to_docstore_id, **kwargs)
|
||||
|
||||
def serialize_to_bytes(self) -> bytes:
|
||||
@@ -1123,7 +1129,13 @@ class FAISS(VectorStore):
|
||||
"loading a file from an untrusted source (e.g., some random site on "
|
||||
"the internet.)."
|
||||
)
|
||||
index, docstore, index_to_docstore_id = pickle.loads(serialized)
|
||||
(
|
||||
index,
|
||||
docstore,
|
||||
index_to_docstore_id,
|
||||
) = pickle.loads( # ignore[pickle]: explicit-opt-in
|
||||
serialized
|
||||
)
|
||||
return cls(embeddings, index, docstore, index_to_docstore_id, **kwargs)
|
||||
|
||||
def _select_relevance_score_fn(self) -> Callable[[float], float]:
|
||||
|
@@ -493,7 +493,13 @@ class ScaNN(VectorStore):
|
||||
|
||||
# load docstore and index_to_docstore_id
|
||||
with open(path / "{index_name}.pkl".format(index_name=index_name), "rb") as f:
|
||||
docstore, index_to_docstore_id = pickle.load(f)
|
||||
(
|
||||
docstore,
|
||||
index_to_docstore_id,
|
||||
) = pickle.load( # ignore[pickle]: explicit-opt-in
|
||||
f
|
||||
)
|
||||
|
||||
return cls(embedding, index, docstore, index_to_docstore_id, **kwargs)
|
||||
|
||||
def _select_relevance_score_fn(self) -> Callable[[float], float]:
|
||||
|
@@ -188,7 +188,7 @@ class TileDB(VectorStore):
|
||||
pickled_metadata = doc.get("metadata")
|
||||
result_doc = Document(page_content=str(doc["text"][0]))
|
||||
if pickled_metadata is not None:
|
||||
metadata = pickle.loads(
|
||||
metadata = pickle.loads( # ignore[pickle]: explicit-opt-in
|
||||
np.array(pickled_metadata.tolist()).astype(np.uint8).tobytes()
|
||||
)
|
||||
result_doc.metadata = metadata
|
||||
|
Reference in New Issue
Block a user