mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-26 16:43:35 +00:00
experimental: Fix divide by 0 error (#25439)
Within the semantic chunker, when calling `_threshold_from_clusters` there is the possibility for a divide by 0 error if the `number_of_chunks` is equal to the length of `distances`. Fix simply implements a check if these values match to prevent the error and enable chunking to continue.
This commit is contained in:
parent
ba167dc158
commit
66e30efa61
@ -180,7 +180,11 @@ class SemanticChunker(BaseDocumentTransformer):
|
|||||||
x = max(min(self.number_of_chunks, x1), x2)
|
x = max(min(self.number_of_chunks, x1), x2)
|
||||||
|
|
||||||
# Linear interpolation formula
|
# Linear interpolation formula
|
||||||
|
if x2 == x1:
|
||||||
|
y = y2
|
||||||
|
else:
|
||||||
y = y1 + ((y2 - y1) / (x2 - x1)) * (x - x1)
|
y = y1 + ((y2 - y1) / (x2 - x1)) * (x - x1)
|
||||||
|
|
||||||
y = min(max(y, 0), 100)
|
y = min(max(y, 0), 100)
|
||||||
|
|
||||||
return cast(float, np.percentile(distances, y))
|
return cast(float, np.percentile(distances, y))
|
||||||
|
Loading…
Reference in New Issue
Block a user