mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-21 14:43:07 +00:00
update
This commit is contained in:
@@ -1,19 +1,21 @@
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from langchain_core.caches import InMemoryCache
|
||||
from langchain_core.language_models import GenericFakeChatModel
|
||||
from langchain_core.rate_limiters import InMemoryRateLimiter
|
||||
from typing import Optional
|
||||
|
||||
|
||||
|
||||
def test_rate_limit_invoke() -> None:
|
||||
"""Add rate limiter."""
|
||||
|
||||
model = GenericFakeChatModel(
|
||||
messages=iter(["hello", "world", "!"]),
|
||||
messages=iter(["hello", "world"]),
|
||||
rate_limiter=InMemoryRateLimiter(
|
||||
requests_per_second=200, check_every_n_seconds=0.01, max_bucket_size=10
|
||||
requests_per_second=20,
|
||||
check_every_n_seconds=0.1,
|
||||
max_bucket_size=10,
|
||||
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||
),
|
||||
)
|
||||
tic = time.time()
|
||||
@@ -21,22 +23,14 @@ def test_rate_limit_invoke() -> None:
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
# with 0 tokens.
|
||||
assert 0.01 < toc - tic < 0.02
|
||||
assert 0.10 < toc - tic < 0.15
|
||||
|
||||
tic = time.time()
|
||||
model.invoke("foo")
|
||||
toc = time.time()
|
||||
# The second time we call the model, we should have 1 extra token
|
||||
# to proceed immediately.
|
||||
assert toc - tic < 0.005
|
||||
|
||||
# The third time we call the model, we need to wait again for a token
|
||||
tic = time.time()
|
||||
model.invoke("foo")
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
# with 0 tokens.
|
||||
assert 0.01 < toc - tic < 0.02
|
||||
# Second time we check the model, we should have 1 extra token
|
||||
# since the sleep time is 0.1 seconds
|
||||
assert 0.00 < toc - tic < 0.10
|
||||
|
||||
|
||||
async def test_rate_limit_ainvoke() -> None:
|
||||
@@ -45,7 +39,10 @@ async def test_rate_limit_ainvoke() -> None:
|
||||
model = GenericFakeChatModel(
|
||||
messages=iter(["hello", "world", "!"]),
|
||||
rate_limiter=InMemoryRateLimiter(
|
||||
requests_per_second=20, check_every_n_seconds=0.1, max_bucket_size=10
|
||||
requests_per_second=20,
|
||||
check_every_n_seconds=0.1,
|
||||
max_bucket_size=10,
|
||||
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||
),
|
||||
)
|
||||
tic = time.time()
|
||||
@@ -60,7 +57,7 @@ async def test_rate_limit_ainvoke() -> None:
|
||||
toc = time.time()
|
||||
# The second time we call the model, we should have 1 extra token
|
||||
# to proceed immediately.
|
||||
assert toc - tic < 0.01
|
||||
assert toc - tic < 0.1
|
||||
|
||||
# The third time we call the model, we need to wait again for a token
|
||||
tic = time.time()
|
||||
@@ -76,17 +73,16 @@ def test_rate_limit_batch() -> None:
|
||||
model = GenericFakeChatModel(
|
||||
messages=iter(["hello", "world", "!"]),
|
||||
rate_limiter=InMemoryRateLimiter(
|
||||
requests_per_second=200, check_every_n_seconds=0.01, max_bucket_size=10
|
||||
requests_per_second=20,
|
||||
check_every_n_seconds=0.01,
|
||||
max_bucket_size=10,
|
||||
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||
),
|
||||
)
|
||||
# Need 2 tokens to proceed
|
||||
time_to_fill = 2 / 200.0
|
||||
tic = time.time()
|
||||
model.batch(["foo", "foo"])
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
# with 0 tokens.
|
||||
assert time_to_fill < toc - tic < time_to_fill + 0.03
|
||||
assert 0.1 < toc - tic < 0.2
|
||||
|
||||
|
||||
async def test_rate_limit_abatch() -> None:
|
||||
@@ -94,17 +90,16 @@ async def test_rate_limit_abatch() -> None:
|
||||
model = GenericFakeChatModel(
|
||||
messages=iter(["hello", "world", "!"]),
|
||||
rate_limiter=InMemoryRateLimiter(
|
||||
requests_per_second=200, check_every_n_seconds=0.01, max_bucket_size=10
|
||||
requests_per_second=20,
|
||||
check_every_n_seconds=0.01,
|
||||
max_bucket_size=10,
|
||||
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||
),
|
||||
)
|
||||
# Need 2 tokens to proceed
|
||||
time_to_fill = 2 / 200.0
|
||||
tic = time.time()
|
||||
await model.abatch(["foo", "foo"])
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
# with 0 tokens.
|
||||
assert time_to_fill < toc - tic < time_to_fill + 0.03
|
||||
assert 0.1 < toc - tic < 0.2
|
||||
|
||||
|
||||
def test_rate_limit_stream() -> None:
|
||||
@@ -112,7 +107,10 @@ def test_rate_limit_stream() -> None:
|
||||
model = GenericFakeChatModel(
|
||||
messages=iter(["hello world", "hello world", "hello world"]),
|
||||
rate_limiter=InMemoryRateLimiter(
|
||||
requests_per_second=200, check_every_n_seconds=0.01, max_bucket_size=10
|
||||
requests_per_second=20,
|
||||
check_every_n_seconds=0.1,
|
||||
max_bucket_size=10,
|
||||
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||
),
|
||||
)
|
||||
# Check astream
|
||||
@@ -121,52 +119,54 @@ def test_rate_limit_stream() -> None:
|
||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
assert 0.01 < toc - tic < 0.02 # Slightly smaller than check every n seconds
|
||||
|
||||
# Second time around we should have 1 token left
|
||||
tic = time.time()
|
||||
response = list(model.stream("foo"))
|
||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
assert toc - tic < 0.005 # Slightly smaller than check every n seconds
|
||||
|
||||
# Third time around we should have 0 tokens left
|
||||
tic = time.time()
|
||||
response = list(model.stream("foo"))
|
||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
assert 0.01 < toc - tic < 0.02 # Slightly smaller than check every n seconds
|
||||
|
||||
|
||||
async def test_rate_limit_astream() -> None:
|
||||
"""Test rate limiting astream."""
|
||||
rate_limiter = InMemoryRateLimiter(
|
||||
requests_per_second=20, check_every_n_seconds=0.1, max_bucket_size=10
|
||||
)
|
||||
model = GenericFakeChatModel(
|
||||
messages=iter(["hello world", "hello world", "hello world"]),
|
||||
rate_limiter=rate_limiter,
|
||||
)
|
||||
# Check astream
|
||||
tic = time.time()
|
||||
response = [chunk async for chunk in model.astream("foo")]
|
||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||
toc = time.time()
|
||||
assert 0.1 < toc - tic < 0.2
|
||||
|
||||
# Second time around we should have 1 token left
|
||||
tic = time.time()
|
||||
response = [chunk async for chunk in model.astream("foo")]
|
||||
response = list(model.stream("foo"))
|
||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
assert toc - tic < 0.01 # Slightly smaller than check every n seconds
|
||||
assert toc - tic < 0.1 # Slightly smaller than check every n seconds
|
||||
|
||||
# Third time around we should have 0 tokens left
|
||||
tic = time.time()
|
||||
response = [chunk async for chunk in model.astream("foo")]
|
||||
response = list(model.stream("foo"))
|
||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||
toc = time.time()
|
||||
assert 0.1 < toc - tic < 0.2
|
||||
|
||||
|
||||
async def test_rate_limit_astream() -> None:
|
||||
"""Test rate limiting astream."""
|
||||
model = GenericFakeChatModel(
|
||||
messages=iter(["hello world", "hello world", "hello world"]),
|
||||
rate_limiter=InMemoryRateLimiter(
|
||||
requests_per_second=20,
|
||||
check_every_n_seconds=0.1,
|
||||
max_bucket_size=10,
|
||||
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||
),
|
||||
)
|
||||
# Check astream
|
||||
tic = time.time()
|
||||
response = [msg async for msg in model.astream("foo")]
|
||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
assert 0.1 < toc - tic < 0.2
|
||||
|
||||
# Second time around we should have 1 token left
|
||||
tic = time.time()
|
||||
response = [msg async for msg in model.astream("foo")]
|
||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
assert toc - tic < 0.1 # Slightly smaller than check every n seconds
|
||||
|
||||
# Third time around we should have 0 tokens left
|
||||
tic = time.time()
|
||||
response = [msg async for msg in model.astream("foo")]
|
||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||
toc = time.time()
|
||||
assert 0.1 < toc - tic < 0.2
|
||||
@@ -178,7 +178,10 @@ def test_rate_limit_skips_cache() -> None:
|
||||
model = GenericFakeChatModel(
|
||||
messages=iter(["hello", "world", "!"]),
|
||||
rate_limiter=InMemoryRateLimiter(
|
||||
requests_per_second=100, check_every_n_seconds=0.01, max_bucket_size=1
|
||||
requests_per_second=20,
|
||||
check_every_n_seconds=0.1,
|
||||
max_bucket_size=1,
|
||||
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||
),
|
||||
cache=cache,
|
||||
)
|
||||
@@ -188,7 +191,7 @@ def test_rate_limit_skips_cache() -> None:
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
# with 0 tokens.
|
||||
assert 0.01 < toc - tic < 0.02
|
||||
assert 0.1 < toc - tic < 0.2
|
||||
|
||||
for _ in range(2):
|
||||
# Cache hits
|
||||
@@ -197,7 +200,7 @@ def test_rate_limit_skips_cache() -> None:
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
# with 0 tokens.
|
||||
assert toc - tic < 0.005
|
||||
assert toc - tic < 0.05
|
||||
|
||||
# Test verifies that there's only a single key
|
||||
# Test also verifies that rate_limiter information is not part of the
|
||||
@@ -217,6 +220,7 @@ class SerializableModel(GenericFakeChatModel):
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
SerializableModel.model_rebuild()
|
||||
|
||||
|
||||
@@ -240,7 +244,7 @@ async def test_rate_limit_skips_cache_async() -> None:
|
||||
model = GenericFakeChatModel(
|
||||
messages=iter(["hello", "world", "!"]),
|
||||
rate_limiter=InMemoryRateLimiter(
|
||||
requests_per_second=100, check_every_n_seconds=0.01, max_bucket_size=1
|
||||
requests_per_second=20, check_every_n_seconds=0.1, max_bucket_size=1
|
||||
),
|
||||
cache=cache,
|
||||
)
|
||||
@@ -250,7 +254,7 @@ async def test_rate_limit_skips_cache_async() -> None:
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
# with 0 tokens.
|
||||
assert 0.01 < toc - tic < 0.02
|
||||
assert 0.1 < toc - tic < 0.2
|
||||
|
||||
for _ in range(2):
|
||||
# Cache hits
|
||||
@@ -259,4 +263,4 @@ async def test_rate_limit_skips_cache_async() -> None:
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
# with 0 tokens.
|
||||
assert toc - tic < 0.005
|
||||
assert toc - tic < 0.05
|
||||
|
||||
Reference in New Issue
Block a user