mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-05 12:48:12 +00:00
core[patch]: Relax rate limit unit tests in terms of timing (#25140)
Relax rate limit unit tests
This commit is contained in:
parent
a2e9910268
commit
28e0958ff4
@ -9,9 +9,12 @@ def test_rate_limit_invoke() -> None:
|
|||||||
"""Add rate limiter."""
|
"""Add rate limiter."""
|
||||||
|
|
||||||
model = GenericFakeChatModel(
|
model = GenericFakeChatModel(
|
||||||
messages=iter(["hello", "world", "!"]),
|
messages=iter(["hello", "world"]),
|
||||||
rate_limiter=InMemoryRateLimiter(
|
rate_limiter=InMemoryRateLimiter(
|
||||||
requests_per_second=200, check_every_n_seconds=0.01, max_bucket_size=10
|
requests_per_second=20,
|
||||||
|
check_every_n_seconds=0.1,
|
||||||
|
max_bucket_size=10,
|
||||||
|
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
tic = time.time()
|
tic = time.time()
|
||||||
@ -19,22 +22,14 @@ def test_rate_limit_invoke() -> None:
|
|||||||
toc = time.time()
|
toc = time.time()
|
||||||
# Should be larger than check every n seconds since the token bucket starts
|
# Should be larger than check every n seconds since the token bucket starts
|
||||||
# with 0 tokens.
|
# with 0 tokens.
|
||||||
assert 0.01 < toc - tic < 0.02
|
assert 0.10 < toc - tic < 0.15
|
||||||
|
|
||||||
tic = time.time()
|
tic = time.time()
|
||||||
model.invoke("foo")
|
model.invoke("foo")
|
||||||
toc = time.time()
|
toc = time.time()
|
||||||
# The second time we call the model, we should have 1 extra token
|
# Second time we check the model, we should have 1 extra token
|
||||||
# to proceed immediately.
|
# since the sleep time is 0.1 seconds
|
||||||
assert toc - tic < 0.005
|
assert 0.00 < toc - tic < 0.10
|
||||||
|
|
||||||
# The third time we call the model, we need to wait again for a token
|
|
||||||
tic = time.time()
|
|
||||||
model.invoke("foo")
|
|
||||||
toc = time.time()
|
|
||||||
# Should be larger than check every n seconds since the token bucket starts
|
|
||||||
# with 0 tokens.
|
|
||||||
assert 0.01 < toc - tic < 0.02
|
|
||||||
|
|
||||||
|
|
||||||
async def test_rate_limit_ainvoke() -> None:
|
async def test_rate_limit_ainvoke() -> None:
|
||||||
@ -43,7 +38,10 @@ async def test_rate_limit_ainvoke() -> None:
|
|||||||
model = GenericFakeChatModel(
|
model = GenericFakeChatModel(
|
||||||
messages=iter(["hello", "world", "!"]),
|
messages=iter(["hello", "world", "!"]),
|
||||||
rate_limiter=InMemoryRateLimiter(
|
rate_limiter=InMemoryRateLimiter(
|
||||||
requests_per_second=20, check_every_n_seconds=0.1, max_bucket_size=10
|
requests_per_second=20,
|
||||||
|
check_every_n_seconds=0.1,
|
||||||
|
max_bucket_size=10,
|
||||||
|
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
tic = time.time()
|
tic = time.time()
|
||||||
@ -58,7 +56,7 @@ async def test_rate_limit_ainvoke() -> None:
|
|||||||
toc = time.time()
|
toc = time.time()
|
||||||
# The second time we call the model, we should have 1 extra token
|
# The second time we call the model, we should have 1 extra token
|
||||||
# to proceed immediately.
|
# to proceed immediately.
|
||||||
assert toc - tic < 0.01
|
assert toc - tic < 0.1
|
||||||
|
|
||||||
# The third time we call the model, we need to wait again for a token
|
# The third time we call the model, we need to wait again for a token
|
||||||
tic = time.time()
|
tic = time.time()
|
||||||
@ -74,17 +72,16 @@ def test_rate_limit_batch() -> None:
|
|||||||
model = GenericFakeChatModel(
|
model = GenericFakeChatModel(
|
||||||
messages=iter(["hello", "world", "!"]),
|
messages=iter(["hello", "world", "!"]),
|
||||||
rate_limiter=InMemoryRateLimiter(
|
rate_limiter=InMemoryRateLimiter(
|
||||||
requests_per_second=200, check_every_n_seconds=0.01, max_bucket_size=10
|
requests_per_second=20,
|
||||||
|
check_every_n_seconds=0.01,
|
||||||
|
max_bucket_size=10,
|
||||||
|
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
# Need 2 tokens to proceed
|
|
||||||
time_to_fill = 2 / 200.0
|
|
||||||
tic = time.time()
|
tic = time.time()
|
||||||
model.batch(["foo", "foo"])
|
model.batch(["foo", "foo"])
|
||||||
toc = time.time()
|
toc = time.time()
|
||||||
# Should be larger than check every n seconds since the token bucket starts
|
assert 0.1 < toc - tic < 0.2
|
||||||
# with 0 tokens.
|
|
||||||
assert time_to_fill < toc - tic < time_to_fill + 0.03
|
|
||||||
|
|
||||||
|
|
||||||
async def test_rate_limit_abatch() -> None:
|
async def test_rate_limit_abatch() -> None:
|
||||||
@ -92,17 +89,16 @@ async def test_rate_limit_abatch() -> None:
|
|||||||
model = GenericFakeChatModel(
|
model = GenericFakeChatModel(
|
||||||
messages=iter(["hello", "world", "!"]),
|
messages=iter(["hello", "world", "!"]),
|
||||||
rate_limiter=InMemoryRateLimiter(
|
rate_limiter=InMemoryRateLimiter(
|
||||||
requests_per_second=200, check_every_n_seconds=0.01, max_bucket_size=10
|
requests_per_second=20,
|
||||||
|
check_every_n_seconds=0.01,
|
||||||
|
max_bucket_size=10,
|
||||||
|
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
# Need 2 tokens to proceed
|
|
||||||
time_to_fill = 2 / 200.0
|
|
||||||
tic = time.time()
|
tic = time.time()
|
||||||
await model.abatch(["foo", "foo"])
|
await model.abatch(["foo", "foo"])
|
||||||
toc = time.time()
|
toc = time.time()
|
||||||
# Should be larger than check every n seconds since the token bucket starts
|
assert 0.1 < toc - tic < 0.2
|
||||||
# with 0 tokens.
|
|
||||||
assert time_to_fill < toc - tic < time_to_fill + 0.03
|
|
||||||
|
|
||||||
|
|
||||||
def test_rate_limit_stream() -> None:
|
def test_rate_limit_stream() -> None:
|
||||||
@ -110,7 +106,10 @@ def test_rate_limit_stream() -> None:
|
|||||||
model = GenericFakeChatModel(
|
model = GenericFakeChatModel(
|
||||||
messages=iter(["hello world", "hello world", "hello world"]),
|
messages=iter(["hello world", "hello world", "hello world"]),
|
||||||
rate_limiter=InMemoryRateLimiter(
|
rate_limiter=InMemoryRateLimiter(
|
||||||
requests_per_second=200, check_every_n_seconds=0.01, max_bucket_size=10
|
requests_per_second=20,
|
||||||
|
check_every_n_seconds=0.1,
|
||||||
|
max_bucket_size=10,
|
||||||
|
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
# Check astream
|
# Check astream
|
||||||
@ -119,52 +118,54 @@ def test_rate_limit_stream() -> None:
|
|||||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||||
toc = time.time()
|
toc = time.time()
|
||||||
# Should be larger than check every n seconds since the token bucket starts
|
# Should be larger than check every n seconds since the token bucket starts
|
||||||
assert 0.01 < toc - tic < 0.02 # Slightly smaller than check every n seconds
|
|
||||||
|
|
||||||
# Second time around we should have 1 token left
|
|
||||||
tic = time.time()
|
|
||||||
response = list(model.stream("foo"))
|
|
||||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
|
||||||
toc = time.time()
|
|
||||||
# Should be larger than check every n seconds since the token bucket starts
|
|
||||||
assert toc - tic < 0.005 # Slightly smaller than check every n seconds
|
|
||||||
|
|
||||||
# Third time around we should have 0 tokens left
|
|
||||||
tic = time.time()
|
|
||||||
response = list(model.stream("foo"))
|
|
||||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
|
||||||
toc = time.time()
|
|
||||||
# Should be larger than check every n seconds since the token bucket starts
|
|
||||||
assert 0.01 < toc - tic < 0.02 # Slightly smaller than check every n seconds
|
|
||||||
|
|
||||||
|
|
||||||
async def test_rate_limit_astream() -> None:
|
|
||||||
"""Test rate limiting astream."""
|
|
||||||
rate_limiter = InMemoryRateLimiter(
|
|
||||||
requests_per_second=20, check_every_n_seconds=0.1, max_bucket_size=10
|
|
||||||
)
|
|
||||||
model = GenericFakeChatModel(
|
|
||||||
messages=iter(["hello world", "hello world", "hello world"]),
|
|
||||||
rate_limiter=rate_limiter,
|
|
||||||
)
|
|
||||||
# Check astream
|
|
||||||
tic = time.time()
|
|
||||||
response = [chunk async for chunk in model.astream("foo")]
|
|
||||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
|
||||||
toc = time.time()
|
|
||||||
assert 0.1 < toc - tic < 0.2
|
assert 0.1 < toc - tic < 0.2
|
||||||
|
|
||||||
# Second time around we should have 1 token left
|
# Second time around we should have 1 token left
|
||||||
tic = time.time()
|
tic = time.time()
|
||||||
response = [chunk async for chunk in model.astream("foo")]
|
response = list(model.stream("foo"))
|
||||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||||
toc = time.time()
|
toc = time.time()
|
||||||
# Should be larger than check every n seconds since the token bucket starts
|
# Should be larger than check every n seconds since the token bucket starts
|
||||||
assert toc - tic < 0.01 # Slightly smaller than check every n seconds
|
assert toc - tic < 0.1 # Slightly smaller than check every n seconds
|
||||||
|
|
||||||
# Third time around we should have 0 tokens left
|
# Third time around we should have 0 tokens left
|
||||||
tic = time.time()
|
tic = time.time()
|
||||||
response = [chunk async for chunk in model.astream("foo")]
|
response = list(model.stream("foo"))
|
||||||
|
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||||
|
toc = time.time()
|
||||||
|
assert 0.1 < toc - tic < 0.2
|
||||||
|
|
||||||
|
|
||||||
|
async def test_rate_limit_astream() -> None:
|
||||||
|
"""Test rate limiting astream."""
|
||||||
|
model = GenericFakeChatModel(
|
||||||
|
messages=iter(["hello world", "hello world", "hello world"]),
|
||||||
|
rate_limiter=InMemoryRateLimiter(
|
||||||
|
requests_per_second=20,
|
||||||
|
check_every_n_seconds=0.1,
|
||||||
|
max_bucket_size=10,
|
||||||
|
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||||
|
),
|
||||||
|
)
|
||||||
|
# Check astream
|
||||||
|
tic = time.time()
|
||||||
|
response = [msg async for msg in model.astream("foo")]
|
||||||
|
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||||
|
toc = time.time()
|
||||||
|
# Should be larger than check every n seconds since the token bucket starts
|
||||||
|
assert 0.1 < toc - tic < 0.2
|
||||||
|
|
||||||
|
# Second time around we should have 1 token left
|
||||||
|
tic = time.time()
|
||||||
|
response = [msg async for msg in model.astream("foo")]
|
||||||
|
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||||
|
toc = time.time()
|
||||||
|
# Should be larger than check every n seconds since the token bucket starts
|
||||||
|
assert toc - tic < 0.1 # Slightly smaller than check every n seconds
|
||||||
|
|
||||||
|
# Third time around we should have 0 tokens left
|
||||||
|
tic = time.time()
|
||||||
|
response = [msg async for msg in model.astream("foo")]
|
||||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||||
toc = time.time()
|
toc = time.time()
|
||||||
assert 0.1 < toc - tic < 0.2
|
assert 0.1 < toc - tic < 0.2
|
||||||
@ -176,7 +177,10 @@ def test_rate_limit_skips_cache() -> None:
|
|||||||
model = GenericFakeChatModel(
|
model = GenericFakeChatModel(
|
||||||
messages=iter(["hello", "world", "!"]),
|
messages=iter(["hello", "world", "!"]),
|
||||||
rate_limiter=InMemoryRateLimiter(
|
rate_limiter=InMemoryRateLimiter(
|
||||||
requests_per_second=100, check_every_n_seconds=0.01, max_bucket_size=1
|
requests_per_second=20,
|
||||||
|
check_every_n_seconds=0.1,
|
||||||
|
max_bucket_size=1,
|
||||||
|
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||||
),
|
),
|
||||||
cache=cache,
|
cache=cache,
|
||||||
)
|
)
|
||||||
@ -186,7 +190,7 @@ def test_rate_limit_skips_cache() -> None:
|
|||||||
toc = time.time()
|
toc = time.time()
|
||||||
# Should be larger than check every n seconds since the token bucket starts
|
# Should be larger than check every n seconds since the token bucket starts
|
||||||
# with 0 tokens.
|
# with 0 tokens.
|
||||||
assert 0.01 < toc - tic < 0.02
|
assert 0.1 < toc - tic < 0.2
|
||||||
|
|
||||||
for _ in range(2):
|
for _ in range(2):
|
||||||
# Cache hits
|
# Cache hits
|
||||||
@ -195,7 +199,7 @@ def test_rate_limit_skips_cache() -> None:
|
|||||||
toc = time.time()
|
toc = time.time()
|
||||||
# Should be larger than check every n seconds since the token bucket starts
|
# Should be larger than check every n seconds since the token bucket starts
|
||||||
# with 0 tokens.
|
# with 0 tokens.
|
||||||
assert toc - tic < 0.005
|
assert toc - tic < 0.05
|
||||||
|
|
||||||
# Test verifies that there's only a single key
|
# Test verifies that there's only a single key
|
||||||
# Test also verifies that rate_limiter information is not part of the
|
# Test also verifies that rate_limiter information is not part of the
|
||||||
@ -236,7 +240,7 @@ async def test_rate_limit_skips_cache_async() -> None:
|
|||||||
model = GenericFakeChatModel(
|
model = GenericFakeChatModel(
|
||||||
messages=iter(["hello", "world", "!"]),
|
messages=iter(["hello", "world", "!"]),
|
||||||
rate_limiter=InMemoryRateLimiter(
|
rate_limiter=InMemoryRateLimiter(
|
||||||
requests_per_second=100, check_every_n_seconds=0.01, max_bucket_size=1
|
requests_per_second=20, check_every_n_seconds=0.1, max_bucket_size=1
|
||||||
),
|
),
|
||||||
cache=cache,
|
cache=cache,
|
||||||
)
|
)
|
||||||
@ -246,7 +250,7 @@ async def test_rate_limit_skips_cache_async() -> None:
|
|||||||
toc = time.time()
|
toc = time.time()
|
||||||
# Should be larger than check every n seconds since the token bucket starts
|
# Should be larger than check every n seconds since the token bucket starts
|
||||||
# with 0 tokens.
|
# with 0 tokens.
|
||||||
assert 0.01 < toc - tic < 0.02
|
assert 0.1 < toc - tic < 0.2
|
||||||
|
|
||||||
for _ in range(2):
|
for _ in range(2):
|
||||||
# Cache hits
|
# Cache hits
|
||||||
@ -255,4 +259,4 @@ async def test_rate_limit_skips_cache_async() -> None:
|
|||||||
toc = time.time()
|
toc = time.time()
|
||||||
# Should be larger than check every n seconds since the token bucket starts
|
# Should be larger than check every n seconds since the token bucket starts
|
||||||
# with 0 tokens.
|
# with 0 tokens.
|
||||||
assert toc - tic < 0.005
|
assert toc - tic < 0.05
|
||||||
|
Loading…
Reference in New Issue
Block a user