mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-21 22:56:05 +00:00
81 lines
2.0 KiB
Python
Executable File
81 lines
2.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Refresh model profile data from models.dev.
|
|
|
|
Update the bundled model data by running:
|
|
python scripts/refresh_data.py
|
|
"""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
import httpx
|
|
|
|
PROVIDER_SUBSET = [
|
|
# This is done to limit the data size
|
|
"amazon-bedrock",
|
|
"anthropic",
|
|
"azure",
|
|
"baseten",
|
|
"cerebras",
|
|
"cloudflare-workers-ai",
|
|
"deepinfra",
|
|
"deepseek",
|
|
"fireworks-ai",
|
|
"google",
|
|
"google-vertex",
|
|
"google-vertex-anthropic",
|
|
"groq",
|
|
"huggingface",
|
|
"lmstudio",
|
|
"mistral",
|
|
"nebius",
|
|
"nvidia",
|
|
"openai",
|
|
"openrouter",
|
|
"perplexity",
|
|
"togetherai",
|
|
"upstage",
|
|
"xai",
|
|
]
|
|
|
|
|
|
def main() -> None:
|
|
"""Download and save the latest model data from models.dev."""
|
|
api_url = "https://models.dev/api.json"
|
|
output_dir = Path(__file__).parent.parent / "langchain_model_profiles" / "data"
|
|
output_file = output_dir / "models.json"
|
|
|
|
print(f"Downloading data from {api_url}...") # noqa: T201
|
|
response = httpx.get(api_url, timeout=30)
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
|
|
|
# Basic validation
|
|
if not isinstance(data, dict):
|
|
msg = "Expected API response to be a dictionary"
|
|
raise TypeError(msg)
|
|
|
|
provider_count = len(data)
|
|
model_count = sum(len(provider.get("models", {})) for provider in data.values())
|
|
|
|
print(f"Downloaded {provider_count} providers with {model_count} models") # noqa: T201
|
|
|
|
# Subset providers
|
|
data = {k: v for k, v in data.items() if k in PROVIDER_SUBSET}
|
|
print(f"Filtered to {len(data)} providers based on subset") # noqa: T201
|
|
|
|
# Ensure directory exists
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Write with pretty formatting for readability
|
|
print(f"Writing to {output_file}...") # noqa: T201
|
|
with output_file.open("w") as f:
|
|
json.dump(data, f, indent=2, sort_keys=True)
|
|
|
|
print(f"✓ Successfully refreshed model data ({output_file.stat().st_size:,} bytes)") # noqa: T201
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|