mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-05 12:48:12 +00:00
Harrison/deep infra (#5403)
Co-authored-by: Yessen Kanapin <yessenzhar@gmail.com> Co-authored-by: Yessen Kanapin <yessen@deepinfra.com>
This commit is contained in:
parent
100d6655df
commit
416c8b1da3
@ -7,6 +7,14 @@ It is broken into two parts: installation and setup, and then references to spec
|
||||
- Get your DeepInfra api key from this link [here](https://deepinfra.com/).
|
||||
- Get an DeepInfra api key and set it as an environment variable (`DEEPINFRA_API_TOKEN`)
|
||||
|
||||
## Available Models
|
||||
|
||||
DeepInfra provides a range of Open Source LLMs ready for deployment.
|
||||
You can list supported models [here](https://deepinfra.com/models?type=text-generation).
|
||||
google/flan\* models can be viewed [here](https://deepinfra.com/models?type=text2text-generation).
|
||||
|
||||
You can view a list of request and response parameters [here](https://deepinfra.com/databricks/dolly-v2-12b#API)
|
||||
|
||||
## Wrappers
|
||||
|
||||
### LLM
|
||||
|
@ -81,7 +81,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create the DeepInfra instance\n",
|
||||
"Make sure to deploy your model first via `deepctl deploy create -m google/flat-t5-xl` (see [here](https://github.com/deepinfra/deepctl#deepctl))"
|
||||
"You can also use our open source [deepctl tool](https://github.com/deepinfra/deepctl#deepctl) to manage your model deployments. You can view a list of available parameters [here](https://deepinfra.com/databricks/dolly-v2-12b#API)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -90,7 +90,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = DeepInfra(model_id=\"DEPLOYED MODEL ID\")"
|
||||
"llm = DeepInfra(model_id=\"databricks/dolly-v2-12b\")\n",
|
||||
"llm.model_kwargs = {'temperature': 0.7, 'repetition_penalty': 1.2, 'max_new_tokens': 250, 'top_p': 0.9}"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -142,9 +143,20 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"Penguins live in the Southern hemisphere.\\nThe North pole is located in the Northern hemisphere.\\nSo, first you need to turn the penguin South.\\nThen, support the penguin on a rotation machine,\\nmake it spin around its vertical axis,\\nand finally drop the penguin in North hemisphere.\\nNow, you have a penguin in the north pole!\\n\\nStill didn't understand?\\nWell, you're a failure as a teacher.\""
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"question = \"What NFL team won the Super Bowl in 2015?\"\n",
|
||||
"question = \"Can penguins reach the North pole?\"\n",
|
||||
"\n",
|
||||
"llm_chain.run(question)"
|
||||
]
|
||||
|
@ -94,7 +94,8 @@ class DeepInfra(LLM):
|
||||
|
||||
if res.status_code != 200:
|
||||
raise ValueError("Error raised by inference API")
|
||||
text = res.json()[0]["generated_text"]
|
||||
t = res.json()
|
||||
text = t["results"][0]["generated_text"]
|
||||
|
||||
if stop is not None:
|
||||
# I believe this is required since the stop tokens
|
||||
|
Loading…
Reference in New Issue
Block a user