mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-22 06:39:52 +00:00
parent
1afdb40b48
commit
f3cc9bba5b
@ -107,7 +107,7 @@
|
||||
"# Now let's try with fallbacks to Anthropic\n",
|
||||
"with patch('openai.ChatCompletion.create', side_effect=RateLimitError()):\n",
|
||||
" try:\n",
|
||||
" print(llm.invoke(\"Why did the the chicken cross the road?\"))\n",
|
||||
" print(llm.invoke(\"Why did the chicken cross the road?\"))\n",
|
||||
" except:\n",
|
||||
" print(\"Hit error\")"
|
||||
]
|
||||
|
@ -109,7 +109,7 @@
|
||||
"# Now let's try with fallbacks to Anthropic\n",
|
||||
"with patch('openai.ChatCompletion.create', side_effect=RateLimitError()):\n",
|
||||
" try:\n",
|
||||
" print(llm.invoke(\"Why did the the chicken cross the road?\"))\n",
|
||||
" print(llm.invoke(\"Why did the chicken cross the road?\"))\n",
|
||||
" except:\n",
|
||||
" print(\"Hit error\")"
|
||||
]
|
||||
|
@ -148,7 +148,7 @@
|
||||
"\n",
|
||||
"Inference speed is a challenge when running models locally (see above).\n",
|
||||
"\n",
|
||||
"To minimize latency, it is desiable to run models locally on GPU, which ships with many consumer laptops [e.g., Apple devices](https://www.apple.com/newsroom/2022/06/apple-unveils-m2-with-breakthrough-performance-and-capabilities/).\n",
|
||||
"To minimize latency, it is desirable to run models locally on GPU, which ships with many consumer laptops [e.g., Apple devices](https://www.apple.com/newsroom/2022/06/apple-unveils-m2-with-breakthrough-performance-and-capabilities/).\n",
|
||||
"\n",
|
||||
"And even with GPU, the available GPU memory bandwidth (as noted above) is important.\n",
|
||||
"\n",
|
||||
@ -254,7 +254,7 @@
|
||||
"\n",
|
||||
"`f16_kv`: whether the model should use half-precision for the key/value cache\n",
|
||||
"* Value: True\n",
|
||||
"* Meaning: The model will use half-precision, which can be more memory efficient; Metal only support True."
|
||||
"* Meaning: The model will use half-precision, which can be more memory efficient; Metal only supports True."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -291,7 +291,7 @@
|
||||
"id": "f56f5168",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The console log will show the the below to indicate Metal was enabled properly from steps above:\n",
|
||||
"The console log will show the below to indicate Metal was enabled properly from steps above:\n",
|
||||
"```\n",
|
||||
"ggml_metal_init: allocating\n",
|
||||
"ggml_metal_init: using MPS\n",
|
||||
|
@ -49,7 +49,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"`BibtexLoader` has these arguments:\n",
|
||||
"- `file_path`: the path the the `.bib` bibtex file\n",
|
||||
"- `file_path`: the path of the `.bib` bibtex file\n",
|
||||
"- optional `max_docs`: default=None, i.e. not limit. Use it to limit number of retrieved documents.\n",
|
||||
"- optional `max_content_chars`: default=4000. Use it to limit the number of characters in a single document.\n",
|
||||
"- optional `load_extra_meta`: default=False. By default only the most important fields from the bibtex entries: `Published` (publication year), `Title`, `Authors`, `Summary`, `Journal`, `Keywords`, and `URL`. If True, it will also try to load return `entry_id`, `note`, `doi`, and `links` fields. \n",
|
||||
|
@ -55,7 +55,7 @@
|
||||
"id": "ac5c88ce",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's add some files to the the sandbox"
|
||||
"Let's add some files to the sandbox"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -66,7 +66,7 @@
|
||||
"\n",
|
||||
"The record manager relies on a time-based mechanism to determine what content can be cleaned up (when using `full` or `incremental` cleanup modes).\n",
|
||||
"\n",
|
||||
"If two tasks run back-to-back, and the first task finishes before the the clock time changes, then the second task may not be able to clean up content.\n",
|
||||
"If two tasks run back-to-back, and the first task finishes before the clock time changes, then the second task may not be able to clean up content.\n",
|
||||
"\n",
|
||||
"This is unlikely to be an issue in actual settings for the following reasons:\n",
|
||||
"\n",
|
||||
|
@ -12,7 +12,7 @@
|
||||
"- [Memory in LLMChain](/docs/modules/memory/how_to/adding_memory.html)\n",
|
||||
"- [Custom Agents](/docs/modules/agents/how_to/custom_agent.html)\n",
|
||||
"\n",
|
||||
"In order to add a memory to an agent we are going to the the following steps:\n",
|
||||
"In order to add a memory to an agent we are going to perform the following steps:\n",
|
||||
"\n",
|
||||
"1. We are going to create an `LLMChain` with memory.\n",
|
||||
"2. We are going to use that `LLMChain` to create a custom Agent.\n",
|
||||
|
@ -65,7 +65,7 @@ qa.run(query)
|
||||
|
||||
</CodeOutputBlock>
|
||||
|
||||
The above way allows you to really simply change the chain_type, but it doesn't provide a ton of flexibility over parameters to that chain type. If you want to control those parameters, you can load the chain directly (as you did in [this notebook](/docs/modules/chains/additional/question_answering.html)) and then pass that directly to the the RetrievalQA chain with the `combine_documents_chain` parameter. For example:
|
||||
The above way allows you to really simply change the chain_type, but it doesn't provide a ton of flexibility over parameters to that chain type. If you want to control those parameters, you can load the chain directly (as you did in [this notebook](/docs/modules/chains/additional/question_answering.html)) and then pass that directly to the RetrievalQA chain with the `combine_documents_chain` parameter. For example:
|
||||
|
||||
|
||||
```python
|
||||
|
@ -52,7 +52,7 @@ class SequentialChain(Chain):
|
||||
if set(input_variables).intersection(set(memory_keys)):
|
||||
overlapping_keys = set(input_variables) & set(memory_keys)
|
||||
raise ValueError(
|
||||
f"The the input key(s) {''.join(overlapping_keys)} are found "
|
||||
f"The input key(s) {''.join(overlapping_keys)} are found "
|
||||
f"in the Memory keys ({memory_keys}) - please use input and "
|
||||
f"memory keys that don't overlap."
|
||||
)
|
||||
|
@ -76,7 +76,7 @@ class LlamaContentFormatter(ContentFormatterBase):
|
||||
return self.format_request_payload(prompt=prompt, model_kwargs=model_kwargs)
|
||||
|
||||
def format_request_payload(self, prompt: str, model_kwargs: Dict) -> bytes:
|
||||
"""Formats the request according the the chosen api"""
|
||||
"""Formats the request according to the chosen api"""
|
||||
return str.encode(prompt)
|
||||
|
||||
def format_response_payload(self, output: bytes) -> str:
|
||||
@ -118,7 +118,7 @@ class AzureMLChatOnlineEndpoint(SimpleChatModel):
|
||||
@validator("http_client", always=True, allow_reuse=True)
|
||||
@classmethod
|
||||
def validate_client(cls, field_value: Any, values: Dict) -> AzureMLEndpointClient:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
"""Validate that api key and python package exist in environment."""
|
||||
endpoint_key = get_from_dict_or_env(
|
||||
values, "endpoint_api_key", "AZUREML_ENDPOINT_API_KEY"
|
||||
)
|
||||
|
@ -180,7 +180,7 @@ class LlamaContentFormatter(ContentFormatterBase):
|
||||
"""Content formatter for LLaMa"""
|
||||
|
||||
def format_request_payload(self, prompt: str, model_kwargs: Dict) -> bytes:
|
||||
"""Formats the request according the the chosen api"""
|
||||
"""Formats the request according to the chosen api"""
|
||||
prompt = ContentFormatterBase.escape_special_characters(prompt)
|
||||
request_payload = json.dumps(
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user