mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-22 23:00:00 +00:00
parent
1afdb40b48
commit
f3cc9bba5b
@ -107,7 +107,7 @@
|
|||||||
"# Now let's try with fallbacks to Anthropic\n",
|
"# Now let's try with fallbacks to Anthropic\n",
|
||||||
"with patch('openai.ChatCompletion.create', side_effect=RateLimitError()):\n",
|
"with patch('openai.ChatCompletion.create', side_effect=RateLimitError()):\n",
|
||||||
" try:\n",
|
" try:\n",
|
||||||
" print(llm.invoke(\"Why did the the chicken cross the road?\"))\n",
|
" print(llm.invoke(\"Why did the chicken cross the road?\"))\n",
|
||||||
" except:\n",
|
" except:\n",
|
||||||
" print(\"Hit error\")"
|
" print(\"Hit error\")"
|
||||||
]
|
]
|
||||||
|
@ -109,7 +109,7 @@
|
|||||||
"# Now let's try with fallbacks to Anthropic\n",
|
"# Now let's try with fallbacks to Anthropic\n",
|
||||||
"with patch('openai.ChatCompletion.create', side_effect=RateLimitError()):\n",
|
"with patch('openai.ChatCompletion.create', side_effect=RateLimitError()):\n",
|
||||||
" try:\n",
|
" try:\n",
|
||||||
" print(llm.invoke(\"Why did the the chicken cross the road?\"))\n",
|
" print(llm.invoke(\"Why did the chicken cross the road?\"))\n",
|
||||||
" except:\n",
|
" except:\n",
|
||||||
" print(\"Hit error\")"
|
" print(\"Hit error\")"
|
||||||
]
|
]
|
||||||
|
@ -148,7 +148,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"Inference speed is a challenge when running models locally (see above).\n",
|
"Inference speed is a challenge when running models locally (see above).\n",
|
||||||
"\n",
|
"\n",
|
||||||
"To minimize latency, it is desiable to run models locally on GPU, which ships with many consumer laptops [e.g., Apple devices](https://www.apple.com/newsroom/2022/06/apple-unveils-m2-with-breakthrough-performance-and-capabilities/).\n",
|
"To minimize latency, it is desirable to run models locally on GPU, which ships with many consumer laptops [e.g., Apple devices](https://www.apple.com/newsroom/2022/06/apple-unveils-m2-with-breakthrough-performance-and-capabilities/).\n",
|
||||||
"\n",
|
"\n",
|
||||||
"And even with GPU, the available GPU memory bandwidth (as noted above) is important.\n",
|
"And even with GPU, the available GPU memory bandwidth (as noted above) is important.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -254,7 +254,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"`f16_kv`: whether the model should use half-precision for the key/value cache\n",
|
"`f16_kv`: whether the model should use half-precision for the key/value cache\n",
|
||||||
"* Value: True\n",
|
"* Value: True\n",
|
||||||
"* Meaning: The model will use half-precision, which can be more memory efficient; Metal only support True."
|
"* Meaning: The model will use half-precision, which can be more memory efficient; Metal only supports True."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -291,7 +291,7 @@
|
|||||||
"id": "f56f5168",
|
"id": "f56f5168",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"The console log will show the the below to indicate Metal was enabled properly from steps above:\n",
|
"The console log will show the below to indicate Metal was enabled properly from steps above:\n",
|
||||||
"```\n",
|
"```\n",
|
||||||
"ggml_metal_init: allocating\n",
|
"ggml_metal_init: allocating\n",
|
||||||
"ggml_metal_init: using MPS\n",
|
"ggml_metal_init: using MPS\n",
|
||||||
|
@ -49,7 +49,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"`BibtexLoader` has these arguments:\n",
|
"`BibtexLoader` has these arguments:\n",
|
||||||
"- `file_path`: the path the the `.bib` bibtex file\n",
|
"- `file_path`: the path of the `.bib` bibtex file\n",
|
||||||
"- optional `max_docs`: default=None, i.e. not limit. Use it to limit number of retrieved documents.\n",
|
"- optional `max_docs`: default=None, i.e. not limit. Use it to limit number of retrieved documents.\n",
|
||||||
"- optional `max_content_chars`: default=4000. Use it to limit the number of characters in a single document.\n",
|
"- optional `max_content_chars`: default=4000. Use it to limit the number of characters in a single document.\n",
|
||||||
"- optional `load_extra_meta`: default=False. By default only the most important fields from the bibtex entries: `Published` (publication year), `Title`, `Authors`, `Summary`, `Journal`, `Keywords`, and `URL`. If True, it will also try to load return `entry_id`, `note`, `doi`, and `links` fields. \n",
|
"- optional `load_extra_meta`: default=False. By default only the most important fields from the bibtex entries: `Published` (publication year), `Title`, `Authors`, `Summary`, `Journal`, `Keywords`, and `URL`. If True, it will also try to load return `entry_id`, `note`, `doi`, and `links` fields. \n",
|
||||||
|
@ -55,7 +55,7 @@
|
|||||||
"id": "ac5c88ce",
|
"id": "ac5c88ce",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Let's add some files to the the sandbox"
|
"Let's add some files to the sandbox"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -66,7 +66,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"The record manager relies on a time-based mechanism to determine what content can be cleaned up (when using `full` or `incremental` cleanup modes).\n",
|
"The record manager relies on a time-based mechanism to determine what content can be cleaned up (when using `full` or `incremental` cleanup modes).\n",
|
||||||
"\n",
|
"\n",
|
||||||
"If two tasks run back-to-back, and the first task finishes before the the clock time changes, then the second task may not be able to clean up content.\n",
|
"If two tasks run back-to-back, and the first task finishes before the clock time changes, then the second task may not be able to clean up content.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This is unlikely to be an issue in actual settings for the following reasons:\n",
|
"This is unlikely to be an issue in actual settings for the following reasons:\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
"- [Memory in LLMChain](/docs/modules/memory/how_to/adding_memory.html)\n",
|
"- [Memory in LLMChain](/docs/modules/memory/how_to/adding_memory.html)\n",
|
||||||
"- [Custom Agents](/docs/modules/agents/how_to/custom_agent.html)\n",
|
"- [Custom Agents](/docs/modules/agents/how_to/custom_agent.html)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In order to add a memory to an agent we are going to the the following steps:\n",
|
"In order to add a memory to an agent we are going to perform the following steps:\n",
|
||||||
"\n",
|
"\n",
|
||||||
"1. We are going to create an `LLMChain` with memory.\n",
|
"1. We are going to create an `LLMChain` with memory.\n",
|
||||||
"2. We are going to use that `LLMChain` to create a custom Agent.\n",
|
"2. We are going to use that `LLMChain` to create a custom Agent.\n",
|
||||||
|
@ -65,7 +65,7 @@ qa.run(query)
|
|||||||
|
|
||||||
</CodeOutputBlock>
|
</CodeOutputBlock>
|
||||||
|
|
||||||
The above way allows you to really simply change the chain_type, but it doesn't provide a ton of flexibility over parameters to that chain type. If you want to control those parameters, you can load the chain directly (as you did in [this notebook](/docs/modules/chains/additional/question_answering.html)) and then pass that directly to the the RetrievalQA chain with the `combine_documents_chain` parameter. For example:
|
The above way allows you to really simply change the chain_type, but it doesn't provide a ton of flexibility over parameters to that chain type. If you want to control those parameters, you can load the chain directly (as you did in [this notebook](/docs/modules/chains/additional/question_answering.html)) and then pass that directly to the RetrievalQA chain with the `combine_documents_chain` parameter. For example:
|
||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -52,7 +52,7 @@ class SequentialChain(Chain):
|
|||||||
if set(input_variables).intersection(set(memory_keys)):
|
if set(input_variables).intersection(set(memory_keys)):
|
||||||
overlapping_keys = set(input_variables) & set(memory_keys)
|
overlapping_keys = set(input_variables) & set(memory_keys)
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"The the input key(s) {''.join(overlapping_keys)} are found "
|
f"The input key(s) {''.join(overlapping_keys)} are found "
|
||||||
f"in the Memory keys ({memory_keys}) - please use input and "
|
f"in the Memory keys ({memory_keys}) - please use input and "
|
||||||
f"memory keys that don't overlap."
|
f"memory keys that don't overlap."
|
||||||
)
|
)
|
||||||
|
@ -76,7 +76,7 @@ class LlamaContentFormatter(ContentFormatterBase):
|
|||||||
return self.format_request_payload(prompt=prompt, model_kwargs=model_kwargs)
|
return self.format_request_payload(prompt=prompt, model_kwargs=model_kwargs)
|
||||||
|
|
||||||
def format_request_payload(self, prompt: str, model_kwargs: Dict) -> bytes:
|
def format_request_payload(self, prompt: str, model_kwargs: Dict) -> bytes:
|
||||||
"""Formats the request according the the chosen api"""
|
"""Formats the request according to the chosen api"""
|
||||||
return str.encode(prompt)
|
return str.encode(prompt)
|
||||||
|
|
||||||
def format_response_payload(self, output: bytes) -> str:
|
def format_response_payload(self, output: bytes) -> str:
|
||||||
@ -118,7 +118,7 @@ class AzureMLChatOnlineEndpoint(SimpleChatModel):
|
|||||||
@validator("http_client", always=True, allow_reuse=True)
|
@validator("http_client", always=True, allow_reuse=True)
|
||||||
@classmethod
|
@classmethod
|
||||||
def validate_client(cls, field_value: Any, values: Dict) -> AzureMLEndpointClient:
|
def validate_client(cls, field_value: Any, values: Dict) -> AzureMLEndpointClient:
|
||||||
"""Validate that api key and python package exists in environment."""
|
"""Validate that api key and python package exist in environment."""
|
||||||
endpoint_key = get_from_dict_or_env(
|
endpoint_key = get_from_dict_or_env(
|
||||||
values, "endpoint_api_key", "AZUREML_ENDPOINT_API_KEY"
|
values, "endpoint_api_key", "AZUREML_ENDPOINT_API_KEY"
|
||||||
)
|
)
|
||||||
|
@ -180,7 +180,7 @@ class LlamaContentFormatter(ContentFormatterBase):
|
|||||||
"""Content formatter for LLaMa"""
|
"""Content formatter for LLaMa"""
|
||||||
|
|
||||||
def format_request_payload(self, prompt: str, model_kwargs: Dict) -> bytes:
|
def format_request_payload(self, prompt: str, model_kwargs: Dict) -> bytes:
|
||||||
"""Formats the request according the the chosen api"""
|
"""Formats the request according to the chosen api"""
|
||||||
prompt = ContentFormatterBase.escape_special_characters(prompt)
|
prompt = ContentFormatterBase.escape_special_characters(prompt)
|
||||||
request_payload = json.dumps(
|
request_payload = json.dumps(
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user