mirror of
https://github.com/hwchase17/langchain.git
synced 2026-02-12 20:20:08 +00:00
Compare commits
95 Commits
langchain-
...
langchain-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ed3a5e664c | ||
|
|
29461b36d9 | ||
|
|
07e2e80fe7 | ||
|
|
8f95da4eb1 | ||
|
|
284c935b08 | ||
|
|
c79274cb7c | ||
|
|
a3878a3c62 | ||
|
|
7aae738296 | ||
|
|
12bcc85927 | ||
|
|
585f467d4a | ||
|
|
ca9d4e4595 | ||
|
|
e120378695 | ||
|
|
6f711794a7 | ||
|
|
2d776351af | ||
|
|
737a68fcdc | ||
|
|
fa3857c9d0 | ||
|
|
8bf9c71673 | ||
|
|
ecdc881328 | ||
|
|
dced0ed3fd | ||
|
|
7cbf885c18 | ||
|
|
2bb2c9bfe8 | ||
|
|
b1fdac726b | ||
|
|
1551d9750c | ||
|
|
f00c66cc1f | ||
|
|
3b886cdbb2 | ||
|
|
aba1fd0bd4 | ||
|
|
7b404fcd37 | ||
|
|
3fce78994e | ||
|
|
dbb6b7b103 | ||
|
|
723b603f52 | ||
|
|
bd1909fe05 | ||
|
|
bbc50f65e7 | ||
|
|
ed797e17fb | ||
|
|
933b35b9c5 | ||
|
|
317fb86fd9 | ||
|
|
8d566a8fe7 | ||
|
|
b6ae7ca91d | ||
|
|
618e550f06 | ||
|
|
f795ab99ec | ||
|
|
8977451c76 | ||
|
|
d5b8aabb32 | ||
|
|
8f2c11e17b | ||
|
|
2df9daa7f2 | ||
|
|
e4921239a6 | ||
|
|
ec0ebb76f2 | ||
|
|
a1e62070d0 | ||
|
|
a13faab6b7 | ||
|
|
f2ea62f632 | ||
|
|
861024f388 | ||
|
|
3f1d20964a | ||
|
|
7378c955db | ||
|
|
ef1610e24a | ||
|
|
9eb10a9240 | ||
|
|
a2ed796aa6 | ||
|
|
de1fc4811d | ||
|
|
e723882a49 | ||
|
|
836c791829 | ||
|
|
a004dec119 | ||
|
|
2340b3154d | ||
|
|
e4a78dfc2a | ||
|
|
6f95db81b7 | ||
|
|
d6a7aaa97d | ||
|
|
1c4ce7b42b | ||
|
|
86a0720310 | ||
|
|
32c9c58adf | ||
|
|
566915d7cf | ||
|
|
33e22ccb19 | ||
|
|
7b44c3384e | ||
|
|
536b44a47f | ||
|
|
ec5fae76d4 | ||
|
|
923e6fb321 | ||
|
|
06456c1dcf | ||
|
|
d3ed9b86be | ||
|
|
989eec4b7b | ||
|
|
e5d62c6ce7 | ||
|
|
4efc5093c1 | ||
|
|
f175319303 | ||
|
|
7a95ffc775 | ||
|
|
5d64597490 | ||
|
|
6c52378992 | ||
|
|
b5fbebb3c8 | ||
|
|
c20f7418c7 | ||
|
|
6b249a0dc2 | ||
|
|
e9abe583b2 | ||
|
|
8fad9214c7 | ||
|
|
2fb6fd7950 | ||
|
|
1cd4d8d101 | ||
|
|
d4b9404fd6 | ||
|
|
184ea8aeb2 | ||
|
|
ac52021097 | ||
|
|
c616b445f2 | ||
|
|
628145b172 | ||
|
|
97a5bc7fc7 | ||
|
|
f0226135e5 | ||
|
|
36ff83a0b5 |
5
.github/scripts/check_diff.py
vendored
5
.github/scripts/check_diff.py
vendored
@@ -304,9 +304,8 @@ if __name__ == "__main__":
|
||||
f"Unknown lib: {file}. check_diff.py likely needs "
|
||||
"an update for this new library!"
|
||||
)
|
||||
elif any(file.startswith(p) for p in ["docs/", "cookbook/"]):
|
||||
if file.startswith("docs/"):
|
||||
docs_edited = True
|
||||
elif file.startswith("docs/") or file in ["pyproject.toml", "poetry.lock"]: # docs or root poetry files
|
||||
docs_edited = True
|
||||
dirs_to_run["lint"].add(".")
|
||||
|
||||
dependents = dependents_graph()
|
||||
|
||||
6
.github/scripts/prep_api_docs_build.py
vendored
6
.github/scripts/prep_api_docs_build.py
vendored
@@ -82,6 +82,12 @@ def main():
|
||||
and p["repo"] != "langchain-ai/langchain"
|
||||
])
|
||||
|
||||
# Delete ones without a pyproject.toml
|
||||
for partner in Path("langchain/libs/partners").iterdir():
|
||||
if partner.is_dir() and not (partner / "pyproject.toml").exists():
|
||||
print(f"Removing {partner} as it does not have a pyproject.toml")
|
||||
shutil.rmtree(partner)
|
||||
|
||||
print("Library sync completed successfully!")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
3
.github/workflows/_release.yml
vendored
3
.github/workflows/_release.yml
vendored
@@ -217,7 +217,8 @@ jobs:
|
||||
|
||||
# Replace all dashes in the package name with underscores,
|
||||
# since that's how Python imports packages with dashes in the name.
|
||||
IMPORT_NAME="$(echo "$PKG_NAME" | sed s/-/_/g)"
|
||||
# also remove _official suffix
|
||||
IMPORT_NAME="$(echo "$PKG_NAME" | sed s/-/_/g | sed s/_official//g)"
|
||||
|
||||
poetry run python -c "import $IMPORT_NAME; print(dir($IMPORT_NAME))"
|
||||
|
||||
|
||||
1
docs/cassettes/sql_qa_11.msgpack.zlib
Normal file
1
docs/cassettes/sql_qa_11.msgpack.zlib
Normal file
@@ -0,0 +1 @@
|
||||
eNrVVk1v3EQYBiQuHPkFI4sTWu96v7Lxoh5CqMpXaFHSUtRW1qz92h5iz7gz42y20R4oPSOZX1BIlFRRS1FBXKASRw78gXDgt/DOeDcp27RBrYREtJF35/163q9nfPtgC6Rigr96n3ENkoYaf6hvbh9IuFmC0nf2c9CpiHYvnN/YLSU7eivVulDDVosWrKlyptNmRnkSppTxZijyFuOx2BuJaPLbQQo0Qvd3Di8rkO5KAlxXPxlta+cWk5bXbDfb3cHDlTCEQrvneSgixpPqQXKLFQ0SQZxRDfu1uPqBFkXGQmowtr5Qgh+uCs7BYq4ONwEKl2ZsC+5JUAWmAV/tK011qW7voV/44/eDHJSiCXx38aM5uL9eefPhVdcEyNzz2wVD0+pb78cNliMOdyXLxNi9KFnCeHX30VX3PappJBJ3A0sF7gdRdUR6PdoP+wM/Xu72o+7SEu32Rh3wRyH1BrG/HH2PIDWm7n4MPNFptbs06PxsAirlGpEUmbtGt019UOZ5e1eonFT7ddD7c+ONSQFPF+DBKg1TmLupHhXlCOUNkqM/zPPckrd/SdIkp9U9LtzQKO9hClA9/gyiBun45EPKScfr9Em7N+wsD9s+ubC2sXuF0eoQW0MSIZIMfl2AW5dlzU4GluvtI3KqwqqECLEzmqlqT8sSDlYy7a5vhdVRM+2ec4a9Xtd5B8Ge6/T9jud5jbTrdvxTBAfz1jzeSMsG8doWd9sfeMTzhvZjcJ+O8/2604jzl1Pl8/Y+XpBiTKHgCXM71l/iTEmcjT9fu7vjzLbHGTpe02+2O07DYRxnjocQ4OgmyhnuOKNMjAKlBTYCAuB0lEHkDE09GosyxAHobL2LjiIcNQU6gG2aFxmoIC8zzQoq9aKTszVw9XC5NQSUBbjXcrKoIGQShBLsZAURUzNhjK1DaUEnOTZy0ajA7AWnWYDW6mkr1X1W1gqoDNOnTlMxDrTOgpLNj7RZs0AzkEFUyhk6OrFlzQRPzLajfQ9Hx5hLPTto96YNZyzkpiqMAxWKAgzKgPEtpkEdY7yldBQgbRU4XKaT/8SETkZUI1LsN5IhKvKYJSZ4qeAf1Y4KgQR6nElIMwjKIrip2C3EjwOWgERYngU6l3KdYskjFWTINmjcXpoLIzHmAYe80JMT6x5Kjbu5tvV1fBCMJjaxjucP2v2ON52+8WwWXz+LxfEfT1Xr+NSlrKVuZq6dHldNlIbcLSRWTrcMRyv9PyL9r/cRG+5udVBusVBIvsjRe+1u13seST+wXOqGM+I9Ztf74cyRPpWvF4n13xLnWQT8wsR6+kW3F519R/ynXPt6Z8epRzJIqUqRIvvRUtjxwB9Qj3ajcCn2+/1B26ft5T6Mlv3lQeTHISxTrwdhO4oHXrfXi2O/0/MHMArjERJsTjmLcXAtmYS4UMg32DV0jk3EwGWI7GUYHdngmnO8CXhSz73Cb3ii8bGKj0v2cAOX1oyvc6PhbI6prK+A2Qji92svHWvd7t5a7fF5QWvLl8lu5qHhPC8M40Wpgy0qmWFOk6IT4eTihqKhFkWwaZ5GFph3Q3tFooVxomcOg1jIHJMbOrFbd9w5EeLpBdxvTnAKrSWxnIYr1SD2ygJCiZpwbV5ekT2zCcGVlhif7MyATImlLaIFkSU3jxSygsSMR0SnaM/VGGSTXOaYgLJHSPCSqAJCFjNQGJikTB1HNhFrWUh4mY9QV8Rkfg8bBxMyZio1ocRIY10bhGZjvLmIJWsyEaU8AUU1yYVCvLZcU4JUhneLapLPRUlCzFtIXAuLayYiowlikJDBFuUaE87K3CYmQZeSW1Xr0r7WG9A8OcHHagXzhjHCd4zmdX6dfwJIiDNI2A6Em1ml2rUiMY7Ak2nbjjaI4FhvqjZrG2sRw3gRmSKJbaERz2too16iaK0NY5qqIn6se+3zJDbhNLdFpbZutiAKYJ6FQtrNKV4bKpSssJ7Ju2hLJcRlZnxyoZ9IbI7I+otqKWwzpZtkJVOiQYpFTOOUhekcDLPlq49sDWweFw1kg90WQBiuMxW3Cmp4ne+cLMDU6H86K8KQ7NiZnjpT/LvRePFtmk5PXgTR6sb0b0XzJ70=
|
||||
1
docs/cassettes/sql_qa_15.msgpack.zlib
Normal file
1
docs/cassettes/sql_qa_15.msgpack.zlib
Normal file
File diff suppressed because one or more lines are too long
1
docs/cassettes/sql_qa_25.msgpack.zlib
Normal file
1
docs/cassettes/sql_qa_25.msgpack.zlib
Normal file
File diff suppressed because one or more lines are too long
1
docs/cassettes/sql_qa_27.msgpack.zlib
Normal file
1
docs/cassettes/sql_qa_27.msgpack.zlib
Normal file
File diff suppressed because one or more lines are too long
1
docs/cassettes/sql_qa_31.msgpack.zlib
Normal file
1
docs/cassettes/sql_qa_31.msgpack.zlib
Normal file
File diff suppressed because one or more lines are too long
1
docs/cassettes/sql_qa_33.msgpack.zlib
Normal file
1
docs/cassettes/sql_qa_33.msgpack.zlib
Normal file
File diff suppressed because one or more lines are too long
1
docs/cassettes/sql_qa_39.msgpack.zlib
Normal file
1
docs/cassettes/sql_qa_39.msgpack.zlib
Normal file
@@ -0,0 +1 @@
|
||||
eNrVV01vHEkZBg4cOPILSiMOgKbH3e2Z8QfKwYqtbJY4DuvJfmgTtWq63+ku3FPVqar2ZBL5QNgz0vALFmLZqyhh0YK4QCSOHPgD4cBv4anqGX9MvDjKAQnLVnfXW+9bz/v1vOVnp4ekjVDyuy+FtKR5avFhfvvsVNOjmoz94mRMtlDZ81s7g+e1Fm9+VFhbmc2VFV6JjhkLW3RKLvO04EJ2UjVeEXKkjocqm/79tCCewfwXL+4b0sFWTtLO/ux2e72gmq6EnagTra59vZWmVNlgR6YqEzKfvcqfiKrNMhqV3NJJI579kVdVKVLuMK780ij54qaSkjzm2YsDoirgpTikrzSZCm7Qr0+M5bY2z45hl/75j9MxGcNz+v3ezxfg/v2dH379aeAOKIOdx5WA6ux34Z8GYgwcwVZZqkmwp0Uu5OzLbz4NtrnlmcqDAUJFwe1s9obROmU8HfFhL10L+931KKI4XuO9NI7769l6/w8AaeF6cIdkbovZ8/5a/Bd3oDGBE2lVBrv8sYsPZGF4/DHX09lJc+jLhfJgWtHbAXh1k6cFLczMvqnqIeRtNoY9+HmjH57c0zwf89lXUgWp23wMF2j2+hPK2izeYB9yyeIw7rGouxmvb+Ll1u7g+ceCz14gNSxXKi/pb0twm7Ds+spAuH76hl254aamDNgFL83s2OqaTrdKG+wfprM3nWL1Rmuz211t/Qxgb8S9jTgMw3axGsQbVwhOF6l5PSjqNgsjjzvaWAtZGG76X4f7apwfNJkGzr9eKV+k9/WSFGcqQxfUfVn/CjWlURv/+t6XT1vz7mlttsLORieKW+2WkKg5mVKC0s1Na/Npa1iqYWKsQiIoIcmHJWWtTReP9rIMOAjG9ldhKEOpGbIJPebjqiSTjOvSiopru2zk+h1oPTS3pYSLBH2tp8sblM6TVJOvrCQTZi4cIXWQVnw6RiKXlSp4ryQvE2ibt7XM6rd5bYjrtHhrtVCTxNoyqcViybo2S6wgnWS1nqPjUx/WUsncdTv0uygdp67tfCHqHrVbE6UPTOUMmFRV5FAmQh4KS+YM4xNjswS0VaG4XCYvY4KRIbdAinyDDLFRjkTuDq8NXYp2VikQ6JknKS8pqavkkRFPgB8FlpMGrNADXUilLRDyzCQl2AbKUX8hzNREJpLGlZ2ea3chdeYWu72ts4VkOPWOxeHGWtSLw6OjH3w7i+9fx+L4w6pZOVsNuFgxj0rHKmAjMzWWxkGlETm74jja2P8j0v/NCbChd2en9aFIlZbLHH0c9fv9/0bSrzyXBumceM/Y9WU6N2Sv5OtlYn1X4ryOgN+bWK8edMfZ9TPif8q133/2tNWUZFJwU4AiV6Oo1896I1rd6Pai9XWiaC2OetEwXO+vRXEWx8N1zOPR+ihKw17YG/W6vbXeRrcbdylb7TmmHnMpRihc188CHf9566zaIW1q2+ANKxaPm3jc84sDNKYr0dbDdqtM0YkgKqQbqJB9IK5T0B40DiZcNyNgXoJ4//ydztr3/bXbaL3voY3V67yb72q33vcYu9DYbH2masY1MdSL5wn0shG5pIxZxRYXTDYBDzDO9n9xh7kZN8SU6zyQt9DD0mkKWdWWed5C27SZH0uwycxUWndBBUOWU4a21WAB9jRDm+DliPnB5k7SNdRsAXOlUgeMW/fBwA8gbMPUyH82u7nMILC1ln6RSzMhDTT3Jeap8Wuges1MRakYCTIOR/OeMlmPh5DB4GICO4UpHDSFw6GGFtFuM15OMLOYp2k2VbU+hwpsY2XghVVVcnC0AAkELpYpwqE0OuKSA0PAxkdJhxwhTlVZj6V3+9wPb9MH3EVR5ucARbPhQuDvEphwjmikNNCWfktj2LAR6uKi19YNyDZTEkng5sDrNPAuITIs9xmdx9onc+5WwQ8Rak8ADrdVqjTezNn/IEDsq2QJ6p47E/nw60MCoTTKHXZJ4v4F0WPPuvOgoAIRtSUtd/ZZUTd5GQlcZ86rwIHdvb8/YJnCxRohKSg9uJjBIeEkQnQprT1qYTvs9shtYTlZV86kNTybFKK8uI83BtrAN9G4kSyVpHXP3A3jB/KB3N5jd/cGYO0DV6FTtr17h7kZR+5SZtiPb9/d3/lo0Gb3721vDXbabHvnzo5/frR3j5FNOz/xYb4cywdyoJwV7V1nuD3VZca27nyy9dn+pb7x+X6rcJxFQwTHeGPA1ap3ALa3lQdsDkQFJWFwDlVYH7ievHDavA1g1iC0Y77oTV++Z/XUAOj4uzWoITnkWvglR2rz7ofQd5CjrQUhJU0VgJdGQTNHWkf4efhudo6Ozi/W2PDw6D8mPJsi
|
||||
1
docs/cassettes/sql_qa_45.msgpack.zlib
Normal file
1
docs/cassettes/sql_qa_45.msgpack.zlib
Normal file
File diff suppressed because one or more lines are too long
1
docs/cassettes/sql_qa_47.msgpack.zlib
Normal file
1
docs/cassettes/sql_qa_47.msgpack.zlib
Normal file
File diff suppressed because one or more lines are too long
1
docs/cassettes/sql_qa_55.msgpack.zlib
Normal file
1
docs/cassettes/sql_qa_55.msgpack.zlib
Normal file
File diff suppressed because one or more lines are too long
1
docs/cassettes/sql_qa_57.msgpack.zlib
Normal file
1
docs/cassettes/sql_qa_57.msgpack.zlib
Normal file
File diff suppressed because one or more lines are too long
1
docs/cassettes/sql_qa_60.msgpack.zlib
Normal file
1
docs/cassettes/sql_qa_60.msgpack.zlib
Normal file
File diff suppressed because one or more lines are too long
@@ -27,7 +27,7 @@ LangChain has lots of different types of output parsers. This is a list of outpu
|
||||
| Name | Supports Streaming | Has Format Instructions | Calls LLM | Input Type | Output Type | Description |
|
||||
|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|-------------------------|-----------|--------------------|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| [Str](https://python.langchain.com/api_reference/core/output_parsers/langchain_core.output_parsers.string.StrOutputParser.html) | ✅ | | | `str` \| `Message` | String | Parses texts from message objects. Useful for handling variable formats of message content (e.g., extracting text from content blocks). |
|
||||
| [JSON](https://python.langchain.com/api_reference/core/output_parsers/langchain_core.output_parsers.json.JSONOutputParser.html#langchain_core.output_parsers.json.JSONOutputParser) | ✅ | ✅ | | `str` \| `Message` | JSON object | Returns a JSON object as specified. You can specify a Pydantic model and it will return JSON for that model. Probably the most reliable output parser for getting structured data that does NOT use function calling. |
|
||||
| [JSON](https://python.langchain.com/api_reference/core/output_parsers/langchain_core.output_parsers.json.JsonOutputParser.html) | ✅ | ✅ | | `str` \| `Message` | JSON object | Returns a JSON object as specified. You can specify a Pydantic model and it will return JSON for that model. Probably the most reliable output parser for getting structured data that does NOT use function calling. |
|
||||
| [XML](https://python.langchain.com/api_reference/core/output_parsers/langchain_core.output_parsers.xml.XMLOutputParser.html#langchain_core.output_parsers.xml.XMLOutputParser) | ✅ | ✅ | | `str` \| `Message` | `dict` | Returns a dictionary of tags. Use when XML output is needed. Use with models that are good at writing XML (like Anthropic's). |
|
||||
| [CSV](https://python.langchain.com/api_reference/core/output_parsers/langchain_core.output_parsers.list.CommaSeparatedListOutputParser.html#langchain_core.output_parsers.list.CommaSeparatedListOutputParser) | ✅ | ✅ | | `str` \| `Message` | `List[str]` | Returns a list of comma separated values. |
|
||||
| [OutputFixing](https://python.langchain.com/api_reference/langchain/output_parsers/langchain.output_parsers.fix.OutputFixingParser.html#langchain.output_parsers.fix.OutputFixingParser) | | | ✅ | `str` \| `Message` | | Wraps another output parser. If that output parser errors, then this will pass the error message and the bad output to an LLM and ask it to fix the output. |
|
||||
|
||||
@@ -151,10 +151,10 @@ Many vectorstores support [the `k`](/docs/integrations/vectorstores/pinecone/#qu
|
||||
### Metadata filtering
|
||||
|
||||
While vectorstore implement a search algorithm to efficiently search over *all* the embedded documents to find the most similar ones, many also support filtering on metadata.
|
||||
This allows structured filters to reduce the size of the similarity search space. These two concepts work well together:
|
||||
Metadata filtering helps narrow down the search by applying specific conditions such as retrieving documents from a particular source or date range. These two concepts work well together:
|
||||
|
||||
1. **Semantic search**: Query the unstructured data directly, often using via embedding or keyword similarity.
|
||||
2. **Metadata search**: Apply structured query to the metadata, filering specific documents.
|
||||
1. **Semantic search**: Query the unstructured data directly, often via embedding or keyword similarity.
|
||||
2. **Metadata search**: Apply structured query to the metadata, filtering specific documents.
|
||||
|
||||
Vector store support for metadata filtering is typically dependent on the underlying vector store implementation.
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ This provides a standard way to interact with chat models, supporting important
|
||||
|
||||
### Example: chat models
|
||||
|
||||
Many [model providers](/docs/concepts/chat_models/) support [tool calling](/docs/concepts/tool_calling/), a critical features for many applications (e.g., [agents](https://langchain-ai.github.io/langgraph/concepts/agentic_concepts/)), that allows a developer to request model responses that match a particular schema.
|
||||
Many [model providers](/docs/concepts/chat_models/) support [tool calling](/docs/concepts/tool_calling/), a critical feature for many applications (e.g., [agents](https://langchain-ai.github.io/langgraph/concepts/agentic_concepts/)), that allows a developer to request model responses that match a particular schema.
|
||||
The APIs for each provider differ.
|
||||
LangChain's [chat model](/docs/concepts/chat_models/) interface provides a common way to bind [tools](/docs/concepts/tools) to a model in order to support [tool calling](/docs/concepts/tool_calling/):
|
||||
|
||||
|
||||
@@ -118,8 +118,8 @@ langchain-cli integration create-doc \
|
||||
These commands will create the following 3 files, which you should fill out with information about your package:
|
||||
|
||||
- `docs/docs/integrations/providers/parrot_link.ipynb`
|
||||
- `docs/docs/integrations/chat/parrot_-_link.ipynb`
|
||||
- `docs/docs/integrations/vectorstores/parrot_-_link.ipynb`
|
||||
- `docs/docs/integrations/chat/parrot_link.ipynb`
|
||||
- `docs/docs/integrations/vectorstores/parrot_link.ipynb`
|
||||
|
||||
### Manually create your documentation pages (if you prefer)
|
||||
|
||||
|
||||
@@ -120,7 +120,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Great! Now let's assemble our agent using LangGraph's prebuilt [create_react_agent](https://langchain-ai.github.io/langgraph/reference/prebuilt/#create_react_agent), which allows you to create a [tool-calling agent](https://langchain-ai.github.io/langgraph/concepts/agentic_concepts/#tool-calling-agent):"
|
||||
"Great! Now let's assemble our agent using LangGraph's prebuilt [create_react_agent](https://langchain-ai.github.io/langgraph/reference/prebuilt/#langgraph.prebuilt.chat_agent_executor.create_react_agent), which allows you to create a [tool-calling agent](https://langchain-ai.github.io/langgraph/concepts/agentic_concepts/#tool-calling-agent):"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -131,10 +131,10 @@
|
||||
"source": [
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"# state_modifier allows you to preprocess the inputs to the model inside ReAct agent\n",
|
||||
"# prompt allows you to preprocess the inputs to the model inside ReAct agent\n",
|
||||
"# in this case, since we're passing a prompt string, we'll just always add a SystemMessage\n",
|
||||
"# with this prompt string before any other messages sent to the model\n",
|
||||
"agent = create_react_agent(model, tools, state_modifier=prompt)"
|
||||
"agent = create_react_agent(model, tools, prompt=prompt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -266,7 +266,7 @@
|
||||
"\n",
|
||||
"# highlight-start\n",
|
||||
"memory = MemorySaver()\n",
|
||||
"agent = create_react_agent(model, tools, state_modifier=prompt, checkpointer=memory)\n",
|
||||
"agent = create_react_agent(model, tools, prompt=prompt, checkpointer=memory)\n",
|
||||
"# highlight-end"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -25,52 +25,10 @@ This highlights functionality that is core to using LangChain.
|
||||
- [How to: stream runnables](/docs/how_to/streaming)
|
||||
- [How to: debug your LLM apps](/docs/how_to/debugging/)
|
||||
|
||||
## LangChain Expression Language (LCEL)
|
||||
|
||||
[LangChain Expression Language](/docs/concepts/lcel) is a way to create arbitrary custom chains. It is built on the [Runnable](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html) protocol.
|
||||
|
||||
[**LCEL cheatsheet**](/docs/how_to/lcel_cheatsheet/): For a quick overview of how to use the main LCEL primitives.
|
||||
|
||||
[**Migration guide**](/docs/versions/migrating_chains): For migrating legacy chain abstractions to LCEL.
|
||||
|
||||
- [How to: chain runnables](/docs/how_to/sequence)
|
||||
- [How to: stream runnables](/docs/how_to/streaming)
|
||||
- [How to: invoke runnables in parallel](/docs/how_to/parallel/)
|
||||
- [How to: add default invocation args to runnables](/docs/how_to/binding/)
|
||||
- [How to: turn any function into a runnable](/docs/how_to/functions)
|
||||
- [How to: pass through inputs from one chain step to the next](/docs/how_to/passthrough)
|
||||
- [How to: configure runnable behavior at runtime](/docs/how_to/configure)
|
||||
- [How to: add message history (memory) to a chain](/docs/how_to/message_history)
|
||||
- [How to: route between sub-chains](/docs/how_to/routing)
|
||||
- [How to: create a dynamic (self-constructing) chain](/docs/how_to/dynamic_chain/)
|
||||
- [How to: inspect runnables](/docs/how_to/inspect)
|
||||
- [How to: add fallbacks to a runnable](/docs/how_to/fallbacks)
|
||||
- [How to: pass runtime secrets to a runnable](/docs/how_to/runnable_runtime_secrets)
|
||||
|
||||
## Components
|
||||
|
||||
These are the core building blocks you can use when building applications.
|
||||
|
||||
### Prompt templates
|
||||
|
||||
[Prompt Templates](/docs/concepts/prompt_templates) are responsible for formatting user input into a format that can be passed to a language model.
|
||||
|
||||
- [How to: use few shot examples](/docs/how_to/few_shot_examples)
|
||||
- [How to: use few shot examples in chat models](/docs/how_to/few_shot_examples_chat/)
|
||||
- [How to: partially format prompt templates](/docs/how_to/prompts_partial)
|
||||
- [How to: compose prompts together](/docs/how_to/prompts_composition)
|
||||
|
||||
### Example selectors
|
||||
|
||||
[Example Selectors](/docs/concepts/example_selectors) are responsible for selecting the correct few shot examples to pass to the prompt.
|
||||
|
||||
- [How to: use example selectors](/docs/how_to/example_selectors)
|
||||
- [How to: select examples by length](/docs/how_to/example_selectors_length_based)
|
||||
- [How to: select examples by semantic similarity](/docs/how_to/example_selectors_similarity)
|
||||
- [How to: select examples by semantic ngram overlap](/docs/how_to/example_selectors_ngram)
|
||||
- [How to: select examples by maximal marginal relevance](/docs/how_to/example_selectors_mmr)
|
||||
- [How to: select examples from LangSmith few-shot datasets](/docs/how_to/example_selectors_langsmith/)
|
||||
|
||||
### Chat models
|
||||
|
||||
[Chat Models](/docs/concepts/chat_models) are newer forms of language models that take messages in and output a message.
|
||||
@@ -101,6 +59,26 @@ See [supported integrations](/docs/integrations/chat/) for details on getting st
|
||||
- [How to: filter messages](/docs/how_to/filter_messages/)
|
||||
- [How to: merge consecutive messages of the same type](/docs/how_to/merge_message_runs/)
|
||||
|
||||
### Prompt templates
|
||||
|
||||
[Prompt Templates](/docs/concepts/prompt_templates) are responsible for formatting user input into a format that can be passed to a language model.
|
||||
|
||||
- [How to: use few shot examples](/docs/how_to/few_shot_examples)
|
||||
- [How to: use few shot examples in chat models](/docs/how_to/few_shot_examples_chat/)
|
||||
- [How to: partially format prompt templates](/docs/how_to/prompts_partial)
|
||||
- [How to: compose prompts together](/docs/how_to/prompts_composition)
|
||||
|
||||
### Example selectors
|
||||
|
||||
[Example Selectors](/docs/concepts/example_selectors) are responsible for selecting the correct few shot examples to pass to the prompt.
|
||||
|
||||
- [How to: use example selectors](/docs/how_to/example_selectors)
|
||||
- [How to: select examples by length](/docs/how_to/example_selectors_length_based)
|
||||
- [How to: select examples by semantic similarity](/docs/how_to/example_selectors_similarity)
|
||||
- [How to: select examples by semantic ngram overlap](/docs/how_to/example_selectors_ngram)
|
||||
- [How to: select examples by maximal marginal relevance](/docs/how_to/example_selectors_mmr)
|
||||
- [How to: select examples from LangSmith few-shot datasets](/docs/how_to/example_selectors_langsmith/)
|
||||
|
||||
### LLMs
|
||||
|
||||
What LangChain calls [LLMs](/docs/concepts/text_llms) are older forms of language models that take a string in and output a string.
|
||||
@@ -329,6 +307,36 @@ large volumes of text. For a high-level tutorial, check out [this guide](/docs/t
|
||||
- [How to: summarize text through parallelization](/docs/how_to/summarize_map_reduce)
|
||||
- [How to: summarize text through iterative refinement](/docs/how_to/summarize_refine)
|
||||
|
||||
## LangChain Expression Language (LCEL)
|
||||
|
||||
:::note Should I use LCEL?
|
||||
|
||||
LCEL is an orchestration solution. See our
|
||||
[concepts page](/docs/concepts/lcel/#should-i-use-lcel) for recommendations on when to
|
||||
use LCEL.
|
||||
|
||||
:::
|
||||
|
||||
[LangChain Expression Language](/docs/concepts/lcel) is a way to create arbitrary custom chains. It is built on the [Runnable](https://python.langchain.com/api_reference/core/runnables/langchain_core.runnables.base.Runnable.html) protocol.
|
||||
|
||||
[**LCEL cheatsheet**](/docs/how_to/lcel_cheatsheet/): For a quick overview of how to use the main LCEL primitives.
|
||||
|
||||
[**Migration guide**](/docs/versions/migrating_chains): For migrating legacy chain abstractions to LCEL.
|
||||
|
||||
- [How to: chain runnables](/docs/how_to/sequence)
|
||||
- [How to: stream runnables](/docs/how_to/streaming)
|
||||
- [How to: invoke runnables in parallel](/docs/how_to/parallel/)
|
||||
- [How to: add default invocation args to runnables](/docs/how_to/binding/)
|
||||
- [How to: turn any function into a runnable](/docs/how_to/functions)
|
||||
- [How to: pass through inputs from one chain step to the next](/docs/how_to/passthrough)
|
||||
- [How to: configure runnable behavior at runtime](/docs/how_to/configure)
|
||||
- [How to: add message history (memory) to a chain](/docs/how_to/message_history)
|
||||
- [How to: route between sub-chains](/docs/how_to/routing)
|
||||
- [How to: create a dynamic (self-constructing) chain](/docs/how_to/dynamic_chain/)
|
||||
- [How to: inspect runnables](/docs/how_to/inspect)
|
||||
- [How to: add fallbacks to a runnable](/docs/how_to/fallbacks)
|
||||
- [How to: pass runtime secrets to a runnable](/docs/how_to/runnable_runtime_secrets)
|
||||
|
||||
## [LangGraph](https://langchain-ai.github.io/langgraph)
|
||||
|
||||
LangGraph is an extension of LangChain aimed at
|
||||
|
||||
@@ -32,11 +32,11 @@
|
||||
"\n",
|
||||
"Here we focus on how to move from legacy LangChain agents to more flexible [LangGraph](https://langchain-ai.github.io/langgraph/) agents.\n",
|
||||
"LangChain agents (the [AgentExecutor](https://python.langchain.com/api_reference/langchain/agents/langchain.agents.agent.AgentExecutor.html#langchain.agents.agent.AgentExecutor) in particular) have multiple configuration parameters.\n",
|
||||
"In this notebook we will show how those parameters map to the LangGraph react agent executor using the [create_react_agent](https://langchain-ai.github.io/langgraph/reference/prebuilt/#create_react_agent) prebuilt helper method.\n",
|
||||
"In this notebook we will show how those parameters map to the LangGraph react agent executor using the [create_react_agent](https://langchain-ai.github.io/langgraph/reference/prebuilt/#langgraph.prebuilt.chat_agent_executor.create_react_agent) prebuilt helper method.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
":::note\n",
|
||||
"In LangGraph, the graph replaces LangChain's agent executor. It manages the agent's cycles and tracks the scratchpad as messages within its state. The LangChain \"agent\" corresponds to the state_modifier and LLM you've provided.\n",
|
||||
"In LangGraph, the graph replaces LangChain's agent executor. It manages the agent's cycles and tracks the scratchpad as messages within its state. The LangChain \"agent\" corresponds to the prompt and LLM you've provided.\n",
|
||||
":::\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -164,7 +164,7 @@
|
||||
"id": "94205f3b-fd2b-4fd7-af69-0a3fc313dc88",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"LangGraph's [react agent executor](https://langchain-ai.github.io/langgraph/reference/prebuilt/#create_react_agent) manages a state that is defined by a list of messages. It will continue to process the list until there are no tool calls in the agent's output. To kick it off, we input a list of messages. The output will contain the entire state of the graph-- in this case, the conversation history.\n",
|
||||
"LangGraph's [react agent executor](https://langchain-ai.github.io/langgraph/reference/prebuilt/#langgraph.prebuilt.chat_agent_executor.create_react_agent) manages a state that is defined by a list of messages. It will continue to process the list until there are no tool calls in the agent's output. To kick it off, we input a list of messages. The output will contain the entire state of the graph-- in this case, the conversation history.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
@@ -240,11 +240,12 @@
|
||||
"\n",
|
||||
"With legacy LangChain agents you have to pass in a prompt template. You can use this to control the agent.\n",
|
||||
"\n",
|
||||
"With LangGraph [react agent executor](https://langchain-ai.github.io/langgraph/reference/prebuilt/#create_react_agent), by default there is no prompt. You can achieve similar control over the agent in a few ways:\n",
|
||||
"With LangGraph [react agent executor](https://langchain-ai.github.io/langgraph/reference/prebuilt/#langgraph.prebuilt.chat_agent_executor.create_react_agent), by default there is no prompt. You can achieve similar control over the agent in a few ways:\n",
|
||||
"\n",
|
||||
"1. Pass in a system message as input\n",
|
||||
"2. Initialize the agent with a system message\n",
|
||||
"3. Initialize the agent with a function to transform messages before passing to the model.\n",
|
||||
"3. Initialize the agent with a function to transform messages in the graph state before passing to the model.\n",
|
||||
"4. Initialize the agent with a [Runnable](/docs/concepts/lcel) to transform messages in the graph state before passing to the model. This includes passing prompt templates as well.\n",
|
||||
"\n",
|
||||
"Let's take a look at all of these below. We will pass in custom instructions to get the agent to respond in Spanish.\n",
|
||||
"\n",
|
||||
@@ -291,9 +292,9 @@
|
||||
"id": "bd5f5500-5ae4-4000-a9fd-8c5a2cc6404d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, let's pass a custom system message to [react agent executor](https://langchain-ai.github.io/langgraph/reference/prebuilt/#create_react_agent).\n",
|
||||
"Now, let's pass a custom system message to [react agent executor](https://langchain-ai.github.io/langgraph/reference/prebuilt/#langgraph.prebuilt.chat_agent_executor.create_react_agent).\n",
|
||||
"\n",
|
||||
"LangGraph's prebuilt `create_react_agent` does not take a prompt template directly as a parameter, but instead takes a [`state_modifier`](https://langchain-ai.github.io/langgraph/reference/prebuilt/#create_react_agent) parameter. This modifies the graph state before the llm is called, and can be one of four values:\n",
|
||||
"LangGraph's prebuilt `create_react_agent` does not take a prompt template directly as a parameter, but instead takes a [`prompt`](https://langchain-ai.github.io/langgraph/reference/prebuilt/#langgraph.prebuilt.chat_agent_executor.create_react_agent) parameter. This modifies the graph state before the llm is called, and can be one of four values:\n",
|
||||
"\n",
|
||||
"- A `SystemMessage`, which is added to the beginning of the list of messages.\n",
|
||||
"- A `string`, which is converted to a `SystemMessage` and added to the beginning of the list of messages.\n",
|
||||
@@ -317,9 +318,7 @@
|
||||
"# This could also be a SystemMessage object\n",
|
||||
"# system_message = SystemMessage(content=\"You are a helpful assistant. Respond only in Spanish.\")\n",
|
||||
"\n",
|
||||
"langgraph_agent_executor = create_react_agent(\n",
|
||||
" model, tools, state_modifier=system_message\n",
|
||||
")\n",
|
||||
"langgraph_agent_executor = create_react_agent(model, tools, prompt=system_message)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"messages = langgraph_agent_executor.invoke({\"messages\": [(\"user\", query)]})"
|
||||
@@ -330,8 +329,8 @@
|
||||
"id": "fc6059fd-0df7-4b6f-a84c-b5874e983638",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can also pass in an arbitrary function. This function should take in a list of messages and output a list of messages.\n",
|
||||
"We can do all types of arbitrary formatting of messages here. In this case, let's just add a SystemMessage to the start of the list of messages."
|
||||
"We can also pass in an arbitrary function or a runnable. This function/runnable should take in a the graph state and output a list of messages.\n",
|
||||
"We can do all types of arbitrary formatting of messages here. In this case, let's add a SystemMessage to the start of the list of messages and append another user message at the end."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -349,6 +348,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage\n",
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"from langgraph.prebuilt.chat_agent_executor import AgentState\n",
|
||||
"\n",
|
||||
@@ -356,19 +356,20 @@
|
||||
" [\n",
|
||||
" (\"system\", \"You are a helpful assistant. Respond only in Spanish.\"),\n",
|
||||
" (\"placeholder\", \"{messages}\"),\n",
|
||||
" (\"user\", \"Also say 'Pandamonium!' after the answer.\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def _modify_state_messages(state: AgentState):\n",
|
||||
" return prompt.invoke({\"messages\": state[\"messages\"]}).to_messages() + [\n",
|
||||
" (\"user\", \"Also say 'Pandamonium!' after the answer.\")\n",
|
||||
" ]\n",
|
||||
"# alternatively, this can be passed as a function, e.g.\n",
|
||||
"# def prompt(state: AgentState):\n",
|
||||
"# return (\n",
|
||||
"# [SystemMessage(content=\"You are a helpful assistant. Respond only in Spanish.\")] +\n",
|
||||
"# state[\"messages\"] +\n",
|
||||
"# [HumanMessage(content=\"Also say 'Pandamonium!' after the answer.\")]\n",
|
||||
"# )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"langgraph_agent_executor = create_react_agent(\n",
|
||||
" model, tools, state_modifier=_modify_state_messages\n",
|
||||
")\n",
|
||||
"langgraph_agent_executor = create_react_agent(model, tools, prompt=prompt)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"messages = langgraph_agent_executor.invoke({\"messages\": [(\"human\", query)]})\n",
|
||||
@@ -516,7 +517,7 @@
|
||||
"\n",
|
||||
"memory = MemorySaver()\n",
|
||||
"langgraph_agent_executor = create_react_agent(\n",
|
||||
" model, tools, state_modifier=system_message, checkpointer=memory\n",
|
||||
" model, tools, prompt=system_message, checkpointer=memory\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"config = {\"configurable\": {\"thread_id\": \"test-thread\"}}\n",
|
||||
@@ -643,14 +644,7 @@
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def _modify_state_messages(state: AgentState):\n",
|
||||
" return prompt.invoke({\"messages\": state[\"messages\"]}).to_messages()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"langgraph_agent_executor = create_react_agent(\n",
|
||||
" model, tools, state_modifier=_modify_state_messages\n",
|
||||
")\n",
|
||||
"langgraph_agent_executor = create_react_agent(model, tools, prompt=prompt)\n",
|
||||
"\n",
|
||||
"for step in langgraph_agent_executor.stream(\n",
|
||||
" {\"messages\": [(\"human\", query)]}, stream_mode=\"updates\"\n",
|
||||
@@ -697,7 +691,7 @@
|
||||
"source": [
|
||||
"### In LangGraph\n",
|
||||
"\n",
|
||||
"By default the [react agent executor](https://langchain-ai.github.io/langgraph/reference/prebuilt/#create_react_agent) in LangGraph appends all messages to the central state. Therefore, it is easy to see any intermediate steps by just looking at the full state."
|
||||
"By default the [react agent executor](https://langchain-ai.github.io/langgraph/reference/prebuilt/#langgraph.prebuilt.chat_agent_executor.create_react_agent) in LangGraph appends all messages to the central state. Therefore, it is easy to see any intermediate steps by just looking at the full state."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1244,7 +1238,7 @@
|
||||
"source": [
|
||||
"### In LangGraph\n",
|
||||
"\n",
|
||||
"We can use the [`state_modifier`](https://langchain-ai.github.io/langgraph/reference/prebuilt/#create_react_agent) just as before when passing in [prompt templates](#prompt-templates)."
|
||||
"We can use the [`prompt`](https://langchain-ai.github.io/langgraph/reference/prebuilt/#langgraph.prebuilt.chat_agent_executor.create_react_agent) just as before when passing in [prompt templates](#prompt-templates)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1299,7 +1293,7 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"langgraph_agent_executor = create_react_agent(\n",
|
||||
" model, tools, state_modifier=_modify_state_messages\n",
|
||||
" model, tools, prompt=_modify_state_messages\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
|
||||
@@ -228,7 +228,7 @@
|
||||
"# highlight-next-line\n",
|
||||
"def retrieve(state: State, config: RunnableConfig):\n",
|
||||
" # highlight-next-line\n",
|
||||
" retrieved_docs = configurable_retriever.invoke(state[\"question\"])\n",
|
||||
" retrieved_docs = configurable_retriever.invoke(state[\"question\"], config)\n",
|
||||
" return {\"context\": retrieved_docs}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
||||
@@ -315,6 +315,163 @@
|
||||
"ai_msg.tool_calls"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "301d372f-4dec-43e6-b58c-eee25633e1a6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Citations\n",
|
||||
"\n",
|
||||
"Anthropic supports a [citations](https://docs.anthropic.com/en/docs/build-with-claude/citations) feature that lets Claude attach context to its answers based on source documents supplied by the user. When [document content blocks](https://docs.anthropic.com/en/docs/build-with-claude/citations#document-types) with `\"citations\": {\"enabled\": True}` are included in a query, Claude may generate citations in its response.\n",
|
||||
"\n",
|
||||
"### Simple example\n",
|
||||
"\n",
|
||||
"In this example we pass a [plain text document](https://docs.anthropic.com/en/docs/build-with-claude/citations#plain-text-documents). In the background, Claude [automatically chunks](https://docs.anthropic.com/en/docs/build-with-claude/citations#plain-text-documents) the input text into sentences, which are used when generating citations."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "e5370e6e-5a9a-4546-848b-5f5bf313c3e7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'text': 'Based on the document, ', 'type': 'text'},\n",
|
||||
" {'text': 'the grass is green',\n",
|
||||
" 'type': 'text',\n",
|
||||
" 'citations': [{'type': 'char_location',\n",
|
||||
" 'cited_text': 'The grass is green. ',\n",
|
||||
" 'document_index': 0,\n",
|
||||
" 'document_title': 'My Document',\n",
|
||||
" 'start_char_index': 0,\n",
|
||||
" 'end_char_index': 20}]},\n",
|
||||
" {'text': ', and ', 'type': 'text'},\n",
|
||||
" {'text': 'the sky is blue',\n",
|
||||
" 'type': 'text',\n",
|
||||
" 'citations': [{'type': 'char_location',\n",
|
||||
" 'cited_text': 'The sky is blue.',\n",
|
||||
" 'document_index': 0,\n",
|
||||
" 'document_title': 'My Document',\n",
|
||||
" 'start_char_index': 20,\n",
|
||||
" 'end_char_index': 36}]},\n",
|
||||
" {'text': '.', 'type': 'text'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_anthropic import ChatAnthropic\n",
|
||||
"\n",
|
||||
"llm = ChatAnthropic(model=\"claude-3-5-haiku-latest\")\n",
|
||||
"\n",
|
||||
"messages = [\n",
|
||||
" {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" {\n",
|
||||
" \"type\": \"document\",\n",
|
||||
" \"source\": {\n",
|
||||
" \"type\": \"text\",\n",
|
||||
" \"media_type\": \"text/plain\",\n",
|
||||
" \"data\": \"The grass is green. The sky is blue.\",\n",
|
||||
" },\n",
|
||||
" \"title\": \"My Document\",\n",
|
||||
" \"context\": \"This is a trustworthy document.\",\n",
|
||||
" \"citations\": {\"enabled\": True},\n",
|
||||
" },\n",
|
||||
" {\"type\": \"text\", \"text\": \"What color is the grass and sky?\"},\n",
|
||||
" ],\n",
|
||||
" }\n",
|
||||
"]\n",
|
||||
"response = llm.invoke(messages)\n",
|
||||
"response.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "69956596-0e6c-492b-934d-c08ed3c9de9a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using with text splitters\n",
|
||||
"\n",
|
||||
"Anthropic also lets you specify your own splits using [custom document](https://docs.anthropic.com/en/docs/build-with-claude/citations#custom-content-documents) types. LangChain [text splitters](/docs/concepts/text_splitters/) can be used to generate meaningful splits for this purpose. See the below example, where we split the LangChain README (a markdown document) and pass it to Claude as context:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "04cc2841-7987-47a5-906c-09ea7fa28323",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'text': \"You can find LangChain's tutorials at https://python.langchain.com/docs/tutorials/\\n\\nThe tutorials section is recommended for those looking to build something specific or who prefer a hands-on learning approach. It's considered the best place to get started with LangChain.\",\n",
|
||||
" 'type': 'text',\n",
|
||||
" 'citations': [{'type': 'content_block_location',\n",
|
||||
" 'cited_text': \"[Tutorials](https://python.langchain.com/docs/tutorials/):If you're looking to build something specific orare more of a hands-on learner, check out ourtutorials. This is the best place to get started.\",\n",
|
||||
" 'document_index': 0,\n",
|
||||
" 'document_title': None,\n",
|
||||
" 'start_block_index': 243,\n",
|
||||
" 'end_block_index': 248}]}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"from langchain_anthropic import ChatAnthropic\n",
|
||||
"from langchain_text_splitters import MarkdownTextSplitter\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def format_to_anthropic_documents(documents: list[str]):\n",
|
||||
" return {\n",
|
||||
" \"type\": \"document\",\n",
|
||||
" \"source\": {\n",
|
||||
" \"type\": \"content\",\n",
|
||||
" \"content\": [{\"type\": \"text\", \"text\": document} for document in documents],\n",
|
||||
" },\n",
|
||||
" \"citations\": {\"enabled\": True},\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Pull readme\n",
|
||||
"get_response = requests.get(\n",
|
||||
" \"https://raw.githubusercontent.com/langchain-ai/langchain/master/README.md\"\n",
|
||||
")\n",
|
||||
"readme = get_response.text\n",
|
||||
"\n",
|
||||
"# Split into chunks\n",
|
||||
"splitter = MarkdownTextSplitter(\n",
|
||||
" chunk_overlap=0,\n",
|
||||
" chunk_size=50,\n",
|
||||
")\n",
|
||||
"documents = splitter.split_text(readme)\n",
|
||||
"\n",
|
||||
"# Construct message\n",
|
||||
"message = {\n",
|
||||
" \"role\": \"user\",\n",
|
||||
" \"content\": [\n",
|
||||
" format_to_anthropic_documents(documents),\n",
|
||||
" {\"type\": \"text\", \"text\": \"Give me a link to LangChain's tutorials.\"},\n",
|
||||
" ],\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# Query LLM\n",
|
||||
"llm = ChatAnthropic(model=\"claude-3-5-haiku-latest\")\n",
|
||||
"response = llm.invoke([message])\n",
|
||||
"\n",
|
||||
"response.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3",
|
||||
@@ -342,7 +499,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
231
docs/docs/integrations/chat/deepseek.ipynb
Normal file
231
docs/docs/integrations/chat/deepseek.ipynb
Normal file
@@ -0,0 +1,231 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: DeepSeek\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e49f1e0d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ChatDeepSeek\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This will help you getting started with DeepSeek's hosted [chat models](/docs/concepts/chat_models). For detailed documentation of all ChatDeepSeek features and configurations head to the [API reference](https://python.langchain.com/api_reference/deepseek/chat_models/langchain_deepseek.chat_models.ChatDeepSeek.html).\n",
|
||||
"\n",
|
||||
":::tip\n",
|
||||
"\n",
|
||||
"DeepSeek's models are open source and can be run locally (e.g. in [Ollama](./ollama.ipynb)) or on other inference providers (e.g. [Fireworks](./fireworks.ipynb), [Together](./together.ipynb)) as well.\n",
|
||||
"\n",
|
||||
":::\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/docs/integrations/chat/deepseek) | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| [ChatDeepSeek](https://python.langchain.com/api_reference/deepseek/chat_models/langchain_deepseek.chat_models.ChatDeepSeek.html) | [langchain-deepseek-official](https://python.langchain.com/api_reference/deepseek/) | ❌ | beta | ✅ |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access DeepSeek models you'll need to create a/an DeepSeek account, get an API key, and install the `langchain-deepseek-official` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [DeepSeek's API Key page](https://platform.deepseek.com/api_keys) to sign up to DeepSeek and generate an API key. Once you've done this set the `DEEPSEEK_API_KEY` environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "433e8d2b-9519-4b49-b2c4-7ab65b046c94",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"DEEPSEEK_API_KEY\"):\n",
|
||||
" os.environ[\"DEEPSEEK_API_KEY\"] = getpass.getpass(\"Enter your DeepSeek API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "72ee0c4b-9764-423a-9dbf-95129e185210",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a15d341e-3e26-4ca3-830b-5aab30ed66de",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0730d6a1-c893-4840-9817-5e5251676d5d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain DeepSeek integration lives in the `langchain-deepseek-official` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "652d6238-1f87-422a-b135-f5abbb8652fc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-deepseek-official"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a38cde65-254d-4219-a441-068766c0d4b5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_deepseek import ChatDeepSeek\n",
|
||||
"\n",
|
||||
"llm = ChatDeepSeek(\n",
|
||||
" model=\"deepseek-chat\",\n",
|
||||
" temperature=0,\n",
|
||||
" max_tokens=None,\n",
|
||||
" timeout=None,\n",
|
||||
" max_retries=2,\n",
|
||||
" # other params...\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2b4f3e15",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "62e0dbc3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates English to French. Translate the user sentence.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"I love programming.\"),\n",
|
||||
"]\n",
|
||||
"ai_msg = llm.invoke(messages)\n",
|
||||
"ai_msg.content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "18e2bfc0-7e78-4528-a73f-499ac150dca8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
"\n",
|
||||
"We can [chain](/docs/how_to/sequence/) our model with a prompt template like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e197d1d7-a070-4c96-9f8a-a0e86d046e0b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate(\n",
|
||||
" [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates {input_language} to {output_language}.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | llm\n",
|
||||
"chain.invoke(\n",
|
||||
" {\n",
|
||||
" \"input_language\": \"English\",\n",
|
||||
" \"output_language\": \"German\",\n",
|
||||
" \"input\": \"I love programming.\",\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all ChatDeepSeek features and configurations head to the [API Reference](https://python.langchain.com/api_reference/deepseek/chat_models/langchain_deepseek.chat_models.ChatDeepSeek.html)."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -17,7 +17,7 @@
|
||||
"source": [
|
||||
"# ChatGroq\n",
|
||||
"\n",
|
||||
"This will help you getting started with Groq [chat models](../../concepts/chat_models.mdx). For detailed documentation of all ChatGroq features and configurations head to the [API reference](https://python.langchain.com/api_reference/groq/chat_models/langchain_groq.chat_models.ChatGroq.html). For a list of all Groq models, visit this [link](https://console.groq.com/docs/models).\n",
|
||||
"This will help you getting started with Groq [chat models](../../concepts/chat_models.mdx). For detailed documentation of all ChatGroq features and configurations head to the [API reference](https://python.langchain.com/api_reference/groq/chat_models/langchain_groq.chat_models.ChatGroq.html). For a list of all Groq models, visit this [link](https://console.groq.com/docs/models?utm_source=langchain).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
@@ -37,7 +37,7 @@
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to the [Groq console](https://console.groq.com/keys) to sign up to Groq and generate an API key. Once you've done this set the GROQ_API_KEY environment variable:"
|
||||
"Head to the [Groq console](https://console.groq.com/login?utm_source=langchain&utm_content=chat_page) to sign up to Groq and generate an API key. Once you've done this set the GROQ_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -35,25 +35,24 @@
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"Before using the chat model, you must go through the three steps below.\n",
|
||||
"Before using the chat model, you must go through the four steps below.\n",
|
||||
"\n",
|
||||
"1. Creating [NAVER Cloud Platform](https://www.ncloud.com/) account \n",
|
||||
"2. Apply to use [CLOVA Studio](https://www.ncloud.com/product/aiService/clovaStudio)\n",
|
||||
"3. Find API Keys after creating CLOVA Studio Test App or Service App (See [here](https://guide.ncloud-docs.com/docs/en/clovastudio-playground01#테스트앱생성).)\n",
|
||||
"3. Create a CLOVA Studio Test App or Service App of a model to use (See [here](https://guide.ncloud-docs.com/docs/en/clovastudio-playground01#테스트앱생성).)\n",
|
||||
"4. Issue a Test or Service API key (See [here](https://api.ncloud-docs.com/docs/ai-naver-clovastudio-summary#API%ED%82%A4).)\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"CLOVA Studio requires 2 keys (`NCP_CLOVASTUDIO_API_KEY` and `NCP_APIGW_API_KEY`).\n",
|
||||
" - `NCP_CLOVASTUDIO_API_KEY` is issued per Test App or Service App\n",
|
||||
" - `NCP_APIGW_API_KEY` is issued per account, could be optional depending on the region you are using\n",
|
||||
"\n",
|
||||
"The two API Keys could be found by clicking `App Request Status` > `Service App, Test App List` > `‘Details’ button for each app` in [CLOVA Studio](https://clovastudio.ncloud.com/studio-application/service-app)\n",
|
||||
"Set the `NCP_CLOVASTUDIO_API_KEY` environment variable with your API key.\n",
|
||||
" - Note that if you are using a legacy API Key (that doesn't start with `nv-*` prefix), you might need to get an additional API Key by clicking `App Request Status` > `Service App, Test App List` > `‘Details’ button for each app` in [CLOVA Studio](https://clovastudio.ncloud.com/studio-application/service-app) and set it as `NCP_APIGW_API_KEY`.\n",
|
||||
"\n",
|
||||
"You can add them to your environment variables as below:\n",
|
||||
"\n",
|
||||
"``` bash\n",
|
||||
"export NCP_CLOVASTUDIO_API_KEY=\"your-api-key-here\"\n",
|
||||
"export NCP_APIGW_API_KEY=\"your-api-key-here\"\n",
|
||||
"# Uncomment below to use a legacy API key\n",
|
||||
"# export NCP_APIGW_API_KEY=\"your-api-key-here\"\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
@@ -71,10 +70,11 @@
|
||||
" os.environ[\"NCP_CLOVASTUDIO_API_KEY\"] = getpass.getpass(\n",
|
||||
" \"Enter your NCP CLOVA Studio API Key: \"\n",
|
||||
" )\n",
|
||||
"if not os.getenv(\"NCP_APIGW_API_KEY\"):\n",
|
||||
" os.environ[\"NCP_APIGW_API_KEY\"] = getpass.getpass(\n",
|
||||
" \"Enter your NCP API Gateway API key: \"\n",
|
||||
" )"
|
||||
"# Uncomment below to use a legacy API key\n",
|
||||
"# if not os.getenv(\"NCP_APIGW_API_KEY\"):\n",
|
||||
"# os.environ[\"NCP_APIGW_API_KEY\"] = getpass.getpass(\n",
|
||||
"# \"Enter your NCP API Gateway API key: \"\n",
|
||||
"# )"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -340,7 +340,7 @@
|
||||
"\n",
|
||||
"When going live with production-level application using CLOVA Studio, you should apply for and use Service App. (See [here](https://guide.ncloud-docs.com/docs/en/clovastudio-playground01#서비스앱신청).)\n",
|
||||
"\n",
|
||||
"For a Service App, a corresponding `NCP_CLOVASTUDIO_API_KEY` is issued and can only be called with it."
|
||||
"For a Service App, you should use a corresponding Service API key and can only be called with it."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -353,7 +353,7 @@
|
||||
"# Update environment variables\n",
|
||||
"\n",
|
||||
"os.environ[\"NCP_CLOVASTUDIO_API_KEY\"] = getpass.getpass(\n",
|
||||
" \"Enter NCP CLOVA Studio API Key for Service App: \"\n",
|
||||
" \"Enter NCP CLOVA Studio Service API Key: \"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
"### Model features\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | Image input | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | \n",
|
||||
"| ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
|
||||
278
docs/docs/integrations/chat/pipeshift.ipynb
Normal file
278
docs/docs/integrations/chat/pipeshift.ipynb
Normal file
@@ -0,0 +1,278 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Pipeshift\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e49f1e0d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ChatPipeshift\n",
|
||||
"\n",
|
||||
"This will help you getting started with Pipeshift [chat models](/docs/concepts/chat_models/). For detailed documentation of all ChatPipeshift features and configurations head to the [API reference](https://dashboard.pipeshift.com/docs).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| [ChatPipeshift](https://dashboard.pipeshift.com/docs) | [langchain-pipeshift](https://pypi.org/project/langchain-pipeshift/) | ❌ | -| ❌ |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | - | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Pipeshift models you'll need to create an account on Pipeshift, get an API key, and install the `langchain-pipeshift` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [Pipeshift](https://dashboard.pipeshift.com) to sign up to Pipeshift and generate an API key. Once you've done this set the PIPESHIFT_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "433e8d2b-9519-4b49-b2c4-7ab65b046c94",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"PIPESHIFT_API_KEY\"):\n",
|
||||
" os.environ[\"PIPESHIFT_API_KEY\"] = getpass.getpass(\"Enter your Pipeshift API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "72ee0c4b-9764-423a-9dbf-95129e185210",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a15d341e-3e26-4ca3-830b-5aab30ed66de",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0730d6a1-c893-4840-9817-5e5251676d5d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Pipeshift integration lives in the `langchain-pipeshift` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "652d6238-1f87-422a-b135-f5abbb8652fc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -qU langchain-pipeshift"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a38cde65-254d-4219-a441-068766c0d4b5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_pipeshift import ChatPipeshift\n",
|
||||
"\n",
|
||||
"llm = ChatPipeshift(\n",
|
||||
" model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
|
||||
" temperature=0,\n",
|
||||
" max_tokens=512,\n",
|
||||
" # other params...\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2b4f3e15",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "62e0dbc3",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content='Here is the translation:\\n\\nJe suis amoureux du programme. \\n\\nHowever, a more common translation would be:\\n\\nJ\\'aime programmer.\\n\\nNote that \"Je suis amoureux\" typically implies romantic love, whereas \"J\\'aime\" is a more casual way to express affection or enjoyment for an activity, in this case, programming.', additional_kwargs={}, response_metadata={}, id='run-5cad8e5c-d089-44a8-8dcd-22736cde7d7b-0')"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"messages = [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates English to French. Translate the user sentence.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"I love programming.\"),\n",
|
||||
"]\n",
|
||||
"ai_msg = llm.invoke(messages)\n",
|
||||
"ai_msg"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "d86145b3-bfef-46e8-b227-4dda5c9c2705",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Here is the translation:\n",
|
||||
"\n",
|
||||
"Je suis amoureux du programme. \n",
|
||||
"\n",
|
||||
"However, a more common translation would be:\n",
|
||||
"\n",
|
||||
"J'aime programmer.\n",
|
||||
"\n",
|
||||
"Note that \"Je suis amoureux\" typically implies romantic love, whereas \"J'aime\" is a more casual way to express affection or enjoyment for an activity, in this case, programming.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(ai_msg.content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "18e2bfc0-7e78-4528-a73f-499ac150dca8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
"\n",
|
||||
"We can [chain](/docs/how_to/sequence/) our model with a prompt template like so:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "e197d1d7-a070-4c96-9f8a-a0e86d046e0b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"Das ist schön! Du liebst Programmieren! (That's great! You love programming!)\\n\\nWould you like to know the German translation of a specific programming-related term or phrase, or would you like me to help you with something else?\", additional_kwargs={}, response_metadata={}, id='run-8a4b7d56-23d9-43a7-8fb2-e05f556d94bd-0')"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate(\n",
|
||||
" [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates {input_language} to {output_language}.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = prompt | llm\n",
|
||||
"chain.invoke(\n",
|
||||
" {\n",
|
||||
" \"input_language\": \"English\",\n",
|
||||
" \"output_language\": \"German\",\n",
|
||||
" \"input\": \"I love programming.\",\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all ChatPipeshift features and configurations head to the API reference: https://dashboard.pipeshift.com/docs"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -19,7 +19,7 @@
|
||||
"source": [
|
||||
"# ChatSambaNovaCloud\n",
|
||||
"\n",
|
||||
"This will help you getting started with SambaNovaCloud [chat models](/docs/concepts/chat_models). For detailed documentation of all ChatSambaNovaCloud features and configurations head to the [API reference](https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.sambanova.ChatSambaNovaCloud.html).\n",
|
||||
"This will help you getting started with SambaNovaCloud [chat models](/docs/concepts/chat_models/). For detailed documentation of all ChatSambaNovaCloud features and configurations head to the [API reference](https://python.langchain.com/api_reference/sambanova/chat_models/langchain_sambanova.ChatSambaNovaCloud.html).\n",
|
||||
"\n",
|
||||
"**[SambaNova](https://sambanova.ai/)'s** [SambaNova Cloud](https://cloud.sambanova.ai/) is a platform for performing inference with open-source models\n",
|
||||
"\n",
|
||||
@@ -28,21 +28,20 @@
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| [ChatSambaNovaCloud](https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.sambanova.ChatSambaNovaCloud.html) | [langchain-community](https://python.langchain.com/api_reference/community/index.html) | ❌ | ❌ | ❌ |  |  |\n",
|
||||
"| [ChatSambaNovaCloud](https://python.langchain.com/api_reference/sambanova/chat_models/langchain_sambanova.ChatSambaNovaCloud.html) | [langchain-community](https://python.langchain.com/api_reference/community/index.html) | ❌ | ❌ | ❌ |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](//docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | \n",
|
||||
"| ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access ChatSambaNovaCloud models you will need to create a [SambaNovaCloud](https://cloud.sambanova.ai/) account, get an API key, install the `langchain_community` integration package, and install the `SSEClient` Package.\n",
|
||||
"To access ChatSambaNovaCloud models you will need to create a [SambaNovaCloud](https://cloud.sambanova.ai/) account, get an API key, install the `langchain_sambanova` integration package.\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install langchain-community\n",
|
||||
"pip install sseclient-py\n",
|
||||
"pip install langchain-sambanova\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
@@ -82,8 +81,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -92,7 +91,7 @@
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain __SambaNovaCloud__ integration lives in the `langchain_community` package:"
|
||||
"The LangChain __SambaNovaCloud__ integration lives in the `langchain_sambanova` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -101,8 +100,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-community\n",
|
||||
"%pip install -qu sseclient-py"
|
||||
"%pip install -qU langchain-sambanova"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -120,13 +118,12 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models.sambanova import ChatSambaNovaCloud\n",
|
||||
"from langchain_sambanova import ChatSambaNovaCloud\n",
|
||||
"\n",
|
||||
"llm = ChatSambaNovaCloud(\n",
|
||||
" model=\"Meta-Llama-3.1-70B-Instruct\",\n",
|
||||
" model=\"Meta-Llama-3.3-70B-Instruct\",\n",
|
||||
" max_tokens=1024,\n",
|
||||
" temperature=0.7,\n",
|
||||
" top_k=1,\n",
|
||||
" top_p=0.01,\n",
|
||||
")"
|
||||
]
|
||||
@@ -158,7 +155,8 @@
|
||||
"messages = [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates English to French. Translate the user sentence.\",\n",
|
||||
" \"You are a helpful assistant that translates English to French. \"\n",
|
||||
" \"Translate the user sentence.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"I love programming.\"),\n",
|
||||
"]\n",
|
||||
@@ -215,7 +213,8 @@
|
||||
" [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates {input_language} to {output_language}.\",\n",
|
||||
" \"You are a helpful assistant that translates {input_language} \"\n",
|
||||
" \"to {output_language}.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
@@ -412,7 +411,8 @@
|
||||
"llm_with_tools = llm.bind_tools(tools=tools)\n",
|
||||
"messages = [\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"I need to schedule a meeting for two weeks from today. Can you tell me the exact date of the meeting?\"\n",
|
||||
" content=\"I need to schedule a meeting for two weeks from today. \"\n",
|
||||
" \"Can you tell me the exact date of the meeting?\"\n",
|
||||
" )\n",
|
||||
"]"
|
||||
]
|
||||
@@ -499,7 +499,6 @@
|
||||
" model=\"Llama-3.2-11B-Vision-Instruct\",\n",
|
||||
" max_tokens=1024,\n",
|
||||
" temperature=0.7,\n",
|
||||
" top_k=1,\n",
|
||||
" top_p=0.01,\n",
|
||||
")"
|
||||
]
|
||||
@@ -546,7 +545,7 @@
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all ChatSambaNovaCloud features and configurations head to the API reference: https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.sambanova.ChatSambaNovaCloud.html"
|
||||
"For detailed documentation of all ChatSambaNovaCloud features and configurations head to the API reference: https://python.langchain.com/api_reference/sambanova/chat_models/langchain_sambanova.ChatSambaNovaCloud.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
"source": [
|
||||
"# ChatSambaStudio\n",
|
||||
"\n",
|
||||
"This will help you getting started with SambaStudio [chat models](/docs/concepts/chat_models). For detailed documentation of all ChatStudio features and configurations head to the [API reference](https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.sambanova.ChatSambaStudio.html).\n",
|
||||
"This will help you getting started with SambaStudio [chat models](/docs/concepts/chat_models). For detailed documentation of all ChatStudio features and configurations head to the [API reference](https://python.langchain.com/api_reference/sambanova/chat_models/langchain_sambanova.chat_models.sambanova.ChatSambaStudio.html).\n",
|
||||
"\n",
|
||||
"**[SambaNova](https://sambanova.ai/)'s** [SambaStudio](https://docs.sambanova.ai/sambastudio/latest/sambastudio-intro.html) SambaStudio is a rich, GUI-based platform that provides the functionality to train, deploy, and manage models in SambaNova [DataScale](https://sambanova.ai/products/datascale) systems.\n",
|
||||
"\n",
|
||||
@@ -28,21 +28,20 @@
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | JS support | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| [ChatSambaStudio](https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.sambanova.ChatSambaStudio.html) | [langchain-community](https://python.langchain.com/api_reference/community/index.html) | ❌ | ❌ | ❌ |  |  |\n",
|
||||
"| [ChatSambaStudio](https://python.langchain.com/api_reference/sambanova/chat_models/langchain_sambanova.chat_models.sambanova.ChatSambaStudio.html) | [langchain-community](https://python.langchain.com/api_reference/community/index.html) | ❌ | ❌ | ❌ |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
"| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | \n",
|
||||
"| ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | \n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access ChatSambaStudio models you will need to [deploy an endpoint](https://docs.sambanova.ai/sambastudio/latest/language-models.html) in your SambaStudio platform, install the `langchain_community` integration package, and install the `SSEClient` Package.\n",
|
||||
"To access ChatSambaStudio models you will need to [deploy an endpoint](https://docs.sambanova.ai/sambastudio/latest/language-models.html) in your SambaStudio platform, install the `langchain_sambanova` integration package.\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install langchain-community\n",
|
||||
"pip install sseclient-py\n",
|
||||
"pip install langchain-sambanova\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
@@ -50,7 +49,7 @@
|
||||
"Get the URL and API Key from your SambaStudio deployed endpoint and add them to your environment variables:\n",
|
||||
"\n",
|
||||
"``` bash\n",
|
||||
"export SAMBASTUDIO_URL=\"your-api-key-here\"\n",
|
||||
"export SAMBASTUDIO_URL=\"sambastudio-url-key-here\"\n",
|
||||
"export SAMBASTUDIO_API_KEY=\"your-api-key-here\"\n",
|
||||
"```"
|
||||
]
|
||||
@@ -85,8 +84,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -95,7 +94,7 @@
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain __SambaStudio__ integration lives in the `langchain_community` package:"
|
||||
"The LangChain __SambaStudio__ integration lives in the `langchain_sambanova` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -104,8 +103,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-community\n",
|
||||
"%pip install -qu sseclient-py"
|
||||
"%pip install -qU langchain-sambanova"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -123,13 +121,12 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.chat_models.sambanova import ChatSambaStudio\n",
|
||||
"from langchain_sambanova import ChatSambaStudio\n",
|
||||
"\n",
|
||||
"llm = ChatSambaStudio(\n",
|
||||
" model=\"Meta-Llama-3-70B-Instruct-4096\", # set if using a Bundle endpoint\n",
|
||||
" max_tokens=1024,\n",
|
||||
" temperature=0.7,\n",
|
||||
" top_k=1,\n",
|
||||
" top_p=0.01,\n",
|
||||
" do_sample=True,\n",
|
||||
" process_prompt=\"True\", # set if using a Bundle endpoint\n",
|
||||
@@ -163,7 +160,8 @@
|
||||
"messages = [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates English to French. Translate the user sentence.\",\n",
|
||||
" \"You are a helpful assistant that translates English to French.\"\n",
|
||||
" \"Translate the user sentence.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"I love programming.\"),\n",
|
||||
"]\n",
|
||||
@@ -220,7 +218,8 @@
|
||||
" [\n",
|
||||
" (\n",
|
||||
" \"system\",\n",
|
||||
" \"You are a helpful assistant that translates {input_language} to {output_language}.\",\n",
|
||||
" \"You are a helpful assistant that translates {input_language} \"\n",
|
||||
" \"to {output_language}.\",\n",
|
||||
" ),\n",
|
||||
" (\"human\", \"{input}\"),\n",
|
||||
" ]\n",
|
||||
@@ -364,7 +363,7 @@
|
||||
"source": [
|
||||
"from datetime import datetime\n",
|
||||
"\n",
|
||||
"from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage\n",
|
||||
"from langchain_core.messages import HumanMessage, ToolMessage\n",
|
||||
"from langchain_core.tools import tool\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -408,7 +407,8 @@
|
||||
"llm_with_tools = llm.bind_tools(tools=tools)\n",
|
||||
"messages = [\n",
|
||||
" HumanMessage(\n",
|
||||
" content=\"I need to schedule a meeting for two weeks from today. Can you tell me the exact date of the meeting?\"\n",
|
||||
" content=\"I need to schedule a meeting for two weeks from today. \"\n",
|
||||
" \"Can you tell me the exact date of the meeting?\"\n",
|
||||
" )\n",
|
||||
"]"
|
||||
]
|
||||
@@ -483,7 +483,7 @@
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all ChatSambaStudio features and configurations head to the API reference: https://python.langchain.com/api_reference/community/chat_models/langchain_community.chat_models.sambanova.ChatSambaStudio.html"
|
||||
"For detailed documentation of all ChatSambaStudio features and configurations head to the API reference: https://python.langchain.com/api_reference/sambanova/chat_models/langchain_sambanova.sambanova.chat_models.ChatSambaStudio.html"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -385,7 +385,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First we prepare an example table with non-default schema, and populate it with some arbitary data."
|
||||
"First we prepare an example table with non-default schema, and populate it with some arbitrary data."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -70,9 +70,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Austin airport has 98 outgoing routes.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain_aws import ChatBedrockConverse\n",
|
||||
"from langchain_aws.chains import create_neptune_opencypher_qa_chain\n",
|
||||
@@ -83,13 +91,161 @@
|
||||
" temperature=0,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"chain = create_neptune_opencypher_qa_chain(\n",
|
||||
" llm=llm,\n",
|
||||
" graph=graph,\n",
|
||||
")\n",
|
||||
"chain = create_neptune_opencypher_qa_chain(llm=llm, graph=graph)\n",
|
||||
"\n",
|
||||
"result = chain.invoke(\n",
|
||||
" {\"query\": \"How many outgoing routes does the Austin airport have?\"}\n",
|
||||
"result = chain.invoke(\"How many outgoing routes does the Austin airport have?\")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Adding Message History\n",
|
||||
"\n",
|
||||
"The Neptune openCypher QA chain has the ability to be wrapped by [`RunnableWithMessageHistory`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.history.RunnableWithMessageHistory.html#langchain_core.runnables.history.RunnableWithMessageHistory). This adds message history to the chain, allowing us to create a chatbot that retains conversation state across multiple invocations.\n",
|
||||
"\n",
|
||||
"To start, we need a way to store and load the message history. For this purpose, each thread will be created as an instance of [`InMemoryChatMessageHistory`](https://python.langchain.com/api_reference/core/chat_history/langchain_core.chat_history.InMemoryChatMessageHistory.html), and stored into a dictionary for repeated access.\n",
|
||||
"\n",
|
||||
"(Also see: https://python.langchain.com/docs/versions/migrating_memory/chat_history/#chatmessagehistory)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.chat_history import InMemoryChatMessageHistory\n",
|
||||
"\n",
|
||||
"chats_by_session_id = {}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_chat_history(session_id: str) -> InMemoryChatMessageHistory:\n",
|
||||
" chat_history = chats_by_session_id.get(session_id)\n",
|
||||
" if chat_history is None:\n",
|
||||
" chat_history = InMemoryChatMessageHistory()\n",
|
||||
" chats_by_session_id[session_id] = chat_history\n",
|
||||
" return chat_history"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, the QA chain and message history storage can be used to create the new `RunnableWithMessageHistory`. Note that we must set `query` as the input key to match the format expected by the base chain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.runnables.history import RunnableWithMessageHistory\n",
|
||||
"\n",
|
||||
"runnable_with_history = RunnableWithMessageHistory(\n",
|
||||
" chain,\n",
|
||||
" get_chat_history,\n",
|
||||
" input_messages_key=\"query\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Before invoking the chain, a unique `session_id` needs to be generated for the conversation that the new `InMemoryChatMessageHistory` will remember."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import uuid\n",
|
||||
"\n",
|
||||
"session_id = uuid.uuid4()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally, invoke the message history enabled chain with the `session_id`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"You can fly directly to 98 destinations from Austin airport.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = runnable_with_history.invoke(\n",
|
||||
" {\"query\": \"How many destinations can I fly to directly from Austin airport?\"},\n",
|
||||
" config={\"configurable\": {\"session_id\": session_id}},\n",
|
||||
")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As the chain continues to be invoked with the same `session_id`, responses will be returned in the context of previous queries in the conversation.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"You can fly directly to 4 destinations in Europe from Austin airport.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = runnable_with_history.invoke(\n",
|
||||
" {\"query\": \"Out of those destinations, how many are in Europe?\"},\n",
|
||||
" config={\"configurable\": {\"session_id\": session_id}},\n",
|
||||
")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"The four European destinations you can fly to directly from Austin airport are:\n",
|
||||
"- AMS (Amsterdam Airport Schiphol)\n",
|
||||
"- FRA (Frankfurt am Main)\n",
|
||||
"- LGW (London Gatwick)\n",
|
||||
"- LHR (London Heathrow)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = runnable_with_history.invoke(\n",
|
||||
" {\"query\": \"Give me the codes and names of those airports.\"},\n",
|
||||
" config={\"configurable\": {\"session_id\": session_id}},\n",
|
||||
")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
@@ -97,7 +253,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -111,7 +267,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
"\n",
|
||||
"Seed the W3C organizational data, W3C org ontology plus some instances. \n",
|
||||
" \n",
|
||||
"You will need an S3 bucket in the same region and account. Set `STAGE_BUCKET`as the name of that bucket."
|
||||
"You will need an S3 bucket in the same region and account as the Neptune cluster. Set `STAGE_BUCKET`as the name of that bucket."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -60,11 +60,6 @@
|
||||
"STAGE_BUCKET = \"<bucket-name>\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": ""
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -89,7 +84,50 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Bulk-load the org ttl - both ontology and instances"
|
||||
"We will use the `%load` magic command from the `graph-notebook` package to insert the W3C data into the Neptune graph. Before running `%load`, use `%%graph_notebook_config` to set the graph connection parameters."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install --upgrade --quiet graph-notebook"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%load_ext graph_notebook.magics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%graph_notebook_config\n",
|
||||
"{\n",
|
||||
" \"host\": \"<neptune-endpoint>\",\n",
|
||||
" \"neptune_service\": \"neptune-db\",\n",
|
||||
" \"port\": 8182,\n",
|
||||
" \"auth_mode\": \"<[DEFAULT|IAM]>\",\n",
|
||||
" \"load_from_s3_arn\": \"<neptune-cluster-load-role-arn>\",\n",
|
||||
" \"ssl\": true,\n",
|
||||
" \"aws_region\": \"<region>\"\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Bulk-load the org ttl - both ontology and instances."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -246,7 +284,9 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": "### Create the Neptune Database RDF Graph"
|
||||
"source": [
|
||||
"### Create the Neptune Database RDF Graph"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -297,7 +337,7 @@
|
||||
" examples=EXAMPLES,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"result = chain.invoke({\"query\": \"How many organizations are in the graph?\"})\n",
|
||||
"result = chain.invoke(\"How many organizations are in the graph?\")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
@@ -305,7 +345,6 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Extra questions\n",
|
||||
"Here are a few more prompts to try on the graph data that was ingested.\n"
|
||||
]
|
||||
},
|
||||
@@ -315,7 +354,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain.invoke({\"query\": \"Are there any mergers or acquisitions?\"})"
|
||||
"result = chain.invoke(\"Are there any mergers or acquisitions?\")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -324,7 +364,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain.invoke({\"query\": \"Find organizations.\"})"
|
||||
"result = chain.invoke(\"Find organizations.\")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -333,7 +374,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain.invoke({\"query\": \"Find sites of MegaSystems or MegaFinancial.\"})"
|
||||
"result = chain.invoke(\"Find sites of MegaSystems or MegaFinancial.\")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -342,7 +384,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain.invoke({\"query\": \"Find a member who is a manager of one or more members.\"})"
|
||||
"result = chain.invoke(\"Find a member who is a manager of one or more members.\")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -351,7 +394,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain.invoke({\"query\": \"Find five members and their managers.\"})"
|
||||
"result = chain.invoke(\"Find five members and their managers.\")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -360,17 +404,128 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chain.invoke(\n",
|
||||
" {\n",
|
||||
" \"query\": \"Find org units or suborganizations of The Mega Group. What are the sites of those units?\"\n",
|
||||
" }\n",
|
||||
"result = chain.invoke(\n",
|
||||
" \"Find org units or suborganizations of The Mega Group. What are the sites of those units?\"\n",
|
||||
")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Adding Message History\n",
|
||||
"\n",
|
||||
"The Neptune SPARQL QA chain has the ability to be wrapped by [`RunnableWithMessageHistory`](https://python.langchain.com/v0.2/api_reference/core/runnables/langchain_core.runnables.history.RunnableWithMessageHistory.html#langchain_core.runnables.history.RunnableWithMessageHistory). This adds message history to the chain, allowing us to create a chatbot that retains conversation state across multiple invocations.\n",
|
||||
"\n",
|
||||
"To start, we need a way to store and load the message history. For this purpose, each thread will be created as an instance of [`InMemoryChatMessageHistory`](https://python.langchain.com/api_reference/core/chat_history/langchain_core.chat_history.InMemoryChatMessageHistory.html), and stored into a dictionary for repeated access.\n",
|
||||
"\n",
|
||||
"(Also see: https://python.langchain.com/docs/versions/migrating_memory/chat_history/#chatmessagehistory)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.chat_history import InMemoryChatMessageHistory\n",
|
||||
"\n",
|
||||
"chats_by_session_id = {}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_chat_history(session_id: str) -> InMemoryChatMessageHistory:\n",
|
||||
" chat_history = chats_by_session_id.get(session_id)\n",
|
||||
" if chat_history is None:\n",
|
||||
" chat_history = InMemoryChatMessageHistory()\n",
|
||||
" chats_by_session_id[session_id] = chat_history\n",
|
||||
" return chat_history"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, the QA chain and message history storage can be used to create the new `RunnableWithMessageHistory`. Note that we must set `query` as the input key to match the format expected by the base chain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.runnables.history import RunnableWithMessageHistory\n",
|
||||
"\n",
|
||||
"runnable_with_history = RunnableWithMessageHistory(\n",
|
||||
" chain,\n",
|
||||
" get_chat_history,\n",
|
||||
" input_messages_key=\"query\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Before invoking the chain, a unique `session_id` needs to be generated for the conversation that the new `InMemoryChatMessageHistory` will remember.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import uuid\n",
|
||||
"\n",
|
||||
"session_id = uuid.uuid4()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally, invoke the message history enabled chain with the `session_id`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = runnable_with_history.invoke(\n",
|
||||
" {\"query\": \"How many org units or suborganizations does the The Mega Group have?\"},\n",
|
||||
" config={\"configurable\": {\"session_id\": session_id}},\n",
|
||||
")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As the chain continues to be invoked with the same `session_id`, responses will be returned in the context of previous queries in the conversation.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = runnable_with_history.invoke(\n",
|
||||
" {\"query\": \"List the sites for each of the units.\"},\n",
|
||||
" config={\"configurable\": {\"session_id\": session_id}},\n",
|
||||
")\n",
|
||||
"print(result[\"result\"].content)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -384,7 +539,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
201
docs/docs/integrations/llms/pipeshift.ipynb
Normal file
201
docs/docs/integrations/llms/pipeshift.ipynb
Normal file
@@ -0,0 +1,201 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "67db2992",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Pipeshift\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9597802c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Pipeshift\n",
|
||||
"\n",
|
||||
"This will help you get started with Pipeshift completion models (LLMs) using LangChain. For detailed documentation on `Pipeshift` features and configuration options, please refer to the [API reference](https://dashboard.pipeshift.com/docs).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/docs/integrations/llms/pipeshift) | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| [Pipeshift](https://dashboard.pipeshift.com/docs) | [langchain-pipeshift](https://pypi.org/project/langchain-pipeshift/) | ❌ | - | ❌ |  |  |\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access Pipeshift models you'll need to create a Pipeshift account, get an API key, and install the `langchain-pipeshift` integration package.\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Head to [Pipeshift](https://dashboard.pipeshift.com) to sign up to Pipeshift and generate an API key. Once you've done this set the PIPESHIFT_API_KEY environment variable:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "bc51e756",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.getenv(\"PIPESHIFT_API_KEY\"):\n",
|
||||
" os.environ[\"PIPESHIFT_API_KEY\"] = getpass.getpass(\"Enter your Pipeshift API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4b6e1ca6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "196c2b41",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "809c6577",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain Pipeshift integration lives in the `langchain-pipeshift` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "59c710c4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install -qU langchain-pipeshift"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0a760037",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "a0562a13",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_pipeshift import Pipeshift\n",
|
||||
"\n",
|
||||
"llm = Pipeshift(\n",
|
||||
" model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
|
||||
" temperature=0,\n",
|
||||
" max_tokens=512,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0ee90032",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "035dea0f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\xa0specializes in digital transformation and innovation for industry leaders. We leverage AI and IoT technology to provide data-driven insights, predictive analysis and decision support systems to our clients.\\nAbout our company:\\nOur company is built around the passion of creating positive impact through AI innovation. Pipeshift brings together top talent in AI, data science, software engineering, and business consultancy to deliver tailored solutions that drive growth and improvement for our clients across various industries.\\nOur Mission and Values:\\nOur mission is to revolutionize industries by harnessing the power of emerging technologies, like AI and IoT, to unlock new potential and drive progress. Our values are built around collaboration, innovation, integrity, customer value, and continuous learning.\\nJob Title: Software Engineer (Mobile App Development)\\nAbout the role:\\nAs a Software Engineer (Mobile App Development) at Pipeshift, you will be at the forefront of cutting-edge mobile app development. Your responsibilities will include designing, developing, and implementing scalable and reliable mobile apps for various platforms, such as Android and iOS.\\nResponsibilities\\nDesign and develop mobile apps for various platforms using Java, Kotlin, Swift, or React Native\\nWork collaboratively with cross-functional teams to gather requirements, plan and prioritize projects\\nImplement UI/UX design principles to deliver user-friendly and visually appealing apps\\nWrite clean, maintainable, and efficient code, adhering to best coding practices and coding standards\\nTest and debug mobile apps to ensure high-quality, reliable, and stable performance\\nCollaborate with the QA team to ensure all mobile apps meet or exceed quality and performance expectations\\nStay up-to-date with industry trends, new technologies, and platform updates to enhance app development skills\\nParticipate in code reviews to ensure code quality and adherence to coding standards\\nSupport the DevOps team in continuous integration and delivery of mobile apps\\nParticipate in the design and maintenance of technical documentation, coding standards and guidelines\\nConduct peer mentoring and training to promote knowledge sharing and growth\\nExperience and requirements\\n2+ years of experience in mobile app development, or relevant technology experience with strong passion for mobile development.\\nProficient in one or more mobile app development frameworks: React Native, iOS (Swift), Android (Kotlin/Java), Flutter, etc.\\nStrong understanding of OOP (object-oriented programming) principles, design patterns, and data structures.\\nExperience with Agile methodologies, version control systems (e.g., Git), and CI/CD pipelines (e.g., Jenkins, Travis CI, CircleCI).\\nGood coding skills in languages such as Java, Kotlin, Swift,'"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"input_text = \"Pipeshift is an AI company that \"\n",
|
||||
"\n",
|
||||
"completion = llm.invoke(input_text)\n",
|
||||
"completion"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "add38532",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
"\n",
|
||||
"We can also [chain](/docs/how_to/sequence/) our llm with a prompt template"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e9bdfcef",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all `Pipeshift` features and configurations head to the API reference: https://dashboard.pipeshift.com/docs "
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -92,7 +92,7 @@ from langchain_aws import SagemakerEndpoint
|
||||
|
||||
See a [usage example](/docs/integrations/text_embedding/bedrock).
|
||||
```python
|
||||
from langchain_community.embeddings import BedrockEmbeddings
|
||||
from langchain_aws import BedrockEmbeddings
|
||||
```
|
||||
|
||||
### SageMaker Endpoint
|
||||
|
||||
@@ -46,3 +46,14 @@ See a [usage example](/docs/integrations/retrievers/dappier).
|
||||
```python
|
||||
from langchain_dappier import DappierRetriever
|
||||
```
|
||||
|
||||
## Tool
|
||||
|
||||
See a [usage example](/docs/integrations/tools/dappier).
|
||||
|
||||
```python
|
||||
from langchain_dappier import (
|
||||
DappierRealTimeSearchTool,
|
||||
DappierAIRecommendationTool
|
||||
)
|
||||
```
|
||||
|
||||
48
docs/docs/integrations/providers/deepseek.ipynb
Normal file
48
docs/docs/integrations/providers/deepseek.ipynb
Normal file
@@ -0,0 +1,48 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# DeepSeek\n",
|
||||
"\n",
|
||||
"[DeepSeek](https://www.deepseek.com/) is a Chinese artificial intelligence company that develops LLMs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "y8ku6X96sebl"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_deepseek import ChatDeepSeek"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
@@ -21,7 +21,7 @@ Install the integration package:
|
||||
pip install langchain-groq
|
||||
```
|
||||
|
||||
Request an [API key](https://wow.groq.com) and set it as an environment variable:
|
||||
Request an [API key](https://console.groq.com/login?utm_source=langchain&utm_content=provider_page) and set it as an environment variable:
|
||||
|
||||
```bash
|
||||
export GROQ_API_KEY=gsk_...
|
||||
|
||||
@@ -10,7 +10,8 @@ Please refer to [NCP User Guide](https://guide.ncloud-docs.com/docs/clovastudio-
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
- Get both CLOVA Studio API Key and API Gateway Key by [creating your app](https://guide.ncloud-docs.com/docs/en/clovastudio-playground01#create-test-app) and set them as environment variables respectively (`NCP_CLOVASTUDIO_API_KEY`, `NCP_APIGW_API_KEY`).
|
||||
- Get a CLOVA Studio API Key by [issuing it](https://api.ncloud-docs.com/docs/ai-naver-clovastudio-summary#API%ED%82%A4) and set it as an environment variable (`NCP_CLOVASTUDIO_API_KEY`).
|
||||
- If you are using a legacy API Key (that doesn't start with `nv-*` prefix), you might need to get an additional API Key by [creating your app](https://guide.ncloud-docs.com/docs/en/clovastudio-playground01#create-test-app) and set it as `NCP_APIGW_API_KEY`.
|
||||
- Install the integration Python package with:
|
||||
|
||||
```bash
|
||||
|
||||
115
docs/docs/integrations/providers/payman-tool.mdx
Normal file
115
docs/docs/integrations/providers/payman-tool.mdx
Normal file
@@ -0,0 +1,115 @@
|
||||
|
||||
# PaymanAI
|
||||
|
||||
PaymanAI provides functionality to send and receive payments (fiat and crypto) on behalf of an AI Agent. To get started:
|
||||
|
||||
1. **Sign up** at app.paymanai.com to create an AI Agent and obtain your **API Key**.
|
||||
2. **Set** environment variables (`PAYMAN_API_SECRET` for your API Key, `PAYMAN_ENVIRONMENT` for sandbox or production).
|
||||
|
||||
This notebook gives a quick overview of integrating PaymanAI into LangChain as a tool. For complete reference, see the API documentation.
|
||||
|
||||
## Overview
|
||||
|
||||
The PaymanAI integration is part of the `langchain-community` (or your custom) package. It allows you to:
|
||||
|
||||
- Send payments (`send_payment`) to crypto addresses or bank accounts.
|
||||
- Search for payees (`search_payees`).
|
||||
- Add new payees (`add_payee`).
|
||||
- Request money from customers with a hosted checkout link (`ask_for_money`).
|
||||
- Check agent or customer balances (`get_balance`).
|
||||
|
||||
These can be wrapped as **LangChain Tools** for an LLM-based agent to call them automatically.
|
||||
|
||||
### Integration details
|
||||
|
||||
| Class | Package | Serializable | JS support | Package latest |
|
||||
| :--- | :--- | :---: | :---: | :--- |
|
||||
| PaymanAI | `langchain_community` | ❌ | ❌ | [PyPI Version] |
|
||||
|
||||
If you're simply calling the PaymanAI SDK, you can do it directly or via the **Tool** interface in LangChain.
|
||||
|
||||
## Setup
|
||||
|
||||
1. **Install** the `langchain-community` (or equivalent) package:
|
||||
|
||||
```bash
|
||||
pip install --quiet -U langchain-community
|
||||
```
|
||||
|
||||
2. **Install** the PaymanAI SDK:
|
||||
```bash
|
||||
pip install paymanai
|
||||
```
|
||||
|
||||
3. **Set** environment variables:
|
||||
```bash
|
||||
export PAYMAN_API_SECRET="YOUR_SECRET_KEY"
|
||||
export PAYMAN_ENVIRONMENT="sandbox"
|
||||
```
|
||||
|
||||
Your `PAYMAN_API_SECRET` should be the secret key from app.paymanai.com. The `PAYMAN_ENVIRONMENT` can be `sandbox` or `production` depending on your usage.
|
||||
|
||||
## Instantiation
|
||||
|
||||
Here is an example of instantiating a PaymanAI tool. If you have multiple Payman methods, you can create multiple tools.
|
||||
|
||||
```python
|
||||
from langchain_community.tools.langchain_payman_tool.tool import PaymanAI
|
||||
|
||||
# Instantiate the PaymanAI tool (example)
|
||||
tool = PaymanAI(
|
||||
name="send_payment",
|
||||
description="Send a payment to a specified payee.",
|
||||
)
|
||||
```
|
||||
|
||||
## Invocation
|
||||
|
||||
### Invoke directly with args
|
||||
|
||||
You can call `tool.invoke(...)` and pass a dictionary matching the tool's expected fields. For example:
|
||||
|
||||
```python
|
||||
response = tool.invoke({
|
||||
"amount_decimal": 10.00,
|
||||
"payment_destination_id": "abc123",
|
||||
"customer_id": "cust_001",
|
||||
"memo": "Payment for invoice #XYZ"
|
||||
})
|
||||
```
|
||||
|
||||
### Invoke with ToolCall
|
||||
|
||||
When used inside an AI workflow, the LLM might produce a `ToolCall` dict. You can simulate it as follows:
|
||||
|
||||
```python
|
||||
model_generated_tool_call = {
|
||||
"args": {
|
||||
"amount_decimal": 10.00,
|
||||
"payment_destination_id": "abc123"
|
||||
},
|
||||
"id": "1",
|
||||
"name": tool.name,
|
||||
"type": "tool_call",
|
||||
}
|
||||
tool.invoke(model_generated_tool_call)
|
||||
```
|
||||
|
||||
## Using the Tool in a Chain or Agent
|
||||
|
||||
You can bind a PaymanAI tool to a LangChain agent or chain that supports tool-calling.
|
||||
|
||||
## Quick Start Summary
|
||||
|
||||
1. **Sign up** at app.paymanai.com to get your **API Key**.
|
||||
2. **Install** dependencies:
|
||||
```bash
|
||||
pip install paymanai langchain-community
|
||||
```
|
||||
3. **Export** environment variables:
|
||||
```bash
|
||||
export PAYMAN_API_SECRET="YOUR_SECRET_KEY"
|
||||
export PAYMAN_ENVIRONMENT="sandbox"
|
||||
```
|
||||
4. **Instantiate** a PaymanAI tool, passing your desired name/description.
|
||||
5. **Call** the tool with `.invoke(...)` or integrate it into a chain or agent.
|
||||
51
docs/docs/integrations/providers/pipeshift.md
Normal file
51
docs/docs/integrations/providers/pipeshift.md
Normal file
@@ -0,0 +1,51 @@
|
||||
# Pipeshift
|
||||
|
||||
> [Pipeshift](https://pipeshift.com) is a fine-tuning and inference platform for open-source LLMs
|
||||
|
||||
- You bring your datasets. Fine-tune multiple LLMs. Start inferencing in one-click and watch them scale to millions.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
- Install the Pipeshift integration package.
|
||||
|
||||
```
|
||||
pip install langchain-pipeshift
|
||||
```
|
||||
|
||||
- Get your Pipeshift API key by signing up at [Pipeshift](https://pipeshift.com).
|
||||
|
||||
### Authentication
|
||||
|
||||
You can perform authentication using your Pipeshift API key in any of the following ways:
|
||||
|
||||
1. Adding API key to the environment variable as `PIPESHIFT_API_KEY`.
|
||||
|
||||
```python
|
||||
os.environ["PIPESHIFT_API_KEY"] = "<your_api_key>"
|
||||
```
|
||||
|
||||
2. By passing `api_key` to the pipeshift LLM module or chat module
|
||||
|
||||
```python
|
||||
llm = Pipeshift(api_key="<your_api_key>", model="meta-llama/Meta-Llama-3.1-8B-Instruct", max_tokens=512)
|
||||
|
||||
OR
|
||||
|
||||
chat = ChatPipeshift(api_key="<your_api_key>", model="meta-llama/Meta-Llama-3.1-8B-Instruct", max_tokens=512)
|
||||
```
|
||||
|
||||
## Chat models
|
||||
|
||||
See an [example](/docs/integrations/chat/pipeshift).
|
||||
|
||||
```python
|
||||
from langchain_pipeshift import ChatPipeshift
|
||||
```
|
||||
|
||||
## LLMs
|
||||
|
||||
See an [example](/docs/integrations/llms/pipeshift).
|
||||
|
||||
```python
|
||||
from langchain_pipeshift import Pipeshift
|
||||
```
|
||||
147
docs/docs/integrations/providers/sambanova.ipynb
Normal file
147
docs/docs/integrations/providers/sambanova.ipynb
Normal file
@@ -0,0 +1,147 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SambaNova\n",
|
||||
"\n",
|
||||
"Customers are turning to [SambaNova](https://sambanova.ai/) to quickly deploy state-of-the-art AI capabilities to gain competitive advantage. Our purpose-built enterprise-scale AI platform is the technology backbone for the next generation of AI computing. We power the foundation models that unlock the valuable business insights trapped in data.\n",
|
||||
"\n",
|
||||
"Designed for AI, the SambaNova RDU was built with a revolutionary dataflow architecture. This design makes the RDU significantly more efficient for these workloads than GPUs as it eliminates redundant calls to memory, which are an inherent limitation of how GPUs function. This built-in efficiency is one of the features that makes the RDU capable of much higher performance than GPUs in a fraction of the footprint.\n",
|
||||
"\n",
|
||||
"On top of our architecture We have developed some platforms that allow companies and developers to get full advantage of the RDU processors and open source models."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### SambaNovaCloud\n",
|
||||
"\n",
|
||||
"SambaNova's [SambaNova Cloud](https://cloud.sambanova.ai/) is a platform for performing inference with open-source models\n",
|
||||
"\n",
|
||||
"You can obtain a free SambaNovaCloud API key [here](https://cloud.sambanova.ai/)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### SambaStudio\n",
|
||||
"\n",
|
||||
"SambaNova's [SambaStudio](https://docs.sambanova.ai/sambastudio/latest/sambastudio-intro.html) is a rich, GUI-based platform that provides the functionality to train, deploy, and manage models in SambaNova DataScale systems."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation and Setup\n",
|
||||
"\n",
|
||||
"Install the integration package:\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install langchain-sambanova\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"set your API key it as an environment variable:\n",
|
||||
"\n",
|
||||
"If you are a SambaNovaCloud user:\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"export SAMBANOVA_API_KEY=\"your-sambanova-cloud-api-key-here\"\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"or if you are SambaStudio User\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"export SAMBASTUDIO_API_KEY=\"your-sambastudio-api-key-here\"\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chat models"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"id": "y8ku6X96sebl"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_sambanova import ChatSambaNovaCloud\n",
|
||||
"\n",
|
||||
"llm = ChatSambaNovaCloud(model=\"Meta-Llama-3.3-70B-Instruct\", temperature=0.7)\n",
|
||||
"llm.invoke(\"Tell me a joke about artificial intelligence.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_sambanova import ChatSambaStudio\n",
|
||||
"\n",
|
||||
"llm = ChatSambaStudio(model=\"Meta-Llama-3.3-70B-Instruct\", temperature=0.7)\n",
|
||||
"llm.invoke(\"Tell me a joke about artificial intelligence.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Embedding Models"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_sambanova import SambaStudioEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = SambaStudioEmbeddings(model=\"e5-mistral-7b-instruct\")\n",
|
||||
"embeddings.embed_query(\"What is the meaning of life?\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"API Reference [langchain-sambanova](https://python.langchain.com/api_reference/sambanova/index.html)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
105
docs/docs/integrations/providers/tilores.ipynb
Normal file
105
docs/docs/integrations/providers/tilores.ipynb
Normal file
@@ -0,0 +1,105 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Tilores\n",
|
||||
"\n",
|
||||
"[Tilores](https://tilores.io) is a platform that provides advanced entity resolution solutions for data integration and management. Using cutting-edge algorithms, machine learning, and a user-friendly interfaces, Tilores helps organizations match, resolve, and consolidate data from disparate sources, ensuring high-quality, consistent information."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation and Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "y8ku6X96sebl"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade tilores-langchain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To access Tilores, you need to [create and configure an instance](https://app.tilores.io). If you prefer to test out Tilores first, you can use the [read-only demo credentials](https://github.com/tilotech/identity-rag-customer-insights-chatbot?tab=readme-ov-file#1-configure-customer-data-access)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from tilores import TiloresAPI\n",
|
||||
"\n",
|
||||
"os.environ[\"TILORES_API_URL\"] = \"<api-url>\"\n",
|
||||
"os.environ[\"TILORES_TOKEN_URL\"] = \"<token-url>\"\n",
|
||||
"os.environ[\"TILORES_CLIENT_ID\"] = \"<client-id>\"\n",
|
||||
"os.environ[\"TILORES_CLIENT_SECRET\"] = \"<client-secret>\"\n",
|
||||
"\n",
|
||||
"tilores = TiloresAPI.from_environ()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Please refer to the [Tilores documentation](https://docs.tilotech.io/tilores/publicsaaswalkthrough/) on how to create your own instance."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Toolkits\n",
|
||||
"\n",
|
||||
"You can use the [`TiloresTools`](/docs/integrations/tools/tilores) to query data from Tilores:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from tilores_langchain import TiloresTools"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "langchain",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
@@ -25,14 +25,6 @@
|
||||
"\n",
|
||||
"This will help you getting started with the Dappier [retriever](https://python.langchain.com/docs/concepts/retrievers/). For detailed documentation of all DappierRetriever features and configurations head to the [API reference](https://python.langchain.com/en/latest/retrievers/langchain_dappier.retrievers.Dappier.DappierRetriever.html).\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"Bring-your-own data (i.e., index and search a custom corpus of documents):\n",
|
||||
"\n",
|
||||
"| Retriever | Self-host | Cloud offering | Package |\n",
|
||||
"| :--- | :--- | :---: | :---: |\n",
|
||||
"[DappierRetriever](https://python.langchain.com/en/latest/retrievers/langchain_dappier.retrievers.Dappier.DappierRetriever.html) | ❌ | ❌ | langchain-dappier |\n",
|
||||
"\n",
|
||||
"### Setup\n",
|
||||
"\n",
|
||||
"Install ``langchain-dappier`` and set environment variable ``DAPPIER_API_KEY``.\n",
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain sentence_transformers"
|
||||
"%pip install --upgrade --quiet langchain langchain-huggingface sentence_transformers"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -32,15 +32,13 @@
|
||||
"\n",
|
||||
"1. Creating [NAVER Cloud Platform](https://www.ncloud.com/) account \n",
|
||||
"2. Apply to use [CLOVA Studio](https://www.ncloud.com/product/aiService/clovaStudio)\n",
|
||||
"3. Find API Keys after creating CLOVA Studio Test App or Service App (See [here](https://guide.ncloud-docs.com/docs/en/clovastudio-playground01#테스트앱생성).)\n",
|
||||
"3. Create a CLOVA Studio Test App or Service App of a model to use (See [here](https://guide.ncloud-docs.com/docs/clovastudio-explorer03#%ED%85%8C%EC%8A%A4%ED%8A%B8%EC%95%B1%EC%83%9D%EC%84%B1).)\n",
|
||||
"4. Issue a Test or Service API key (See [here](https://api.ncloud-docs.com/docs/ai-naver-clovastudio-summary#API%ED%82%A4).)\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"CLOVA Studio requires 3 keys (`NCP_CLOVASTUDIO_API_KEY`, `NCP_APIGW_API_KEY` and `NCP_CLOVASTUDIO_APP_ID`) for embeddings.\n",
|
||||
"- `NCP_CLOVASTUDIO_API_KEY` and `NCP_CLOVASTUDIO_APP_ID` is issued per serviceApp or testApp\n",
|
||||
"- `NCP_APIGW_API_KEY` is issued per account\n",
|
||||
"\n",
|
||||
"The two API Keys could be found by clicking `App Request Status` > `Service App, Test App List` > `‘Details’ button for each app` in [CLOVA Studio](https://clovastudio.ncloud.com/studio-application/service-app)."
|
||||
"Set the `NCP_CLOVASTUDIO_API_KEY` environment variable with your API key.\n",
|
||||
" - Note that if you are using a legacy API Key (that doesn't start with `nv-*` prefix), you might need two additional keys to be set as environment variables (`NCP_APIGW_API_KEY` and `NCP_CLOVASTUDIO_APP_ID`. They could be found by clicking `App Request Status` > `Service App, Test App List` > `Details` button for each app in [CLOVA Studio](https://clovastudio.ncloud.com/studio-application/service-app)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -56,9 +54,15 @@
|
||||
"if not os.getenv(\"NCP_CLOVASTUDIO_API_KEY\"):\n",
|
||||
" os.environ[\"NCP_CLOVASTUDIO_API_KEY\"] = getpass.getpass(\n",
|
||||
" \"Enter NCP CLOVA Studio API Key: \"\n",
|
||||
" )\n",
|
||||
"if not os.getenv(\"NCP_APIGW_API_KEY\"):\n",
|
||||
" os.environ[\"NCP_APIGW_API_KEY\"] = getpass.getpass(\"Enter NCP API Gateway API Key: \")"
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b31fc062",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Uncomment below to use a legacy API key:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -68,7 +72,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ[\"NCP_CLOVASTUDIO_APP_ID\"] = input(\"Enter NCP CLOVA Studio App ID: \")"
|
||||
"# if not os.getenv(\"NCP_APIGW_API_KEY\"):\n",
|
||||
"# os.environ[\"NCP_APIGW_API_KEY\"] = getpass.getpass(\"Enter NCP API Gateway API Key: \")\n",
|
||||
"# os.environ[\"NCP_CLOVASTUDIO_APP_ID\"] = input(\"Enter NCP CLOVA Studio App ID: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -118,8 +124,7 @@
|
||||
"from langchain_community.embeddings import ClovaXEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = ClovaXEmbeddings(\n",
|
||||
" model=\"clir-emb-dolphin\", # set with the model name of corresponding app id. Default is `clir-emb-dolphin`\n",
|
||||
" # app_id=\"...\" # set if you prefer to pass app id directly instead of using environment variables\n",
|
||||
" model=\"clir-emb-dolphin\" # set with the model name of corresponding app id. Default is `clir-emb-dolphin`\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -251,7 +256,7 @@
|
||||
"\n",
|
||||
"When going live with production-level application using CLOVA Studio, you should apply for and use Service App. (See [here](https://guide.ncloud-docs.com/docs/en/clovastudio-playground01#서비스앱신청).)\n",
|
||||
"\n",
|
||||
"For a Service App, corresponding `NCP_CLOVASTUDIO_API_KEY` and `NCP_CLOVASTUDIO_APP_ID` are issued and can only be called with them."
|
||||
"For a Service App, you should use a corresponding Service API key and can only be called with it."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -266,6 +271,7 @@
|
||||
"os.environ[\"NCP_CLOVASTUDIO_API_KEY\"] = getpass.getpass(\n",
|
||||
" \"Enter NCP CLOVA Studio API Key for Service App: \"\n",
|
||||
")\n",
|
||||
"# Uncomment below to use a legacy API key:\n",
|
||||
"os.environ[\"NCP_CLOVASTUDIO_APP_ID\"] = input(\"Enter NCP CLOVA Studio Service App ID: \")"
|
||||
]
|
||||
},
|
||||
@@ -279,7 +285,6 @@
|
||||
"embeddings = ClovaXEmbeddings(\n",
|
||||
" service_app=True,\n",
|
||||
" model=\"clir-emb-dolphin\", # set with the model name of corresponding app id of your Service App\n",
|
||||
" # app_id=\"...\" # set if you prefer to pass app id directly instead of using environment variables\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -1,137 +1,255 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "afaf8039",
|
||||
"metadata": {
|
||||
"vscode": {
|
||||
"languageId": "raw"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: SambaStudio\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a3d6f34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# SambaNova\n",
|
||||
"# SambaStudioEmbeddings\n",
|
||||
"\n",
|
||||
"This will help you get started with SambaNova's SambaStudio embedding models using LangChain. For detailed documentation on `SambaStudioEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/sambanova/embeddings/langchain_sambanova.embeddingsSambaStudioEmbeddings.html).\n",
|
||||
"\n",
|
||||
"**[SambaNova](https://sambanova.ai/)'s** [Sambastudio](https://sambanova.ai/technology/full-stack-ai-platform) is a platform for running your own open-source models\n",
|
||||
"\n",
|
||||
"This example goes over how to use LangChain to interact with SambaNova embedding models"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## SambaStudio\n",
|
||||
"## Overview\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"**SambaStudio** allows you to train, run batch inference jobs, and deploy online inference endpoints to run open source models that you fine tuned yourself."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"A SambaStudio environment is required to deploy a model. Get more information at [sambanova.ai/products/enterprise-ai-platform-sambanova-suite](https://sambanova.ai/products/enterprise-ai-platform-sambanova-suite)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Register your environment variables:"
|
||||
"| Provider | Package |\n",
|
||||
"|:--------:|:-------:|\n",
|
||||
"| [SambaNova](/docs/integrations/providers/sambanova/) | [langchain-sambanova](https://python.langchain.com/api_reference/langchain_sambanova/embeddings/langchain_sambanova.embeddings.SambaStudioEmbeddings.html) |\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"To access ChatSambaStudio models you will need to [deploy an endpoint](https://docs.sambanova.ai/sambastudio/latest/language-models.html) in your SambaStudio platform, install the `langchain_sambanova` integration package.\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install langchain-sambanova\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"Get the URL and API Key from your SambaStudio deployed endpoint and add them to your environment variables:\n",
|
||||
"\n",
|
||||
"``` bash\n",
|
||||
"export SAMBASTUDIO_URL=\"sambastudio-url-key-here\"\n",
|
||||
"export SAMBASTUDIO_API_KEY=\"your-api-key-here\"\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "36521c2a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"sambastudio_base_url = \"<Your SambaStudio environment URL>\"\n",
|
||||
"sambastudio_base_uri = \"<Your SambaStudio environment URI>\"\n",
|
||||
"sambastudio_project_id = \"<Your SambaStudio project id>\"\n",
|
||||
"sambastudio_endpoint_id = \"<Your SambaStudio endpoint id>\"\n",
|
||||
"sambastudio_api_key = \"<Your SambaStudio endpoint API key>\"\n",
|
||||
"\n",
|
||||
"# Set the environment variables\n",
|
||||
"os.environ[\"SAMBASTUDIO_EMBEDDINGS_BASE_URL\"] = sambastudio_base_url\n",
|
||||
"os.environ[\"SAMBASTUDIO_EMBEDDINGS_BASE_URI\"] = sambastudio_base_uri\n",
|
||||
"os.environ[\"SAMBASTUDIO_EMBEDDINGS_PROJECT_ID\"] = sambastudio_project_id\n",
|
||||
"os.environ[\"SAMBASTUDIO_EMBEDDINGS_ENDPOINT_ID\"] = sambastudio_endpoint_id\n",
|
||||
"os.environ[\"SAMBASTUDIO_EMBEDDINGS_API_KEY\"] = sambastudio_api_key"
|
||||
"if not os.getenv(\"SAMBASTUDIO_API_KEY\"):\n",
|
||||
" os.environ[\"SAMBASTUDIO_API_KEY\"] = getpass.getpass(\n",
|
||||
" \"Enter your SambaNova API key: \"\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c84fb993",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Call SambaStudio hosted embeddings directly from LangChain!"
|
||||
"If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "39a4953b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.embeddings.sambanova import SambaStudioEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = SambaStudioEmbeddings()\n",
|
||||
"\n",
|
||||
"text = \"Hello, this is a test\"\n",
|
||||
"result = embeddings.embed_query(text)\n",
|
||||
"print(result)\n",
|
||||
"\n",
|
||||
"texts = [\"Hello, this is a test\", \"Hello, this is another test\"]\n",
|
||||
"results = embeddings.embed_documents(texts)\n",
|
||||
"print(results)"
|
||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9664366",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can manually pass the endpoint parameters and manually set the batch size you have in your SambaStudio embeddings endpoint"
|
||||
"### Installation\n",
|
||||
"\n",
|
||||
"The LangChain SambaNova integration lives in the `langchain-sambanova` package:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64853226",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-sambanova"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "45dd1724",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Now we can instantiate our model object and generate chat completions:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9ea7a09b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_sambanova import SambaStudioEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = SambaStudioEmbeddings(\n",
|
||||
" sambastudio_embeddings_base_url=sambastudio_base_url,\n",
|
||||
" sambastudio_embeddings_base_uri=sambastudio_base_uri,\n",
|
||||
" sambastudio_embeddings_project_id=sambastudio_project_id,\n",
|
||||
" sambastudio_embeddings_endpoint_id=sambastudio_endpoint_id,\n",
|
||||
" sambastudio_embeddings_api_key=sambastudio_api_key,\n",
|
||||
" batch_size=32, # set depending on the deployed endpoint configuration\n",
|
||||
" model=\"e5-mistral-7b-instruct\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "77d271b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Or You can use an embedding model expert included in your deployed CoE"
|
||||
"## Indexing and Retrieval\n",
|
||||
"\n",
|
||||
"Embedding models are often used in retrieval-augmented generation (RAG) flows, both as part of indexing data as well as later retrieving it. For more detailed instructions, please see our [RAG tutorials](/docs/tutorials/).\n",
|
||||
"\n",
|
||||
"Below, see how to index and retrieve data using the `embeddings` object we initialized above. In this example, we will index and retrieve a sample document in the `InMemoryVectorStore`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d817716b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = SambaStudioEmbeddings(\n",
|
||||
" batch_size=1,\n",
|
||||
" model_kwargs={\n",
|
||||
" \"select_expert\": \"e5-mistral-7b-instruct\",\n",
|
||||
" },\n",
|
||||
")"
|
||||
"# Create a vector store with a sample text\n",
|
||||
"from langchain_core.vectorstores import InMemoryVectorStore\n",
|
||||
"\n",
|
||||
"text = \"LangChain is the framework for building context-aware reasoning applications\"\n",
|
||||
"\n",
|
||||
"vectorstore = InMemoryVectorStore.from_texts(\n",
|
||||
" [text],\n",
|
||||
" embedding=embeddings,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Use the vectorstore as a retriever\n",
|
||||
"retriever = vectorstore.as_retriever()\n",
|
||||
"\n",
|
||||
"# Retrieve the most similar text\n",
|
||||
"retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
|
||||
"\n",
|
||||
"# show the retrieved document's content\n",
|
||||
"retrieved_documents[0].page_content"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e02b9855",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Direct Usage\n",
|
||||
"\n",
|
||||
"Under the hood, the vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings for the text(s) used in `from_texts` and retrieval `invoke` operations, respectively.\n",
|
||||
"\n",
|
||||
"You can directly call these methods to get embeddings for your own use cases.\n",
|
||||
"\n",
|
||||
"### Embed single texts\n",
|
||||
"\n",
|
||||
"You can embed single texts or documents with `embed_query`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0d2befcd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"single_vector = embeddings.embed_query(text)\n",
|
||||
"print(str(single_vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b5a7d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Embed multiple texts\n",
|
||||
"\n",
|
||||
"You can embed multiple texts with `embed_documents`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2f4d6e97",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text2 = (\n",
|
||||
" \"LangGraph is a library for building stateful, multi-actor applications with LLMs\"\n",
|
||||
")\n",
|
||||
"two_vectors = embeddings.embed_documents([text, text2])\n",
|
||||
"for vector in two_vectors:\n",
|
||||
" print(str(vector)[:100]) # Show the first 100 characters of the vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "98785c12",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API Reference\n",
|
||||
"\n",
|
||||
"For detailed documentation on `SambaNovaEmbeddings` features and configuration options, please refer to the [API reference](https://python.langchain.com/api_reference/langchain_sambanova/embeddings/langchain_sambanova.embeddings.SambaStudioEmbeddings.html).\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
||||
504
docs/docs/integrations/tools/dappier.ipynb
Normal file
504
docs/docs/integrations/tools/dappier.ipynb
Normal file
@@ -0,0 +1,504 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "BJB3NSYqUWl4"
|
||||
},
|
||||
"source": [
|
||||
"# Dappier\n",
|
||||
"\n",
|
||||
"[Dappier](https://dappier.com) connects any LLM or your Agentic AI to real-time, rights-cleared, proprietary data from trusted sources, making your AI an expert in anything. Our specialized models include Real-Time Web Search, News, Sports, Financial Stock Market Data, Crypto Data, and exclusive content from premium publishers. Explore a wide range of data models in our marketplace at [marketplace.dappier.com](https://marketplace.dappier.com).\n",
|
||||
"\n",
|
||||
"[Dappier](https://dappier.com) delivers enriched, prompt-ready, and contextually relevant data strings, optimized for seamless integration with LangChain. Whether you're building conversational AI, recommendation engines, or intelligent search, Dappier's LLM-agnostic RAG models ensure your AI has access to verified, up-to-date data—without the complexity of building and managing your own retrieval pipeline."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "MAbkvI8pUb7R"
|
||||
},
|
||||
"source": [
|
||||
"# Dappier Tool\n",
|
||||
"\n",
|
||||
"This will help you getting started with the Dappier [tool](https://python.langchain.com/docs/concepts/tools/). For detailed documentation of all DappierRetriever features and configurations head to the [API reference](https://python.langchain.com/en/latest/tools/langchain_dappier.tools.Dappier.DappierRealTimeSearchTool.html).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"The DappierRealTimeSearchTool and DappierAIRecommendationTool empower AI applications with real-time data and AI-driven insights. The former provides access to up-to-date information across news, weather, travel, and financial markets, while the latter supercharges applications with factual, premium content from diverse domains like News, Finance, and Sports, all powered by Dappier's pre-trained RAG models and natural language APIs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "ouA6p-E-aF34"
|
||||
},
|
||||
"source": [
|
||||
"### Setup\n",
|
||||
"\n",
|
||||
"This tool lives in the `langchain-dappier` package."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "tNhKB0BUaneq"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install -qU langchain-dappier"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "oBLVZpnoYshG"
|
||||
},
|
||||
"source": [
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"We also need to set our Dappier API credentials, which can be generated at the [Dappier site.](https://platform.dappier.com/profile/api-keys)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"id": "UrmBR_JyY9I6"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"if not os.environ.get(\"DAPPIER_API_KEY\"):\n",
|
||||
" os.environ[\"DAPPIER_API_KEY\"] = getpass.getpass(\"Dappier API key:\\n\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "x_3712yIXTGc"
|
||||
},
|
||||
"source": [
|
||||
"If you want to get automated tracing from individual queries, you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {
|
||||
"id": "S1Tuwpq-XVaX"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n",
|
||||
"# os.environ[\"LANGSMITH_TRACING\"] = \"true\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "IgPgQ12wcA4i"
|
||||
},
|
||||
"source": [
|
||||
"## DappierRealTimeSearchTool\n",
|
||||
"\n",
|
||||
"Access real-time Google search results, including the latest news, weather, travel, and deals, along with up-to-date financial news, stock prices, and trades from polygon.io, all powered by AI insights to keep you informed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "W8QBSmDvbL69"
|
||||
},
|
||||
"source": [
|
||||
"### Instantiation\n",
|
||||
"\n",
|
||||
"- ai_model_id: str\n",
|
||||
" The AI model ID to use for the query. The AI model ID always starts\n",
|
||||
" with the prefix \"am_\".\n",
|
||||
"\n",
|
||||
" Defaults to \"am_01j06ytn18ejftedz6dyhz2b15\".\n",
|
||||
"\n",
|
||||
" Multiple AI model IDs are available, which can be found at:\n",
|
||||
" https://marketplace.dappier.com/marketplace"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"id": "tw1edqrLbiJ4"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_dappier import DappierRealTimeSearchTool\n",
|
||||
"\n",
|
||||
"tool = DappierRealTimeSearchTool(\n",
|
||||
" # ai_model_id=\"...\", # overwrite default ai_model_id\n",
|
||||
" # name=\"...\", # overwrite default tool name\n",
|
||||
" # description=\"...\", # overwrite default tool description\n",
|
||||
" # args_schema=..., # overwrite default args_schema: BaseModel\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "nTatJ6F8b0sV"
|
||||
},
|
||||
"source": [
|
||||
"### Invocation\n",
|
||||
"\n",
|
||||
"#### [Invoke directly with args](/docs/concepts/tools)\n",
|
||||
"\n",
|
||||
"The `DappierRealTimeSearchTool` takes a single \"query\" argument, which should be a natural language query:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 53
|
||||
},
|
||||
"id": "ASCcnvUCdIvz",
|
||||
"outputId": "91538fac-f515-4a8e-adb6-0a7aa42f704c"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.google.colaboratory.intrinsic+json": {
|
||||
"type": "string"
|
||||
},
|
||||
"text/plain": [
|
||||
"\"At the last Wimbledon in 2024, Carlos Alcaraz won the title by defeating Novak Djokovic. This victory marked Alcaraz's fourth Grand Slam title at just 21 years old! 🎉🏆🎾\""
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tool.invoke({\"query\": \"What happened at the last wimbledon\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "Jcbi44TRdL3D"
|
||||
},
|
||||
"source": [
|
||||
"### [Invoke with ToolCall](/docs/concepts/tools)\n",
|
||||
"\n",
|
||||
"We can also invoke the tool with a model-generated ToolCall, in which case a ToolMessage will be returned:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "FCTpjujpdQst",
|
||||
"outputId": "e184c25b-0089-4896-fbb4-1fbe09ea2f6b"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Euro 2024 is being hosted by Germany! 🇩🇪 The tournament runs from June 14 to July 14, 2024, featuring 24 teams competing across various cities like Berlin and Munich. It's going to be an exciting summer of football! ⚽️🏆\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This is usually generated by a model, but we'll create a tool call directly for demo purposes.\n",
|
||||
"model_generated_tool_call = {\n",
|
||||
" \"args\": {\"query\": \"euro 2024 host nation\"},\n",
|
||||
" \"id\": \"1\",\n",
|
||||
" \"name\": \"dappier\",\n",
|
||||
" \"type\": \"tool_call\",\n",
|
||||
"}\n",
|
||||
"tool_msg = tool.invoke(model_generated_tool_call)\n",
|
||||
"\n",
|
||||
"# The content is a JSON string of results\n",
|
||||
"print(tool_msg.content[:400])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "PTBiq_2JdfjK"
|
||||
},
|
||||
"source": [
|
||||
"### Chaining\n",
|
||||
"\n",
|
||||
"We can use our tool in a chain by first binding it to a [tool-calling model](/docs/how_to/tool_calling/) and then calling it:\n",
|
||||
"\n",
|
||||
"import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
|
||||
"\n",
|
||||
"<ChatModelTabs customVarName=\"llm\" />\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"id": "_XImV9NtdoJq"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"\n",
|
||||
"# !pip install -qU langchain langchain-openai\n",
|
||||
"from langchain.chat_models import init_chat_model\n",
|
||||
"\n",
|
||||
"llm = init_chat_model(model=\"gpt-4o\", model_provider=\"openai\", temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "i5P5DgJOdwPI",
|
||||
"outputId": "70e14f71-637e-422d-80ac-62e93b3686a9"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"Barbora Krejčíková won the women's singles title at Wimbledon 2024, defeating Jasmine Paolini in the final with a score of 6–2, 2–6, 6–4. This victory marked her first Wimbledon singles title and her second major singles title overall! 🎉🏆🎾\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 69, 'prompt_tokens': 222, 'total_tokens': 291, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_4691090a87', 'finish_reason': 'stop', 'logprobs': None}, id='run-87a385dd-103b-4344-a3be-2d6fd1dcfdf5-0', usage_metadata={'input_tokens': 222, 'output_tokens': 69, 'total_tokens': 291, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import datetime\n",
|
||||
"\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnableConfig, chain\n",
|
||||
"\n",
|
||||
"today = datetime.datetime.today().strftime(\"%D\")\n",
|
||||
"prompt = ChatPromptTemplate(\n",
|
||||
" [\n",
|
||||
" (\"system\", f\"You are a helpful assistant. The date today is {today}.\"),\n",
|
||||
" (\"human\", \"{user_input}\"),\n",
|
||||
" (\"placeholder\", \"{messages}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# specifying tool_choice will force the model to call this tool.\n",
|
||||
"llm_with_tools = llm.bind_tools([tool])\n",
|
||||
"\n",
|
||||
"llm_chain = prompt | llm_with_tools\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@chain\n",
|
||||
"def tool_chain(user_input: str, config: RunnableConfig):\n",
|
||||
" input_ = {\"user_input\": user_input}\n",
|
||||
" ai_msg = llm_chain.invoke(input_, config=config)\n",
|
||||
" tool_msgs = tool.batch(ai_msg.tool_calls, config=config)\n",
|
||||
" return llm_chain.invoke({**input_, \"messages\": [ai_msg, *tool_msgs]}, config=config)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"tool_chain.invoke(\"who won the last womens singles wimbledon\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "TycbUKZsfRQy"
|
||||
},
|
||||
"source": [
|
||||
"## DappierAIRecommendationTool\n",
|
||||
"\n",
|
||||
"Supercharge your AI applications with Dappier's pre-trained RAG models and natural language APIs, delivering factual and up-to-date responses from premium content providers across verticals like News, Finance, Sports, Weather, and more."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "x1XfwHzHfvUN"
|
||||
},
|
||||
"source": [
|
||||
"### Instantiation\n",
|
||||
"\n",
|
||||
"- data_model_id: str \n",
|
||||
" The data model ID to use for recommendations. Data model IDs always start with the prefix \"dm_\". Defaults to \"dm_01j0pb465keqmatq9k83dthx34\". \n",
|
||||
" Multiple data model IDs are available, which can be found at [Dappier marketplace](https://marketplace.dappier.com/marketplace). \n",
|
||||
"\n",
|
||||
"- similarity_top_k: int \n",
|
||||
" The number of top documents to retrieve based on similarity. Defaults to \"9\". \n",
|
||||
"\n",
|
||||
"- ref: Optional[str]\n",
|
||||
" The site domain where AI recommendations should be displayed. Defaults to \"None\". \n",
|
||||
"\n",
|
||||
"- num_articles_ref: int\n",
|
||||
" The minimum number of articles to return from the specified reference domain (\"ref\"). The remaining articles will come from other sites in the RAG model. Defaults to \"0\". \n",
|
||||
"\n",
|
||||
"- search_algorithm: Literal[\"most_recent\", \"semantic\", \"most_recent_semantic\", \"trending\"]\n",
|
||||
" The search algorithm to use for retrieving articles. Defaults to \"most_recent\". "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {
|
||||
"id": "-t9rS-TBhNss"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_dappier import DappierAIRecommendationTool\n",
|
||||
"\n",
|
||||
"tool = DappierAIRecommendationTool(\n",
|
||||
" data_model_id=\"dm_01j0pb465keqmatq9k83dthx34\",\n",
|
||||
" similarity_top_k=3,\n",
|
||||
" ref=\"sportsnaut.com\",\n",
|
||||
" num_articles_ref=2,\n",
|
||||
" search_algorithm=\"most_recent\",\n",
|
||||
" # name=\"...\", # overwrite default tool name\n",
|
||||
" # description=\"...\", # overwrite default tool description\n",
|
||||
" # args_schema=..., # overwrite default args_schema: BaseModel\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "ad3anWusg9BI"
|
||||
},
|
||||
"source": [
|
||||
"### Invocation\n",
|
||||
"\n",
|
||||
"#### [Invoke directly with args](/docs/concepts/tools)\n",
|
||||
"\n",
|
||||
"The `DappierAIRecommendationTool` takes a single \"query\" argument, which should be a natural language query:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "UQ08UkTMhI17",
|
||||
"outputId": "5fd145b8-a547-4caa-ba06-ab0bfac3b104"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'author': 'Matt Weaver',\n",
|
||||
" 'image_url': 'https://images.dappier.com/dm_01j0pb465keqmatq9k83dthx34/Screenshot_20250117_021643_Gallery_.jpg?width=428&height=321',\n",
|
||||
" 'pubdate': 'Fri, 17 Jan 2025 08:04:03 +0000',\n",
|
||||
" 'source_url': 'https://sportsnaut.com/chili-bowl-thursday-bell-column/',\n",
|
||||
" 'summary': \"The article highlights the thrilling unpredictability of the Chili Bowl Midget Nationals, focusing on the dramatic shifts in fortune for drivers like Christopher Bell, Tanner Thorson, and Karter Sarff during Thursday's events. Key moments included Sarff's unfortunate pull-off and a last-lap crash that allowed Ryan Bernal to capitalize and improve his standing, showcasing the chaotic nature of the race and the importance of strategy and luck.\\n\\nAs the competition intensifies leading up to Championship Saturday, Bell faces the challenge of racing from a Last Chance Race, reflecting on the excitement and difficulties of the sport. The article emphasizes the emotional highs and lows experienced by racers, with insights from Bell and Bernal on the unpredictable nature of racing. Overall, it captures the camaraderie and passion that define the Chili Bowl, illustrating how each moment contributes to the event's narrative.\",\n",
|
||||
" 'title': 'Thursday proves why every lap of Chili Bowl is so consequential'},\n",
|
||||
" {'author': 'Matt Higgins',\n",
|
||||
" 'image_url': 'https://images.dappier.com/dm_01j0pb465keqmatq9k83dthx34/Pete-Alonso-24524027_.jpg?width=428&height=321',\n",
|
||||
" 'pubdate': 'Fri, 17 Jan 2025 02:48:42 +0000',\n",
|
||||
" 'source_url': 'https://sportsnaut.com/new-york-mets-news-pete-alonso-rejected-last-ditch-contract-offer/',\n",
|
||||
" 'summary': \"The New York Mets are likely parting ways with star first baseman Pete Alonso after failing to finalize a contract agreement. Alonso rejected a last-minute three-year offer worth between $68 and $70 million, leading the Mets to redirect funds towards acquiring a top reliever. With Alonso's free-agent options dwindling, speculation arises about his potential signing with another team for the 2025 season, while the Mets plan to shift Mark Vientos to first base.\\n\\nIn a strategic move, the Mets are also considering a trade for Toronto Blue Jays' star first baseman Vladimir Guerrero Jr. This potential acquisition aims to enhance the Mets' competitiveness as they reshape their roster. Guerrero's impressive offensive stats make him a valuable target, and discussions are in the early stages. Fans and analysts are keenly watching the situation, as a trade involving such a prominent player could significantly impact both teams.\",\n",
|
||||
" 'title': 'MLB insiders reveal New York Mets’ last-ditch contract offer that Pete Alonso rejected'},\n",
|
||||
" {'author': 'Jim Cerny',\n",
|
||||
" 'image_url': 'https://images.dappier.com/dm_01j0pb465keqmatq9k83dthx34/NHL-New-York-Rangers-at-Utah-25204492_.jpg?width=428&height=321',\n",
|
||||
" 'pubdate': 'Fri, 17 Jan 2025 05:10:39 +0000',\n",
|
||||
" 'source_url': 'https://www.foreverblueshirts.com/new-york-rangers-news/stirring-5-3-comeback-win-utah-close-road-trip/',\n",
|
||||
" 'summary': \"The New York Rangers achieved a thrilling 5-3 comeback victory against the Utah Hockey Club, showcasing their resilience after a prior overtime loss. The Rangers scored three unanswered goals in the third period, with key contributions from Reilly Smith, Chris Kreider, and Artemi Panarin, who sealed the win with an empty-net goal. This victory marked their first win of the season when trailing after two periods and capped off a successful road trip, improving their record to 21-20-3.\\n\\nIgor Shesterkin's strong performance in goal, along with Arthur Kaliyev's first goal for the team, helped the Rangers overcome an early deficit. The game featured multiple lead changes, highlighting the competitive nature of both teams. As the Rangers prepare for their next game against the Columbus Blue Jackets, they aim to close the gap in the playoff race, with the Blue Jackets currently holding a five-point lead in the Eastern Conference standings.\",\n",
|
||||
" 'title': 'Rangers score 3 times in 3rd period for stirring 5-3 comeback win against Utah to close road trip'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"tool.invoke({\"query\": \"latest sports news\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "dzorKILbiOyy"
|
||||
},
|
||||
"source": [
|
||||
"### [Invoke with ToolCall](/docs/concepts/tools)\n",
|
||||
"\n",
|
||||
"We can also invoke the tool with a model-generated ToolCall, in which case a ToolMessage will be returned:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "wUu-awo0iP3P",
|
||||
"outputId": "af1a9679-06ae-4432-a49f-769330c1e32f"
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[{\"author\": \"Matt Johnson\", \"image_url\": \"https://images.dappier.com/dm_01j0pb465keqmatq9k83dthx34/MLB-New-York-Mets-at-Colorado-Rockies-23948644_.jpg?width=428&height=321\", \"pubdate\": \"Fri, 17 Jan 2025 13:31:02 +0000\", \"source_url\": \"https://sportsnaut.com/new-york-mets-rumors-vladimir-guerrero-jr-news/\", \"summary\": \"The New York Mets are refocusing their strategy after failing to extend a contra\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This is usually generated by a model, but we'll create a tool call directly for demo purposes.\n",
|
||||
"model_generated_tool_call = {\n",
|
||||
" \"args\": {\"query\": \"top 3 news articles\"},\n",
|
||||
" \"id\": \"1\",\n",
|
||||
" \"name\": \"dappier\",\n",
|
||||
" \"type\": \"tool_call\",\n",
|
||||
"}\n",
|
||||
"tool_msg = tool.invoke(model_generated_tool_call)\n",
|
||||
"\n",
|
||||
"# The content is a JSON string of results\n",
|
||||
"print(tool_msg.content[:400])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "quFltDcDd2T8"
|
||||
},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all DappierRealTimeSearchTool features and configurations head to the [API reference](https://python.langchain.com/api_reference/community/tools/langchain_dappier.tools.dappier.tool.DappierRealTimeSearchTool.html)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"colab": {
|
||||
"provenance": []
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
@@ -149,7 +149,7 @@
|
||||
"agent = create_react_agent(\n",
|
||||
" llm,\n",
|
||||
" tools,\n",
|
||||
" state_modifier=\"You are a helpful assistant. Make sure to use tool for information.\",\n",
|
||||
" prompt=\"You are a helpful assistant. Make sure to use tool for information.\",\n",
|
||||
")\n",
|
||||
"agent.invoke({\"messages\": [{\"role\": \"user\", \"content\": \"36939 * 8922.4\"}]})"
|
||||
]
|
||||
|
||||
@@ -200,6 +200,37 @@
|
||||
"8. **Delete File**- deletes a file from the repository."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Include release tools\n",
|
||||
"\n",
|
||||
"By default, the toolkit does not include release-related tools. You can include them by setting `include_release_tools=True` when initializing the toolkit:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"toolkit = GitHubToolkit.from_github_api_wrapper(github, include_release_tools=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Settings `include_release_tools=True` will include the following tools:\n",
|
||||
"\n",
|
||||
"* **Get Latest Release**- fetches the latest release from the repository.\n",
|
||||
"\n",
|
||||
"* **Get Releases**- fetches the latest 5 releases from the repository.\n",
|
||||
"\n",
|
||||
"* **Get Release**- fetches a specific release from the repository by tag name, e.g. `v1.0.0`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -321,7 +352,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
"version": "3.13.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
189
docs/docs/integrations/tools/payman-tool.ipynb
Normal file
189
docs/docs/integrations/tools/payman-tool.ipynb
Normal file
@@ -0,0 +1,189 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"# PaymanAI\n",
|
||||
"\n",
|
||||
"PaymanAI provides functionality to send and receive payments (fiat and crypto) on behalf of an AI Agent. To get started:\n",
|
||||
"\n",
|
||||
"1. **Sign up** at app.paymanai.com to create an AI Agent and obtain your **API Key**.\n",
|
||||
"2. **Set** environment variables (`PAYMAN_API_SECRET` for your API Key, `PAYMAN_ENVIRONMENT` for sandbox or production).\n",
|
||||
"\n",
|
||||
"This notebook gives a quick overview of integrating PaymanAI into LangChain as a tool. For complete reference, see the API documentation.\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"The PaymanAI integration is part of the `langchain-community` (or your custom) package. It allows you to:\n",
|
||||
"\n",
|
||||
"- Send payments (`send_payment`) to crypto addresses or bank accounts.\n",
|
||||
"- Search for payees (`search_payees`).\n",
|
||||
"- Add new payees (`add_payee`).\n",
|
||||
"- Request money from customers with a hosted checkout link (`ask_for_money`).\n",
|
||||
"- Check agent or customer balances (`get_balance`).\n",
|
||||
"\n",
|
||||
"These can be wrapped as **LangChain Tools** for an LLM-based agent to call them automatically.\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Serializable | JS support | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :--- |\n",
|
||||
"| PaymanAI | `langchain_community` | ❌ | ❌ | [PyPI Version] |\n",
|
||||
"\n",
|
||||
"If you're simply calling the PaymanAI SDK, you can do it directly or via the **Tool** interface in LangChain.\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"1. **Install** the `langchain-community` (or equivalent) package:\n",
|
||||
"\n",
|
||||
"```bash\n",
|
||||
"pip install --quiet -U langchain-community\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"2. **Install** the PaymanAI SDK:\n",
|
||||
"```bash\n",
|
||||
"pip install paymanai\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"3. **Set** environment variables:\n",
|
||||
"```bash\n",
|
||||
"export PAYMAN_API_SECRET=\"YOUR_SECRET_KEY\"\n",
|
||||
"export PAYMAN_ENVIRONMENT=\"sandbox\"\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Your `PAYMAN_API_SECRET` should be the secret key from app.paymanai.com. The `PAYMAN_ENVIRONMENT` can be `sandbox` or `production` depending on your usage.\n",
|
||||
"\n",
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Here is an example of instantiating a PaymanAI tool. If you have multiple Payman methods, you can create multiple tools.\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"from langchain_community.tools.langchain_payman_tool.tool import PaymanAI\n",
|
||||
"\n",
|
||||
"# Instantiate the PaymanAI tool (example)\n",
|
||||
"tool = PaymanAI(\n",
|
||||
" name=\"send_payment\",\n",
|
||||
" description=\"Send a payment to a specified payee.\",\n",
|
||||
")\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"## Invocation\n",
|
||||
"\n",
|
||||
"### Invoke directly with args\n",
|
||||
"\n",
|
||||
"You can call `tool.invoke(...)` and pass a dictionary matching the tool's expected fields. For example:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"response = tool.invoke({\n",
|
||||
" \"amount_decimal\": 10.00,\n",
|
||||
" \"payment_destination_id\": \"abc123\",\n",
|
||||
" \"customer_id\": \"cust_001\",\n",
|
||||
" \"memo\": \"Payment for invoice #XYZ\"\n",
|
||||
"})\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"### Invoke with ToolCall\n",
|
||||
"\n",
|
||||
"When used inside an AI workflow, the LLM might produce a `ToolCall` dict. You can simulate it as follows:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"model_generated_tool_call = {\n",
|
||||
" \"args\": {\n",
|
||||
" \"amount_decimal\": 10.00,\n",
|
||||
" \"payment_destination_id\": \"abc123\"\n",
|
||||
" },\n",
|
||||
" \"id\": \"1\",\n",
|
||||
" \"name\": tool.name,\n",
|
||||
" \"type\": \"tool_call\",\n",
|
||||
"}\n",
|
||||
"tool.invoke(model_generated_tool_call)\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"## Using the Tool in a Chain or Agent\n",
|
||||
"\n",
|
||||
"You can bind a PaymanAI tool to a LangChain agent or chain that supports tool-calling.\n",
|
||||
"\n",
|
||||
"## Quick Start Summary\n",
|
||||
"\n",
|
||||
"1. **Sign up** at app.paymanai.com to get your **API Key**.\n",
|
||||
"2. **Install** dependencies:\n",
|
||||
" ```bash\n",
|
||||
" pip install paymanai langchain-community\n",
|
||||
" ```\n",
|
||||
"3. **Export** environment variables:\n",
|
||||
" ```bash\n",
|
||||
" export PAYMAN_API_SECRET=\"YOUR_SECRET_KEY\"\n",
|
||||
" export PAYMAN_ENVIRONMENT=\"sandbox\"\n",
|
||||
" ```\n",
|
||||
"4. **Instantiate** a PaymanAI tool, passing your desired name/description.\n",
|
||||
"5. **Call** the tool with `.invoke(...)` or integrate it into a chain or agent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"You can find full API documentation for PaymanAI at:\n",
|
||||
"\n",
|
||||
"- [Langchain-Payman Python reference](https://pypi.org/project/langchain-payman-tool/)\n",
|
||||
"- [Payman Docs](https://docs.paymanai.com)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnableConfig, chain\n",
|
||||
"from langchain.chat_models import init_chat_model\n",
|
||||
"\n",
|
||||
"# Assume we've imported your PaymanAITool or multiple Payman AI Tools\n",
|
||||
"payman_tool = PaymanAITool(name=\"send_payment\")\n",
|
||||
"\n",
|
||||
"# Build a prompt\n",
|
||||
"prompt = ChatPromptTemplate([\n",
|
||||
" (\"system\", \"You are a helpful AI that can send payments if asked.\"),\n",
|
||||
" (\"human\", \"{user_input}\"),\n",
|
||||
" (\"placeholder\", \"{messages}\"),\n",
|
||||
"])\n",
|
||||
"\n",
|
||||
"llm = init_chat_model(model=\"gpt-4\", model_provider=\"openai\")\n",
|
||||
"llm_with_tools = llm.bind_tools([payman_tool], tool_choice=payman_tool.name)\n",
|
||||
"\n",
|
||||
"llm_chain = prompt | llm_with_tools\n",
|
||||
"\n",
|
||||
"@chain\n",
|
||||
"def tool_chain(user_input: str, config: RunnableConfig):\n",
|
||||
" input_ = {\"user_input\": user_input}\n",
|
||||
" ai_msg = llm_chain.invoke(input_, config=config)\n",
|
||||
" tool_msgs = payman_tool.batch(ai_msg.tool_calls, config=config)\n",
|
||||
" return llm_chain.invoke({**input_, \"messages\": [ai_msg, *tool_msgs]}, config=config)\n",
|
||||
"\n",
|
||||
"# Example usage:\n",
|
||||
"response = tool_chain.invoke(\"Send $10 to payee123.\")\n",
|
||||
"print(response)```\n",
|
||||
"\n",
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"You can find full API documentation for PaymanAI at:\n",
|
||||
"\n",
|
||||
"- [Python reference](https://python.langchain.com/v0.2/api_reference/community/tools/langchain_community.tools.langchain_payman_tool.tool.PaymanAI.html)\n",
|
||||
"- (Any other relevant references or doc links)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -258,7 +258,7 @@
|
||||
"{api_spec}\n",
|
||||
"\"\"\".format(api_spec=api_spec)\n",
|
||||
"\n",
|
||||
"agent_executor = create_react_agent(llm, tools, state_modifier=system_message)"
|
||||
"agent_executor = create_react_agent(llm, tools, prompt=system_message)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -290,9 +290,7 @@
|
||||
"source": [
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"agent_executor = create_react_agent(\n",
|
||||
" llm, toolkit.get_tools(), state_modifier=system_message\n",
|
||||
")"
|
||||
"agent_executor = create_react_agent(llm, toolkit.get_tools(), prompt=system_message)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
350
docs/docs/integrations/tools/tilores.ipynb
Normal file
350
docs/docs/integrations/tools/tilores.ipynb
Normal file
@@ -0,0 +1,350 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"id": "10238e62-3465-4973-9279-606cbb7ccf16",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: Tilores\n",
|
||||
"---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a6f91f20",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Tilores\n",
|
||||
"\n",
|
||||
"This notebook covers how to get started with the [Tilores](/docs/integrations/providers/tilores) tools.\n",
|
||||
"For a more complex example you can checkout our [customer insights chatbot example](https://github.com/tilotech/identity-rag-customer-insights-chatbot).\n",
|
||||
"\n",
|
||||
"## Overview\n",
|
||||
"\n",
|
||||
"### Integration details\n",
|
||||
"\n",
|
||||
"| Class | Package | Serializable | JS support | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: |\n",
|
||||
"| TiloresTools | [tilores-langchain](https://pypi.org/project/tilores-langchain/) | ❌ | ❌ |  |\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"The integration requires the following packages:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 72,
|
||||
"id": "f85b4089",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pip install --quiet -U tilores-langchain langchain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b15e9266",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Credentials\n",
|
||||
"\n",
|
||||
"To access Tilores, you need to [create and configure an instance](https://app.tilores.io). If you prefer to test out Tilores first, you can use the [read-only demo credentials](https://github.com/tilotech/identity-rag-customer-insights-chatbot?tab=readme-ov-file#1-configure-customer-data-access)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 73,
|
||||
"id": "e0b178a2-8816-40ca-b57c-ccdd86dde9c9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"TILORES_API_URL\"] = \"<api-url>\"\n",
|
||||
"os.environ[\"TILORES_TOKEN_URL\"] = \"<token-url>\"\n",
|
||||
"os.environ[\"TILORES_CLIENT_ID\"] = \"<client-id>\"\n",
|
||||
"os.environ[\"TILORES_CLIENT_SECRET\"] = \"<client-secret>\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1c97218f-f366-479d-8bf7-fe9f2f6df73f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiation\n",
|
||||
"\n",
|
||||
"Here we show how to instantiate an instance of the Tilores tools:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 74,
|
||||
"id": "8b3ddfe9-ca79-494c-a7ab-1f56d9407a64",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from tilores import TiloresAPI\n",
|
||||
"from tilores_langchain import TiloresTools\n",
|
||||
"\n",
|
||||
"tilores = TiloresAPI.from_environ()\n",
|
||||
"tilores_tools = TiloresTools(tilores)\n",
|
||||
"search_tool = tilores_tools.search_tool()\n",
|
||||
"edge_tool = tilores_tools.edge_tool()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "74147a1a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Invocation\n",
|
||||
"\n",
|
||||
"The parameters for the `tilores_search` tool are dependent on the [configured schema](https://docs.tilotech.io/tilores/schema/) within Tilores. The following examples will use the schema for the demo instance with generated data.\n",
|
||||
"\n",
|
||||
"### [Invoke directly with args](/docs/concepts/tools)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "010aea95",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The following example searches for a person called Sophie Müller in Berlin. The Tilores data contains multiple such persons and returns their known email addresses and phone numbers."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 75,
|
||||
"id": "65310a8b-eb0c-4d9e-a618-4f4abe2414fc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Number of entities: 3\n",
|
||||
"Number of records: 3\n",
|
||||
"Email Addresses: ['s.mueller@newcompany.de', 'sophie.mueller@email.de']\n",
|
||||
"Phone Numbers: ['30987654', '30987654', '30987654']\n",
|
||||
"Number of records: 5\n",
|
||||
"Email Addresses: ['mueller.sophie@uni-berlin.de', 'sophie.m@newshipping.de', 's.mueller@newfinance.de']\n",
|
||||
"Phone Numbers: ['30135792', '30135792']\n",
|
||||
"Number of records: 2\n",
|
||||
"Email Addresses: ['s.mueller@company.de']\n",
|
||||
"Phone Numbers: ['30123456', '30123456']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"result = search_tool.invoke(\n",
|
||||
" {\n",
|
||||
" \"searchParams\": {\n",
|
||||
" \"name\": \"Sophie Müller\",\n",
|
||||
" \"city\": \"Berlin\",\n",
|
||||
" },\n",
|
||||
" \"recordFieldsToQuery\": {\n",
|
||||
" \"email\": True,\n",
|
||||
" \"phone\": True,\n",
|
||||
" },\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"print(\"Number of entities:\", len(result[\"data\"][\"search\"][\"entities\"]))\n",
|
||||
"for entity in result[\"data\"][\"search\"][\"entities\"]:\n",
|
||||
" print(\"Number of records:\", len(entity[\"records\"]))\n",
|
||||
" print(\n",
|
||||
" \"Email Addresses:\",\n",
|
||||
" [record[\"email\"] for record in entity[\"records\"] if record.get(\"email\")],\n",
|
||||
" )\n",
|
||||
" print(\n",
|
||||
" \"Phone Numbers:\",\n",
|
||||
" [record[\"phone\"] for record in entity[\"records\"] if record.get(\"phone\")],\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ab5c77ef",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If we're interested how the records from the first entity are related, we can use the edge_tool. Note that the Tilores entity resolution engine figured out the relation between those records automatically. Please refer to the [edge documentation](https://docs.tilotech.io/tilores/rules/#edges) for more details."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 76,
|
||||
"id": "430e425c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Number of edges: 7\n",
|
||||
"Edges: ['e1f2g3h4-i5j6-k7l8-m9n0-o1p2q3r4s5t6:f2g3h4i5-j6k7-l8m9-n0o1-p2q3r4s5t6u7:L1', 'e1f2g3h4-i5j6-k7l8-m9n0-o1p2q3r4s5t6:g3h4i5j6-k7l8-m9n0-o1p2-q3r4s5t6u7v8:L4', 'e1f2g3h4-i5j6-k7l8-m9n0-o1p2q3r4s5t6:f2g3h4i5-j6k7-l8m9-n0o1-p2q3r4s5t6u7:L2', 'f2g3h4i5-j6k7-l8m9-n0o1-p2q3r4s5t6u7:g3h4i5j6-k7l8-m9n0-o1p2-q3r4s5t6u7v8:L1', 'f2g3h4i5-j6k7-l8m9-n0o1-p2q3r4s5t6u7:g3h4i5j6-k7l8-m9n0-o1p2-q3r4s5t6u7v8:L4', 'e1f2g3h4-i5j6-k7l8-m9n0-o1p2q3r4s5t6:g3h4i5j6-k7l8-m9n0-o1p2-q3r4s5t6u7v8:L1', 'e1f2g3h4-i5j6-k7l8-m9n0-o1p2q3r4s5t6:f2g3h4i5-j6k7-l8m9-n0o1-p2q3r4s5t6u7:L4']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"edge_result = edge_tool.invoke(\n",
|
||||
" {\"entityID\": result[\"data\"][\"search\"][\"entities\"][0][\"id\"]}\n",
|
||||
")\n",
|
||||
"edges = edge_result[\"data\"][\"entity\"][\"entity\"][\"edges\"]\n",
|
||||
"print(\"Number of edges:\", len(edges))\n",
|
||||
"print(\"Edges:\", edges)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d6e73897",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### [Invoke with ToolCall](/docs/concepts/tools)\n",
|
||||
"\n",
|
||||
"We can also invoke the tool with a model-generated ToolCall, in which case a ToolMessage will be returned:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 77,
|
||||
"id": "f90e33a7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"ToolMessage(content='{\"data\": {\"search\": {\"entities\": [{\"id\": \"9601cf3b-e85f-46ab-aaa8-ffb8b46f1c5b\", \"hits\": {\"c3d4e5f6-g7h8-i9j0-k1l2-m3n4o5p6q7r8\": [\"L1\"]}, \"records\": [{\"email\": \"\", \"phone\": \"30123456\"}, {\"email\": \"s.mueller@company.de\", \"phone\": \"30123456\"}]}, {\"id\": \"03da2e11-0aa2-4d17-8aaa-7b32c52decd9\", \"hits\": {\"e1f2g3h4-i5j6-k7l8-m9n0-o1p2q3r4s5t6\": [\"L1\"], \"g3h4i5j6-k7l8-m9n0-o1p2-q3r4s5t6u7v8\": [\"L1\"]}, \"records\": [{\"email\": \"s.mueller@newcompany.de\", \"phone\": \"30987654\"}, {\"email\": \"\", \"phone\": \"30987654\"}, {\"email\": \"sophie.mueller@email.de\", \"phone\": \"30987654\"}]}, {\"id\": \"4d896fb5-0d08-4212-a043-b5deb0347106\", \"hits\": {\"j6k7l8m9-n0o1-p2q3-r4s5-t6u7v8w9x0y1\": [\"L1\"], \"l8m9n0o1-p2q3-r4s5-t6u7-v8w9x0y1z2a3\": [\"L1\"], \"m9n0o1p2-q3r4-s5t6-u7v8-w9x0y1z2a3b4\": [\"L1\"], \"n0o1p2q3-r4s5-t6u7-v8w9-x0y1z2a3b4c5\": [\"L1\"]}, \"records\": [{\"email\": \"mueller.sophie@uni-berlin.de\", \"phone\": \"\"}, {\"email\": \"sophie.m@newshipping.de\", \"phone\": \"\"}, {\"email\": \"\", \"phone\": \"30135792\"}, {\"email\": \"\", \"phone\": \"\"}, {\"email\": \"s.mueller@newfinance.de\", \"phone\": \"30135792\"}]}]}}}', name='tilores_search', tool_call_id='1')"
|
||||
]
|
||||
},
|
||||
"execution_count": 77,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# This is usually generated by a model, but we'll create a tool call directly for demo purposes.\n",
|
||||
"model_generated_tool_call = {\n",
|
||||
" \"args\": {\n",
|
||||
" \"searchParams\": {\n",
|
||||
" \"name\": \"Sophie Müller\",\n",
|
||||
" \"city\": \"Berlin\",\n",
|
||||
" },\n",
|
||||
" \"recordFieldsToQuery\": {\n",
|
||||
" \"email\": True,\n",
|
||||
" \"phone\": True,\n",
|
||||
" },\n",
|
||||
" },\n",
|
||||
" \"id\": \"1\",\n",
|
||||
" \"name\": search_tool.name,\n",
|
||||
" \"type\": \"tool_call\",\n",
|
||||
"}\n",
|
||||
"search_tool.invoke(model_generated_tool_call)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "659f9fbd-6fcf-445f-aa8c-72d8e60154bd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Chaining\n",
|
||||
"\n",
|
||||
"We can use our tool in a chain by first binding it to a [tool-calling model](/docs/how_to/tool_calling/) and then calling it:\n",
|
||||
"\n",
|
||||
"import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
|
||||
"\n",
|
||||
"<ChatModelTabs customVarName=\"llm\" />\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "af3123ad-7a02-40e5-b58e-7d56e23e5830",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# | output: false\n",
|
||||
"# | echo: false\n",
|
||||
"\n",
|
||||
"# !pip install -qU langchain langchain-openai\n",
|
||||
"from langchain.chat_models import init_chat_model\n",
|
||||
"\n",
|
||||
"llm = init_chat_model(model=\"gpt-4o\", model_provider=\"openai\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fdbf35b5-3aaf-4947-9ec6-48c21533fb95",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnableConfig, chain\n",
|
||||
"\n",
|
||||
"prompt = ChatPromptTemplate(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You are a helpful assistant.\"),\n",
|
||||
" (\"human\", \"{user_input}\"),\n",
|
||||
" (\"placeholder\", \"{messages}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# specifying tool_choice will force the model to call this tool.\n",
|
||||
"llm_with_tools = llm.bind_tools([search_tool], tool_choice=search_tool.name)\n",
|
||||
"\n",
|
||||
"llm_chain = prompt | llm_with_tools\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@chain\n",
|
||||
"def tool_chain(user_input: str, config: RunnableConfig):\n",
|
||||
" input_ = {\"user_input\": user_input}\n",
|
||||
" ai_msg = llm_chain.invoke(input_, config=config)\n",
|
||||
" tool_msgs = search_tool.batch(ai_msg.tool_calls, config=config)\n",
|
||||
" return llm_chain.invoke({**input_, \"messages\": [ai_msg, *tool_msgs]}, config=config)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"tool_chain.invoke(\"Tell me the email addresses from Sophie Müller from Berlin.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4ac8146c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all Tilores features and configurations head to the official documentation: https://docs.tilotech.io/tilores/"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "langchain",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -49,7 +49,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-core"
|
||||
"pip install --upgrade --quiet langchain-core"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -718,7 +718,7 @@
|
||||
"from langchain_core.messages import HumanMessage\n",
|
||||
"from langgraph.prebuilt import create_react_agent\n",
|
||||
"\n",
|
||||
"agent_executor = create_react_agent(llm, tools, state_modifier=system_message)"
|
||||
"agent_executor = create_react_agent(llm, tools, prompt=system_message)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1119,7 +1119,7 @@
|
||||
"\n",
|
||||
"tools.append(retriever_tool)\n",
|
||||
"\n",
|
||||
"agent = create_react_agent(llm, tools, state_modifier=system)"
|
||||
"agent = create_react_agent(llm, tools, prompt=system)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -363,7 +363,7 @@
|
||||
"\n",
|
||||
"This example is shown here explicitly to make it easier for users to compare the legacy implementation vs. the corresponding langgraph implementation.\n",
|
||||
"\n",
|
||||
"This example shows how to add memory to the [pre-built react agent](https://langchain-ai.github.io/langgraph/reference/prebuilt/#create_react_agent) in langgraph.\n",
|
||||
"This example shows how to add memory to the [pre-built react agent](https://langchain-ai.github.io/langgraph/reference/prebuilt/#langgraph.prebuilt.chat_agent_executor.create_react_agent) in langgraph.\n",
|
||||
"\n",
|
||||
"For more details, please see the [how to add memory to the prebuilt ReAct agent](https://langchain-ai.github.io/langgraph/how-tos/create-react-agent-memory/) guide in langgraph.\n",
|
||||
"\n",
|
||||
|
||||
@@ -500,7 +500,7 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"# highlight-start\n",
|
||||
"def state_modifier(state) -> list[BaseMessage]:\n",
|
||||
"def prompt(state) -> list[BaseMessage]:\n",
|
||||
" \"\"\"Given the agent state, return a list of messages for the chat model.\"\"\"\n",
|
||||
" # We're using the message processor defined above.\n",
|
||||
" return trim_messages(\n",
|
||||
@@ -528,7 +528,7 @@
|
||||
" tools=[get_user_age],\n",
|
||||
" checkpointer=memory,\n",
|
||||
" # highlight-next-line\n",
|
||||
" state_modifier=state_modifier,\n",
|
||||
" prompt=prompt,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# The thread id is a unique key that identifies\n",
|
||||
|
||||
@@ -375,7 +375,7 @@
|
||||
"id": "4f1aa06c-69b0-4f86-94bc-6be588c9a778",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Our agent graph is going to be very similar to simple [ReAct agent](https://langchain-ai.github.io/langgraph/reference/prebuilt/#create_react_agent). The only important modification is adding a node to load memories BEFORE calling the agent for the first time."
|
||||
"Our agent graph is going to be very similar to simple [ReAct agent](https://langchain-ai.github.io/langgraph/reference/prebuilt/#langgraph.prebuilt.chat_agent_executor.create_react_agent). The only important modification is adding a node to load memories BEFORE calling the agent for the first time."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -133,6 +133,14 @@
|
||||
{
|
||||
"source": "/docs/integrations/retrievers/weaviate-hybrid(/?)",
|
||||
"destination": "/docs/integrations/vectorstores/weaviate/#search-mechanism"
|
||||
},
|
||||
{
|
||||
"source": "/api_reference/mongodb/:path(.*/?)*",
|
||||
"destination": "https://langchain-mongodb.readthedocs.io/en/latest/langchain_mongodb/api_docs.html"
|
||||
},
|
||||
{
|
||||
"source": "/api_reference/tests/:path(.*/?)*",
|
||||
"destination": "/api_reference/standard_tests/:path"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
"\n",
|
||||
"- TODO: Make sure API reference link is correct.\n",
|
||||
"\n",
|
||||
"This will help you getting started with __ModuleName__ [chat models](/docs/concepts/chat_models). For detailed documentation of all Chat__ModuleName__ features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/__module_name__.chat_models.Chat__ModuleName__.html).\n",
|
||||
"This will help you getting started with __ModuleName__ [chat models](/docs/concepts/chat_models). For detailed documentation of all Chat__ModuleName__ features and configurations head to the [API reference](https://python.langchain.com/api_reference/__package_name_short_snake__/chat_models/__module_name__.chat_models.Chat__ModuleName__.html).\n",
|
||||
"\n",
|
||||
"- TODO: Add any other relevant links, like information about models, prices, context windows, etc. See https://python.langchain.com/docs/integrations/chat/openai/ for an example.\n",
|
||||
"\n",
|
||||
@@ -32,7 +32,7 @@
|
||||
"\n",
|
||||
"| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/docs/integrations/chat/__package_name_short_snake__) | Package downloads | Package latest |\n",
|
||||
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
|
||||
"| [Chat__ModuleName__](https://api.python.langchain.com/en/latest/chat_models/__module_name__.chat_models.Chat__ModuleName__.html) | [__package_name__](https://api.python.langchain.com/en/latest/__package_name_short_snake___api_reference.html) | ✅/❌ | beta/❌ | ✅/❌ |  |  |\n",
|
||||
"| [Chat__ModuleName__](https://python.langchain.com/api_reference/__package_name_short_snake__/chat_models/__module_name__.chat_models.Chat__ModuleName__.html) | [__package_name__](https://python.langchain.com/api_reference/__package_name_short_snake__/) | ✅/❌ | beta/❌ | ✅/❌ |  |  |\n",
|
||||
"\n",
|
||||
"### Model features\n",
|
||||
"| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n",
|
||||
@@ -234,7 +234,7 @@
|
||||
"source": [
|
||||
"## API reference\n",
|
||||
"\n",
|
||||
"For detailed documentation of all Chat__ModuleName__ features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/__module_name__.chat_models.Chat__ModuleName__.html"
|
||||
"For detailed documentation of all Chat__ModuleName__ features and configurations head to the [API reference](https://python.langchain.com/api_reference/__package_name_short_snake__/chat_models/__module_name__.chat_models.Chat__ModuleName__.html)"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -60,12 +60,14 @@ oracle-ads>=2.9.1,<3
|
||||
oracledb>=2.2.0,<3
|
||||
pandas>=2.0.1,<3
|
||||
pdfminer-six>=20221105,<20240706
|
||||
pdfplumber>=0.11
|
||||
pgvector>=0.1.6,<0.2
|
||||
playwright>=1.48.0,<2
|
||||
praw>=7.7.1,<8
|
||||
premai>=0.3.25,<0.4
|
||||
psychicapi>=0.8.0,<0.9
|
||||
pydantic>=2.7.4,<3
|
||||
pytesseract>=0.3.13
|
||||
py-trello>=0.19.0,<0.20
|
||||
pyjwt>=2.8.0,<3
|
||||
pymupdf>=1.22.3,<2
|
||||
|
||||
@@ -16,7 +16,10 @@ from langchain_community.tools.github.prompt import (
|
||||
GET_FILES_FROM_DIRECTORY_PROMPT,
|
||||
GET_ISSUE_PROMPT,
|
||||
GET_ISSUES_PROMPT,
|
||||
GET_LATEST_RELEASE_PROMPT,
|
||||
GET_PR_PROMPT,
|
||||
GET_RELEASE_PROMPT,
|
||||
GET_RELEASES_PROMPT,
|
||||
LIST_BRANCHES_IN_REPO_PROMPT,
|
||||
LIST_PRS_PROMPT,
|
||||
LIST_PULL_REQUEST_FILES,
|
||||
@@ -152,6 +155,15 @@ class SearchIssuesAndPRs(BaseModel):
|
||||
)
|
||||
|
||||
|
||||
class TagName(BaseModel):
|
||||
"""Schema for operations that require a tag name as input."""
|
||||
|
||||
tag_name: str = Field(
|
||||
...,
|
||||
description="The tag name of the release, e.g. `v1.0.0`.",
|
||||
)
|
||||
|
||||
|
||||
class GitHubToolkit(BaseToolkit):
|
||||
"""GitHub Toolkit.
|
||||
|
||||
@@ -218,6 +230,25 @@ class GitHubToolkit(BaseToolkit):
|
||||
Search code
|
||||
Create review request
|
||||
|
||||
Include release tools:
|
||||
By default, the toolkit does not include release-related tools.
|
||||
You can include them by setting ``include_release_tools=True`` when
|
||||
initializing the toolkit:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
toolkit = GitHubToolkit.from_github_api_wrapper(
|
||||
github, include_release_tools=True
|
||||
)
|
||||
|
||||
Setting ``include_release_tools=True`` will include the following tools:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
Get latest release
|
||||
Get releases
|
||||
Get release
|
||||
|
||||
Use within an agent:
|
||||
.. code-block:: python
|
||||
|
||||
@@ -268,12 +299,14 @@ class GitHubToolkit(BaseToolkit):
|
||||
|
||||
@classmethod
|
||||
def from_github_api_wrapper(
|
||||
cls, github_api_wrapper: GitHubAPIWrapper
|
||||
cls, github_api_wrapper: GitHubAPIWrapper, include_release_tools: bool = False
|
||||
) -> "GitHubToolkit":
|
||||
"""Create a GitHubToolkit from a GitHubAPIWrapper.
|
||||
|
||||
Args:
|
||||
github_api_wrapper: GitHubAPIWrapper. The GitHub API wrapper.
|
||||
include_release_tools: bool. Whether to include release-related tools.
|
||||
Defaults to False.
|
||||
|
||||
Returns:
|
||||
GitHubToolkit. The GitHub toolkit.
|
||||
@@ -406,6 +439,29 @@ class GitHubToolkit(BaseToolkit):
|
||||
"args_schema": CreateReviewRequest,
|
||||
},
|
||||
]
|
||||
|
||||
release_operations: List[Dict] = [
|
||||
{
|
||||
"mode": "get_latest_release",
|
||||
"name": "Get latest release",
|
||||
"description": GET_LATEST_RELEASE_PROMPT,
|
||||
"args_schema": NoInput,
|
||||
},
|
||||
{
|
||||
"mode": "get_releases",
|
||||
"name": "Get releases",
|
||||
"description": GET_RELEASES_PROMPT,
|
||||
"args_schema": NoInput,
|
||||
},
|
||||
{
|
||||
"mode": "get_release",
|
||||
"name": "Get release",
|
||||
"description": GET_RELEASE_PROMPT,
|
||||
"args_schema": TagName,
|
||||
},
|
||||
]
|
||||
|
||||
operations = operations + (release_operations if include_release_tools else [])
|
||||
tools = [
|
||||
GitHubAction(
|
||||
name=action["name"],
|
||||
|
||||
@@ -370,7 +370,7 @@ class MiniMaxChat(BaseChatModel):
|
||||
}
|
||||
|
||||
_client: Any = None
|
||||
model: str = "abab6.5-chat"
|
||||
model: str = "abab6.5s-chat"
|
||||
"""Model name to use."""
|
||||
max_tokens: int = 256
|
||||
"""Denotes the number of tokens to predict per generation."""
|
||||
@@ -381,7 +381,7 @@ class MiniMaxChat(BaseChatModel):
|
||||
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
|
||||
"""Holds any model parameters valid for `create` call not explicitly specified."""
|
||||
minimax_api_host: str = Field(
|
||||
default="https://api.minimax.chat/v1/text/chatcompletion_v2", alias="base_url"
|
||||
default="https://api.minimaxi.chat/v1/text/chatcompletion_v2", alias="base_url"
|
||||
)
|
||||
minimax_group_id: Optional[str] = Field(default=None, alias="group_id")
|
||||
"""[DEPRECATED, keeping it for for backward compatibility] Group Id"""
|
||||
@@ -511,7 +511,13 @@ class MiniMaxChat(BaseChatModel):
|
||||
with httpx.Client(headers=headers, timeout=60) as client:
|
||||
response = client.post(self.minimax_api_host, json=payload)
|
||||
response.raise_for_status()
|
||||
|
||||
final_response = response.json()
|
||||
if (
|
||||
"base_resp" in final_response
|
||||
and "status_msg" in final_response["base_resp"]
|
||||
and final_response["base_resp"]["status_msg"] == "invalid api key"
|
||||
):
|
||||
raise Exception("Invalid API Key Provided")
|
||||
return self._create_chat_result(response.json())
|
||||
|
||||
def _stream(
|
||||
|
||||
@@ -15,6 +15,7 @@ from typing import (
|
||||
)
|
||||
|
||||
import httpx
|
||||
from httpx_sse import SSEError
|
||||
from langchain_core.callbacks import (
|
||||
AsyncCallbackManagerForLLMRun,
|
||||
CallbackManagerForLLMRun,
|
||||
@@ -35,7 +36,13 @@ from langchain_core.messages import (
|
||||
)
|
||||
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
|
||||
from langchain_core.utils import convert_to_secret_str, get_from_env
|
||||
from pydantic import AliasChoices, ConfigDict, Field, SecretStr, model_validator
|
||||
from pydantic import (
|
||||
AliasChoices,
|
||||
ConfigDict,
|
||||
Field,
|
||||
SecretStr,
|
||||
model_validator,
|
||||
)
|
||||
from typing_extensions import Self
|
||||
|
||||
_DEFAULT_BASE_URL = "https://clovastudio.stream.ntruss.com"
|
||||
@@ -47,16 +54,13 @@ def _convert_chunk_to_message_chunk(
|
||||
sse: Any, default_class: Type[BaseMessageChunk]
|
||||
) -> BaseMessageChunk:
|
||||
sse_data = sse.json()
|
||||
if sse.event == "result":
|
||||
response_metadata = _sse_data_to_response_metadata(sse_data)
|
||||
return AIMessageChunk(content="", response_metadata=response_metadata)
|
||||
|
||||
message = sse_data.get("message")
|
||||
role = message.get("role")
|
||||
content = message.get("content") or ""
|
||||
|
||||
if sse.event == "result":
|
||||
response_metadata = {}
|
||||
if "stopReason" in sse_data:
|
||||
response_metadata["stopReason"] = sse_data["stopReason"]
|
||||
return AIMessageChunk(content="", response_metadata=response_metadata)
|
||||
|
||||
if role == "user" or default_class == HumanMessageChunk:
|
||||
return HumanMessageChunk(content=content)
|
||||
elif role == "assistant" or default_class == AIMessageChunk:
|
||||
@@ -69,6 +73,21 @@ def _convert_chunk_to_message_chunk(
|
||||
return default_class(content=content) # type: ignore[call-arg]
|
||||
|
||||
|
||||
def _sse_data_to_response_metadata(sse_data: Dict) -> Dict[str, Any]:
|
||||
response_metadata = {}
|
||||
if "stopReason" in sse_data:
|
||||
response_metadata["stop_reason"] = sse_data["stopReason"]
|
||||
if "inputLength" in sse_data:
|
||||
response_metadata["input_length"] = sse_data["inputLength"]
|
||||
if "outputLength" in sse_data:
|
||||
response_metadata["output_length"] = sse_data["outputLength"]
|
||||
if "seed" in sse_data:
|
||||
response_metadata["seed"] = sse_data["seed"]
|
||||
if "aiFilter" in sse_data:
|
||||
response_metadata["ai_filter"] = sse_data["aiFilter"]
|
||||
return response_metadata
|
||||
|
||||
|
||||
def _convert_message_to_naver_chat_message(
|
||||
message: BaseMessage,
|
||||
) -> Dict:
|
||||
@@ -130,6 +149,8 @@ async def _aiter_sse(
|
||||
event_data = sse.json()
|
||||
if sse.event == "signal" and event_data.get("data", {}) == "[DONE]":
|
||||
return
|
||||
if sse.event == "error":
|
||||
raise SSEError(message=sse.data)
|
||||
yield sse
|
||||
|
||||
|
||||
@@ -240,10 +261,15 @@ class ChatClovaX(BaseChatModel):
|
||||
|
||||
@property
|
||||
def lc_secrets(self) -> Dict[str, str]:
|
||||
return {
|
||||
"ncp_clovastudio_api_key": "NCP_CLOVASTUDIO_API_KEY",
|
||||
"ncp_apigw_api_key": "NCP_APIGW_API_KEY",
|
||||
}
|
||||
if not self._is_new_api_key():
|
||||
return {
|
||||
"ncp_clovastudio_api_key": "NCP_CLOVASTUDIO_API_KEY",
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"ncp_clovastudio_api_key": "NCP_CLOVASTUDIO_API_KEY",
|
||||
"ncp_apigw_api_key": "NCP_APIGW_API_KEY",
|
||||
}
|
||||
|
||||
@property
|
||||
def _llm_type(self) -> str:
|
||||
@@ -285,10 +311,8 @@ class ChatClovaX(BaseChatModel):
|
||||
get_from_env("ncp_clovastudio_api_key", "NCP_CLOVASTUDIO_API_KEY")
|
||||
)
|
||||
|
||||
if not self.ncp_apigw_api_key:
|
||||
self.ncp_apigw_api_key = convert_to_secret_str(
|
||||
get_from_env("ncp_apigw_api_key", "NCP_APIGW_API_KEY", "")
|
||||
)
|
||||
if not self._is_new_api_key():
|
||||
self._init_fields_on_old_api_key()
|
||||
|
||||
if not self.base_url:
|
||||
self.base_url = get_from_env(
|
||||
@@ -311,6 +335,18 @@ class ChatClovaX(BaseChatModel):
|
||||
|
||||
return self
|
||||
|
||||
def _is_new_api_key(self) -> bool:
|
||||
if self.ncp_clovastudio_api_key:
|
||||
return self.ncp_clovastudio_api_key.get_secret_value().startswith("nv-")
|
||||
else:
|
||||
return False
|
||||
|
||||
def _init_fields_on_old_api_key(self) -> None:
|
||||
if not self.ncp_apigw_api_key:
|
||||
self.ncp_apigw_api_key = convert_to_secret_str(
|
||||
get_from_env("ncp_apigw_api_key", "NCP_APIGW_API_KEY", "")
|
||||
)
|
||||
|
||||
def default_headers(self) -> Dict[str, Any]:
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
@@ -322,16 +358,22 @@ class ChatClovaX(BaseChatModel):
|
||||
if self.ncp_clovastudio_api_key
|
||||
else None
|
||||
)
|
||||
if clovastudio_api_key:
|
||||
headers["X-NCP-CLOVASTUDIO-API-KEY"] = clovastudio_api_key
|
||||
|
||||
apigw_api_key = (
|
||||
self.ncp_apigw_api_key.get_secret_value()
|
||||
if self.ncp_apigw_api_key
|
||||
else None
|
||||
)
|
||||
if apigw_api_key:
|
||||
headers["X-NCP-APIGW-API-KEY"] = apigw_api_key
|
||||
if self._is_new_api_key():
|
||||
### headers on new api key
|
||||
headers["Authorization"] = f"Bearer {clovastudio_api_key}"
|
||||
else:
|
||||
### headers on old api key
|
||||
if clovastudio_api_key:
|
||||
headers["X-NCP-CLOVASTUDIO-API-KEY"] = clovastudio_api_key
|
||||
|
||||
apigw_api_key = (
|
||||
self.ncp_apigw_api_key.get_secret_value()
|
||||
if self.ncp_apigw_api_key
|
||||
else None
|
||||
)
|
||||
if apigw_api_key:
|
||||
headers["X-NCP-APIGW-API-KEY"] = apigw_api_key
|
||||
|
||||
return headers
|
||||
|
||||
@@ -348,7 +390,6 @@ class ChatClovaX(BaseChatModel):
|
||||
def _completion_with_retry(self, **kwargs: Any) -> Any:
|
||||
from httpx_sse import (
|
||||
ServerSentEvent,
|
||||
SSEError,
|
||||
connect_sse,
|
||||
)
|
||||
|
||||
|
||||
@@ -223,15 +223,21 @@ class ChatPerplexity(BaseChatModel):
|
||||
stream_resp = self.client.chat.completions.create(
|
||||
messages=message_dicts, stream=True, **params
|
||||
)
|
||||
first_chunk = True
|
||||
for chunk in stream_resp:
|
||||
if not isinstance(chunk, dict):
|
||||
chunk = chunk.dict()
|
||||
if len(chunk["choices"]) == 0:
|
||||
continue
|
||||
choice = chunk["choices"][0]
|
||||
citations = chunk.get("citations", [])
|
||||
|
||||
chunk = self._convert_delta_to_message_chunk(
|
||||
choice["delta"], default_chunk_class
|
||||
)
|
||||
if first_chunk:
|
||||
chunk.additional_kwargs |= {"citations": citations}
|
||||
first_chunk = False
|
||||
finish_reason = choice.get("finish_reason")
|
||||
generation_info = (
|
||||
dict(finish_reason=finish_reason) if finish_reason is not None else None
|
||||
|
||||
@@ -16,6 +16,7 @@ from typing import (
|
||||
)
|
||||
|
||||
import requests
|
||||
from langchain_core._api.deprecation import deprecated
|
||||
from langchain_core.callbacks import (
|
||||
CallbackManagerForLLMRun,
|
||||
)
|
||||
@@ -107,6 +108,11 @@ def _is_pydantic_class(obj: Any) -> bool:
|
||||
return isinstance(obj, type) and is_basemodel_subclass(obj)
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="0.3.16",
|
||||
removal="1.0",
|
||||
alternative_import="langchain_sambanova.ChatSambaNovaCloud",
|
||||
)
|
||||
class ChatSambaNovaCloud(BaseChatModel):
|
||||
"""
|
||||
SambaNova Cloud chat model.
|
||||
@@ -952,6 +958,11 @@ class ChatSambaNovaCloud(BaseChatModel):
|
||||
yield chunk
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="0.3.16",
|
||||
removal="1.0",
|
||||
alternative_import="langchain_sambanova.ChatSambaStudio",
|
||||
)
|
||||
class ChatSambaStudio(BaseChatModel):
|
||||
"""
|
||||
SambaStudio chat model.
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
from typing import Iterator, List, Optional
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Iterator, List, Optional
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
@@ -8,6 +10,9 @@ from langchain_community.document_loaders.parsers import (
|
||||
AzureAIDocumentIntelligenceParser,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from azure.core.credentials import TokenCredential
|
||||
|
||||
|
||||
class AzureAIDocumentIntelligenceLoader(BaseLoader):
|
||||
"""Load a PDF with Azure Document Intelligence."""
|
||||
@@ -15,7 +20,7 @@ class AzureAIDocumentIntelligenceLoader(BaseLoader):
|
||||
def __init__(
|
||||
self,
|
||||
api_endpoint: str,
|
||||
api_key: str,
|
||||
api_key: Optional[str] = None,
|
||||
file_path: Optional[str] = None,
|
||||
url_path: Optional[str] = None,
|
||||
bytes_source: Optional[bytes] = None,
|
||||
@@ -24,6 +29,7 @@ class AzureAIDocumentIntelligenceLoader(BaseLoader):
|
||||
mode: str = "markdown",
|
||||
*,
|
||||
analysis_features: Optional[List[str]] = None,
|
||||
azure_credential: Optional["TokenCredential"] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize the object for file processing with Azure Document Intelligence
|
||||
@@ -63,6 +69,9 @@ class AzureAIDocumentIntelligenceLoader(BaseLoader):
|
||||
List of optional analysis features, each feature should be passed
|
||||
as a str that conforms to the enum `DocumentAnalysisFeature` in
|
||||
`azure-ai-documentintelligence` package. Default value is None.
|
||||
azure_credential: Optional[TokenCredential]
|
||||
The credentials to use for DocumentIntelligenceClient construction, when
|
||||
using credentials other than api_key (like AD).
|
||||
|
||||
Examples:
|
||||
---------
|
||||
@@ -79,6 +88,15 @@ class AzureAIDocumentIntelligenceLoader(BaseLoader):
|
||||
assert (
|
||||
file_path is not None or url_path is not None or bytes_source is not None
|
||||
), "file_path, url_path or bytes_source must be provided"
|
||||
|
||||
assert (
|
||||
api_key is not None or azure_credential is not None
|
||||
), "Either api_key or azure_credential must be provided."
|
||||
|
||||
assert (
|
||||
api_key is None or azure_credential is None
|
||||
), "Only one of api_key or azure_credential should be provided."
|
||||
|
||||
self.file_path = file_path
|
||||
self.url_path = url_path
|
||||
self.bytes_source = bytes_source
|
||||
@@ -90,6 +108,7 @@ class AzureAIDocumentIntelligenceLoader(BaseLoader):
|
||||
api_model=api_model,
|
||||
mode=mode,
|
||||
analysis_features=analysis_features,
|
||||
azure_credential=azure_credential,
|
||||
)
|
||||
|
||||
def lazy_load(
|
||||
|
||||
@@ -17,6 +17,12 @@ if TYPE_CHECKING:
|
||||
from langchain_community.document_loaders.parsers.html import (
|
||||
BS4HTMLParser,
|
||||
)
|
||||
from langchain_community.document_loaders.parsers.images import (
|
||||
BaseImageBlobParser,
|
||||
LLMImageBlobParser,
|
||||
RapidOCRBlobParser,
|
||||
TesseractBlobParser,
|
||||
)
|
||||
from langchain_community.document_loaders.parsers.language import (
|
||||
LanguageParser,
|
||||
)
|
||||
@@ -35,15 +41,19 @@ if TYPE_CHECKING:
|
||||
_module_lookup = {
|
||||
"AzureAIDocumentIntelligenceParser": "langchain_community.document_loaders.parsers.doc_intelligence", # noqa: E501
|
||||
"BS4HTMLParser": "langchain_community.document_loaders.parsers.html",
|
||||
"BaseImageBlobParser": "langchain_community.document_loaders.parsers.images",
|
||||
"DocAIParser": "langchain_community.document_loaders.parsers.docai",
|
||||
"GrobidParser": "langchain_community.document_loaders.parsers.grobid",
|
||||
"LanguageParser": "langchain_community.document_loaders.parsers.language",
|
||||
"LLMImageBlobParser": "langchain_community.document_loaders.parsers.images",
|
||||
"OpenAIWhisperParser": "langchain_community.document_loaders.parsers.audio",
|
||||
"PDFMinerParser": "langchain_community.document_loaders.parsers.pdf",
|
||||
"PDFPlumberParser": "langchain_community.document_loaders.parsers.pdf",
|
||||
"PyMuPDFParser": "langchain_community.document_loaders.parsers.pdf",
|
||||
"PyPDFParser": "langchain_community.document_loaders.parsers.pdf",
|
||||
"PyPDFium2Parser": "langchain_community.document_loaders.parsers.pdf",
|
||||
"RapidOCRBlobParser": "langchain_community.document_loaders.parsers.images",
|
||||
"TesseractBlobParser": "langchain_community.document_loaders.parsers.images",
|
||||
"VsdxParser": "langchain_community.document_loaders.parsers.vsdx",
|
||||
}
|
||||
|
||||
@@ -57,15 +67,19 @@ def __getattr__(name: str) -> Any:
|
||||
|
||||
__all__ = [
|
||||
"AzureAIDocumentIntelligenceParser",
|
||||
"BaseImageBlobParser",
|
||||
"BS4HTMLParser",
|
||||
"DocAIParser",
|
||||
"GrobidParser",
|
||||
"LanguageParser",
|
||||
"LLMImageBlobParser",
|
||||
"OpenAIWhisperParser",
|
||||
"PDFMinerParser",
|
||||
"PDFPlumberParser",
|
||||
"PyMuPDFParser",
|
||||
"PyPDFParser",
|
||||
"PyPDFium2Parser",
|
||||
"RapidOCRBlobParser",
|
||||
"TesseractBlobParser",
|
||||
"VsdxParser",
|
||||
]
|
||||
|
||||
@@ -1,11 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Iterator, List, Optional
|
||||
from typing import TYPE_CHECKING, Any, Iterator, List, Optional
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.document_loaders.base import BaseBlobParser
|
||||
from langchain_community.document_loaders.blob_loaders import Blob
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from azure.core.credentials import TokenCredential
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -16,17 +21,27 @@ class AzureAIDocumentIntelligenceParser(BaseBlobParser):
|
||||
def __init__(
|
||||
self,
|
||||
api_endpoint: str,
|
||||
api_key: str,
|
||||
api_key: Optional[str] = None,
|
||||
api_version: Optional[str] = None,
|
||||
api_model: str = "prebuilt-layout",
|
||||
mode: str = "markdown",
|
||||
analysis_features: Optional[List[str]] = None,
|
||||
azure_credential: Optional["TokenCredential"] = None,
|
||||
):
|
||||
from azure.ai.documentintelligence import DocumentIntelligenceClient
|
||||
from azure.ai.documentintelligence.models import DocumentAnalysisFeature
|
||||
from azure.core.credentials import AzureKeyCredential
|
||||
|
||||
kwargs = {}
|
||||
|
||||
if api_key is None and azure_credential is None:
|
||||
raise ValueError("Either api_key or azure_credential must be provided.")
|
||||
|
||||
if api_key and azure_credential:
|
||||
raise ValueError(
|
||||
"Only one of api_key or azure_credential should be provided."
|
||||
)
|
||||
|
||||
if api_version is not None:
|
||||
kwargs["api_version"] = api_version
|
||||
|
||||
@@ -49,7 +64,7 @@ class AzureAIDocumentIntelligenceParser(BaseBlobParser):
|
||||
|
||||
self.client = DocumentIntelligenceClient(
|
||||
endpoint=api_endpoint,
|
||||
credential=AzureKeyCredential(api_key),
|
||||
credential=azure_credential or AzureKeyCredential(api_key),
|
||||
headers={"x-ms-useragent": "langchain-parser/1.0.0"},
|
||||
features=analysis_features,
|
||||
**kwargs,
|
||||
|
||||
@@ -0,0 +1,220 @@
|
||||
import base64
|
||||
import io
|
||||
import logging
|
||||
from abc import abstractmethod
|
||||
from typing import TYPE_CHECKING, Iterable, Iterator
|
||||
|
||||
import numpy
|
||||
import numpy as np
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.language_models import BaseChatModel
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from langchain_community.document_loaders.base import BaseBlobParser
|
||||
from langchain_community.document_loaders.blob_loaders import Blob
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from PIL.Image import Image
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseImageBlobParser(BaseBlobParser):
|
||||
"""Abstract base class for parsing image blobs into text."""
|
||||
|
||||
@abstractmethod
|
||||
def _analyze_image(self, img: "Image") -> str:
|
||||
"""Abstract method to analyze an image and extract textual content.
|
||||
|
||||
Args:
|
||||
img: The image to be analyzed.
|
||||
|
||||
Returns:
|
||||
The extracted text content.
|
||||
"""
|
||||
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||
"""Lazily parse a blob and yields Documents containing the parsed content.
|
||||
|
||||
Args:
|
||||
blob (Blob): The blob to be parsed.
|
||||
|
||||
Yields:
|
||||
Document:
|
||||
A document containing the parsed content and metadata.
|
||||
"""
|
||||
try:
|
||||
from PIL import Image as Img
|
||||
|
||||
with blob.as_bytes_io() as buf:
|
||||
if blob.mimetype == "application/x-npy":
|
||||
img = Img.fromarray(numpy.load(buf))
|
||||
else:
|
||||
img = Img.open(buf)
|
||||
content = self._analyze_image(img)
|
||||
logger.debug("Image text: %s", content.replace("\n", "\\n"))
|
||||
yield Document(
|
||||
page_content=content,
|
||||
metadata={**blob.metadata, **{"source": blob.source}},
|
||||
)
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`Pillow` package not found, please install it with "
|
||||
"`pip install Pillow`"
|
||||
)
|
||||
|
||||
|
||||
class RapidOCRBlobParser(BaseImageBlobParser):
|
||||
"""Parser for extracting text from images using the RapidOCR library.
|
||||
|
||||
Attributes:
|
||||
ocr:
|
||||
The RapidOCR instance for performing OCR.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
) -> None:
|
||||
"""
|
||||
Initializes the RapidOCRBlobParser.
|
||||
"""
|
||||
super().__init__()
|
||||
self.ocr = None
|
||||
|
||||
def _analyze_image(self, img: "Image") -> str:
|
||||
"""
|
||||
Analyzes an image and extracts text using RapidOCR.
|
||||
|
||||
Args:
|
||||
img (Image):
|
||||
The image to be analyzed.
|
||||
|
||||
Returns:
|
||||
str:
|
||||
The extracted text content.
|
||||
"""
|
||||
if not self.ocr:
|
||||
try:
|
||||
from rapidocr_onnxruntime import RapidOCR
|
||||
|
||||
self.ocr = RapidOCR()
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`rapidocr-onnxruntime` package not found, please install it with "
|
||||
"`pip install rapidocr-onnxruntime`"
|
||||
)
|
||||
ocr_result, _ = self.ocr(np.array(img)) # type: ignore
|
||||
content = ""
|
||||
if ocr_result:
|
||||
content = ("\n".join([text[1] for text in ocr_result])).strip()
|
||||
return content
|
||||
|
||||
|
||||
class TesseractBlobParser(BaseImageBlobParser):
|
||||
"""Parse for extracting text from images using the Tesseract OCR library."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
langs: Iterable[str] = ("eng",),
|
||||
):
|
||||
"""Initialize the TesseractBlobParser.
|
||||
|
||||
Args:
|
||||
langs (list[str]):
|
||||
The languages to use for OCR.
|
||||
"""
|
||||
super().__init__()
|
||||
self.langs = list(langs)
|
||||
|
||||
def _analyze_image(self, img: "Image") -> str:
|
||||
"""Analyze an image and extracts text using Tesseract OCR.
|
||||
|
||||
Args:
|
||||
img: The image to be analyzed.
|
||||
|
||||
Returns:
|
||||
str: The extracted text content.
|
||||
"""
|
||||
try:
|
||||
import pytesseract
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`pytesseract` package not found, please install it with "
|
||||
"`pip install pytesseract`"
|
||||
)
|
||||
return pytesseract.image_to_string(img, lang="+".join(self.langs)).strip()
|
||||
|
||||
|
||||
_PROMPT_IMAGES_TO_DESCRIPTION: str = (
|
||||
"You are an assistant tasked with summarizing images for retrieval. "
|
||||
"1. These summaries will be embedded and used to retrieve the raw image. "
|
||||
"Give a concise summary of the image that is well optimized for retrieval\n"
|
||||
"2. extract all the text from the image. "
|
||||
"Do not exclude any content from the page.\n"
|
||||
"Format answer in markdown without explanatory text "
|
||||
"and without markdown delimiter ``` at the beginning. "
|
||||
)
|
||||
|
||||
|
||||
class LLMImageBlobParser(BaseImageBlobParser):
|
||||
"""Parser for analyzing images using a language model (LLM).
|
||||
|
||||
Attributes:
|
||||
model (BaseChatModel):
|
||||
The language model to use for analysis.
|
||||
prompt (str):
|
||||
The prompt to provide to the language model.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
model: BaseChatModel,
|
||||
prompt: str = _PROMPT_IMAGES_TO_DESCRIPTION,
|
||||
):
|
||||
"""Initializes the LLMImageBlobParser.
|
||||
|
||||
Args:
|
||||
model (BaseChatModel):
|
||||
The language model to use for analysis.
|
||||
prompt (str):
|
||||
The prompt to provide to the language model.
|
||||
"""
|
||||
super().__init__()
|
||||
self.model = model
|
||||
self.prompt = prompt
|
||||
|
||||
def _analyze_image(self, img: "Image") -> str:
|
||||
"""Analyze an image using the provided language model.
|
||||
|
||||
Args:
|
||||
img: The image to be analyzed.
|
||||
|
||||
Returns:
|
||||
The extracted textual content.
|
||||
"""
|
||||
image_bytes = io.BytesIO()
|
||||
img.save(image_bytes, format="PNG")
|
||||
img_base64 = base64.b64encode(image_bytes.getvalue()).decode("utf-8")
|
||||
msg = self.model.invoke(
|
||||
[
|
||||
HumanMessage(
|
||||
content=[
|
||||
{
|
||||
"type": "text",
|
||||
"text": self.prompt.format(format=format),
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{img_base64}"
|
||||
},
|
||||
},
|
||||
]
|
||||
)
|
||||
]
|
||||
)
|
||||
result = msg.content
|
||||
assert isinstance(result, str)
|
||||
return result
|
||||
@@ -2,12 +2,18 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import html
|
||||
import io
|
||||
import logging
|
||||
import threading
|
||||
import warnings
|
||||
from datetime import datetime
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Iterable,
|
||||
Iterator,
|
||||
Literal,
|
||||
Mapping,
|
||||
Optional,
|
||||
Sequence,
|
||||
@@ -15,16 +21,21 @@ from typing import (
|
||||
)
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import numpy
|
||||
import numpy as np
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.document_loaders.base import BaseBlobParser
|
||||
from langchain_community.document_loaders.blob_loaders import Blob
|
||||
from langchain_community.document_loaders.parsers.images import (
|
||||
BaseImageBlobParser,
|
||||
RapidOCRBlobParser,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import fitz
|
||||
import pdfminer
|
||||
import pdfplumber
|
||||
import pymupdf
|
||||
import pypdf
|
||||
import pypdfium2
|
||||
from textractor.data.text_linearization_config import TextLinearizationConfig
|
||||
@@ -78,6 +89,156 @@ def extract_from_images_with_rapidocr(
|
||||
return text
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_FORMAT_IMAGE_STR = "\n\n{image_text}\n\n"
|
||||
_JOIN_IMAGES = "\n"
|
||||
_JOIN_TABLES = "\n"
|
||||
_DEFAULT_PAGES_DELIMITER = "\n\f"
|
||||
|
||||
_STD_METADATA_KEYS = {"source", "total_pages", "creationdate", "creator", "producer"}
|
||||
|
||||
|
||||
def _format_inner_image(blob: Blob, content: str, format: str) -> str:
|
||||
"""Format the content of the image with the source of the blob.
|
||||
|
||||
blob: The blob containing the image.
|
||||
format::
|
||||
The format for the parsed output.
|
||||
- "text" = return the content as is
|
||||
- "markdown-img" = wrap the content into an image markdown link, w/ link
|
||||
pointing to (`![body)(#)`]
|
||||
- "html-img" = wrap the content as the `alt` text of an tag and link to
|
||||
(`<img alt="{body}" src="#"/>`)
|
||||
"""
|
||||
if content:
|
||||
source = blob.source or "#"
|
||||
if format == "markdown-img":
|
||||
content = content.replace("]", r"\\]")
|
||||
content = f""
|
||||
elif format == "html-img":
|
||||
content = (
|
||||
f'<img alt="{html.escape(content, quote=True)} ' f'src="{source}" />'
|
||||
)
|
||||
return content
|
||||
|
||||
|
||||
def _validate_metadata(metadata: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Validate that the metadata has all the standard keys and the page is an integer.
|
||||
|
||||
The standard keys are:
|
||||
- source
|
||||
- total_page
|
||||
- creationdate
|
||||
- creator
|
||||
- producer
|
||||
|
||||
Validate that page is an integer if it is present.
|
||||
"""
|
||||
if not _STD_METADATA_KEYS.issubset(metadata.keys()):
|
||||
raise ValueError("The PDF parser must valorize the standard metadata.")
|
||||
if not isinstance(metadata.get("page", 0), int):
|
||||
raise ValueError("The PDF metadata page must be a integer.")
|
||||
return metadata
|
||||
|
||||
|
||||
def _purge_metadata(metadata: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Purge metadata from unwanted keys and normalize key names.
|
||||
|
||||
Args:
|
||||
metadata: The original metadata dictionary.
|
||||
|
||||
Returns:
|
||||
The cleaned and normalized the key format of metadata dictionary.
|
||||
"""
|
||||
new_metadata: dict[str, Any] = {}
|
||||
map_key = {
|
||||
"page_count": "total_pages",
|
||||
"file_path": "source",
|
||||
}
|
||||
for k, v in metadata.items():
|
||||
if type(v) not in [str, int]:
|
||||
v = str(v)
|
||||
if k.startswith("/"):
|
||||
k = k[1:]
|
||||
k = k.lower()
|
||||
if k in ["creationdate", "moddate"]:
|
||||
try:
|
||||
new_metadata[k] = datetime.strptime(
|
||||
v.replace("'", ""), "D:%Y%m%d%H%M%S%z"
|
||||
).isoformat("T")
|
||||
except ValueError:
|
||||
new_metadata[k] = v
|
||||
elif k in map_key:
|
||||
# Normalize key with others PDF parser
|
||||
new_metadata[map_key[k]] = v
|
||||
new_metadata[k] = v
|
||||
elif isinstance(v, str):
|
||||
new_metadata[k] = v.strip()
|
||||
elif isinstance(v, int):
|
||||
new_metadata[k] = v
|
||||
return new_metadata
|
||||
|
||||
|
||||
_PARAGRAPH_DELIMITER = [
|
||||
"\n\n\n",
|
||||
"\n\n",
|
||||
] # To insert images or table in the middle of the page.
|
||||
|
||||
|
||||
def _merge_text_and_extras(extras: list[str], text_from_page: str) -> str:
|
||||
"""Insert extras such as image/table in a text between two paragraphs if possible,
|
||||
else at the end of the text.
|
||||
|
||||
Args:
|
||||
extras: List of extra content (images/tables) to insert.
|
||||
text_from_page: The text content from the page.
|
||||
|
||||
Returns:
|
||||
The merged text with extras inserted.
|
||||
"""
|
||||
|
||||
def _recurs_merge_text_and_extras(
|
||||
extras: list[str], text_from_page: str, recurs: bool
|
||||
) -> Optional[str]:
|
||||
if extras:
|
||||
for delim in _PARAGRAPH_DELIMITER:
|
||||
pos = text_from_page.rfind(delim)
|
||||
if pos != -1:
|
||||
# search penultimate, to bypass an error in footer
|
||||
previous_text = None
|
||||
if recurs:
|
||||
previous_text = _recurs_merge_text_and_extras(
|
||||
extras, text_from_page[:pos], False
|
||||
)
|
||||
if previous_text:
|
||||
all_text = previous_text + text_from_page[pos:]
|
||||
else:
|
||||
all_extras = ""
|
||||
str_extras = "\n\n".join(filter(lambda x: x, extras))
|
||||
if str_extras:
|
||||
all_extras = delim + str_extras
|
||||
all_text = (
|
||||
text_from_page[:pos] + all_extras + text_from_page[pos:]
|
||||
)
|
||||
break
|
||||
else:
|
||||
all_text = None
|
||||
else:
|
||||
all_text = text_from_page
|
||||
return all_text
|
||||
|
||||
all_text = _recurs_merge_text_and_extras(extras, text_from_page, True)
|
||||
if not all_text:
|
||||
all_extras = ""
|
||||
str_extras = "\n\n".join(filter(lambda x: x, extras))
|
||||
if str_extras:
|
||||
all_extras = _PARAGRAPH_DELIMITER[-1] + str_extras
|
||||
all_text = text_from_page + all_extras
|
||||
|
||||
return all_text
|
||||
|
||||
|
||||
class PyPDFParser(BaseBlobParser):
|
||||
"""Load `PDF` using `pypdf`"""
|
||||
|
||||
@@ -105,9 +266,7 @@ class PyPDFParser(BaseBlobParser):
|
||||
)
|
||||
|
||||
def _extract_text_from_page(page: pypdf.PageObject) -> str:
|
||||
"""
|
||||
Extract text from image given the version of pypdf.
|
||||
"""
|
||||
"""Extract text from image given the version of pypdf."""
|
||||
if pypdf.__version__.startswith("3"):
|
||||
return page.extract_text()
|
||||
else:
|
||||
@@ -152,6 +311,12 @@ class PyPDFParser(BaseBlobParser):
|
||||
)
|
||||
elif xObject[obj]["/Filter"][1:] in _PDF_FILTER_WITH_LOSS:
|
||||
images.append(xObject[obj].get_data())
|
||||
elif (
|
||||
isinstance(xObject[obj]["/Filter"], list)
|
||||
and xObject[obj]["/Filter"]
|
||||
and xObject[obj]["/Filter"][0][1:] in _PDF_FILTER_WITH_LOSS
|
||||
):
|
||||
images.append(xObject[obj].get_data())
|
||||
else:
|
||||
warnings.warn("Unknown PDF Filter!")
|
||||
return extract_from_images_with_rapidocr(images)
|
||||
@@ -275,92 +440,363 @@ class PDFMinerParser(BaseBlobParser):
|
||||
|
||||
|
||||
class PyMuPDFParser(BaseBlobParser):
|
||||
"""Parse `PDF` using `PyMuPDF`."""
|
||||
"""Parse a blob from a PDF using `PyMuPDF` library.
|
||||
|
||||
This class provides methods to parse a blob from a PDF document, supporting various
|
||||
configurations such as handling password-protected PDFs, extracting images, and
|
||||
defining extraction mode.
|
||||
It integrates the 'PyMuPDF' library for PDF processing and offers synchronous blob
|
||||
parsing.
|
||||
|
||||
Examples:
|
||||
Setup:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U langchain-community pymupdf
|
||||
|
||||
Load a blob from a PDF file:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.documents.base import Blob
|
||||
|
||||
blob = Blob.from_path("./example_data/layout-parser-paper.pdf")
|
||||
|
||||
Instantiate the parser:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.document_loaders.parsers import PyMuPDFParser
|
||||
|
||||
parser = PyMuPDFParser(
|
||||
# password = None,
|
||||
mode = "single",
|
||||
pages_delimiter = "\n\f",
|
||||
# extract_images = True,
|
||||
# images_parser = TesseractBlobParser(),
|
||||
# extract_tables="markdown",
|
||||
# extract_tables_settings=None,
|
||||
# text_kwargs=None,
|
||||
)
|
||||
|
||||
Lazily parse the blob:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
docs = []
|
||||
docs_lazy = parser.lazy_parse(blob)
|
||||
|
||||
for doc in docs_lazy:
|
||||
docs.append(doc)
|
||||
print(docs[0].page_content[:100])
|
||||
print(docs[0].metadata)
|
||||
"""
|
||||
|
||||
# PyMuPDF is not thread safe.
|
||||
# See https://pymupdf.readthedocs.io/en/latest/recipes-multiprocessing.html
|
||||
_lock = threading.Lock()
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
text_kwargs: Optional[Mapping[str, Any]] = None,
|
||||
text_kwargs: Optional[dict[str, Any]] = None,
|
||||
extract_images: bool = False,
|
||||
*,
|
||||
password: Optional[str] = None,
|
||||
mode: Literal["single", "page"] = "page",
|
||||
pages_delimiter: str = _DEFAULT_PAGES_DELIMITER,
|
||||
images_parser: Optional[BaseImageBlobParser] = None,
|
||||
images_inner_format: Literal["text", "markdown-img", "html-img"] = "text",
|
||||
extract_tables: Union[Literal["csv", "markdown", "html"], None] = None,
|
||||
extract_tables_settings: Optional[dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Initialize the parser.
|
||||
"""Initialize a parser based on PyMuPDF.
|
||||
|
||||
Args:
|
||||
text_kwargs: Keyword arguments to pass to ``fitz.Page.get_text()``.
|
||||
password: Optional password for opening encrypted PDFs.
|
||||
mode: The extraction mode, either "single" for the entire document or "page"
|
||||
for page-wise extraction.
|
||||
pages_delimiter: A string delimiter to separate pages in single-mode
|
||||
extraction.
|
||||
extract_images: Whether to extract images from the PDF.
|
||||
images_parser: Optional image blob parser.
|
||||
images_inner_format: The format for the parsed output.
|
||||
- "text" = return the content as is
|
||||
- "markdown-img" = wrap the content into an image markdown link, w/ link
|
||||
pointing to (`![body)(#)`]
|
||||
- "html-img" = wrap the content as the `alt` text of an tag and link to
|
||||
(`<img alt="{body}" src="#"/>`)
|
||||
extract_tables: Whether to extract tables in a specific format, such as
|
||||
"csv", "markdown", or "html".
|
||||
extract_tables_settings: Optional dictionary of settings for customizing
|
||||
table extraction.
|
||||
|
||||
Returns:
|
||||
This method does not directly return data. Use the `parse` or `lazy_parse`
|
||||
methods to retrieve parsed documents with content and metadata.
|
||||
|
||||
Raises:
|
||||
ValueError: If the mode is not "single" or "page".
|
||||
ValueError: If the extract_tables format is not "markdown", "html",
|
||||
or "csv".
|
||||
"""
|
||||
super().__init__()
|
||||
if mode not in ["single", "page"]:
|
||||
raise ValueError("mode must be single or page")
|
||||
if extract_tables and extract_tables not in ["markdown", "html", "csv"]:
|
||||
raise ValueError("mode must be markdown")
|
||||
|
||||
self.mode = mode
|
||||
self.pages_delimiter = pages_delimiter
|
||||
self.password = password
|
||||
self.text_kwargs = text_kwargs or {}
|
||||
if extract_images and not images_parser:
|
||||
images_parser = RapidOCRBlobParser()
|
||||
self.extract_images = extract_images
|
||||
self.images_inner_format = images_inner_format
|
||||
self.images_parser = images_parser
|
||||
self.extract_tables = extract_tables
|
||||
self.extract_tables_settings = extract_tables_settings
|
||||
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]: # type: ignore[valid-type]
|
||||
"""Lazily parse the blob."""
|
||||
|
||||
import fitz
|
||||
|
||||
with blob.as_bytes_io() as file_path: # type: ignore[attr-defined]
|
||||
if blob.data is None: # type: ignore[attr-defined]
|
||||
doc = fitz.open(file_path)
|
||||
else:
|
||||
doc = fitz.open(stream=file_path, filetype="pdf")
|
||||
|
||||
yield from [
|
||||
Document(
|
||||
page_content=self._get_page_content(doc, page, blob),
|
||||
metadata=self._extract_metadata(doc, page, blob),
|
||||
)
|
||||
for page in doc
|
||||
]
|
||||
|
||||
def _get_page_content(self, doc: fitz.Document, page: fitz.Page, blob: Blob) -> str:
|
||||
"""
|
||||
Get the text of the page using PyMuPDF and RapidOCR and issue a warning
|
||||
if it is empty.
|
||||
"""
|
||||
content = page.get_text(**self.text_kwargs) + self._extract_images_from_page(
|
||||
doc, page
|
||||
return self._lazy_parse(
|
||||
blob,
|
||||
)
|
||||
|
||||
if not content:
|
||||
warnings.warn(
|
||||
f"Warning: Empty content on page "
|
||||
f"{page.number} of document {blob.source}"
|
||||
def _lazy_parse(
|
||||
self,
|
||||
blob: Blob,
|
||||
# text-kwargs is present for backwards compatibility.
|
||||
# Users should not use it directly.
|
||||
text_kwargs: Optional[dict[str, Any]] = None,
|
||||
) -> Iterator[Document]: # type: ignore[valid-type]
|
||||
"""Lazily parse the blob.
|
||||
Insert image, if possible, between two paragraphs.
|
||||
In this way, a paragraph can be continued on the next page.
|
||||
|
||||
Args:
|
||||
blob: The blob to parse.
|
||||
text_kwargs: Optional keyword arguments to pass to the `get_text` method.
|
||||
If provided at run time, it will override the default text_kwargs.
|
||||
|
||||
Raises:
|
||||
ImportError: If the `pypdf` package is not found.
|
||||
|
||||
Yield:
|
||||
An iterator over the parsed documents.
|
||||
"""
|
||||
try:
|
||||
import pymupdf
|
||||
|
||||
text_kwargs = text_kwargs or self.text_kwargs
|
||||
if not self.extract_tables_settings:
|
||||
from pymupdf.table import (
|
||||
DEFAULT_JOIN_TOLERANCE,
|
||||
DEFAULT_MIN_WORDS_HORIZONTAL,
|
||||
DEFAULT_MIN_WORDS_VERTICAL,
|
||||
DEFAULT_SNAP_TOLERANCE,
|
||||
)
|
||||
|
||||
self.extract_tables_settings = {
|
||||
# See https://pymupdf.readthedocs.io/en/latest/page.html#Page.find_tables
|
||||
"clip": None,
|
||||
"vertical_strategy": "lines",
|
||||
"horizontal_strategy": "lines",
|
||||
"vertical_lines": None,
|
||||
"horizontal_lines": None,
|
||||
"snap_tolerance": DEFAULT_SNAP_TOLERANCE,
|
||||
"snap_x_tolerance": None,
|
||||
"snap_y_tolerance": None,
|
||||
"join_tolerance": DEFAULT_JOIN_TOLERANCE,
|
||||
"join_x_tolerance": None,
|
||||
"join_y_tolerance": None,
|
||||
"edge_min_length": 3,
|
||||
"min_words_vertical": DEFAULT_MIN_WORDS_VERTICAL,
|
||||
"min_words_horizontal": DEFAULT_MIN_WORDS_HORIZONTAL,
|
||||
"intersection_tolerance": 3,
|
||||
"intersection_x_tolerance": None,
|
||||
"intersection_y_tolerance": None,
|
||||
"text_tolerance": 3,
|
||||
"text_x_tolerance": 3,
|
||||
"text_y_tolerance": 3,
|
||||
"strategy": None, # offer abbreviation
|
||||
"add_lines": None, # optional user-specified lines
|
||||
}
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"pymupdf package not found, please install it "
|
||||
"with `pip install pymupdf`"
|
||||
)
|
||||
|
||||
return content
|
||||
with PyMuPDFParser._lock:
|
||||
with blob.as_bytes_io() as file_path: # type: ignore[attr-defined]
|
||||
if blob.data is None: # type: ignore[attr-defined]
|
||||
doc = pymupdf.open(file_path)
|
||||
else:
|
||||
doc = pymupdf.open(stream=file_path, filetype="pdf")
|
||||
if doc.is_encrypted:
|
||||
doc.authenticate(self.password)
|
||||
doc_metadata = self._extract_metadata(doc, blob)
|
||||
full_content = []
|
||||
for page in doc:
|
||||
all_text = self._get_page_content(doc, page, text_kwargs).strip()
|
||||
if self.mode == "page":
|
||||
yield Document(
|
||||
page_content=all_text,
|
||||
metadata=_validate_metadata(
|
||||
doc_metadata | {"page": page.number}
|
||||
),
|
||||
)
|
||||
else:
|
||||
full_content.append(all_text)
|
||||
|
||||
def _extract_metadata(
|
||||
self, doc: fitz.Document, page: fitz.Page, blob: Blob
|
||||
) -> dict:
|
||||
"""Extract metadata from the document and page."""
|
||||
return dict(
|
||||
{
|
||||
"source": blob.source, # type: ignore[attr-defined]
|
||||
"file_path": blob.source, # type: ignore[attr-defined]
|
||||
"page": page.number,
|
||||
"total_pages": len(doc),
|
||||
},
|
||||
**{
|
||||
k: doc.metadata[k]
|
||||
for k in doc.metadata
|
||||
if isinstance(doc.metadata[k], (str, int))
|
||||
},
|
||||
if self.mode == "single":
|
||||
yield Document(
|
||||
page_content=self.pages_delimiter.join(full_content),
|
||||
metadata=_validate_metadata(doc_metadata),
|
||||
)
|
||||
|
||||
def _get_page_content(
|
||||
self,
|
||||
doc: pymupdf.Document,
|
||||
page: pymupdf.Page,
|
||||
text_kwargs: dict[str, Any],
|
||||
) -> str:
|
||||
"""Get the text of the page using PyMuPDF and RapidOCR and issue a warning
|
||||
if it is empty.
|
||||
|
||||
Args:
|
||||
doc: The PyMuPDF document object.
|
||||
page: The PyMuPDF page object.
|
||||
blob: The blob being parsed.
|
||||
|
||||
Returns:
|
||||
str: The text content of the page.
|
||||
"""
|
||||
text_from_page = page.get_text(**{**self.text_kwargs, **text_kwargs})
|
||||
images_from_page = self._extract_images_from_page(doc, page)
|
||||
tables_from_page = self._extract_tables_from_page(page)
|
||||
extras = []
|
||||
if images_from_page:
|
||||
extras.append(images_from_page)
|
||||
if tables_from_page:
|
||||
extras.append(tables_from_page)
|
||||
all_text = _merge_text_and_extras(extras, text_from_page)
|
||||
|
||||
return all_text
|
||||
|
||||
def _extract_metadata(self, doc: pymupdf.Document, blob: Blob) -> dict:
|
||||
"""Extract metadata from the document and page.
|
||||
|
||||
Args:
|
||||
doc: The PyMuPDF document object.
|
||||
blob: The blob being parsed.
|
||||
|
||||
Returns:
|
||||
dict: The extracted metadata.
|
||||
"""
|
||||
return _purge_metadata(
|
||||
dict(
|
||||
{
|
||||
"producer": "PyMuPDF",
|
||||
"creator": "PyMuPDF",
|
||||
"creationdate": "",
|
||||
"source": blob.source, # type: ignore[attr-defined]
|
||||
"file_path": blob.source, # type: ignore[attr-defined]
|
||||
"total_pages": len(doc),
|
||||
},
|
||||
**{
|
||||
k: doc.metadata[k]
|
||||
for k in doc.metadata
|
||||
if isinstance(doc.metadata[k], (str, int))
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
def _extract_images_from_page(self, doc: fitz.Document, page: fitz.Page) -> str:
|
||||
"""Extract images from page and get the text with RapidOCR."""
|
||||
if not self.extract_images:
|
||||
def _extract_images_from_page(
|
||||
self, doc: pymupdf.Document, page: pymupdf.Page
|
||||
) -> str:
|
||||
"""Extract images from a PDF page and get the text using images_to_text.
|
||||
|
||||
Args:
|
||||
doc: The PyMuPDF document object.
|
||||
page: The PyMuPDF page object.
|
||||
|
||||
Returns:
|
||||
str: The extracted text from the images on the page.
|
||||
"""
|
||||
if not self.images_parser:
|
||||
return ""
|
||||
import fitz
|
||||
import pymupdf
|
||||
|
||||
img_list = page.get_images()
|
||||
imgs = []
|
||||
images = []
|
||||
for img in img_list:
|
||||
xref = img[0]
|
||||
pix = fitz.Pixmap(doc, xref)
|
||||
imgs.append(
|
||||
np.frombuffer(pix.samples, dtype=np.uint8).reshape(
|
||||
if self.images_parser:
|
||||
xref = img[0]
|
||||
pix = pymupdf.Pixmap(doc, xref)
|
||||
image = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
|
||||
pix.height, pix.width, -1
|
||||
)
|
||||
)
|
||||
return extract_from_images_with_rapidocr(imgs)
|
||||
image_bytes = io.BytesIO()
|
||||
numpy.save(image_bytes, image)
|
||||
blob = Blob.from_data(
|
||||
image_bytes.getvalue(), mime_type="application/x-npy"
|
||||
)
|
||||
image_text = next(self.images_parser.lazy_parse(blob)).page_content
|
||||
|
||||
images.append(
|
||||
_format_inner_image(blob, image_text, self.images_inner_format)
|
||||
)
|
||||
return _FORMAT_IMAGE_STR.format(
|
||||
image_text=_JOIN_IMAGES.join(filter(None, images))
|
||||
)
|
||||
|
||||
def _extract_tables_from_page(self, page: pymupdf.Page) -> str:
|
||||
"""Extract tables from a PDF page.
|
||||
|
||||
Args:
|
||||
page: The PyMuPDF page object.
|
||||
|
||||
Returns:
|
||||
str: The extracted tables in the specified format.
|
||||
"""
|
||||
if self.extract_tables is None:
|
||||
return ""
|
||||
import pymupdf
|
||||
|
||||
tables_list = list(
|
||||
pymupdf.table.find_tables(page, **self.extract_tables_settings)
|
||||
)
|
||||
if tables_list:
|
||||
if self.extract_tables == "markdown":
|
||||
return _JOIN_TABLES.join([table.to_markdown() for table in tables_list])
|
||||
elif self.extract_tables == "html":
|
||||
return _JOIN_TABLES.join(
|
||||
[
|
||||
table.to_pandas().to_html(
|
||||
header=False,
|
||||
index=False,
|
||||
bold_rows=False,
|
||||
)
|
||||
for table in tables_list
|
||||
]
|
||||
)
|
||||
elif self.extract_tables == "csv":
|
||||
return _JOIN_TABLES.join(
|
||||
[
|
||||
table.to_pandas().to_csv(
|
||||
header=False,
|
||||
index=False,
|
||||
)
|
||||
for table in tables_list
|
||||
]
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"extract_tables {self.extract_tables} not implemented"
|
||||
)
|
||||
return ""
|
||||
|
||||
|
||||
class PyPDFium2Parser(BaseBlobParser):
|
||||
|
||||
@@ -12,6 +12,7 @@ from typing import (
|
||||
Any,
|
||||
BinaryIO,
|
||||
Iterator,
|
||||
Literal,
|
||||
Mapping,
|
||||
Optional,
|
||||
Sequence,
|
||||
@@ -27,7 +28,9 @@ from langchain_core.utils import get_from_dict_or_env
|
||||
from langchain_community.document_loaders.base import BaseLoader
|
||||
from langchain_community.document_loaders.blob_loaders import Blob
|
||||
from langchain_community.document_loaders.dedoc import DedocBaseLoader
|
||||
from langchain_community.document_loaders.parsers.images import BaseImageBlobParser
|
||||
from langchain_community.document_loaders.parsers.pdf import (
|
||||
_DEFAULT_PAGES_DELIMITER,
|
||||
AmazonTextractPDFParser,
|
||||
DocumentIntelligenceParser,
|
||||
PDFMinerParser,
|
||||
@@ -113,7 +116,8 @@ class BasePDFLoader(BaseLoader, ABC):
|
||||
if "~" in self.file_path:
|
||||
self.file_path = os.path.expanduser(self.file_path)
|
||||
|
||||
# If the file is a web path or S3, download it to a temporary file, and use that
|
||||
# If the file is a web path or S3, download it to a temporary file,
|
||||
# and use that. It's better to use a BlobLoader.
|
||||
if not os.path.isfile(self.file_path) and self._is_valid_url(self.file_path):
|
||||
self.temp_dir = tempfile.TemporaryDirectory()
|
||||
_, suffix = os.path.splitext(self.file_path)
|
||||
@@ -180,8 +184,7 @@ class OnlinePDFLoader(BasePDFLoader):
|
||||
|
||||
|
||||
class PyPDFLoader(BasePDFLoader):
|
||||
"""
|
||||
PyPDFLoader document loader integration
|
||||
"""PyPDFLoader document loader integration
|
||||
|
||||
Setup:
|
||||
Install ``langchain-community``.
|
||||
@@ -429,44 +432,139 @@ class PDFMinerPDFasHTMLLoader(BasePDFLoader):
|
||||
|
||||
|
||||
class PyMuPDFLoader(BasePDFLoader):
|
||||
"""Load `PDF` files using `PyMuPDF`."""
|
||||
"""Load and parse a PDF file using 'PyMuPDF' library.
|
||||
|
||||
This class provides methods to load and parse PDF documents, supporting various
|
||||
configurations such as handling password-protected files, extracting tables,
|
||||
extracting images, and defining extraction mode. It integrates the `PyMuPDF`
|
||||
library for PDF processing and offers both synchronous and asynchronous document
|
||||
loading.
|
||||
|
||||
Examples:
|
||||
Setup:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U langchain-community pymupdf
|
||||
|
||||
Instantiate the loader:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.document_loaders import PyMuPDFLoader
|
||||
|
||||
loader = PyMuPDFLoader(
|
||||
file_path = "./example_data/layout-parser-paper.pdf",
|
||||
# headers = None
|
||||
# password = None,
|
||||
mode = "single",
|
||||
pages_delimiter = "\n\f",
|
||||
# extract_images = True,
|
||||
# images_parser = TesseractBlobParser(),
|
||||
# extract_tables = "markdown",
|
||||
# extract_tables_settings = None,
|
||||
)
|
||||
|
||||
Lazy load documents:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
docs = []
|
||||
docs_lazy = loader.lazy_load()
|
||||
|
||||
for doc in docs_lazy:
|
||||
docs.append(doc)
|
||||
print(docs[0].page_content[:100])
|
||||
print(docs[0].metadata)
|
||||
|
||||
Load documents asynchronously:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
docs = await loader.aload()
|
||||
print(docs[0].page_content[:100])
|
||||
print(docs[0].metadata)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
file_path: Union[str, PurePath],
|
||||
*,
|
||||
headers: Optional[dict] = None,
|
||||
password: Optional[str] = None,
|
||||
mode: Literal["single", "page"] = "page",
|
||||
pages_delimiter: str = _DEFAULT_PAGES_DELIMITER,
|
||||
extract_images: bool = False,
|
||||
images_parser: Optional[BaseImageBlobParser] = None,
|
||||
images_inner_format: Literal["text", "markdown-img", "html-img"] = "text",
|
||||
extract_tables: Union[Literal["csv", "markdown", "html"], None] = None,
|
||||
headers: Optional[dict] = None,
|
||||
extract_tables_settings: Optional[dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Initialize with a file path."""
|
||||
try:
|
||||
import fitz # noqa:F401
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`PyMuPDF` package not found, please install it with "
|
||||
"`pip install pymupdf`"
|
||||
)
|
||||
"""Initialize with a file path.
|
||||
|
||||
Args:
|
||||
file_path: The path to the PDF file to be loaded.
|
||||
headers: Optional headers to use for GET request to download a file from a
|
||||
web path.
|
||||
password: Optional password for opening encrypted PDFs.
|
||||
mode: The extraction mode, either "single" for the entire document or "page"
|
||||
for page-wise extraction.
|
||||
pages_delimiter: A string delimiter to separate pages in single-mode
|
||||
extraction.
|
||||
extract_images: Whether to extract images from the PDF.
|
||||
images_parser: Optional image blob parser.
|
||||
images_inner_format: The format for the parsed output.
|
||||
- "text" = return the content as is
|
||||
- "markdown-img" = wrap the content into an image markdown link, w/ link
|
||||
pointing to (`![body)(#)`]
|
||||
- "html-img" = wrap the content as the `alt` text of an tag and link to
|
||||
(`<img alt="{body}" src="#"/>`)
|
||||
extract_tables: Whether to extract tables in a specific format, such as
|
||||
"csv", "markdown", or "html".
|
||||
extract_tables_settings: Optional dictionary of settings for customizing
|
||||
table extraction.
|
||||
**kwargs: Additional keyword arguments for customizing text extraction
|
||||
behavior.
|
||||
|
||||
Returns:
|
||||
This method does not directly return data. Use the `load`, `lazy_load`, or
|
||||
`aload` methods to retrieve parsed documents with content and metadata.
|
||||
|
||||
Raises:
|
||||
ValueError: If the `mode` argument is not one of "single" or "page".
|
||||
"""
|
||||
if mode not in ["single", "page"]:
|
||||
raise ValueError("mode must be single or page")
|
||||
super().__init__(file_path, headers=headers)
|
||||
self.extract_images = extract_images
|
||||
self.text_kwargs = kwargs
|
||||
self.parser = PyMuPDFParser(
|
||||
password=password,
|
||||
mode=mode,
|
||||
pages_delimiter=pages_delimiter,
|
||||
text_kwargs=kwargs,
|
||||
extract_images=extract_images,
|
||||
images_parser=images_parser,
|
||||
images_inner_format=images_inner_format,
|
||||
extract_tables=extract_tables,
|
||||
extract_tables_settings=extract_tables_settings,
|
||||
)
|
||||
|
||||
def _lazy_load(self, **kwargs: Any) -> Iterator[Document]:
|
||||
"""Lazy load given path as pages or single document (see `mode`).
|
||||
Insert image, if possible, between two paragraphs.
|
||||
In this way, a paragraph can be continued on the next page.
|
||||
"""
|
||||
if kwargs:
|
||||
logger.warning(
|
||||
f"Received runtime arguments {kwargs}. Passing runtime args to `load`"
|
||||
f" is deprecated. Please pass arguments during initialization instead."
|
||||
)
|
||||
|
||||
text_kwargs = {**self.text_kwargs, **kwargs}
|
||||
parser = PyMuPDFParser(
|
||||
text_kwargs=text_kwargs, extract_images=self.extract_images
|
||||
)
|
||||
parser = self.parser
|
||||
if self.web_path:
|
||||
blob = Blob.from_data(open(self.file_path, "rb").read(), path=self.web_path) # type: ignore[attr-defined]
|
||||
else:
|
||||
blob = Blob.from_path(self.file_path) # type: ignore[attr-defined]
|
||||
yield from parser.lazy_parse(blob)
|
||||
yield from parser._lazy_parse(blob, text_kwargs=kwargs)
|
||||
|
||||
def load(self, **kwargs: Any) -> list[Document]:
|
||||
return list(self._lazy_load(**kwargs))
|
||||
@@ -772,8 +870,8 @@ class AmazonTextractPDFLoader(BasePDFLoader):
|
||||
) -> Iterator[Document]:
|
||||
"""Lazy load documents"""
|
||||
# the self.file_path is local, but the blob has to include
|
||||
# the S3 location if the file originated from S3 for multi-page documents
|
||||
# raises ValueError when multi-page and not on S3"""
|
||||
# the S3 location if the file originated from S3 for multipage documents
|
||||
# raises ValueError when multipage and not on S3"""
|
||||
|
||||
if self.web_path and self._is_s3_url(self.web_path):
|
||||
blob = Blob(path=self.web_path) # type: ignore[call-arg] # type: ignore[misc]
|
||||
@@ -818,8 +916,7 @@ class AmazonTextractPDFLoader(BasePDFLoader):
|
||||
|
||||
|
||||
class DedocPDFLoader(DedocBaseLoader):
|
||||
"""
|
||||
DedocPDFLoader document loader integration to load PDF files using `dedoc`.
|
||||
"""DedocPDFLoader document loader integration to load PDF files using `dedoc`.
|
||||
The file loader can automatically detect the correctness of a textual layer in the
|
||||
PDF document.
|
||||
Note that `__init__` method supports parameters that differ from ones of
|
||||
@@ -925,8 +1022,7 @@ class DocumentIntelligenceLoader(BasePDFLoader):
|
||||
model: str = "prebuilt-document",
|
||||
headers: Optional[dict] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize the object for file processing with Azure Document Intelligence
|
||||
"""Initialize the object for file processing with Azure Document Intelligence
|
||||
(formerly Form Recognizer).
|
||||
|
||||
This constructor initializes a DocumentIntelligenceParser object to be used
|
||||
@@ -968,11 +1064,10 @@ class DocumentIntelligenceLoader(BasePDFLoader):
|
||||
|
||||
|
||||
class ZeroxPDFLoader(BasePDFLoader):
|
||||
"""
|
||||
Document loader utilizing Zerox library:
|
||||
"""Document loader utilizing Zerox library:
|
||||
https://github.com/getomni-ai/zerox
|
||||
|
||||
Zerox converts PDF document to serties of images (page-wise) and
|
||||
Zerox converts PDF document to series of images (page-wise) and
|
||||
uses vision-capable LLM model to generate Markdown representation.
|
||||
|
||||
Zerox utilizes anyc operations. Therefore when using this loader
|
||||
@@ -991,9 +1086,8 @@ class ZeroxPDFLoader(BasePDFLoader):
|
||||
**zerox_kwargs: Any,
|
||||
) -> None:
|
||||
super().__init__(file_path=file_path)
|
||||
"""
|
||||
Initialize the parser with arguments to be passed to the zerox function.
|
||||
Make sure to set necessary environmnet variables such as API key, endpoint, etc.
|
||||
"""Initialize the parser with arguments to be passed to the zerox function.
|
||||
Make sure to set necessary environment variables such as API key, endpoint, etc.
|
||||
Check zerox documentation for list of necessary environment variables for
|
||||
any given model.
|
||||
|
||||
@@ -1014,13 +1108,7 @@ class ZeroxPDFLoader(BasePDFLoader):
|
||||
self.model = model
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""
|
||||
Loads documnts from pdf utilizing zerox library:
|
||||
https://github.com/getomni-ai/zerox
|
||||
|
||||
Returns:
|
||||
Iterator[Document]: An iterator over parsed Document instances.
|
||||
"""
|
||||
"""Lazily load pages."""
|
||||
import asyncio
|
||||
|
||||
from pyzerox import zerox
|
||||
|
||||
@@ -53,7 +53,8 @@ def _metadata_extractor(
|
||||
class RecursiveUrlLoader(BaseLoader):
|
||||
"""Recursively load all child links from a root URL.
|
||||
|
||||
**Security Note**: This loader is a crawler that will start crawling
|
||||
**Security Note**:
|
||||
This loader is a crawler that will start crawling
|
||||
at a given URL and then expand to crawl child links recursively.
|
||||
|
||||
Web crawlers should generally NOT be deployed with network access
|
||||
@@ -154,36 +155,36 @@ class RecursiveUrlLoader(BaseLoader):
|
||||
content. To parse this HTML into a more human/LLM-friendly format you can pass
|
||||
in a custom ``extractor`` method:
|
||||
|
||||
.. code-block:: python
|
||||
.. code-block:: python
|
||||
|
||||
# This example uses `beautifulsoup4` and `lxml`
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
# This example uses `beautifulsoup4` and `lxml`
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
def bs4_extractor(html: str) -> str:
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
return re.sub(r"\n\n+", "\n\n", soup.text).strip()
|
||||
def bs4_extractor(html: str) -> str:
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
return re.sub(r"\\n\\n+", "\\n\\n", soup.text).strip()
|
||||
|
||||
loader = RecursiveUrlLoader(
|
||||
"https://docs.python.org/3.9/",
|
||||
extractor=bs4_extractor,
|
||||
)
|
||||
print(loader.load()[0].page_content[:200])
|
||||
loader = RecursiveUrlLoader(
|
||||
"https://docs.python.org/3.9/",
|
||||
extractor=bs4_extractor,
|
||||
)
|
||||
print(loader.load()[0].page_content[:200])
|
||||
|
||||
|
||||
.. code-block:: python
|
||||
.. code-block:: python
|
||||
|
||||
3.9.19 Documentation
|
||||
3.9.19 Documentation
|
||||
|
||||
Download
|
||||
Download these documents
|
||||
Docs by version
|
||||
Download
|
||||
Download these documents
|
||||
Docs by version
|
||||
|
||||
Python 3.13 (in development)
|
||||
Python 3.12 (stable)
|
||||
Python 3.11 (security-fixes)
|
||||
Python 3.10 (security-fixes)
|
||||
Python 3.9 (securit
|
||||
Python 3.13 (in development)
|
||||
Python 3.12 (stable)
|
||||
Python 3.11 (security-fixes)
|
||||
Python 3.10 (security-fixes)
|
||||
Python 3.9 (securit
|
||||
|
||||
Metadata extraction:
|
||||
Similarly to content extraction, you can specify a metadata extraction function
|
||||
|
||||
@@ -52,14 +52,14 @@ class GoogleApiClient:
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def validate_channel_or_videoIds_is_set(cls, values: Dict[str, Any]) -> Any:
|
||||
def validate_channel_or_videoIds_is_set(cls, values: Any) -> Any:
|
||||
"""Validate that either folder_id or document_ids is set, but not both."""
|
||||
|
||||
if not values.get("credentials_path") and not values.get(
|
||||
if not values.kwargs.get("credentials_path") and not values.kwargs.get(
|
||||
"service_account_path"
|
||||
):
|
||||
raise ValueError("Must specify either channel_name or video_ids")
|
||||
return values
|
||||
return values.kwargs
|
||||
|
||||
def _load_credentials(self) -> Any:
|
||||
"""Load credentials."""
|
||||
|
||||
@@ -15,6 +15,7 @@ from pydantic import (
|
||||
from typing_extensions import Self
|
||||
|
||||
_DEFAULT_BASE_URL = "https://clovastudio.apigw.ntruss.com"
|
||||
_DEFAULT_BASE_URL_ON_NEW_API_KEY = "https://clovastudio.stream.ntruss.com"
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -91,20 +92,28 @@ class ClovaXEmbeddings(BaseModel, Embeddings):
|
||||
|
||||
@property
|
||||
def lc_secrets(self) -> Dict[str, str]:
|
||||
return {
|
||||
"ncp_clovastudio_api_key": "NCP_CLOVASTUDIO_API_KEY",
|
||||
"ncp_apigw_api_key": "NCP_APIGW_API_KEY",
|
||||
}
|
||||
if not self._is_new_api_key():
|
||||
return {
|
||||
"ncp_clovastudio_api_key": "NCP_CLOVASTUDIO_API_KEY",
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"ncp_clovastudio_api_key": "NCP_CLOVASTUDIO_API_KEY",
|
||||
"ncp_apigw_api_key": "NCP_APIGW_API_KEY",
|
||||
}
|
||||
|
||||
@property
|
||||
def _api_url(self) -> str:
|
||||
"""GET embedding api url"""
|
||||
app_type = "serviceapp" if self.service_app else "testapp"
|
||||
model_name = self.model_name if self.model_name != "bge-m3" else "v2"
|
||||
return (
|
||||
f"{self.base_url}/{app_type}"
|
||||
f"/v1/api-tools/embedding/{model_name}/{self.app_id}"
|
||||
)
|
||||
if self._is_new_api_key():
|
||||
return f"{self.base_url}/{app_type}" f"/v1/api-tools/embedding/{model_name}"
|
||||
else:
|
||||
return (
|
||||
f"{self.base_url}/{app_type}"
|
||||
f"/v1/api-tools/embedding/{model_name}/{self.app_id}"
|
||||
)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_model_after(self) -> Self:
|
||||
@@ -113,18 +122,13 @@ class ClovaXEmbeddings(BaseModel, Embeddings):
|
||||
get_from_env("ncp_clovastudio_api_key", "NCP_CLOVASTUDIO_API_KEY")
|
||||
)
|
||||
|
||||
if not self.ncp_apigw_api_key:
|
||||
self.ncp_apigw_api_key = convert_to_secret_str(
|
||||
get_from_env("ncp_apigw_api_key", "NCP_APIGW_API_KEY", "")
|
||||
)
|
||||
if self._is_new_api_key():
|
||||
self._init_fields_on_new_api_key()
|
||||
else:
|
||||
self._init_fields_on_old_api_key()
|
||||
|
||||
if not self.base_url:
|
||||
self.base_url = get_from_env(
|
||||
"base_url", "NCP_CLOVASTUDIO_API_BASE_URL", _DEFAULT_BASE_URL
|
||||
)
|
||||
|
||||
if not self.app_id:
|
||||
self.app_id = get_from_env("app_id", "NCP_CLOVASTUDIO_APP_ID")
|
||||
raise ValueError("base_url dose not exist.")
|
||||
|
||||
if not self.client:
|
||||
self.client = httpx.Client(
|
||||
@@ -133,7 +137,7 @@ class ClovaXEmbeddings(BaseModel, Embeddings):
|
||||
timeout=self.timeout,
|
||||
)
|
||||
|
||||
if not self.async_client:
|
||||
if not self.async_client and self.base_url:
|
||||
self.async_client = httpx.AsyncClient(
|
||||
base_url=self.base_url,
|
||||
headers=self.default_headers(),
|
||||
@@ -142,6 +146,32 @@ class ClovaXEmbeddings(BaseModel, Embeddings):
|
||||
|
||||
return self
|
||||
|
||||
def _is_new_api_key(self) -> bool:
|
||||
if self.ncp_clovastudio_api_key:
|
||||
return self.ncp_clovastudio_api_key.get_secret_value().startswith("nv-")
|
||||
else:
|
||||
return False
|
||||
|
||||
def _init_fields_on_new_api_key(self) -> None:
|
||||
if not self.base_url:
|
||||
self.base_url = get_from_env(
|
||||
"base_url",
|
||||
"NCP_CLOVASTUDIO_API_BASE_URL",
|
||||
_DEFAULT_BASE_URL_ON_NEW_API_KEY,
|
||||
)
|
||||
|
||||
def _init_fields_on_old_api_key(self) -> None:
|
||||
if not self.ncp_apigw_api_key:
|
||||
self.ncp_apigw_api_key = convert_to_secret_str(
|
||||
get_from_env("ncp_apigw_api_key", "NCP_APIGW_API_KEY", "")
|
||||
)
|
||||
if not self.base_url:
|
||||
self.base_url = get_from_env(
|
||||
"base_url", "NCP_CLOVASTUDIO_API_BASE_URL", _DEFAULT_BASE_URL
|
||||
)
|
||||
if not self.app_id:
|
||||
self.app_id = get_from_env("app_id", "NCP_CLOVASTUDIO_APP_ID")
|
||||
|
||||
def default_headers(self) -> Dict[str, Any]:
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
@@ -153,16 +183,22 @@ class ClovaXEmbeddings(BaseModel, Embeddings):
|
||||
if self.ncp_clovastudio_api_key
|
||||
else None
|
||||
)
|
||||
if clovastudio_api_key:
|
||||
headers["X-NCP-CLOVASTUDIO-API-KEY"] = clovastudio_api_key
|
||||
|
||||
apigw_api_key = (
|
||||
self.ncp_apigw_api_key.get_secret_value()
|
||||
if self.ncp_apigw_api_key
|
||||
else None
|
||||
)
|
||||
if apigw_api_key:
|
||||
headers["X-NCP-APIGW-API-KEY"] = apigw_api_key
|
||||
if self._is_new_api_key():
|
||||
### headers on new api key
|
||||
headers["Authorization"] = f"Bearer {clovastudio_api_key}"
|
||||
else:
|
||||
### headers on old api key
|
||||
if clovastudio_api_key:
|
||||
headers["X-NCP-CLOVASTUDIO-API-KEY"] = clovastudio_api_key
|
||||
|
||||
apigw_api_key = (
|
||||
self.ncp_apigw_api_key.get_secret_value()
|
||||
if self.ncp_apigw_api_key
|
||||
else None
|
||||
)
|
||||
if apigw_api_key:
|
||||
headers["X-NCP-APIGW-API-KEY"] = apigw_api_key
|
||||
|
||||
return headers
|
||||
|
||||
@@ -175,7 +211,7 @@ class ClovaXEmbeddings(BaseModel, Embeddings):
|
||||
|
||||
async def _aembed_text(self, text: str) -> List[float]:
|
||||
payload = {"text": text}
|
||||
async_client = cast(httpx.AsyncClient, self.client)
|
||||
async_client = cast(httpx.AsyncClient, self.async_client)
|
||||
response = await async_client.post(url=self._api_url, json=payload)
|
||||
await _araise_on_error(response)
|
||||
return response.json()["result"]["embedding"]
|
||||
|
||||
@@ -2,11 +2,17 @@ import json
|
||||
from typing import Dict, Generator, List, Optional
|
||||
|
||||
import requests
|
||||
from langchain_core._api.deprecation import deprecated
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.utils import get_from_dict_or_env, pre_init
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="0.3.16",
|
||||
removal="1.0",
|
||||
alternative_import="langchain_sambanova.SambaStudioEmbeddings",
|
||||
)
|
||||
class SambaStudioEmbeddings(BaseModel, Embeddings):
|
||||
"""SambaNova embedding models.
|
||||
|
||||
|
||||
@@ -169,7 +169,16 @@ class HuggingFacePipeline(BaseLLM):
|
||||
) from e
|
||||
|
||||
if tokenizer.pad_token is None:
|
||||
tokenizer.pad_token_id = model.config.eos_token_id
|
||||
if model.config.pad_token_id is not None:
|
||||
tokenizer.pad_token_id = model.config.pad_token_id
|
||||
elif model.config.eos_token_id is not None and isinstance(
|
||||
model.config.eos_token_id, int
|
||||
):
|
||||
tokenizer.pad_token_id = model.config.eos_token_id
|
||||
elif tokenizer.eos_token_id is not None:
|
||||
tokenizer.pad_token_id = tokenizer.eos_token_id
|
||||
else:
|
||||
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
|
||||
|
||||
if (
|
||||
(
|
||||
|
||||
@@ -5,6 +5,7 @@ import json
|
||||
from abc import abstractmethod
|
||||
from typing import Any, Dict, Generic, Iterator, List, Mapping, Optional, TypeVar, Union
|
||||
|
||||
from langchain_core._api.deprecation import deprecated
|
||||
from langchain_core.callbacks import CallbackManagerForLLMRun
|
||||
from langchain_core.language_models.llms import LLM
|
||||
from langchain_core.utils import pre_init
|
||||
@@ -124,6 +125,11 @@ class LLMContentHandler(ContentHandlerBase[str, str]):
|
||||
"""Content handler for LLM class."""
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="0.3.16",
|
||||
removal="1.0",
|
||||
alternative_import="langchain_aws.llms.SagemakerEndpoint",
|
||||
)
|
||||
class SagemakerEndpoint(LLM):
|
||||
"""Sagemaker Inference Endpoint models.
|
||||
|
||||
|
||||
@@ -1,7 +1,20 @@
|
||||
from typing import TYPE_CHECKING, Any, Dict, Generator, List, Mapping, Optional, Union
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Dict,
|
||||
Generator,
|
||||
Iterator,
|
||||
List,
|
||||
Mapping,
|
||||
Optional,
|
||||
Union,
|
||||
)
|
||||
|
||||
from langchain_core.callbacks import CallbackManagerForLLMRun
|
||||
from langchain_core.language_models.llms import LLM
|
||||
from langchain_core.outputs import GenerationChunk
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from xinference.client import RESTfulChatModelHandle, RESTfulGenerateModelHandle
|
||||
@@ -73,6 +86,26 @@ class Xinference(LLM):
|
||||
generate_config={"max_tokens": 1024, "stream": True},
|
||||
)
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.llms import Xinference
|
||||
from langchain.prompts import PromptTemplate
|
||||
|
||||
llm = Xinference(
|
||||
server_url="http://0.0.0.0:9997",
|
||||
model_uid={model_uid}, # replace model_uid with the model UID return from launching the model
|
||||
stream=True
|
||||
)
|
||||
prompt = PromptTemplate(
|
||||
input=['country'],
|
||||
template="Q: where can we visit in the capital of {country}? A:"
|
||||
)
|
||||
chain = prompt | llm
|
||||
chain.stream(input={'country': 'France'})
|
||||
|
||||
|
||||
To view all the supported builtin models, run:
|
||||
|
||||
.. code-block:: bash
|
||||
@@ -216,3 +249,59 @@ class Xinference(LLM):
|
||||
token=token, verbose=self.verbose, log_probs=log_probs
|
||||
)
|
||||
yield token
|
||||
|
||||
def _stream(
|
||||
self,
|
||||
prompt: str,
|
||||
stop: Optional[List[str]] = None,
|
||||
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
||||
**kwargs: Any,
|
||||
) -> Iterator[GenerationChunk]:
|
||||
generate_config = kwargs.get("generate_config", {})
|
||||
generate_config = {**self.model_kwargs, **generate_config}
|
||||
if stop:
|
||||
generate_config["stop"] = stop
|
||||
for stream_resp in self._create_generate_stream(prompt, generate_config):
|
||||
if stream_resp:
|
||||
chunk = self._stream_response_to_generation_chunk(stream_resp)
|
||||
if run_manager:
|
||||
run_manager.on_llm_new_token(
|
||||
chunk.text,
|
||||
verbose=self.verbose,
|
||||
)
|
||||
yield chunk
|
||||
|
||||
def _create_generate_stream(
|
||||
self, prompt: str, generate_config: Optional[Dict[str, List[str]]] = None
|
||||
) -> Iterator[str]:
|
||||
if self.client is None:
|
||||
raise ValueError("Client is not initialized!")
|
||||
model = self.client.get_model(self.model_uid)
|
||||
yield from model.generate(prompt=prompt, generate_config=generate_config)
|
||||
|
||||
@staticmethod
|
||||
def _stream_response_to_generation_chunk(
|
||||
stream_response: str,
|
||||
) -> GenerationChunk:
|
||||
"""Convert a stream response to a generation chunk."""
|
||||
token = ""
|
||||
if isinstance(stream_response, dict):
|
||||
choices = stream_response.get("choices", [])
|
||||
if choices:
|
||||
choice = choices[0]
|
||||
if isinstance(choice, dict):
|
||||
token = choice.get("text", "")
|
||||
|
||||
return GenerationChunk(
|
||||
text=token,
|
||||
generation_info=dict(
|
||||
finish_reason=choice.get("finish_reason", None),
|
||||
logprobs=choice.get("logprobs", None),
|
||||
),
|
||||
)
|
||||
else:
|
||||
raise TypeError("choice type error!")
|
||||
else:
|
||||
return GenerationChunk(text=token)
|
||||
else:
|
||||
raise TypeError("stream_response type error!")
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain_core._api.deprecation import deprecated
|
||||
from langchain_core.callbacks import CallbackManagerForRetrieverRun
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.retrievers import BaseRetriever
|
||||
@@ -18,6 +19,11 @@ class RetrievalConfig(BaseModel, extra="allow"): # type: ignore[call-arg]
|
||||
vectorSearchConfiguration: VectorSearchConfig
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="0.3.16",
|
||||
removal="1.0",
|
||||
alternative_import="langchain_aws.AmazonKnowledgeBasesRetriever",
|
||||
)
|
||||
class AmazonKnowledgeBasesRetriever(BaseRetriever):
|
||||
"""Amazon Bedrock Knowledge Bases retriever.
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ from typing import (
|
||||
Union,
|
||||
)
|
||||
|
||||
from langchain_core._api.deprecation import deprecated
|
||||
from langchain_core.callbacks import CallbackManagerForRetrieverRun
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.retrievers import BaseRetriever
|
||||
@@ -323,6 +324,11 @@ KENDRA_CONFIDENCE_MAPPING = {
|
||||
}
|
||||
|
||||
|
||||
@deprecated(
|
||||
since="0.3.16",
|
||||
removal="1.0",
|
||||
alternative_import="langchain_aws.AmazonKendraRetriever",
|
||||
)
|
||||
class AmazonKendraRetriever(BaseRetriever):
|
||||
"""`Amazon Kendra Index` retriever.
|
||||
|
||||
|
||||
@@ -98,3 +98,12 @@ This tool will create a new branch in the repository. **VERY IMPORTANT**: You mu
|
||||
|
||||
GET_FILES_FROM_DIRECTORY_PROMPT = """
|
||||
This tool will fetch a list of all files in a specified directory. **VERY IMPORTANT**: You must specify the path of the directory as a string input parameter."""
|
||||
|
||||
GET_LATEST_RELEASE_PROMPT = """
|
||||
This tool will fetch the latest release of the repository. No input parameters are required."""
|
||||
|
||||
GET_RELEASES_PROMPT = """
|
||||
This tool will fetch the latest 5 releases of the repository. No input parameters are required."""
|
||||
|
||||
GET_RELEASE_PROMPT = """
|
||||
This tool will fetch a specific release of the repository. **VERY IMPORTANT**: You must specify the tag name of the release as a string input parameter."""
|
||||
|
||||
@@ -813,6 +813,56 @@ class GitHubAPIWrapper(BaseModel):
|
||||
except Exception as e:
|
||||
return f"Failed to create a review request with error {e}"
|
||||
|
||||
def get_latest_release(self) -> str:
|
||||
"""
|
||||
Fetches the latest release of the repository.
|
||||
|
||||
Returns:
|
||||
str: The latest release
|
||||
"""
|
||||
release = self.github_repo_instance.get_latest_release()
|
||||
return (
|
||||
f"Latest title: {release.title} "
|
||||
f"tag: {release.tag_name} "
|
||||
f"body: {release.body}"
|
||||
)
|
||||
|
||||
def get_releases(self) -> str:
|
||||
"""
|
||||
Fetches all releases of the repository.
|
||||
|
||||
Returns:
|
||||
str: The releases
|
||||
"""
|
||||
releases = self.github_repo_instance.get_releases()
|
||||
max_results = min(5, releases.totalCount)
|
||||
results = [f"Top {max_results} results:"]
|
||||
for release in releases[:max_results]:
|
||||
results.append(
|
||||
f"Title: {release.title}, "
|
||||
f"Tag: {release.tag_name}, "
|
||||
f"Body: {release.body}"
|
||||
)
|
||||
|
||||
return "\n".join(results)
|
||||
|
||||
def get_release(self, tag_name: str) -> str:
|
||||
"""
|
||||
Fetches a specific release of the repository.
|
||||
|
||||
Parameters:
|
||||
tag_name(str): The tag name of the release
|
||||
|
||||
Returns:
|
||||
str: The release
|
||||
"""
|
||||
release = self.github_repo_instance.get_release(tag_name)
|
||||
return (
|
||||
f"Release: {release.title} "
|
||||
f"tag: {release.tag_name} "
|
||||
f"body: {release.body}"
|
||||
)
|
||||
|
||||
def run(self, mode: str, query: str) -> str:
|
||||
if mode == "get_issue":
|
||||
return json.dumps(self.get_issue(int(query)))
|
||||
@@ -854,5 +904,11 @@ class GitHubAPIWrapper(BaseModel):
|
||||
return self.search_code(query)
|
||||
elif mode == "create_review_request":
|
||||
return self.create_review_request(query)
|
||||
elif mode == "get_latest_release":
|
||||
return self.get_latest_release()
|
||||
elif mode == "get_releases":
|
||||
return self.get_releases()
|
||||
elif mode == "get_release":
|
||||
return self.get_release(query)
|
||||
else:
|
||||
raise ValueError("Invalid mode" + mode)
|
||||
|
||||
@@ -17,6 +17,8 @@ class GitLabAPIWrapper(BaseModel):
|
||||
|
||||
gitlab: Any = None #: :meta private:
|
||||
gitlab_repo_instance: Any = None #: :meta private:
|
||||
gitlab_url: Optional[str] = None
|
||||
"""The url of the GitLab instance."""
|
||||
gitlab_repository: Optional[str] = None
|
||||
"""The name of the GitLab repository, in the form {username}/{repo-name}."""
|
||||
gitlab_personal_access_token: Optional[str] = None
|
||||
@@ -76,6 +78,7 @@ class GitLabAPIWrapper(BaseModel):
|
||||
|
||||
values["gitlab"] = g
|
||||
values["gitlab_repo_instance"] = g.projects.get(gitlab_repository)
|
||||
values["gitlab_url"] = gitlab_url
|
||||
values["gitlab_repository"] = gitlab_repository
|
||||
values["gitlab_personal_access_token"] = gitlab_personal_access_token
|
||||
values["gitlab_branch"] = gitlab_branch
|
||||
|
||||
@@ -45,6 +45,7 @@ class GoogleScholarAPIWrapper(BaseModel):
|
||||
hl: str = "en"
|
||||
lr: str = "lang_en"
|
||||
serp_api_key: Optional[str] = None
|
||||
google_scholar_engine: Any = None
|
||||
|
||||
model_config = ConfigDict(
|
||||
extra="forbid",
|
||||
@@ -55,7 +56,7 @@ class GoogleScholarAPIWrapper(BaseModel):
|
||||
def validate_environment(cls, values: Dict) -> Any:
|
||||
"""Validate that api key and python package exists in environment."""
|
||||
serp_api_key = get_from_dict_or_env(values, "serp_api_key", "SERP_API_KEY")
|
||||
values["SERP_API_KEY"] = serp_api_key
|
||||
values["serp_api_key"] = serp_api_key
|
||||
|
||||
try:
|
||||
from serpapi import GoogleScholarSearch
|
||||
|
||||
@@ -356,7 +356,7 @@ class AzureCosmosDBNoSqlVectorSearch(VectorStore):
|
||||
raise ValueError("No document ids provided to delete.")
|
||||
|
||||
for document_id in ids:
|
||||
self._container.delete_item(document_id)
|
||||
self.delete_document_by_id(document_id)
|
||||
return True
|
||||
|
||||
def delete_document_by_id(self, document_id: Optional[str] = None) -> None:
|
||||
|
||||
@@ -683,7 +683,8 @@ class AzureSearch(VectorStore):
|
||||
self, query: str, *, k: int = 4, **kwargs: Any
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Run similarity search with distance."""
|
||||
search_type = kwargs.get("search_type", self.search_type)
|
||||
# Extract search_type from kwargs, defaulting to self.search_type
|
||||
search_type = kwargs.pop("search_type", self.search_type)
|
||||
if search_type == "similarity":
|
||||
return self.vector_search_with_score(query, k=k, **kwargs)
|
||||
elif search_type == "hybrid":
|
||||
|
||||
@@ -226,6 +226,22 @@ class SupabaseVectorStore(VectorStore):
|
||||
postgrest_filter: Optional[str] = None,
|
||||
score_threshold: Optional[float] = None,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
# Convert MongoDB-style filter to PostgreSQL syntax if needed
|
||||
if filter:
|
||||
for key, value in filter.items():
|
||||
if isinstance(value, dict) and "$in" in value:
|
||||
# Extract the list of values for the $in operator
|
||||
in_values = value["$in"]
|
||||
# Create a PostgreSQL IN clause
|
||||
values_str = ",".join(f"'{str(v)}'" for v in in_values)
|
||||
new_filter = f"metadata->>{key} IN ({values_str})"
|
||||
|
||||
# Combine with existing postgrest_filter if present
|
||||
if postgrest_filter:
|
||||
postgrest_filter = f"({postgrest_filter}) and ({new_filter})"
|
||||
else:
|
||||
postgrest_filter = new_filter
|
||||
|
||||
match_documents_params = self.match_args(query, filter)
|
||||
query_builder = self._client.rpc(self.query_name, match_documents_params)
|
||||
|
||||
|
||||
1368
libs/community/poetry.lock
generated
1368
libs/community/poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry]
|
||||
name = "langchain-community"
|
||||
version = "0.3.14"
|
||||
version = "0.3.16"
|
||||
description = "Community contributed LangChain integrations."
|
||||
authors = []
|
||||
license = "MIT"
|
||||
@@ -33,8 +33,8 @@ ignore-words-list = "momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogy
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.9,<4.0"
|
||||
langchain-core = "^0.3.29"
|
||||
langchain = "^0.3.14"
|
||||
langchain-core = "^0.3.32"
|
||||
langchain = "^0.3.16"
|
||||
SQLAlchemy = ">=1.4,<3"
|
||||
requests = "^2"
|
||||
PyYAML = ">=5.3"
|
||||
@@ -42,7 +42,7 @@ aiohttp = "^3.8.3"
|
||||
tenacity = ">=8.1.0,!=8.4.0,<10"
|
||||
dataclasses-json = ">= 0.5.7, < 0.7"
|
||||
pydantic-settings = "^2.4.0"
|
||||
langsmith = ">=0.1.125,<0.3"
|
||||
langsmith = ">=0.1.125,<0.4"
|
||||
httpx-sse = "^0.4.0"
|
||||
[[tool.poetry.dependencies.numpy]]
|
||||
version = ">=1.22.4,<2"
|
||||
|
||||
@@ -1,71 +1,131 @@
|
||||
"""Test ChatNaver chat model."""
|
||||
"""Test ChatClovaX chat model."""
|
||||
|
||||
from langchain_core.messages import AIMessage, AIMessageChunk
|
||||
import pytest
|
||||
from httpx_sse import SSEError
|
||||
from langchain_core.messages import (
|
||||
AIMessage,
|
||||
AIMessageChunk,
|
||||
)
|
||||
|
||||
from langchain_community.chat_models import ChatClovaX
|
||||
|
||||
|
||||
def test_stream() -> None:
|
||||
"""Test streaming tokens from ChatClovaX."""
|
||||
llm = ChatClovaX()
|
||||
llm = ChatClovaX(include_ai_filters=True)
|
||||
|
||||
for token in llm.stream("I'm Clova"):
|
||||
assert isinstance(token, AIMessageChunk)
|
||||
assert isinstance(token.content, str)
|
||||
if token.response_metadata:
|
||||
assert "input_length" in token.response_metadata
|
||||
assert "output_length" in token.response_metadata
|
||||
assert "stop_reason" in token.response_metadata
|
||||
assert "ai_filter" in token.response_metadata
|
||||
|
||||
|
||||
async def test_astream() -> None:
|
||||
"""Test streaming tokens from ChatClovaX."""
|
||||
llm = ChatClovaX()
|
||||
llm = ChatClovaX(include_ai_filters=True)
|
||||
|
||||
async for token in llm.astream("I'm Clova"):
|
||||
assert isinstance(token, AIMessageChunk)
|
||||
assert isinstance(token.content, str)
|
||||
if token.response_metadata:
|
||||
assert "input_length" in token.response_metadata
|
||||
assert "output_length" in token.response_metadata
|
||||
assert "stop_reason" in token.response_metadata
|
||||
assert "ai_filter" in token.response_metadata
|
||||
|
||||
|
||||
async def test_abatch() -> None:
|
||||
"""Test streaming tokens from ChatClovaX."""
|
||||
llm = ChatClovaX()
|
||||
llm = ChatClovaX(include_ai_filters=True)
|
||||
|
||||
result = await llm.abatch(["I'm Clova", "I'm not Clova"])
|
||||
for token in result:
|
||||
assert isinstance(token, AIMessage)
|
||||
assert isinstance(token.content, str)
|
||||
if token.response_metadata:
|
||||
assert "input_length" in token.response_metadata
|
||||
assert "output_length" in token.response_metadata
|
||||
assert "stop_reason" in token.response_metadata
|
||||
assert "ai_filter" in token.response_metadata
|
||||
|
||||
|
||||
async def test_abatch_tags() -> None:
|
||||
"""Test batch tokens from ChatClovaX."""
|
||||
llm = ChatClovaX()
|
||||
llm = ChatClovaX(include_ai_filters=True)
|
||||
|
||||
result = await llm.abatch(["I'm Clova", "I'm not Clova"], config={"tags": ["foo"]})
|
||||
for token in result:
|
||||
assert isinstance(token, AIMessage)
|
||||
assert isinstance(token.content, str)
|
||||
if token.response_metadata:
|
||||
assert "input_length" in token.response_metadata
|
||||
assert "output_length" in token.response_metadata
|
||||
assert "stop_reason" in token.response_metadata
|
||||
assert "ai_filter" in token.response_metadata
|
||||
|
||||
|
||||
def test_batch() -> None:
|
||||
"""Test batch tokens from ChatClovaX."""
|
||||
llm = ChatClovaX()
|
||||
llm = ChatClovaX(include_ai_filters=True)
|
||||
|
||||
result = llm.batch(["I'm Clova", "I'm not Clova"])
|
||||
for token in result:
|
||||
assert isinstance(token, AIMessage)
|
||||
assert isinstance(token.content, str)
|
||||
if token.response_metadata:
|
||||
assert "input_length" in token.response_metadata
|
||||
assert "output_length" in token.response_metadata
|
||||
assert "stop_reason" in token.response_metadata
|
||||
assert "ai_filter" in token.response_metadata
|
||||
|
||||
|
||||
async def test_ainvoke() -> None:
|
||||
"""Test invoke tokens from ChatClovaX."""
|
||||
llm = ChatClovaX()
|
||||
llm = ChatClovaX(include_ai_filters=True)
|
||||
|
||||
result = await llm.ainvoke("I'm Clova", config={"tags": ["foo"]})
|
||||
assert isinstance(result, AIMessage)
|
||||
assert isinstance(result.content, str)
|
||||
if result.response_metadata:
|
||||
assert "input_length" in result.response_metadata
|
||||
assert "output_length" in result.response_metadata
|
||||
assert "stop_reason" in result.response_metadata
|
||||
assert "ai_filter" in result.response_metadata
|
||||
|
||||
|
||||
def test_invoke() -> None:
|
||||
"""Test invoke tokens from ChatClovaX."""
|
||||
llm = ChatClovaX()
|
||||
llm = ChatClovaX(include_ai_filters=True)
|
||||
|
||||
result = llm.invoke("I'm Clova", config=dict(tags=["foo"]))
|
||||
assert isinstance(result, AIMessage)
|
||||
assert isinstance(result.content, str)
|
||||
if result.response_metadata:
|
||||
assert "input_length" in result.response_metadata
|
||||
assert "output_length" in result.response_metadata
|
||||
assert "stop_reason" in result.response_metadata
|
||||
assert "ai_filter" in result.response_metadata
|
||||
|
||||
|
||||
def test_stream_error_event() -> None:
|
||||
"""Test streaming error event from ChatClovaX."""
|
||||
llm = ChatClovaX()
|
||||
prompt = "What is the best way to reduce my carbon footprint?"
|
||||
|
||||
with pytest.raises(SSEError):
|
||||
for _ in llm.stream(prompt * 1000):
|
||||
pass
|
||||
|
||||
|
||||
async def test_astream_error_event() -> None:
|
||||
"""Test streaming error event from ChatClovaX."""
|
||||
llm = ChatClovaX()
|
||||
prompt = "What is the best way to reduce my carbon footprint?"
|
||||
|
||||
with pytest.raises(SSEError):
|
||||
async for _ in llm.astream(prompt * 1000):
|
||||
pass
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Type
|
||||
|
||||
import pytest
|
||||
from langchain_core.documents.base import Blob
|
||||
from langchain_core.language_models import FakeMessagesListChatModel
|
||||
from langchain_core.messages import ChatMessage
|
||||
|
||||
from langchain_community.document_loaders.parsers.images import (
|
||||
LLMImageBlobParser,
|
||||
RapidOCRBlobParser,
|
||||
TesseractBlobParser,
|
||||
)
|
||||
|
||||
path_base = Path(__file__).parent.parent.parent
|
||||
building_image = Blob.from_path(path_base / "examples/building.jpg")
|
||||
text_image = Blob.from_path(path_base / "examples/text.png")
|
||||
page_image = Blob.from_path(path_base / "examples/page.png")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"blob,body",
|
||||
[
|
||||
(building_image, ""),
|
||||
(text_image, r"(?ms).*MAKE.*TEXT.*STAND.*OUT.*FROM.*BACKGROUNDS.*"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"blob_loader,kw",
|
||||
[
|
||||
(RapidOCRBlobParser, {}),
|
||||
(TesseractBlobParser, {}),
|
||||
(
|
||||
LLMImageBlobParser,
|
||||
{
|
||||
"model": FakeMessagesListChatModel(
|
||||
responses=[
|
||||
ChatMessage(
|
||||
id="ai1",
|
||||
role="system",
|
||||
content="A building. MAKE TEXT STAND OUT FROM BACKGROUNDS",
|
||||
),
|
||||
]
|
||||
)
|
||||
},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_image_parser_with_differents_files(
|
||||
blob_loader: Type,
|
||||
kw: dict[str, Any],
|
||||
blob: Blob,
|
||||
body: str,
|
||||
) -> None:
|
||||
if blob_loader == LLMImageBlobParser and "building" in str(blob.path):
|
||||
body = ".*building.*"
|
||||
documents = list(blob_loader(**kw).lazy_parse(blob))
|
||||
assert len(documents) == 1
|
||||
assert re.compile(body).match(documents[0].page_content)
|
||||
@@ -1,18 +1,26 @@
|
||||
"""Tests for the various PDF parsers."""
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Iterator
|
||||
from typing import TYPE_CHECKING, Iterator
|
||||
|
||||
import pytest
|
||||
|
||||
import langchain_community.document_loaders.parsers as pdf_parsers
|
||||
from langchain_community.document_loaders.base import BaseBlobParser
|
||||
from langchain_community.document_loaders.blob_loaders import Blob
|
||||
from langchain_community.document_loaders.parsers.pdf import (
|
||||
from langchain_community.document_loaders.parsers import (
|
||||
BaseImageBlobParser,
|
||||
PDFMinerParser,
|
||||
PDFPlumberParser,
|
||||
PyMuPDFParser,
|
||||
PyPDFium2Parser,
|
||||
PyPDFParser,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from PIL.Image import Image
|
||||
|
||||
|
||||
# PDFs to test parsers on.
|
||||
HELLO_PDF = Path(__file__).parent.parent.parent / "examples" / "hello.pdf"
|
||||
|
||||
@@ -20,6 +28,12 @@ LAYOUT_PARSER_PAPER_PDF = (
|
||||
Path(__file__).parent.parent.parent / "examples" / "layout-parser-paper.pdf"
|
||||
)
|
||||
|
||||
LAYOUT_PARSER_PAPER_PASSWORD_PDF = (
|
||||
Path(__file__).parent.parent.parent
|
||||
/ "examples"
|
||||
/ "layout-parser-paper-password.pdf"
|
||||
)
|
||||
|
||||
DUPLICATE_CHARS = (
|
||||
Path(__file__).parent.parent.parent / "examples" / "duplicate-chars.pdf"
|
||||
)
|
||||
@@ -41,7 +55,7 @@ def _assert_with_parser(parser: BaseBlobParser, splits_by_page: bool = True) ->
|
||||
assert isinstance(page_content, str)
|
||||
# The different parsers return different amount of whitespace, so using
|
||||
# startswith instead of equals.
|
||||
assert docs[0].page_content.startswith("Hello world!")
|
||||
assert re.findall(r"Hello\s+world!", docs[0].page_content)
|
||||
|
||||
blob = Blob.from_path(LAYOUT_PARSER_PAPER_PDF)
|
||||
doc_generator = parser.lazy_parse(blob)
|
||||
@@ -84,11 +98,6 @@ def _assert_with_duplicate_parser(parser: BaseBlobParser, dedupe: bool = False)
|
||||
assert "11000000 SSeerriieess" == docs[0].page_content.split("\n")[0]
|
||||
|
||||
|
||||
def test_pymupdf_loader() -> None:
|
||||
"""Test PyMuPDF loader."""
|
||||
_assert_with_parser(PyMuPDFParser())
|
||||
|
||||
|
||||
def test_pypdf_parser() -> None:
|
||||
"""Test PyPDF parser."""
|
||||
_assert_with_parser(PyPDFParser())
|
||||
@@ -123,11 +132,210 @@ def test_extract_images_text_from_pdf_pdfminerparser() -> None:
|
||||
_assert_with_parser(PDFMinerParser(extract_images=True))
|
||||
|
||||
|
||||
def test_extract_images_text_from_pdf_pymupdfparser() -> None:
|
||||
"""Test extract image from pdf and recognize text with rapid ocr - PyMuPDFParser"""
|
||||
_assert_with_parser(PyMuPDFParser(extract_images=True))
|
||||
|
||||
|
||||
def test_extract_images_text_from_pdf_pypdfium2parser() -> None:
|
||||
"""Test extract image from pdf and recognize text with rapid ocr - PyPDFium2Parser""" # noqa: E501
|
||||
_assert_with_parser(PyPDFium2Parser(extract_images=True))
|
||||
|
||||
|
||||
class EmptyImageBlobParser(BaseImageBlobParser):
|
||||
def _analyze_image(self, img: "Image") -> str:
|
||||
return "Hello world"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mode,image_parser",
|
||||
[("single", EmptyImageBlobParser()), ("page", None)],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"parser_factory,params",
|
||||
[
|
||||
("PyMuPDFParser", {}),
|
||||
],
|
||||
)
|
||||
@pytest.mark.requires("pillow")
|
||||
def test_mode_and_extract_images_variations(
|
||||
parser_factory: str,
|
||||
params: dict,
|
||||
mode: str,
|
||||
image_parser: BaseImageBlobParser,
|
||||
) -> None:
|
||||
_test_matrix(
|
||||
parser_factory,
|
||||
params,
|
||||
mode,
|
||||
image_parser,
|
||||
images_inner_format="text",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"images_inner_format",
|
||||
["text", "markdown-img", "html-img"],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"parser_factory,params",
|
||||
[
|
||||
("PyMuPDFParser", {}),
|
||||
],
|
||||
)
|
||||
@pytest.mark.requires("pillow")
|
||||
def test_mode_and_image_formats_variations(
|
||||
parser_factory: str,
|
||||
params: dict,
|
||||
images_inner_format: str,
|
||||
) -> None:
|
||||
mode = "single"
|
||||
image_parser = EmptyImageBlobParser()
|
||||
|
||||
_test_matrix(
|
||||
parser_factory,
|
||||
params,
|
||||
mode,
|
||||
image_parser,
|
||||
images_inner_format,
|
||||
)
|
||||
|
||||
|
||||
def _test_matrix(
|
||||
parser_factory: str,
|
||||
params: dict,
|
||||
mode: str,
|
||||
image_parser: BaseImageBlobParser,
|
||||
images_inner_format: str,
|
||||
) -> None:
|
||||
"""Apply the same test for all *standard* PDF parsers.
|
||||
|
||||
- Try with mode `single` and `page`
|
||||
- Try with image_parser `None` or others
|
||||
"""
|
||||
|
||||
def _std_assert_with_parser(parser: BaseBlobParser) -> None:
|
||||
"""Standard tests to verify that the given parser works.
|
||||
|
||||
Args:
|
||||
parser (BaseBlobParser): The parser to test.
|
||||
"""
|
||||
blob = Blob.from_path(LAYOUT_PARSER_PAPER_PDF)
|
||||
doc_generator = parser.lazy_parse(blob)
|
||||
docs = list(doc_generator)
|
||||
metadata = docs[0].metadata
|
||||
assert metadata["source"] == str(LAYOUT_PARSER_PAPER_PDF)
|
||||
assert "creationdate" in metadata
|
||||
assert "creator" in metadata
|
||||
assert "producer" in metadata
|
||||
assert "total_pages" in metadata
|
||||
if len(docs) > 1:
|
||||
assert metadata["page"] == 0
|
||||
if hasattr(parser, "extract_images") and parser.extract_images:
|
||||
images = []
|
||||
for doc in docs:
|
||||
_HTML_image = (
|
||||
r"<img\s+[^>]*"
|
||||
r'src="([^"]+)"(?:\s+alt="([^"]*)")?(?:\s+'
|
||||
r'title="([^"]*)")?[^>]*>'
|
||||
)
|
||||
_markdown_image = r"!\[([^\]]*)\]\(([^)\s]+)(?:\s+\"([^\"]+)\")?\)"
|
||||
match = re.findall(_markdown_image, doc.page_content)
|
||||
if match:
|
||||
images.extend(match)
|
||||
assert len(images) >= 1
|
||||
|
||||
if hasattr(parser, "password"):
|
||||
old_password = parser.password
|
||||
parser.password = "password"
|
||||
blob = Blob.from_path(LAYOUT_PARSER_PAPER_PASSWORD_PDF)
|
||||
doc_generator = parser.lazy_parse(blob)
|
||||
docs = list(doc_generator)
|
||||
assert len(docs)
|
||||
parser.password = old_password
|
||||
|
||||
parser_class = getattr(pdf_parsers, parser_factory)
|
||||
|
||||
parser = parser_class(
|
||||
mode=mode,
|
||||
images_parser=image_parser,
|
||||
images_inner_format=images_inner_format,
|
||||
**params,
|
||||
)
|
||||
_assert_with_parser(parser, splits_by_page=(mode == "page"))
|
||||
_std_assert_with_parser(parser)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mode",
|
||||
["single", "page"],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"extract_tables",
|
||||
["markdown", "html", "csv", None],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"parser_factory,params",
|
||||
[
|
||||
("PyMuPDFParser", {}),
|
||||
],
|
||||
)
|
||||
def test_parser_with_table(
|
||||
parser_factory: str,
|
||||
params: dict,
|
||||
mode: str,
|
||||
extract_tables: str,
|
||||
) -> None:
|
||||
from PIL.Image import Image
|
||||
|
||||
from langchain_community.document_loaders.parsers.images import BaseImageBlobParser
|
||||
|
||||
def _std_assert_with_parser(parser: BaseBlobParser) -> None:
|
||||
"""Standard tests to verify that the given parser works.
|
||||
|
||||
Args:
|
||||
parser (BaseBlobParser): The parser to test.
|
||||
"""
|
||||
blob = Blob.from_path(LAYOUT_PARSER_PAPER_PDF)
|
||||
doc_generator = parser.lazy_parse(blob)
|
||||
docs = list(doc_generator)
|
||||
tables = []
|
||||
for doc in docs:
|
||||
if extract_tables == "markdown":
|
||||
pattern = (
|
||||
r"(?s)("
|
||||
r"(?:(?:[^\n]*\|)\n)"
|
||||
r"(?:\|(?:\s?:?---*:?\s?\|)+)\n"
|
||||
r"(?:(?:[^\n]*\|)\n)+"
|
||||
r")"
|
||||
)
|
||||
elif extract_tables == "html":
|
||||
pattern = r"(?s)(<table[^>]*>(?:.*?)<\/table>)"
|
||||
elif extract_tables == "csv":
|
||||
pattern = (
|
||||
r"((?:(?:"
|
||||
r'(?:"(?:[^"]*(?:""[^"]*)*)"'
|
||||
r"|[^\n,]*),){2,}"
|
||||
r"(?:"
|
||||
r'(?:"(?:[^"]*(?:""[^"]*)*)"'
|
||||
r"|[^\n]*))\n){2,})"
|
||||
)
|
||||
else:
|
||||
pattern = None
|
||||
if pattern:
|
||||
matches = re.findall(pattern, doc.page_content)
|
||||
if matches:
|
||||
tables.extend(matches)
|
||||
if extract_tables:
|
||||
assert len(tables) >= 1
|
||||
else:
|
||||
assert not len(tables)
|
||||
|
||||
class EmptyImageBlobParser(BaseImageBlobParser):
|
||||
def _analyze_image(self, img: Image) -> str:
|
||||
return ""
|
||||
|
||||
parser_class = getattr(pdf_parsers, parser_factory)
|
||||
|
||||
parser = parser_class(
|
||||
mode=mode,
|
||||
extract_tables=extract_tables,
|
||||
images_parser=EmptyImageBlobParser(),
|
||||
**params,
|
||||
)
|
||||
_std_assert_with_parser(parser)
|
||||
|
||||
@@ -4,12 +4,12 @@ from typing import Sequence, Union
|
||||
|
||||
import pytest
|
||||
|
||||
import langchain_community.document_loaders as pdf_loaders
|
||||
from langchain_community.document_loaders import (
|
||||
AmazonTextractPDFLoader,
|
||||
MathpixPDFLoader,
|
||||
PDFMinerLoader,
|
||||
PDFMinerPDFasHTMLLoader,
|
||||
PyMuPDFLoader,
|
||||
PyPDFium2Loader,
|
||||
UnstructuredPDFLoader,
|
||||
)
|
||||
@@ -100,30 +100,6 @@ def test_pypdfium2_loader() -> None:
|
||||
assert len(docs) == 16
|
||||
|
||||
|
||||
def test_pymupdf_loader() -> None:
|
||||
"""Test PyMuPDF loader."""
|
||||
file_path = Path(__file__).parent.parent / "examples/hello.pdf"
|
||||
loader = PyMuPDFLoader(file_path)
|
||||
|
||||
docs = loader.load()
|
||||
assert len(docs) == 1
|
||||
|
||||
file_path = Path(__file__).parent.parent / "examples/layout-parser-paper.pdf"
|
||||
loader = PyMuPDFLoader(file_path)
|
||||
|
||||
docs = loader.load()
|
||||
assert len(docs) == 16
|
||||
assert loader.web_path is None
|
||||
|
||||
web_path = "https://people.sc.fsu.edu/~jpeterson/hello_world.pdf"
|
||||
loader = PyMuPDFLoader(web_path)
|
||||
|
||||
docs = loader.load()
|
||||
assert loader.web_path == web_path
|
||||
assert loader.file_path != web_path
|
||||
assert len(docs) == 1
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not os.environ.get("MATHPIX_API_KEY"), reason="Mathpix API key not found"
|
||||
)
|
||||
@@ -230,3 +206,51 @@ def test_amazontextract_loader_failures() -> None:
|
||||
loader = AmazonTextractPDFLoader(two_page_pdf)
|
||||
with pytest.raises(ValueError):
|
||||
loader.load()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"parser_factory,params",
|
||||
[
|
||||
("PyMuPDFLoader", {}),
|
||||
],
|
||||
)
|
||||
def test_standard_parameters(
|
||||
parser_factory: str,
|
||||
params: dict,
|
||||
) -> None:
|
||||
loader_class = getattr(pdf_loaders, parser_factory)
|
||||
|
||||
file_path = Path(__file__).parent.parent / "examples/hello.pdf"
|
||||
loader = loader_class(file_path)
|
||||
docs = loader.load()
|
||||
assert len(docs) == 1
|
||||
|
||||
file_path = Path(__file__).parent.parent / "examples/layout-parser-paper.pdf"
|
||||
loader = loader_class(
|
||||
file_path,
|
||||
mode="page",
|
||||
page_delimiter="---",
|
||||
images_parser=None,
|
||||
images_inner_format="text",
|
||||
password=None,
|
||||
extract_tables=None,
|
||||
extract_tables_settings=None,
|
||||
)
|
||||
docs = loader.load()
|
||||
assert len(docs) == 16
|
||||
assert loader.web_path is None
|
||||
|
||||
web_path = "https://people.sc.fsu.edu/~jpeterson/hello_world.pdf"
|
||||
loader = loader_class(web_path)
|
||||
docs = loader.load()
|
||||
assert loader.web_path == web_path
|
||||
assert loader.file_path != web_path
|
||||
assert len(docs) == 1
|
||||
|
||||
|
||||
def test_pymupdf_deprecated_kwards() -> None:
|
||||
from langchain_community.document_loaders import PyMuPDFLoader
|
||||
|
||||
file_path = Path(__file__).parent.parent / "examples/hello.pdf"
|
||||
loader = PyMuPDFLoader(file_path=file_path)
|
||||
loader.load(sort=True)
|
||||
|
||||
@@ -4,7 +4,7 @@ from langchain_community.embeddings import ClovaXEmbeddings
|
||||
|
||||
|
||||
def test_embedding_documents() -> None:
|
||||
"""Test cohere embeddings."""
|
||||
"""Test ClovaX embeddings."""
|
||||
documents = ["foo bar"]
|
||||
embedding = ClovaXEmbeddings()
|
||||
output = embedding.embed_documents(documents)
|
||||
@@ -13,7 +13,7 @@ def test_embedding_documents() -> None:
|
||||
|
||||
|
||||
async def test_aembedding_documents() -> None:
|
||||
"""Test cohere embeddings."""
|
||||
"""Test ClovaX embeddings."""
|
||||
documents = ["foo bar"]
|
||||
embedding = ClovaXEmbeddings()
|
||||
output = await embedding.aembed_documents(documents)
|
||||
@@ -22,7 +22,7 @@ async def test_aembedding_documents() -> None:
|
||||
|
||||
|
||||
def test_embedding_query() -> None:
|
||||
"""Test cohere embeddings."""
|
||||
"""Test ClovaX embeddings."""
|
||||
document = "foo bar"
|
||||
embedding = ClovaXEmbeddings()
|
||||
output = embedding.embed_query(document)
|
||||
@@ -30,7 +30,7 @@ def test_embedding_query() -> None:
|
||||
|
||||
|
||||
async def test_aembedding_query() -> None:
|
||||
"""Test cohere embeddings."""
|
||||
"""Test ClovaX embeddings."""
|
||||
document = "foo bar"
|
||||
embedding = ClovaXEmbeddings()
|
||||
output = await embedding.aembed_query(document)
|
||||
|
||||
BIN
libs/community/tests/integration_tests/examples/building.jpg
Normal file
BIN
libs/community/tests/integration_tests/examples/building.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 54 KiB |
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user