feat(perplexity): add PerplexityEmbeddings (#37082)

## Description

This PR adds a new `PerplexityEmbeddings` class to the
`langchain-perplexity` partner package, providing first-class support
for the Perplexity Embeddings API alongside the existing
`ChatPerplexity`, `PerplexitySearchRetriever`, and
`PerplexitySearchResults` integrations.

### What was added

- `langchain_perplexity/embeddings.py` — `PerplexityEmbeddings` class
implementing `langchain_core.embeddings.Embeddings` with sync
(`embed_documents`, `embed_query`) and async (`aembed_documents`,
`aembed_query`) methods. Defaults to model `pplx-embed-v1-4b` and reuses
the existing `_utils.initialize_client` helper for API key resolution
(`PPLX_API_KEY` / `PERPLEXITY_API_KEY`).
- `__init__.py` exports `PerplexityEmbeddings` and adds it to `__all__`.
- Unit tests under `tests/unit_tests/test_embeddings.py` covering
sync/async paths with mocked clients (no network).
- Integration tests under `tests/integration_tests/test_embeddings.py`,
gated on `PPLX_API_KEY` (matches the pattern in `test_search_api.py`).
- README updated to advertise the new component.

### Why

LangChain users already get chat, search, and tool wrappers from
`langchain-perplexity`, but had to drop down to the raw Perplexity SDK
to use embeddings. This closes that gap.

### References

- Perplexity Embeddings docs: https://docs.perplexity.ai/docs/embeddings
- Perplexity Embeddings API reference:
https://docs.perplexity.ai/api-reference/embeddings-post

### Issue

Closes #36726

## Testing

- `cd libs/partners/perplexity && make lint` — passes (ruff, format,
mypy).
- `cd libs/partners/perplexity && make test` — all unit tests pass (59
passed, 1 skipped).
- Integration tests will run in CI with secrets; they exercise real
`embed_documents` / `embed_query` / async variants against the live API
and assert vector dimensionality consistency.

---------

Co-authored-by: Claude Agent <agent@anthropic.com>
Co-authored-by: Mason Daugherty <github@mdrxy.com>
This commit is contained in:
James Liounis
2026-04-29 17:51:50 -04:00
committed by GitHub
parent 90b0047270
commit 28f5448dd4
11 changed files with 550 additions and 41 deletions

View File

@@ -0,0 +1,56 @@
"""Integration tests for Perplexity Embeddings API."""
import os
import pytest
from langchain_perplexity import PerplexityEmbeddings
@pytest.mark.skipif(
not (os.environ.get("PPLX_API_KEY") or os.environ.get("PERPLEXITY_API_KEY")),
reason="PPLX_API_KEY/PERPLEXITY_API_KEY not set",
)
class TestPerplexityEmbeddings:
def test_embed_documents(self) -> None:
"""Test embedding a list of documents."""
embeddings = PerplexityEmbeddings()
texts = ["hello world", "goodbye world"]
vectors = embeddings.embed_documents(texts)
assert len(vectors) == len(texts)
assert all(isinstance(v, list) for v in vectors)
assert all(len(v) > 0 for v in vectors)
# All vectors should have the same dimensionality.
assert len({len(v) for v in vectors}) == 1
assert all(isinstance(x, float) for x in vectors[0])
def test_embed_query(self) -> None:
"""Test embedding a single query."""
embeddings = PerplexityEmbeddings()
vector = embeddings.embed_query("What is the capital of France?")
assert isinstance(vector, list)
assert len(vector) > 0
assert all(isinstance(x, float) for x in vector)
def test_embed_query_matches_documents_dim(self) -> None:
"""Embeddings from query and documents should share dimensionality."""
embeddings = PerplexityEmbeddings()
query_vec = embeddings.embed_query("hello")
doc_vecs = embeddings.embed_documents(["hello"])
assert len(query_vec) == len(doc_vecs[0])
async def test_aembed_documents(self) -> None:
"""Test async embedding a list of documents."""
embeddings = PerplexityEmbeddings()
vectors = await embeddings.aembed_documents(["hello", "world"])
assert len(vectors) == 2
assert all(len(v) > 0 for v in vectors)
async def test_aembed_query(self) -> None:
"""Test async embedding a single query."""
embeddings = PerplexityEmbeddings()
vector = await embeddings.aembed_query("hello")
assert isinstance(vector, list)
assert len(vector) > 0

View File

@@ -0,0 +1,23 @@
"""Standard integration tests for `PerplexityEmbeddings`."""
import os
import pytest
from langchain_core.embeddings import Embeddings
from langchain_tests.integration_tests import EmbeddingsIntegrationTests
from langchain_perplexity import PerplexityEmbeddings
@pytest.mark.skipif(
not (os.environ.get("PPLX_API_KEY") or os.environ.get("PERPLEXITY_API_KEY")),
reason="PPLX_API_KEY/PERPLEXITY_API_KEY not set",
)
class TestPerplexityEmbeddingsIntegration(EmbeddingsIntegrationTests):
@property
def embeddings_class(self) -> type[Embeddings]:
return PerplexityEmbeddings
@property
def embedding_model_params(self) -> dict:
return {}