Update the nlpcloud connector after some changes on the NLP Cloud API (#9586)

- Description: remove some text generation deprecated parameters and
update the embeddings doc,
- Tag maintainer: @rlancemartin
This commit is contained in:
Bagatur 2023-08-23 11:35:08 -07:00 committed by GitHub
commit a40c12bb88
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 20 deletions

View File

@ -9,13 +9,9 @@
"\n", "\n",
"NLP Cloud is an artificial intelligence platform that allows you to use the most advanced AI engines, and even train your own engines with your own data. \n", "NLP Cloud is an artificial intelligence platform that allows you to use the most advanced AI engines, and even train your own engines with your own data. \n",
"\n", "\n",
"The [embeddings](https://docs.nlpcloud.com/#embeddings) endpoint offers several models:\n", "The [embeddings](https://docs.nlpcloud.com/#embeddings) endpoint offers the following model:\n",
"\n", "\n",
"* `paraphrase-multilingual-mpnet-base-v2`: Paraphrase Multilingual MPNet Base V2 is a very fast model based on Sentence Transformers that is perfectly suited for embeddings extraction in more than 50 languages (see the full list here).\n", "* `paraphrase-multilingual-mpnet-base-v2`: Paraphrase Multilingual MPNet Base V2 is a very fast model based on Sentence Transformers that is perfectly suited for embeddings extraction in more than 50 languages (see the full list here)."
"\n",
"* `gpt-j`: GPT-J returns advanced embeddings. It might return better results than Sentence Transformers based models (see above) but it is also much slower.\n",
"\n",
"* `dolphin`: Dolphin returns advanced embeddings. It might return better results than Sentence Transformers based models (see above) but it is also much slower. It natively understands the following languages: Bulgarian, Catalan, Chinese, Croatian, Czech, Danish, Dutch, English, French, German, Hungarian, Italian, Japanese, Polish, Portuguese, Romanian, Russian, Serbian, Slovenian, Spanish, Swedish, and Ukrainian."
] ]
}, },
{ {
@ -84,7 +80,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3 (ipykernel)", "display_name": "Python 3.11.2 64-bit",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@ -98,7 +94,12 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.9.16" "version": "3.11.2"
},
"vscode": {
"interpreter": {
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
}
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@ -28,8 +28,6 @@ class NLPCloud(LLM):
"""Language to use (multilingual addon)""" """Language to use (multilingual addon)"""
temperature: float = 0.7 temperature: float = 0.7
"""What sampling temperature to use.""" """What sampling temperature to use."""
min_length: int = 1
"""The minimum number of tokens to generate in the completion."""
max_length: int = 256 max_length: int = 256
"""The maximum number of tokens to generate in the completion.""" """The maximum number of tokens to generate in the completion."""
length_no_input: bool = True length_no_input: bool = True
@ -46,14 +44,8 @@ class NLPCloud(LLM):
"""The number of highest probability tokens to keep for top-k filtering.""" """The number of highest probability tokens to keep for top-k filtering."""
repetition_penalty: float = 1.0 repetition_penalty: float = 1.0
"""Penalizes repeated tokens. 1.0 means no penalty.""" """Penalizes repeated tokens. 1.0 means no penalty."""
length_penalty: float = 1.0
"""Exponential penalty to the length."""
do_sample: bool = True
"""Whether to use sampling (True) or greedy decoding."""
num_beams: int = 1 num_beams: int = 1
"""Number of beams for beam search.""" """Number of beams for beam search."""
early_stopping: bool = False
"""Whether to stop beam search at num_beams sentences."""
num_return_sequences: int = 1 num_return_sequences: int = 1
"""How many completions to generate for each prompt.""" """How many completions to generate for each prompt."""
@ -91,7 +83,6 @@ class NLPCloud(LLM):
"""Get the default parameters for calling NLPCloud API.""" """Get the default parameters for calling NLPCloud API."""
return { return {
"temperature": self.temperature, "temperature": self.temperature,
"min_length": self.min_length,
"max_length": self.max_length, "max_length": self.max_length,
"length_no_input": self.length_no_input, "length_no_input": self.length_no_input,
"remove_input": self.remove_input, "remove_input": self.remove_input,
@ -100,10 +91,7 @@ class NLPCloud(LLM):
"top_p": self.top_p, "top_p": self.top_p,
"top_k": self.top_k, "top_k": self.top_k,
"repetition_penalty": self.repetition_penalty, "repetition_penalty": self.repetition_penalty,
"length_penalty": self.length_penalty,
"do_sample": self.do_sample,
"num_beams": self.num_beams, "num_beams": self.num_beams,
"early_stopping": self.early_stopping,
"num_return_sequences": self.num_return_sequences, "num_return_sequences": self.num_return_sequences,
} }