From 1df345b5eb8b4cce57744351f09a23c265802196 Mon Sep 17 00:00:00 2001 From: josc146 Date: Tue, 25 Jul 2023 20:30:43 +0800 Subject: [PATCH] improve embeddings API results --- README.md | 3 +++ README_JA.md | 6 +++++- README_ZH.md | 2 ++ backend-python/utils/rwkv.py | 2 +- 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4f076b1..f440e2e 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,9 @@ body.json: ## Embeddings API Example +Note: v1.4.0 has improved the quality of embeddings API. The generated results are not compatible +with previous versions. If you are using embeddings API to generate knowledge bases or similar, please regenerate. + If you are using langchain, just use `OpenAIEmbeddings(openai_api_base="http://127.0.0.1:8000", openai_api_key="sk-")` ```python diff --git a/README_JA.md b/README_JA.md index 1a2ad30..4b85fe6 100644 --- a/README_JA.md +++ b/README_JA.md @@ -91,7 +91,11 @@ body.json: ## 埋め込み API の例 -LangChain を使用している場合は、`OpenAIEmbeddings(openai_api_base="http://127.0.0.1:8000", openai_api_key="sk-")`を使用してください +Note: v1.4.0 has improved the quality of embeddings API. The generated results are not compatible +with previous versions. If you are using embeddings API to generate knowledge bases or similar, please regenerate. + +LangChain を使用している場合は、`OpenAIEmbeddings(openai_api_base="http://127.0.0.1:8000", openai_api_key="sk-")` +を使用してください ```python import numpy as np diff --git a/README_ZH.md b/README_ZH.md index 16f3e5a..7cebabb 100644 --- a/README_ZH.md +++ b/README_ZH.md @@ -89,6 +89,8 @@ body.json: ## Embeddings API 示例 +注意: 1.4.0 版本对embeddings API质量进行了改善,生成结果与之前的版本不兼容,如果你正在使用此API生成知识库等,请重新生成 + 如果你在用langchain, 直接使用 `OpenAIEmbeddings(openai_api_base="http://127.0.0.1:8000", openai_api_key="sk-")` ```python diff --git a/backend-python/utils/rwkv.py b/backend-python/utils/rwkv.py index 83a4c0b..955fa19 100644 --- a/backend-python/utils/rwkv.py +++ b/backend-python/utils/rwkv.py @@ -69,7 +69,7 @@ class AbstractRWKV(ABC): self.model_state = None self.model_tokens = [] _, token_len = self.run_rnn(self.fix_tokens(self.pipeline.encode(input))) - embedding = self.model_state[-5].tolist() + embedding = self.model_state[-11].tolist() embedding = (embedding / np.linalg.norm(embedding)).tolist() return embedding, token_len