Automatically add EOS via Tokenizer, integrate Sentence Transformers (#1)
- Automatically add EOS via Tokenizer, integrate Sentence Transformers (fd17b9cd89d6cc5b416d4b66ea25da0bea7f2bb0) - Remove eod_id line from README (7bd6fbe3c54b9ec2b4b1cc3a052720a76fcf0d90)
This commit is contained in:
parent
9b8853c96a
commit
807d9e22a8
3
.gitattributes
vendored
3
.gitattributes
vendored
@ -45,5 +45,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|||||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
||||||
|
|||||||
10
1_Pooling/config.json
Normal file
10
1_Pooling/config.json
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
"word_embedding_dimension": 4096,
|
||||||
|
"pooling_mode_cls_token": false,
|
||||||
|
"pooling_mode_mean_tokens": false,
|
||||||
|
"pooling_mode_max_tokens": false,
|
||||||
|
"pooling_mode_mean_sqrt_len_tokens": false,
|
||||||
|
"pooling_mode_weightedmean_tokens": false,
|
||||||
|
"pooling_mode_lasttoken": true,
|
||||||
|
"include_prompt": true
|
||||||
|
}
|
||||||
65
README.md
65
README.md
@ -2,7 +2,11 @@
|
|||||||
license: apache-2.0
|
license: apache-2.0
|
||||||
base_model:
|
base_model:
|
||||||
- Qwen/Qwen3-8B-Base
|
- Qwen/Qwen3-8B-Base
|
||||||
library_name: transformers
|
tags:
|
||||||
|
- transformers
|
||||||
|
- sentence-transformers
|
||||||
|
- sentence-similarity
|
||||||
|
- feature-extraction
|
||||||
---
|
---
|
||||||
# Qwen3-Embedding-8B
|
# Qwen3-Embedding-8B
|
||||||
|
|
||||||
@ -53,6 +57,47 @@ With Transformers versions earlier than 4.51.0, you may encounter the following
|
|||||||
KeyError: 'qwen3'
|
KeyError: 'qwen3'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Sentence Transformers Usage
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Requires transformers>=4.51.0
|
||||||
|
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
|
||||||
|
# Load the model
|
||||||
|
model = SentenceTransformer("Qwen/Qwen3-Embedding-8B")
|
||||||
|
|
||||||
|
# We recommend enabling flash_attention_2 for better acceleration and memory saving,
|
||||||
|
# together with setting `padding_side` to "left":
|
||||||
|
# model = SentenceTransformer(
|
||||||
|
# "Qwen/Qwen3-Embedding-8B",
|
||||||
|
# model_kwargs={"attn_implementation": "flash_attention_2", "device_map": "auto"},
|
||||||
|
# tokenizer_kwargs={"padding_side": "left"},
|
||||||
|
# )
|
||||||
|
|
||||||
|
# The queries and documents to embed
|
||||||
|
queries = [
|
||||||
|
"What is the capital of China?",
|
||||||
|
"Explain gravity",
|
||||||
|
]
|
||||||
|
documents = [
|
||||||
|
"The capital of China is Beijing.",
|
||||||
|
"Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun.",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Encode the queries and documents. Note that queries benefit from using a prompt
|
||||||
|
# Here we use the prompt called "query" stored under `model.prompts`, but you can
|
||||||
|
# also pass your own prompt via the `prompt` argument
|
||||||
|
query_embeddings = model.encode(queries, prompt_name="query")
|
||||||
|
document_embeddings = model.encode(documents)
|
||||||
|
|
||||||
|
# Compute the (cosine) similarity between the query and document embeddings
|
||||||
|
similarity = model.similarity(query_embeddings, document_embeddings)
|
||||||
|
print(similarity)
|
||||||
|
# tensor([[0.7493, 0.0751],
|
||||||
|
# [0.0880, 0.6318]])
|
||||||
|
```
|
||||||
|
|
||||||
### Transformers Usage
|
### Transformers Usage
|
||||||
|
|
||||||
```python
|
```python
|
||||||
@ -79,14 +124,6 @@ def last_token_pool(last_hidden_states: Tensor,
|
|||||||
def get_detailed_instruct(task_description: str, query: str) -> str:
|
def get_detailed_instruct(task_description: str, query: str) -> str:
|
||||||
return f'Instruct: {task_description}\nQuery:{query}'
|
return f'Instruct: {task_description}\nQuery:{query}'
|
||||||
|
|
||||||
def tokenize(tokenizer, input_texts, eod_id, max_length):
|
|
||||||
batch_dict = tokenizer(input_texts, padding=False, truncation=True, max_length=max_length-2)
|
|
||||||
for seq, att in zip(batch_dict["input_ids"], batch_dict["attention_mask"]):
|
|
||||||
seq.append(eod_id)
|
|
||||||
att.append(1)
|
|
||||||
batch_dict = tokenizer.pad(batch_dict, padding=True, return_tensors="pt")
|
|
||||||
return batch_dict
|
|
||||||
|
|
||||||
# Each query must come with a one-sentence instruction that describes the task
|
# Each query must come with a one-sentence instruction that describes the task
|
||||||
task = 'Given a web search query, retrieve relevant passages that answer the query'
|
task = 'Given a web search query, retrieve relevant passages that answer the query'
|
||||||
|
|
||||||
@ -107,11 +144,16 @@ model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-8B')
|
|||||||
# We recommend enabling flash_attention_2 for better acceleration and memory saving.
|
# We recommend enabling flash_attention_2 for better acceleration and memory saving.
|
||||||
# model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-8B', attn_implementation="flash_attention_2", torch_dtype=torch.float16).cuda()
|
# model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-8B', attn_implementation="flash_attention_2", torch_dtype=torch.float16).cuda()
|
||||||
|
|
||||||
eod_id = tokenizer.convert_tokens_to_ids("<|endoftext|>")
|
|
||||||
max_length = 8192
|
max_length = 8192
|
||||||
|
|
||||||
# Tokenize the input texts
|
# Tokenize the input texts
|
||||||
batch_dict = tokenize(tokenizer, input_texts, eod_id, max_length)
|
batch_dict = tokenizer(
|
||||||
|
input_texts,
|
||||||
|
padding=True,
|
||||||
|
truncation=True,
|
||||||
|
max_length=max_length,
|
||||||
|
return_tensors="pt",
|
||||||
|
)
|
||||||
batch_dict.to(model.device)
|
batch_dict.to(model.device)
|
||||||
outputs = model(**batch_dict)
|
outputs = model(**batch_dict)
|
||||||
embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
|
embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
|
||||||
@ -120,6 +162,7 @@ embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_ma
|
|||||||
embeddings = F.normalize(embeddings, p=2, dim=1)
|
embeddings = F.normalize(embeddings, p=2, dim=1)
|
||||||
scores = (embeddings[:2] @ embeddings[2:].T)
|
scores = (embeddings[:2] @ embeddings[2:].T)
|
||||||
print(scores.tolist())
|
print(scores.tolist())
|
||||||
|
# [[0.7493016123771667, 0.0750647559762001], [0.08795969933271408, 0.6318399906158447]]
|
||||||
```
|
```
|
||||||
📌 **Tip**: We recommend that developers customize the `instruct` according to their specific scenarios, tasks, and languages. Our tests have shown that in most retrieval scenarios, not using an `instruct` on the query side can lead to a drop in retrieval performance by approximately 1% to 5%.
|
📌 **Tip**: We recommend that developers customize the `instruct` according to their specific scenarios, tasks, and languages. Our tests have shown that in most retrieval scenarios, not using an `instruct` on the query side can lead to a drop in retrieval performance by approximately 1% to 5%.
|
||||||
|
|
||||||
|
|||||||
8
config_sentence_transformers.json
Normal file
8
config_sentence_transformers.json
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"prompts": {
|
||||||
|
"query": "Instruct: Given a web search query, retrieve relevant passages that answer the query\nQuery:",
|
||||||
|
"document": ""
|
||||||
|
},
|
||||||
|
"default_prompt_name": null,
|
||||||
|
"similarity_fn_name": "cosine"
|
||||||
|
}
|
||||||
20
modules.json
Normal file
20
modules.json
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"idx": 0,
|
||||||
|
"name": "0",
|
||||||
|
"path": "",
|
||||||
|
"type": "sentence_transformers.models.Transformer"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"idx": 1,
|
||||||
|
"name": "1",
|
||||||
|
"path": "1_Pooling",
|
||||||
|
"type": "sentence_transformers.models.Pooling"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"idx": 2,
|
||||||
|
"name": "2",
|
||||||
|
"path": "2_Normalize",
|
||||||
|
"type": "sentence_transformers.models.Normalize"
|
||||||
|
}
|
||||||
|
]
|
||||||
BIN
tokenizer.json
(Stored with Git LFS)
BIN
tokenizer.json
(Stored with Git LFS)
Binary file not shown.
Loading…
Reference in New Issue
Block a user