Upload 2 files

This commit is contained in:
Cherrytest 2025-10-18 05:06:24 +00:00
parent 1ac4095e16
commit 8948c8174b
2 changed files with 57 additions and 2330 deletions

View File

@ -21,6 +21,15 @@
"rstrip": false,
"normalized": false
},
{
"id": 2,
"special": true,
"content": "[SPACE]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": true
},
{
"id": 255,
"special": true,
@ -1055,199 +1064,69 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 6561,
"content": "[START_SPEECH]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 6562,
"content": "[STOP_SPEECH]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 6563,
"content": "[EXAGGERATION]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "Sequence",
"normalizers": [
{
"type": "Replace",
"pattern": {
"Regex": "\\s+"
"pattern": { "String": " " },
"content": "[SPACE]"
}
]
},
"content": " "
"pre_tokenizer": {
"type": "Whitespace"
},
"pre_tokenizer": null,
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "[EXAGGERATION]",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[START]",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[STOP]",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[START_SPEECH]",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[START_SPEECH]",
"type_id": 0
}
}
{ "SpecialToken": { "id": "EXAGGERATION", "type_id": 0 } },
{ "SpecialToken": { "id": "BOS", "type_id": 0 } },
{ "Sequence": { "id": "A", "type_id": 0 } },
{ "SpecialToken": { "id": "EOS", "type_id": 0 } },
{ "SpecialToken": { "id": "START_SPEECH", "type_id": 0 } },
{ "SpecialToken": { "id": "START_SPEECH", "type_id": 0 } }
],
"pair": [
{
"SpecialToken": {
"id": "[EXAGGERATION]",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[START]",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[STOP]",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[START_SPEECH]",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[START_SPEECH]",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[EXAGGERATION]",
"type_id": 1
}
},
{
"SpecialToken": {
"id": "[START]",
"type_id": 1
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
},
{
"SpecialToken": {
"id": "[STOP]",
"type_id": 1
}
},
{
"SpecialToken": {
"id": "[START_SPEECH]",
"type_id": 1
}
},
{
"SpecialToken": {
"id": "[START_SPEECH]",
"type_id": 1
}
}
{ "SpecialToken": { "id": "EXAGGERATION", "type_id": 0 } },
{ "SpecialToken": { "id": "BOS", "type_id": 0 } },
{ "Sequence": { "id": "A", "type_id": 0 } },
{ "SpecialToken": { "id": "EOS", "type_id": 0 } },
{ "SpecialToken": { "id": "START_SPEECH", "type_id": 0 } },
{ "SpecialToken": { "id": "START_SPEECH", "type_id": 0 } },
{ "SpecialToken": { "id": "EXAGGERATION", "type_id": 1 } },
{ "SpecialToken": { "id": "BOS", "type_id": 1 } },
{ "Sequence": { "id": "B", "type_id": 1 } },
{ "SpecialToken": { "id": "EOS", "type_id": 1 } },
{ "SpecialToken": { "id": "START_SPEECH", "type_id": 1 } },
{ "SpecialToken": { "id": "START_SPEECH", "type_id": 1 } }
],
"special_tokens": {
"[START]": {
"id": "[START]",
"ids": [
255
],
"tokens": [
"[START]"
]
"BOS": {
"id": "BOS",
"ids": [255],
"tokens": ["<s>"]
},
"[STOP]": {
"id": "[STOP]",
"ids": [
0
],
"tokens": [
"[STOP]"
]
"EOS": {
"id": "EOS",
"ids": [0],
"tokens": ["</s>"]
},
"[EXAGGERATION]": {
"id": "[EXAGGERATION]",
"ids": [
6563
],
"tokens": [
"[EXAGGERATION]"
]
"EXAGGERATION": {
"id": "EXAGGERATION",
"ids": [6563],
"tokens": ["<EXAGGERATION>"]
},
"[START_SPEECH]": {
"id": "[START_SPEECH]",
"ids": [
6561
],
"tokens": [
"[START_SPEECH]"
]
"START_SPEECH": {
"id": "START_SPEECH",
"ids": [6561],
"tokens": ["<START_SPEECH>"]
}
}
},
"decoder": {
"type": "Fuse"
},
"decoder": null,
"model": {
"type": "BPE",
"dropout": null,
@ -3607,109 +3486,7 @@
"tch": 2348,
"sch": 2349,
"🙊": 2350,
"🤭": 2351,
"€": 2352,
"أ": 2353,
"إ": 2354,
"ئ": 2355,
"آ": 2356,
"ؤ": 2357,
"ﻻ": 2358,
"ﺃ": 2359,
"ę": 2360,
"ą": 2361,
"ż": 2362,
"ś": 2363,
"ć": 2364,
"ń": 2365,
"ź": 2366,
"Ś": 2367,
"Ź": 2368,
"Ż": 2369,
"Ć": 2370,
"Š": 2371,
"Ő": 2372,
"й": 2373,
"ё": 2374,
"Й": 2375,
"Ё": 2376,
"が": 2377,
"で": 2378,
"じ": 2379,
"だ": 2380,
"ど": 2381,
"ば": 2382,
"げ": 2383,
"ご": 2384,
"ぶ": 2385,
"ぎ": 2386,
"": 2387,
"": 2388,
"": 2389,
"": 2390,
"": 2391,
"": 2392,
"": 2393,
" ": 2394,
"ά": 2395,
"ό": 2396,
"ί": 2397,
"έ": 2398,
"ή": 2399,
"ύ": 2400,
"ώ": 2401,
"Έ": 2402,
"Ό": 2403,
"Ή": 2404,
"ž": 2405,
"š": 2406,
"ū": 2407,
"ş": 2408,
"Ō": 2409,
"ī": 2410,
"č": 2411,
"ř": 2412,
"ă": 2413,
"이": 2414,
"기": 2415,
"요": 2416,
"에": 2417,
"다": 2418,
"을": 2419,
"은": 2420,
"서": 2421,
"니": 2422,
"어": 2423,
"ě": 2424,
"ů": 2425,
"Č": 2426,
"ň": 2427,
"ď": 2428,
"ť": 2429,
"♭": 2430,
"ľ": 2431,
"ĺ": 2432,
"ğ": 2433,
"İ": 2434,
"Ş": 2435,
"ड़": 2436,
"ढ़": 2437,
"ज़": 2438,
"फ़": 2439,
"ख़": 2440,
"क़": 2441,
"ग़": 2442,
"Ά": 2443,
"ϊ": 2444,
"Ί": 2445,
"Ύ": 2446,
"Ώ": 2447,
"ΐ": 2448,
"ϋ": 2449,
"ũ": 2450,
"ụ": 2451,
"ọ": 2452,
"ạ": 2453
"🤭": 2351
},
"merges": [
"t h",

File diff suppressed because it is too large Load Diff