From 8bd4b2270f04acb379c707feb6397eb185be1ab4 Mon Sep 17 00:00:00 2001 From: Cherrytest Date: Wed, 29 Oct 2025 03:31:39 +0000 Subject: [PATCH] Upload tokenizer.json --- tokenizer.json | 106 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 104 insertions(+), 2 deletions(-) diff --git a/tokenizer.json b/tokenizer.json index 5998042..2182956 100644 --- a/tokenizer.json +++ b/tokenizer.json @@ -3486,8 +3486,110 @@ "tch": 2348, "sch": 2349, "🙊": 2350, - "🤭": 2351 - }, + "🤭": 2351, + "€": 2352, + "أ": 2353, + "إ": 2354, + "ئ": 2355, + "آ": 2356, + "ؤ": 2357, + "ﻻ": 2358, + "ﺃ": 2359, + "ę": 2360, + "ą": 2361, + "ż": 2362, + "ś": 2363, + "ć": 2364, + "ń": 2365, + "ź": 2366, + "Ś": 2367, + "Ź": 2368, + "Ż": 2369, + "Ć": 2370, + "Š": 2371, + "Ő": 2372, + "й": 2373, + "ё": 2374, + "Й": 2375, + "Ё": 2376, + "が": 2377, + "で": 2378, + "じ": 2379, + "だ": 2380, + "ど": 2381, + "ば": 2382, + "げ": 2383, + "ご": 2384, + "ぶ": 2385, + "ぎ": 2386, + ",": 2387, + "(": 2388, + ":": 2389, + ";": 2390, + "?": 2391, + "!": 2392, + "#": 2393, + " )": 2394, + "ά": 2395, + "ό": 2396, + "ί": 2397, + "έ": 2398, + "ή": 2399, + "ύ": 2400, + "ώ": 2401, + "Έ": 2402, + "Ό": 2403, + "Ή": 2404, + "ž": 2405, + "š": 2406, + "ū": 2407, + "ş": 2408, + "Ō": 2409, + "ī": 2410, + "č": 2411, + "ř": 2412, + "ă": 2413, + "이": 2414, + "기": 2415, + "요": 2416, + "에": 2417, + "다": 2418, + "을": 2419, + "은": 2420, + "서": 2421, + "니": 2422, + "어": 2423, + "ě": 2424, + "ů": 2425, + "Č": 2426, + "ň": 2427, + "ď": 2428, + "ť": 2429, + "♭": 2430, + "ľ": 2431, + "ĺ": 2432, + "ğ": 2433, + "İ": 2434, + "Ş": 2435, + "ड़": 2436, + "ढ़": 2437, + "ज़": 2438, + "फ़": 2439, + "ख़": 2440, + "क़": 2441, + "ग़": 2442, + "Ά": 2443, + "ϊ": 2444, + "Ί": 2445, + "Ύ": 2446, + "Ώ": 2447, + "ΐ": 2448, + "ϋ": 2449, + "ũ": 2450, + "ụ": 2451, + "ọ": 2452, + "ạ": 2453 + }, "merges": [ "t h", "i n",