diff --git a/.gitattributes b/.gitattributes index 15ba2c6..b473454 100644 --- a/.gitattributes +++ b/.gitattributes @@ -45,3 +45,49 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.wasm filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text + +model-00006-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00004-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00008-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00002-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00001-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00040-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00003-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00005-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00007-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +merges.txt filter=lfs diff=lfs merge=lfs -text +model-00015-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00018-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00020-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00022-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00010-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00041-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00009-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00012-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00014-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00017-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00011-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00013-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00021-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00023-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00016-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00039-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00019-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00024-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00036-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00037-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model.safetensors.index.json filter=lfs diff=lfs merge=lfs -text +model-00026-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00031-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00034-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00028-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00033-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00025-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00027-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +vocab.json filter=lfs diff=lfs merge=lfs -text +model-00029-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +model-00038-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00035-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00030-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text +model-00032-of-00041.safetensors filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md index a3121e2..3f184c7 100644 --- a/README.md +++ b/README.md @@ -1,48 +1,420 @@ --- -license: Apache License 2.0 -tags: [] +license: MIT License +library_name: transformers +pipeline_tag: text-generation +tags: +- vLLM +- AWQ +base_model: + - MiniMax/MiniMax-M2 +base_model_relation: quantized -#model-type: -##如 gpt、phi、llama、chatglm、baichuan 等 -#- gpt - -#domain: -##如 nlp、cv、audio、multi-modal -#- nlp - -#language: -##语言代码列表 https://help.aliyun.com/document_detail/215387.html?spm=a2c4g.11186623.0.0.9f8d7467kni6Aa -#- cn - -#metrics: -##如 CIDEr、Blue、ROUGE 等 -#- CIDEr - -#tags: -##各种自定义,包括 pretrained、fine-tuned、instruction-tuned、RL-tuned 等训练方法和其他 -#- pretrained - -#tools: -##如 vllm、fastchat、llamacpp、AdaSeq 等 -#- vllm --- -### 当前模型的贡献者未提供更加详细的模型介绍。模型文件和权重,可浏览“模型文件”页面获取。 -#### 您可以通过如下git clone命令,或者ModelScope SDK来下载模型 +# MiniMax-M2-AWQ +Base model: [MiniMax/MiniMax-M2](https://www.modelscope.cn/models/MiniMax/MiniMax-M2) -SDK下载 +### 【Dependencies / Installation】 +As of **2025-10-28**, create a fresh Python environment and run: ```bash -#安装ModelScope -pip install modelscope -``` -```python -#SDK模型下载 -from modelscope import snapshot_download -model_dir = snapshot_download('tclf90/MiniMax-M2-AWQ') -``` -Git下载 -``` -#Git模型下载 -git clone https://www.modelscope.cn/tclf90/MiniMax-M2-AWQ.git +pip install -U pip +pip install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly ``` -

如果您是本模型的贡献者,我们邀请您根据模型贡献文档,及时完善模型卡片内容。

\ No newline at end of file +
+ +testing environment + +``` +Package Version +---------------------------------- -------------------------------- +aiohappyeyeballs 2.6.1 +aiohttp 3.13.1 +aiosignal 1.4.0 +annotated-doc 0.0.3 +annotated-types 0.7.0 +anthropic 0.71.0 +anyio 4.11.0 +apache-tvm-ffi 0.1.0b15 +astor 0.8.1 +attrs 25.4.0 +blake3 1.0.8 +cachetools 6.2.1 +cbor2 5.7.1 +certifi 2025.10.5 +cffi 2.0.0 +charset-normalizer 3.4.4 +click 8.2.1 +cloudpickle 3.1.1 +compressed-tensors 0.12.2 +cuda-bindings 13.0.3 +cuda-pathfinder 1.3.1 +cuda-python 13.0.3 +cupy-cuda12x 13.6.0 +depyf 0.20.0 +dill 0.4.0 +diskcache 5.6.3 +distro 1.9.0 +dnspython 2.8.0 +docstring_parser 0.17.0 +einops 0.8.1 +email-validator 2.3.0 +fastapi 0.120.1 +fastapi-cli 0.0.14 +fastapi-cloud-cli 0.3.1 +fastrlock 0.8.3 +filelock 3.20.0 +flashinfer-python 0.4.1 +frozenlist 1.8.0 +fsspec 2025.9.0 +gguf 0.17.1 +h11 0.16.0 +hf-xet 1.2.0 +httpcore 1.0.9 +httptools 0.7.1 +httpx 0.28.1 +huggingface-hub 0.36.0 +idna 3.11 +importlib_metadata 8.7.0 +iniconfig 2.3.0 +interegular 0.3.3 +Jinja2 3.1.6 +jiter 0.11.1 +jsonschema 4.25.1 +jsonschema-specifications 2025.9.1 +lark 1.2.2 +llguidance 0.7.30 +llvmlite 0.44.0 +lm-format-enforcer 0.11.3 +loguru 0.7.3 +markdown-it-py 4.0.0 +MarkupSafe 3.0.3 +mdurl 0.1.2 +mistral_common 1.8.5 +mpmath 1.3.0 +msgpack 1.1.2 +msgspec 0.19.0 +multidict 6.7.0 +networkx 3.5 +ninja 1.13.0 +numba 0.61.2 +numpy 2.2.6 +nvidia-cublas-cu12 12.8.4.1 +nvidia-cuda-cupti-cu12 12.8.90 +nvidia-cuda-nvrtc-cu12 12.8.93 +nvidia-cuda-runtime-cu12 12.8.90 +nvidia-cudnn-cu12 9.10.2.21 +nvidia-cudnn-frontend 1.15.0 +nvidia-cufft-cu12 11.3.3.83 +nvidia-cufile-cu12 1.13.1.3 +nvidia-curand-cu12 10.3.9.90 +nvidia-cusolver-cu12 11.7.3.90 +nvidia-cusparse-cu12 12.5.8.93 +nvidia-cusparselt-cu12 0.7.1 +nvidia-cutlass-dsl 4.3.0.dev0 +nvidia-ml-py 13.580.82 +nvidia-nccl-cu12 2.27.5 +nvidia-nvjitlink-cu12 12.8.93 +nvidia-nvshmem-cu12 3.3.20 +nvidia-nvtx-cu12 12.8.90 +openai 2.6.1 +openai-harmony 0.0.4 +opencv-python-headless 4.12.0.88 +opentelemetry-api 1.38.0 +opentelemetry-sdk 1.38.0 +opentelemetry-semantic-conventions 0.59b0 +outlines_core 0.2.11 +packaging 25.0 +partial-json-parser 0.2.1.1.post6 +pillow 12.0.0 +pip 25.3 +pluggy 1.6.0 +prometheus_client 0.23.1 +prometheus-fastapi-instrumentator 7.1.0 +propcache 0.4.1 +protobuf 6.33.0 +psutil 7.1.2 +py-cpuinfo 9.0.0 +pybase64 1.4.2 +pycountry 24.6.1 +pycparser 2.23 +pydantic 2.12.3 +pydantic_core 2.41.4 +pydantic-extra-types 2.10.6 +Pygments 2.19.2 +pytest 8.4.2 +python-dotenv 1.2.1 +python-json-logger 4.0.0 +python-multipart 0.0.20 +PyYAML 6.0.3 +pyzmq 27.1.0 +ray 2.50.1 +referencing 0.37.0 +regex 2025.10.23 +requests 2.32.5 +rich 14.2.0 +rich-toolkit 0.15.1 +rignore 0.7.1 +rpds-py 0.28.0 +safetensors 0.6.2 +scipy 1.16.2 +sentencepiece 0.2.1 +sentry-sdk 3.0.0a7 +setproctitle 1.3.7 +setuptools 79.0.1 +shellingham 1.5.4 +six 1.17.0 +sniffio 1.3.1 +soundfile 0.13.1 +soxr 1.0.0 +starlette 0.48.0 +sympy 1.14.0 +tabulate 0.9.0 +tiktoken 0.12.0 +tokenizers 0.22.1 +torch 2.9.0 +torchaudio 2.9.0 +torchvision 0.24.0 +tqdm 4.67.1 +transformers 4.57.1 +triton 3.5.0 +triton_kernels 1.0.0 +typer 0.20.0 +typing_extensions 4.15.0 +typing-inspection 0.4.2 +urllib3 2.5.0 +uv 0.9.5 +uvicorn 0.38.0 +uvloop 0.22.1 +vllm 0.11.1rc4.dev38+g69f064062.cu129 +watchfiles 1.1.1 +websockets 15.0.1 +xgrammar 0.1.25 +yarl 1.22.0 +zipp 3.23.0 +``` + +
+ +### 【vLLM Startup Command】 +Note: When launching with TP=8, include `--enable-expert-parallel`; +otherwise the expert tensors wouldn’t be evenly sharded across GPU devices. + +``` +CONTEXT_LENGTH=32768 +vllm serve \ + tclf90/MiniMax-M2-AWQ \ + --served-model-name MY_MODEL \ + --enable-auto-tool-choice \ + --tool-call-parser hermes \ + --reasoning-parser deepseek_r1 \ + --swap-space 16 \ + --max-num-seqs 32 \ + --max-model-len $CONTEXT_LENGTH \ + --gpu-memory-utilization 0.9 \ + --tensor-parallel-size 8 \ + --enable-expert-parallel \ + --trust-remote-code \ + --disable-log-requests \ + --host 0.0.0.0 \ + --port 8000 +``` + +### 【Logs】 +``` +2025-10-28 +1. Initial commit +``` + +### 【Model Files】 +| File Size | Last Updated | +|-----------|--------------| +| `113GiB` | `2025-10-28` | + +### 【Model Download】 +```python +from huggingface_hub import snapshot_download +snapshot_download('tclf90/MiniMax-M2-AWQ', cache_dir="your_local_path") +``` + +### 【Overview】 + + + + + + + + + + + + + + + + + + +
+ +
+ + Homepage + + + Agent + + + API + + + MCP + +
+
+ + Hugging Face + + + GitHub + + + ModelScope + + + License + + + WeChat + +
+ +# Meet MiniMax-M2 + +Today, we release and open source MiniMax-M2, a **Mini** model built for **Max** coding & agentic workflows. + +**MiniMax-M2** redefines efficiency for agents. It's a compact, fast, and cost-effective MoE model (230 billion total parameters with 10 billion active parameters) built for elite performance in coding and agentic tasks, all while maintaining powerful general intelligence. With just 10 billion activated parameters, MiniMax-M2 provides the sophisticated, end-to-end tool use performance expected from today's leading models, but in a streamlined form factor that makes deployment and scaling easier than ever. + +

+ +

+ +--- + +## Highlights + +**Superior Intelligence**. According to benchmarks from Artificial Analysis, MiniMax-M2 demonstrates highly competitive general intelligence across mathematics, science, instruction following, coding, and agentic tool use. **Its composite score ranks #1 among open-source models globally**. + +**Advanced Coding**. Engineered for end-to-end developer workflows, MiniMax-M2 excels at multi-file edits, coding-run-fix loops, and test-validated repairs. Strong performance on Terminal-Bench and (Multi-)SWE-Bench–style tasks demonstrates practical effectiveness in terminals, IDEs, and CI across languages. + +**Agent Performance**. MiniMax-M2 plans and executes complex, long-horizon toolchains across shell, browser, retrieval, and code runners. In BrowseComp-style evaluations, it consistently locates hard-to-surface sources, maintains evidence traceable, and gracefully recovers from flaky steps. + +**Efficient Design**. With 10 billion activated parameters (230 billion in total), MiniMax-M2 delivers lower latency, lower cost, and higher throughput for interactive agents and batched sampling—perfectly aligned with the shift toward highly deployable models that still shine on coding and agentic tasks. + +--- + +## Coding & Agentic Benchmarks + +These comprehensive evaluations test real-world end-to-end coding and agentic tool use: editing real repos, executing commands, browsing the web, and delivering functional solutions. Performance on this suite correlates with day-to-day developer experience in terminals, IDEs, and CI. + +| **Benchmark** | **MiniMax-M2** | **Claude Sonnet 4** | **Claude Sonnet 4.5** | **Gemini 2.5 Pro** | **GPT-5 (thinking)** | **GLM-4.6** | **Kimi K2 0905** | **DeepSeek-V3.2** | +|-----------|------------|-----------------|-------------------|-----------------|------------------|---------|---------------|----------------| +| **SWE-bench Verified** | 69.4 | 72.7 * | 77.2 * | 63.8 * | 74.9 * | 68 * | 69.2 * | 67.8 * | +| **Multi-SWE-Bench** | 36.2 | 35.7 * | 44.3 | / | / | 30 | 33.5 | 30.6 | +| **SWE-bench Multilingual** | 56.5 | 56.9 * | 68 | / | / | 53.8 | 55.9 * | 57.9 * | +| **Terminal-Bench** | 46.3 | 36.4 * | 50 * | 25.3 * | 43.8 * | 40.5 * | 44.5 * | 37.7 * | +| **ArtifactsBench** | 66.8 | 57.3* | 61.5 | 57.7* | 73* | 59.8 | 54.2 | 55.8 | +| **BrowseComp** | 44 | 12.2 | 19.6 | 9.9 | 54.9* | 45.1* | 14.1 | 40.1* | +| **BrowseComp-zh** | 48.5 | 29.1 | 40.8 | 32.2 | 65 | 49.5 | 28.8 | 47.9* | +| **GAIA (text only)** | 75.7 | 68.3 | 71.2 | 60.2 | 76.4 | 71.9 | 60.2 | 63.5 | +| **xbench-DeepSearch** | 72 | 64.6 | 66 | 56 | 77.8 | 70 | 61 | 71 | +| **HLE (w/ tools)** | 31.8 | 20.3 | 24.5 | 28.4 * | 35.2 * | 30.4 * | 26.9 * | 27.2 * | +| **τ²-Bench** | 77.2 | 65.5* | 84.7* | 59.2 | 80.1* | 75.9* | 70.3 | 66.7 | +| **FinSearchComp-global** | 65.5 | 42 | 60.8 | 42.6* | 63.9* | 29.2 | 29.5* | 26.2 | +| **AgentCompany** | 36 | 37 | 41 | 39.3* | / | 35 | 30 | 34 | + +>Notes: Data points marked with an asterisk (*) are taken directly from the model's official tech report or blog. All other metrics were obtained using the evaluation methods described below. +>- SWE-bench Verified: We use the same scaffold as [R2E-Gym](https://arxiv.org/pdf/2504.07164) (Jain et al. 2025) on top of OpenHands to test with agents on SWE tasks. All scores are validated on our internal infrastructure with 128k context length, 100 max steps, and no test-time scaling. All git-related content is removed to ensure agent sees only the code at the issue point. +>- Multi-SWE-Bench & SWE-bench Multilingual: All scores are averaged across 8 runs using the [claude-code](https://github.com/anthropics/claude-code) CLI (300 max steps) as the evaluation scaffold. +>- Terminal-Bench: All scores are evaluated with the official claude-code from the original [Terminal-Bench](https://www.tbench.ai/) repository(commit `94bf692`), averaged over 8 runs to report the mean pass rate. +>- ArtifactsBench: All Scores are computed by averaging three runs with the official implementation of [ArtifactsBench](https://github.com/Tencent-Hunyuan/ArtifactsBenchmark), using the stable Gemini-2.5-Pro as the judge model. +>- BrowseComp & BrowseComp-zh & GAIA (text only) & xbench-DeepSearch: All scores reported use the same agent framework as [WebExplorer](https://arxiv.org/pdf/2509.06501) (Liu et al. 2025), with minor tools description adjustment. We use the 103-sample text-only GAIA validation subset following [WebExplorer](https://arxiv.org/pdf/2509.06501) (Liu et al. 2025). +>- HLE (w/ tools): All reported scores are obtained using search tools and a Python tool. The search tools employ the same agent framework as [WebExplorer](https://arxiv.org/pdf/2509.06501) (Liu et al. 2025), and the Python tool runs in a Jupyter environment. We use the text-only HLE subset. +>- τ²-Bench: All scores reported use "extended thinking with tool use", and employ GPT-4.1 as the user simulator. +>- FinSearchComp-global: Official results are reported for GPT-5-Thinking, Gemini 2.5 Pro, and Kimi-K2. Other models are evaluated using the open-source [FinSearchComp](https://arxiv.org/pdf/2509.13160) (Hu et al. 2025) framework using both search and Python tools, launched simultaneously for consistency. +>- AgentCompany: All scores reported use OpenHands 0.42 agent framework. + +--- + +## Intelligence Benchmarks + +We align with **Artificial Analysis**, which aggregates challenging benchmarks using a consistent methodology to reflect a model’s broader **intelligence profile** across math, science, instruction following, coding, and agentic tool use. + +| **Metric (AA)** | **MiniMax-M2** | **Claude Sonnet 4** | **Claude Sonnet 4.5** | **Gemini 2.5 Pro** | **GPT-5 (thinking)** | **GLM-4.6** | **Kimi K2 0905** | **DeepSeek-V3.2** | +|-----------------|----------------|---------------------|------------------------|---------------------|----------------------|-------------|------------------|-------------------| +| AIME25 | 78 | 74 | 88 | 88 | 94 | 86 | 57 | 88 | +| MMLU-Pro | 82 | 84 | 88 | 86 | 87 | 83 | 82 | 85 | +| GPQA-Diamond | 78 | 78 | 83 | 84 | 85 | 78 | 77 | 80 | +| HLE (w/o tools) | 12.5 | 9.6 | 17.3 | 21.1 | 26.5 | 13.3 | 6.3 | 13.8 | +| LiveCodeBench (LCB) | 83 | 66 | 71 | 80 | 85 | 70 | 61 | 79 | +| SciCode | 36 | 40 | 45 | 43 | 43 | 38 | 31 | 38 | +| IFBench | 72 | 55 | 57 | 49 | 73 | 43 | 42 | 54 | +| AA-LCR | 61 | 65 | 66 | 66 | 76 | 54 | 52 | 69 | +| τ²-Bench-Telecom | 87 | 65 | 78 | 54 | 85 | 71 | 73 | 34 | +| Terminal-Bench-Hard | 24 | 30 | 33 | 25 | 31 | 23 | 23 | 29 | +| **AA Intelligence** | 61 | 57 | 63 | 60 | 69 | 56 | 50 | 57 | + +>AA: All scores of MiniMax-M2 aligned with Artificial Analysis Intelligence Benchmarking Methodology (https://artificialanalysis.ai/methodology/intelligence-benchmarking). All scores of other models reported from https://artificialanalysis.ai/. + +--- + +## Why activation size matters + +By maintaining activations around **10B** , the plan → act → verify loop in the agentic workflow is streamlined, improving responsiveness and reducing compute overhead: + +- **Faster feedback cycles** in compile-run-test and browse-retrieve-cite chains. + +- **More concurrent runs** on the same budget for regression suites and multi-seed explorations. + +- **Simpler capacity planning** with smaller per-request memory and steadier tail latency. + +In short: **10B activations = responsive agent loops + better unit economics**. + +## At a glance + +If you need frontier-style coding and agents without frontier-scale costs, **MiniMax-M2** hits the sweet spot: fast inference speeds, robust tool-use capabilities, and a deployment-friendly footprint. + +We look forward to your feedback and to collaborating with developers and researchers to bring the future of intelligent collaboration one step closer. + +## How to Use + +- Our product **MiniMax Agent**, built on MiniMax-M2, is now **publicly available and free** for a limited time: https://agent.minimaxi.io/ + +- The MiniMax-M2 API is now live on the **MiniMax Open Platform** and is **free** for a limited time: https://platform.minimax.io/docs/guides/text-generation + +- The MiniMax-M2 model weights are now **open-source**, allowing for local deployment and use: https://huggingface.co/MiniMaxAI/MiniMax-M2. + +## Local Deployment Guide + +Download the model from HuggingFace repository: https://huggingface.co/MiniMaxAI/MiniMax-M2. We recommend using the following inference frameworks (listed alphabetically) to serve the model: + +### SGLang + +We recommend using [SGLang](https://docs.sglang.ai/) to serve MiniMax-M2. SGLang provides solid day-0 support for MiniMax-M2 model. Please refer to our [SGLang Deployment Guide](https://huggingface.co/MiniMaxAI/MiniMax-M2/blob/main/docs/sglang_deploy_guide.md) for more details, and thanks so much for our collaboration with the SGLang team. + +### vLLM + +We recommend using [vLLM](https://docs.vllm.ai/en/stable/) to serve MiniMax-M2. vLLM provides efficient day-0 support of MiniMax-M2 model, check https://docs.vllm.ai/projects/recipes/en/latest/MiniMax/MiniMax-M2.html for latest deployment guide. We also provide our [vLLM Deployment Guide](https://huggingface.co/MiniMaxAI/MiniMax-M2/blob/main/docs/vllm_deploy_guide.md). + +### Inference Parameters +We recommend using the following parameters for best performance: `temperature=1.0`, `top_p = 0.95`, `top_k = 40`. + +**IMPORTANT:** MiniMax-M2 is an interleaved thinking model. Therefore, when using it, it is important to retain the thinking content from the assistant's turns within the historical messages. In the model's output content, we use the `...` format to wrap the assistant's thinking content. When using the model, you must ensure that the historical content is passed back in its original format. Do not remove the `...` part, otherwise, the model's performance will be negatively affected. + +## Tool Calling Guide + +Please refer to our [Tool Calling Guide](https://huggingface.co/MiniMaxAI/MiniMax-M2/blob/main/docs/tool_calling_guide.md). + +# Contact Us + +Contact us at [model@minimax.io](mailto:model@minimax.io). diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..8ed2385 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,159 @@ +{# ----------‑‑‑ special token variables ‑‑‑---------- #} +{%- set toolcall_begin_token = '' -%} +{%- set toolcall_end_token = '' -%} +{#- Tool Rendering Functions ============================================== -#} +{%- macro render_tool_namespace(namespace_name, tool_list) -%} +{%- for tool in tool_list -%} +{{ tool.function | tojson(ensure_ascii=False) }} +{% endfor -%} +{%- endmacro -%} +{%- macro visible_text(content) -%} + {%- if content is string -%} + {{ content }} + {%- elif content is iterable and content is not mapping -%} + {%- for item in content -%} + {%- if item is mapping and item.type == 'text' -%} + {{- item.text }} + {%- elif item is string -%} + {{- item }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{- content }} + {%- endif -%} +{%- endmacro -%} +{#- System Message Construction ============================================ -#} +{%- macro build_system_message(system_message) -%} + {%- if system_message and system_message.content -%} + {{- visible_text(system_message.content) }} + {%- else -%} + {%- if model_identity is not defined -%} + {%- set model_identity = "You are a helpful assistant." -%} + {%- endif -%} + {{- model_identity }} + {%- endif -%} + + {#- Handle current_date -#} + {%- if system_message and system_message.current_date -%} + {{- '\n' ~ 'Current date: ' + system_message.current_date }} + {%- endif -%} + {#- Handle current_location -#} + {%- if system_message and system_message.current_location -%} + {{- '\n' ~ 'Current location: ' + system_message.current_location }} + {%- endif -%} +{%- endmacro -%} +{#- Main Template Logic ================================================= -#} +{#- Extract system message (only first message if it's system) -#} +{%- set system_message = none -%} +{%- set conversation_messages = messages -%} +{%- if messages and messages[0].role == "system" -%} + {%- set system_message = messages[0] -%} + {%- set conversation_messages = messages[1:] -%} +{%- endif -%} +{#- Get the last user message turn, for interleved thinking -#} +{%- set ns = namespace(last_user_index=-1) %} +{% for m in conversation_messages %} + {%- if m.role == 'user' %} + {% set ns.last_user_index = loop.index0 -%} + {%- endif %} +{%- endfor %} +{#- Render system message -#} +{{- ']~!b[' ~ ']~b]system' ~ '\n' }} +{{- build_system_message(system_message) }} +{#- Render tools if available -#} +{%- if tools -%} + {{- '\n\n' ~ '# Tools' ~ '\n' ~ 'You may call one or more tools to assist with the user query.\nHere are the tools available in JSONSchema format:' ~ '\n' }} + {{- '\n' ~ '' ~ '\n' }} + {{- render_tool_namespace("functions", tools) }} + {{- '' ~ '\n\n' }} +{{- 'When making tool calls, use XML format to invoke tools and pass parameters:' ~ '\n' }} +{{- '\n' ~ toolcall_begin_token }} + +param-value-1 +param-value-2 +... + +{{- '\n' ~ toolcall_end_token }} +{%- endif -%} +{{- '[e~[\n' }} + +{#- Render messages -#} +{%- set last_tool_call = namespace(name=none) -%} +{%- for message in conversation_messages -%} + {%- if message.role == 'assistant' -%} + {#- Only render reasoning_content if no user message follows -#} + {{- ']~b]ai' ~ '\n' }} + + {%- set reasoning_content = '' %} + {%- set content = visible_text(message.content) %} + {%- if message.reasoning_content is string %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].strip('\n').split('')[-1].strip('\n') %} + {%- set content = content.split('')[-1].strip('\n') %} + {%- endif %} + {%- endif %} + {%- if reasoning_content and loop.index0 > ns.last_user_index -%} + {{- '' ~ '\n' ~ reasoning_content ~ '\n' ~ '' ~ '\n\n' }} + {%- endif -%} + {%- if content -%} + {{- content }} + {%- endif -%} + {%- if message.tool_calls -%} + {{- '\n' ~ toolcall_begin_token ~ '\n' }} + + {%- for tool_call in message.tool_calls -%} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '' }} + {% set _args = tool_call.arguments %} + {%- for k, v in _args.items() %} + {{- '' }} + {{- v | tojson(ensure_ascii=False) if v is not string else v }} + {{- '' }} + {% endfor %} + {{- '' ~ '\n' }} + {%- endfor -%} + + {{- toolcall_end_token}} + {%- set last_tool_call.name = message.tool_calls[-1].name -%} + {%- else -%} + {%- set last_tool_call.name = none -%} + {%- endif -%} + {{- '[e~[' ~ '\n' }} + + {%- elif message.role == 'tool' -%} + {%- if last_tool_call.name is none -%} + {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }} + {%- endif -%} + {%- if loop.first or (conversation_messages[loop.index0 - 1].role != 'tool') -%} + {{- ']~b]tool' }} + {%- endif -%} + {%- if message.content is string -%} + {{- '\n' }} + {{- message.content }} + {{- '' }} + {%- else -%} + {%- for tr in message.content -%} + {{- '\n' }} + {{- tr.output if tr.output is defined else (tr.text if tr.type == 'text' and tr.text is defined else tr) }} + {{- '\n' }} + {%- endfor -%} + {%- endif -%} + {%- if loop.last or (conversation_messages[loop.index0 + 1].role != 'tool') -%} + {{- '[e~[\n' -}} + {%- endif -%} + + {%- elif message.role == 'user' -%} + {{- ']~b]user' ~ '\n' }} + {{- visible_text(message.content) }} + {{- '[e~[' ~ '\n' }} + {%- endif -%} +{%- endfor -%} + +{#- Generation prompt -#} +{%- if add_generation_prompt -%} +{{- ']~b]ai' ~ '\n' ~ '' ~ '\n' }} +{%- endif -%} diff --git a/config.json b/config.json new file mode 100644 index 0000000..70534b2 --- /dev/null +++ b/config.json @@ -0,0 +1,117 @@ +{ + "name_or_path": "tclf90/MiniMax-M2-AWQ", + "architectures": [ + "MiniMaxM2ForCausalLM" + ], + "attention_dropout": 0.0, + "attn_type_list": [ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1 + ], + "bos_token_id": null, + "eos_token_id": null, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 1536, + "layernorm_full_attention_beta": 1.0, + "layernorm_linear_attention_beta": 1.0, + "layernorm_mlp_beta": 1.0, + "max_position_embeddings": 196608, + "mlp_intermediate_size": 8192, + "model_type": "mixtral", + "mtp_transformer_layers": 1, + "num_attention_heads": 48, + "num_experts_per_tok": 8, + "num_hidden_layers": 62, + "num_key_value_heads": 8, + "num_local_experts": 256, + "num_mtp_modules": 3, + "output_router_logits": false, + "qk_norm_type": "per_layer", + "quantization_config": { + "quant_method": "awq", + "bits": 4, + "group_size": 128, + "version": "gemm", + "zero_point": true + }, + "rms_norm_eps": 1e-06, + "rope_theta": 5000000, + "rotary_dim": 64, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "scoring_func": "sigmoid", + "shared_intermediate_size": 0, + "shared_moe_mode": "sigmoid", + "sliding_window": null, + "tie_word_embeddings": false, + "transformers_version": "4.46.1", + "use_cache": true, + "use_mtp": true, + "use_qk_norm": true, + "use_routing_bias": true, + "vocab_size": 200064, + "torch_dtype": "float16" +} \ No newline at end of file diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/docs/function_call_guide.md b/docs/function_call_guide.md new file mode 100644 index 0000000..ffed775 --- /dev/null +++ b/docs/function_call_guide.md @@ -0,0 +1,482 @@ +# MiniMax-M2 Function Call Guide + +## Introduction + +The MiniMax-M2 model supports function calling capabilities, enabling the model to identify when external functions need to be called and output function call parameters in a structured format. This document provides detailed instructions on how to use the function calling features of MiniMax-M2. + +## Basic Example + +The following Python script implements a weather query function call example based on the OpenAI SDK: + +```python +from openai import OpenAI +import json + +client = OpenAI(base_url="http://localhost:8000/v1", api_key="dummy") + +def get_weather(location: str, unit: str): + return f"Getting the weather for {location} in {unit}..." + +tool_functions = {"get_weather": get_weather} + +tools = [{ + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string", "description": "City and state, e.g., 'San Francisco, CA'"}, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]} + }, + "required": ["location", "unit"] + } + } +}] + +response = client.chat.completions.create( + model=client.models.list().data[0].id, + messages=[{"role": "user", "content": "What's the weather like in San Francisco? use celsius."}], + tools=tools, + tool_choice="auto" +) + +print(response) + +tool_call = response.choices[0].message.tool_calls[0].function +print(f"Function called: {tool_call.name}") +print(f"Arguments: {tool_call.arguments}") +print(f"Result: {get_weather(**json.loads(tool_call.arguments))}") +``` + +**Output Example:** +``` +Function called: get_weather +Arguments: {"location": "San Francisco, CA", "unit": "celsius"} +Result: Getting the weather for San Francisco, CA in celsius... +``` + +## Manually Parsing Model Output + +If you cannot use the built-in parser of inference engines that support MiniMax-M2, or need to use other inference frameworks (such as transformers, TGI, etc.), you can manually parse the model's raw output using the following method. This approach requires you to parse the XML tag format of the model output yourself. + +### Example Using Transformers + +Here is a complete example using the transformers library: + +```python +from transformers import AutoTokenizer + +def get_default_tools(): + return [ + { + "name": "get_current_weather", + "description": "Get the latest weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "A certain city, such as Beijing, Shanghai" + } + }, + } + "required": ["location"], + "type": "object" + } + ] + +# Load model and tokenizer +tokenizer = AutoTokenizer.from_pretrained(model_id) +prompt = "What's the weather like in Shanghai today?" +messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": prompt}, +] + +# Enable function calling tools +tools = get_default_tools() + +# Apply chat template and include tool definitions +text = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + tools=tools +) + +# Send request (using any inference service) +import requests +payload = { + "model": "MiniMaxAI/MiniMax-M2", + "prompt": text, + "max_tokens": 4096 +} +response = requests.post( + "http://localhost:8000/v1/completions", + headers={"Content-Type": "application/json"}, + json=payload, + stream=False, +) + +# Model output needs manual parsing +raw_output = response.json()["choices"][0]["text"] +print("Raw output:", raw_output) + +# Use the parsing function below to process the output +function_calls = parse_tool_calls(raw_output, tools) +``` + +## 🛠️ Function Call Definition + +### Function Structure + +Function calls need to define the `tools` field in the request body. Each function consists of the following parts: + +```json +{ + "tools": [ + { + "name": "search_web", + "description": "Search function.", + "parameters": { + "properties": { + "query_list": { + "description": "Keywords for search, list should contain 1 element.", + "items": { "type": "string" }, + "type": "array" + }, + "query_tag": { + "description": "Category of query", + "items": { "type": "string" }, + "type": "array" + } + }, + "required": [ "query_list", "query_tag" ], + "type": "object" + } + } + ] +} +``` + +**Field Descriptions:** +- `name`: Function name +- `description`: Function description +- `parameters`: Function parameter definition + - `properties`: Parameter property definition, where key is the parameter name and value contains detailed parameter description + - `required`: List of required parameters + - `type`: Parameter type (usually "object") + +### Internal Processing Format + +When processing within the MiniMax-M2 model, function definitions are converted to a special format and concatenated to the input text. Here is a complete example: + +``` +]~!b[]~b]system +You are a helpful assistant. + +# Tools +You may call one or more tools to assist with the user query. +Here are the tools available in JSONSchema format: + + +{"name": "search_web", "description": "Search function.", "parameters": {"type": "object", "properties": {"query_list": {"type": "array", "items": {"type": "string"}, "description": "Keywords for search, list should contain 1 element."}, "query_tag": {"type": "array", "items": {"type": "string"}, "description": "Category of query"}}, "required": ["query_list", "query_tag"]}} + + +When making tool calls, use XML format to invoke tools and pass parameters: + + + +param-value-1 +param-value-2 +... + +[e~[ +]~b]user +When were the latest announcements from OpenAI and Gemini?[e~[ +]~b]ai + +``` + +**Format Description:** + +- `]~!b[]~b]system`: System message start marker +- `[e~[`: Message end marker +- `]~b]user`: User message start marker +- `]~b]ai`: Assistant message start marker +- `]~b]tool`: Tool result message start marker +- `...`: Tool definition area, each tool is wrapped with `` tag, content is JSON Schema +- `...`: Tool call area +- ``: Thinking process marker during generation (optional) + +### Model Output Format + +MiniMax-M2 uses structured XML tag format: + +```xml + + +["technology", "events"] +["\"OpenAI\" \"latest\" \"release\""] + + +["technology", "events"] +["\"Gemini\" \"latest\" \"release\""] + + +``` + +Each function call uses the `` tag, and parameters use the `` tag wrapper. + +## Manually Parsing Function Call Results + +### Parsing Function Calls + +MiniMax-M2 uses structured XML tags, which require a different parsing approach. The core function is as follows: + +```python +import re +import json +from typing import Any, Optional, List, Dict + + +def extract_name(name_str: str) -> str: + """Extract name from quoted string""" + name_str = name_str.strip() + if name_str.startswith('"') and name_str.endswith('"'): + return name_str[1:-1] + elif name_str.startswith("'") and name_str.endswith("'"): + return name_str[1:-1] + return name_str + + +def convert_param_value(value: str, param_type: str) -> Any: + """Convert parameter value based on parameter type""" + if value.lower() == "null": + return None + + param_type = param_type.lower() + + if param_type in ["string", "str", "text"]: + return value + elif param_type in ["integer", "int"]: + try: + return int(value) + except (ValueError, TypeError): + return value + elif param_type in ["number", "float"]: + try: + val = float(value) + return val if val != int(val) else int(val) + except (ValueError, TypeError): + return value + elif param_type in ["boolean", "bool"]: + return value.lower() in ["true", "1"] + elif param_type in ["object", "array"]: + try: + return json.loads(value) + except json.JSONDecodeError: + return value + else: + # Try JSON parsing, return string if failed + try: + return json.loads(value) + except json.JSONDecodeError: + return value + + +def parse_tool_calls(model_output: str, tools: Optional[List[Dict]] = None) -> List[Dict]: + """ + Extract all tool calls from model output + + Args: + model_output: Complete output text from the model + tools: Tool definition list for getting parameter type information, format can be: + - [{"name": "...", "parameters": {...}}] + - [{"type": "function", "function": {"name": "...", "parameters": {...}}}] + + Returns: + Parsed tool call list, each element contains name and arguments fields + + Example: + >>> tools = [{ + ... "name": "get_weather", + ... "parameters": { + ... "type": "object", + ... "properties": { + ... "location": {"type": "string"}, + ... "unit": {"type": "string"} + ... } + ... } + ... }] + >>> output = ''' + ... + ... San Francisco + ... celsius + ... + ... ''' + >>> result = parse_tool_calls(output, tools) + >>> print(result) + [{'name': 'get_weather', 'arguments': {'location': 'San Francisco', 'unit': 'celsius'}}] + """ + # Quick check if tool call marker is present + if "" not in model_output: + return [] + + tool_calls = [] + + try: + # Match all blocks + tool_call_regex = re.compile(r"(.*?)", re.DOTALL) + invoke_regex = re.compile(r"", re.DOTALL) + parameter_regex = re.compile(r"", re.DOTALL) + + # Iterate through all tool_call blocks + for tool_call_match in tool_call_regex.findall(model_output): + # Iterate through all invokes in this block + for invoke_match in invoke_regex.findall(tool_call_match): + # Extract function name + name_match = re.search(r'^([^>]+)', invoke_match) + if not name_match: + continue + + function_name = extract_name(name_match.group(1)) + + # Get parameter configuration + param_config = {} + if tools: + for tool in tools: + tool_name = tool.get("name") or tool.get("function", {}).get("name") + if tool_name == function_name: + params = tool.get("parameters") or tool.get("function", {}).get("parameters") + if isinstance(params, dict) and "properties" in params: + param_config = params["properties"] + break + + # Extract parameters + param_dict = {} + for match in parameter_regex.findall(invoke_match): + param_match = re.search(r'^([^>]+)>(.*)', match, re.DOTALL) + if param_match: + param_name = extract_name(param_match.group(1)) + param_value = param_match.group(2).strip() + + # Remove leading and trailing newlines + if param_value.startswith('\n'): + param_value = param_value[1:] + if param_value.endswith('\n'): + param_value = param_value[:-1] + + # Get parameter type and convert + param_type = "string" + if param_name in param_config: + if isinstance(param_config[param_name], dict) and "type" in param_config[param_name]: + param_type = param_config[param_name]["type"] + + param_dict[param_name] = convert_param_value(param_value, param_type) + + tool_calls.append({ + "name": function_name, + "arguments": param_dict + }) + + except Exception as e: + print(f"Failed to parse tool calls: {e}") + return [] + + return tool_calls +``` + +**Usage Example:** + +```python +# Define tools +tools = [ + { + "name": "get_weather", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string"}, + "unit": {"type": "string"} + }, + "required": ["location", "unit"] + } + } +] + +# Model output +model_output = """Let me help you query the weather. + + +San Francisco +celsius + +""" + +# Parse tool calls +tool_calls = parse_tool_calls(model_output, tools) + +# Output results +for call in tool_calls: + print(f"Function called: {call['name']}") + print(f"Arguments: {call['arguments']}") + # Output: Function called: get_weather + # Arguments: {'location': 'San Francisco', 'unit': 'celsius'} +``` + +### Executing Function Calls + +After parsing is complete, you can execute the corresponding function and construct the return result: + +```python +def execute_function_call(function_name: str, arguments: dict): + """Execute function call and return result""" + if function_name == "get_weather": + location = arguments.get("location", "Unknown location") + unit = arguments.get("unit", "celsius") + # Build function execution result + return { + "role": "tool", + "content": [ + { + "name": function_name, + "type": "text", + "text": json.dumps({ + "location": location, + "temperature": "25", + "unit": unit, + "weather": "Sunny" + }, ensure_ascii=False) + } + ] + } + elif function_name == "search_web": + query_list = arguments.get("query_list", []) + query_tag = arguments.get("query_tag", []) + # Simulate search results + return { + "role": "tool", + "content": [ + { + "name": function_name, + "type": "text", + "text": f"Search keywords: {query_list}, Category: {query_tag}\nSearch results: Relevant information found" + } + ] + } + + return None +``` + +### Returning Function Execution Results to the Model + +After successfully parsing function calls, you should add the function execution results to the conversation history so that the model can access and utilize this information in subsequent interactions. Refer to chat_template.jinja for concatenation format. + +## References + +- [MiniMax-M2 Model Repository](https://github.com/MiniMax-AI/MiniMax-M2) +- [vLLM Project Homepage](https://github.com/vllm-project/vllm) +- [OpenAI Python SDK](https://github.com/openai/openai-python) diff --git a/docs/function_call_guide_cn.md b/docs/function_call_guide_cn.md new file mode 100644 index 0000000..db8d799 --- /dev/null +++ b/docs/function_call_guide_cn.md @@ -0,0 +1,482 @@ +# MiniMax-M2 函数调用(Function Call)功能指南 + +## 简介 + +MiniMax-M2 模型支持函数调用功能,使模型能够识别何时需要调用外部函数,并以结构化格式输出函数调用参数。本文档详细介绍了如何使用 MiniMax-M2 的函数调用功能。 + +## 基础示例 + +以下 Python 脚本基于 OpenAI SDK 实现了一个天气查询函数的调用示例: + +```python +from openai import OpenAI +import json + +client = OpenAI(base_url="http://localhost:8000/v1", api_key="dummy") + +def get_weather(location: str, unit: str): + return f"Getting the weather for {location} in {unit}..." + +tool_functions = {"get_weather": get_weather} + +tools = [{ + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string", "description": "City and state, e.g., 'San Francisco, CA'"}, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]} + }, + "required": ["location", "unit"] + } + } +}] + +response = client.chat.completions.create( + model=client.models.list().data[0].id, + messages=[{"role": "user", "content": "What's the weather like in San Francisco? use celsius."}], + tools=tools, + tool_choice="auto" +) + +print(response) + +tool_call = response.choices[0].message.tool_calls[0].function +print(f"Function called: {tool_call.name}") +print(f"Arguments: {tool_call.arguments}") +print(f"Result: {get_weather(**json.loads(tool_call.arguments))}") +``` + +**输出示例:** +``` +Function called: get_weather +Arguments: {"location": "San Francisco, CA", "unit": "celsius"} +Result: Getting the weather for San Francisco, CA in celsius... +``` + +## 手动解析模型输出 + +如果您无法使用已支持 MiniMax-M2 的推理引擎的内置解析器,或者需要使用其他推理框架(如 transformers、TGI 等),可以使用以下方法手动解析模型的原始输出。这种方法需要您自己解析模型输出的 XML 标签格式。 + +### 使用 Transformers 的示例 + +以下是使用 transformers 库的完整示例: + +```python +from transformers import AutoTokenizer + +def get_default_tools(): + return [ + { + "name": "get_current_weather", + "description": "Get the latest weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "A certain city, such as Beijing, Shanghai" + } + }, + } + "required": ["location"], + "type": "object" + } + ] + +# 加载模型和分词器 +tokenizer = AutoTokenizer.from_pretrained(model_id) +prompt = "What's the weather like in Shanghai today?" +messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": prompt}, +] + +# 启用函数调用工具 +tools = get_default_tools() + +# 应用聊天模板,并加入工具定义 +text = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + tools=tools +) + +# 发送请求(这里使用任何推理服务) +import requests +payload = { + "model": "MiniMaxAI/MiniMax-M2", + "prompt": text, + "max_tokens": 4096 +} +response = requests.post( + "http://localhost:8000/v1/completions", + headers={"Content-Type": "application/json"}, + json=payload, + stream=False, +) + +# 模型输出需要手动解析 +raw_output = response.json()["choices"][0]["text"] +print("原始输出:", raw_output) + +# 使用下面的解析函数处理输出 +function_calls = parse_tool_calls(raw_output, tools) +``` + +## 🛠️ 函数调用的定义 + +### 函数结构体 + +函数调用需要在请求体中定义 `tools` 字段,每个函数由以下部分组成: + +```json +{ + "tools": [ + { + "name": "search_web", + "description": "搜索函数。", + "parameters": { + "properties": { + "query_list": { + "description": "进行搜索的关键词,列表元素个数为1。", + "items": { "type": "string" }, + "type": "array" + }, + "query_tag": { + "description": "query的分类", + "items": { "type": "string" }, + "type": "array" + } + }, + "required": [ "query_list", "query_tag" ], + "type": "object" + } + } + ] +} +``` + +**字段说明:** +- `name`: 函数名称 +- `description`: 函数功能描述 +- `parameters`: 函数参数定义 + - `properties`: 参数属性定义,key 是参数名,value 包含参数的详细描述 + - `required`: 必填参数列表 + - `type`: 参数类型(通常为 "object") + +### 模型内部处理格式 + +在 MiniMax-M2 模型内部处理时,函数定义会被转换为特殊格式并拼接到输入文本中。以下是一个完整的示例: + +``` +]~!b[]~b]system +You are a helpful assistant. + +# Tools +You may call one or more tools to assist with the user query. +Here are the tools available in JSONSchema format: + + +{"name": "search_web", "description": "搜索函数。", "parameters": {"type": "object", "properties": {"query_list": {"type": "array", "items": {"type": "string"}, "description": "进行搜索的关键词,列表元素个数为1。"}, "query_tag": {"type": "array", "items": {"type": "string"}, "description": "query的分类"}}, "required": ["query_list", "query_tag"]}} + + +When making tool calls, use XML format to invoke tools and pass parameters: + + + +param-value-1 +param-value-2 +... + +[e~[ +]~b]user +OpenAI 和 Gemini 的最近一次发布会都是什么时候?[e~[ +]~b]ai + +``` + +**格式说明:** + +- `]~!b[]~b]system`: System 消息开始标记 +- `[e~[`: 消息结束标记 +- `]~b]user`: User 消息开始标记 +- `]~b]ai`: Assistant 消息开始标记 +- `]~b]tool`: Tool 结果消息开始标记 +- `...`: 工具定义区域,每个工具用 `` 标签包裹,内容为 JSON Schema +- `...`: 工具调用区域 +- ``: 生成时的思考过程标记(可选) + +### 模型输出格式 + +MiniMax-M2使用结构化的 XML 标签格式: + +```xml + + +["technology", "events"] +["\"OpenAI\" \"latest\" \"release\""] + + +["technology", "events"] +["\"Gemini\" \"latest\" \"release\""] + + +``` + +每个函数调用使用 `` 标签,参数使用 `` 标签包裹。 + +## 手动解析函数调用结果 + +### 解析函数调用 + +MiniMax-M2使用结构化的 XML 标签,需要不同的解析方式。核心函数如下: + +```python +import re +import json +from typing import Any, Optional, List, Dict + + +def extract_name(name_str: str) -> str: + """从引号包裹的字符串中提取名称""" + name_str = name_str.strip() + if name_str.startswith('"') and name_str.endswith('"'): + return name_str[1:-1] + elif name_str.startswith("'") and name_str.endswith("'"): + return name_str[1:-1] + return name_str + + +def convert_param_value(value: str, param_type: str) -> Any: + """根据参数类型转换参数值""" + if value.lower() == "null": + return None + + param_type = param_type.lower() + + if param_type in ["string", "str", "text"]: + return value + elif param_type in ["integer", "int"]: + try: + return int(value) + except (ValueError, TypeError): + return value + elif param_type in ["number", "float"]: + try: + val = float(value) + return val if val != int(val) else int(val) + except (ValueError, TypeError): + return value + elif param_type in ["boolean", "bool"]: + return value.lower() in ["true", "1"] + elif param_type in ["object", "array"]: + try: + return json.loads(value) + except json.JSONDecodeError: + return value + else: + # 尝试 JSON 解析,失败则返回字符串 + try: + return json.loads(value) + except json.JSONDecodeError: + return value + + +def parse_tool_calls(model_output: str, tools: Optional[List[Dict]] = None) -> List[Dict]: + """ + 从模型输出中提取所有工具调用 + + Args: + model_output: 模型的完整输出文本 + tools: 工具定义列表,用于获取参数类型信息,格式可以是: + - [{"name": "...", "parameters": {...}}] + - [{"type": "function", "function": {"name": "...", "parameters": {...}}}] + + Returns: + 解析后的工具调用列表,每个元素包含 name 和 arguments 字段 + + Example: + >>> tools = [{ + ... "name": "get_weather", + ... "parameters": { + ... "type": "object", + ... "properties": { + ... "location": {"type": "string"}, + ... "unit": {"type": "string"} + ... } + ... } + ... }] + >>> output = ''' + ... + ... San Francisco + ... celsius + ... + ... ''' + >>> result = parse_tool_calls(output, tools) + >>> print(result) + [{'name': 'get_weather', 'arguments': {'location': 'San Francisco', 'unit': 'celsius'}}] + """ + # 快速检查是否包含工具调用标记 + if "" not in model_output: + return [] + + tool_calls = [] + + try: + # 匹配所有 块 + tool_call_regex = re.compile(r"(.*?)", re.DOTALL) + invoke_regex = re.compile(r"", re.DOTALL) + parameter_regex = re.compile(r"", re.DOTALL) + + # 遍历所有 tool_call 块 + for tool_call_match in tool_call_regex.findall(model_output): + # 遍历该块中的所有 invoke + for invoke_match in invoke_regex.findall(tool_call_match): + # 提取函数名 + name_match = re.search(r'^([^>]+)', invoke_match) + if not name_match: + continue + + function_name = extract_name(name_match.group(1)) + + # 获取参数配置 + param_config = {} + if tools: + for tool in tools: + tool_name = tool.get("name") or tool.get("function", {}).get("name") + if tool_name == function_name: + params = tool.get("parameters") or tool.get("function", {}).get("parameters") + if isinstance(params, dict) and "properties" in params: + param_config = params["properties"] + break + + # 提取参数 + param_dict = {} + for match in parameter_regex.findall(invoke_match): + param_match = re.search(r'^([^>]+)>(.*)', match, re.DOTALL) + if param_match: + param_name = extract_name(param_match.group(1)) + param_value = param_match.group(2).strip() + + # 去除首尾的换行符 + if param_value.startswith('\n'): + param_value = param_value[1:] + if param_value.endswith('\n'): + param_value = param_value[:-1] + + # 获取参数类型并转换 + param_type = "string" + if param_name in param_config: + if isinstance(param_config[param_name], dict) and "type" in param_config[param_name]: + param_type = param_config[param_name]["type"] + + param_dict[param_name] = convert_param_value(param_value, param_type) + + tool_calls.append({ + "name": function_name, + "arguments": param_dict + }) + + except Exception as e: + print(f"解析工具调用失败: {e}") + return [] + + return tool_calls +``` + +**使用示例:** + +```python +# 定义工具 +tools = [ + { + "name": "get_weather", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string"}, + "unit": {"type": "string"} + }, + "required": ["location", "unit"] + } + } +] + +# 模型输出 +model_output = """我来帮你查询天气。 + + +San Francisco +celsius + +""" + +# 解析工具调用 +tool_calls = parse_tool_calls(model_output, tools) + +# 输出结果 +for call in tool_calls: + print(f"调用函数: {call['name']}") + print(f"参数: {call['arguments']}") + # 输出: 调用函数: get_weather + # 参数: {'location': 'San Francisco', 'unit': 'celsius'} +``` + +### 执行函数调用 + +解析完成后,您可以执行对应的函数并构建返回结果: + +```python +def execute_function_call(function_name: str, arguments: dict): + """执行函数调用并返回结果""" + if function_name == "get_weather": + location = arguments.get("location", "未知位置") + unit = arguments.get("unit", "celsius") + # 构建函数执行结果 + return { + "role": "tool", + "content": [ + { + "name": function_name, + "type": "text", + "text": json.dumps({ + "location": location, + "temperature": "25", + "unit": unit, + "weather": "晴朗" + }, ensure_ascii=False) + } + ] + } + elif function_name == "search_web": + query_list = arguments.get("query_list", []) + query_tag = arguments.get("query_tag", []) + # 模拟搜索结果 + return { + "role": "tool", + "content": [ + { + "name": function_name, + "type": "text", + "text": f"搜索关键词: {query_list}, 分类: {query_tag}\n搜索结果: 相关信息已找到" + } + ] + } + + return None +``` + +### 将函数执行结果返回给模型 + +成功解析函数调用后,您应将函数执行结果添加到对话历史中,以便模型在后续交互中能够访问和利用这些信息,拼接格式参考chat_template.jinja + +## 参考资料 + +- [MiniMax-M2 模型仓库](https://github.com/MiniMax-AI/MiniMax-M2) +- [vLLM 项目主页](https://github.com/vllm-project/vllm) +- [OpenAI Python SDK](https://github.com/openai/openai-python) \ No newline at end of file diff --git a/docs/vllm_deploy_guide.md b/docs/vllm_deploy_guide.md new file mode 100644 index 0000000..a6d3cf6 --- /dev/null +++ b/docs/vllm_deploy_guide.md @@ -0,0 +1,88 @@ +# MiniMax M2 Model vLLM Deployment Guide + +We recommend using [vLLM](https://docs.vllm.ai/en/stable/) to deploy the [MiniMax-M2](https://huggingface.co/MiniMaxAI/MiniMax-M2) model. vLLM is a high-performance inference engine with excellent serving throughput, efficient and intelligent memory management, powerful batch request processing capabilities, and deeply optimized underlying performance. We recommend reviewing vLLM's official documentation to check hardware compatibility before deployment. + +## System Requirements + +- OS: Linux + +- Python: 3.9 - 3.12 + +- GPU: + + - compute capability 7.0 or higher + + - Memory requirements: 220 GB for weights, 60 GB per 1M context tokens + +The following are recommended configurations; actual requirements should be adjusted based on your use case: + +- 4x 96GB GPUs: Supports context input of up to 400K tokens. + +- 8x 144GB GPUs: Supports context input of up to 3M tokens. + +## Deployment with Python + +It is recommended to use a virtual environment (such as venv, conda, or uv) to avoid dependency conflicts. We recommend installing vLLM in a fresh Python environment: + +```bash +# Not yet released, please install nightly build +uv pip install -U vllm \ + --torch-backend=auto \ + --extra-index-url https://wheels.vllm.ai/nightly +# If released, install using uv +uv pip install "vllm" --torch-backend=auto +``` + +Run the following command to start the vLLM server. vLLM will automatically download and cache the MiniMax-M2 model from Hugging Face. + +4-GPU deployment command: + +```bash +SAFETENSORS_FAST_GPU=1 VLLM_USE_V1=0 vllm serve \ + --model MiniMaxAI/MiniMax-M2 \ + --trust-remote-code \ + --enable-expert-parallel --tensor-parallel-size 4 \ + --enable-auto-tool-choice --tool-call-parser minimax_m2 \ + --reasoning-parser minimax_m2 +``` + +## Testing Deployment + +After startup, you can test the vLLM OpenAI-compatible API with the following command: + +```bash +curl http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "MiniMaxAI/MiniMax-M2", + "messages": [ + {"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]}, + {"role": "user", "content": [{"type": "text", "text": "Who won the world series in 2020?"}]} + ] + }' +``` + +## Common Issues + +### Hugging Face Network Issues + +If you encounter network issues, you can set up a proxy before pulling the model. + +```bash +export HF_ENDPOINT=https://hf-mirror.com +``` + +### MiniMax-M2 model is not currently supported + +This vLLM version is outdated. Please upgrade to the latest version. + +## Getting Support + +If you encounter any issues while deploying the MiniMax model: + +- Contact our technical support team through official channels such as email at api@minimaxi.com + +- Submit an issue on our [GitHub](https://github.com/MiniMax-AI) repository + +We continuously optimize the deployment experience for our models. Feedback is welcome! + diff --git a/docs/vllm_deploy_guide_cn.md b/docs/vllm_deploy_guide_cn.md new file mode 100644 index 0000000..f41a06d --- /dev/null +++ b/docs/vllm_deploy_guide_cn.md @@ -0,0 +1,85 @@ +# MiniMax M2 模型 vLLM 部署指南 + +我们推荐使用 [vLLM](https://docs.vllm.ai/en/stable/) 来部署 [MiniMax-M2](https://huggingface.co/MiniMaxAI/MiniMax-M2) 模型。vLLM 是一个高性能的推理引擎,其具有卓越的服务吞吐、高效智能的内存管理机制、强大的批量请求处理能力、深度优化的底层性能等特性。我们建议在部署之前查看 vLLM 的官方文档以检查硬件兼容性。 + +## 环境要求 + +- OS:Linux + +- Python:3.9 - 3.12 + +- GPU: + + - compute capability 7.0 or higher + + - 显存需求:权重需要 220 GB,每 1M 上下文 token 需要 60 GB + +以下为推荐配置,实际需求请根据业务场景调整: + +- 96G x4 GPU:支持 40 万 token 的上下文输入。 + +- 144G x8 GPU:支持长达 300 万 token 的上下文输入。 + +## 使用 Python 部署 + +建议使用虚拟环境(如 venv、conda、uv)以避免依赖冲突。建议在全新的 Python 环境中安装 vLLM: +```bash +# 尚未 release,请安装 nightly 构建 +uv pip install -U vllm \ + --torch-backend=auto \ + --extra-index-url https://wheels.vllm.ai/nightly +# 如果 release,使用 uv 安装 +uv pip install "vllm" --torch-backend=auto +``` + +运行如下命令启动 vLLM 服务器,vLLM 会自动从 Huggingface 下载并缓存 MiniMax-M2 模型。 + +4 卡部署命令: + +```bash +SAFETENSORS_FAST_GPU=1 VLLM_USE_V1=0 vllm serve \ + --model MiniMaxAI/MiniMax-M2 \ + --trust-remote-code \ + --enable-expert-parallel --tensor-parallel-size 4 \ + --enable-auto-tool-choice --tool-call-parser minimax_m2 \ + --reasoning-parser minimax_m2 +``` + +## 测试部署 + +启动后,可以通过如下命令测试 vLLM OpenAI 兼容接口: + +```bash +curl http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "MiniMaxAI/MiniMax-M2", + "messages": [ + {"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]}, + {"role": "user", "content": [{"type": "text", "text": "Who won the world series in 2020?"}]} + ] + }' +``` + +## 常见问题 + +### Huggingface 网络问题 + +如果遇到网络问题,可以设置代理后再进行拉取。 + +```bash +export HF_ENDPOINT=https://hf-mirror.com +``` + +### MiniMax-M2 model is not currently supported + +该 vLLM 版本过旧,请升级到最新版本。 + +## 获取支持 + +如果在部署 MiniMax 模型过程中遇到任何问题: + +- 通过邮箱 api@minimaxi.com 等官方渠道联系我们的技术支持团队 + +- 在我们的 [GitHub](https://github.com/MiniMax-AI) 仓库提交 Issue +我们会持续优化模型的部署体验,欢迎反馈! diff --git a/figures/Bench.png b/figures/Bench.png new file mode 100644 index 0000000..7e3bfc3 Binary files /dev/null and b/figures/Bench.png differ diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..8aaeb4c --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "do_sample": true, + "temperature": 1.0, + "top_p": 0.95, + "top_k": 40, + "transformers_version": "4.46.1" +} diff --git a/merges.txt b/merges.txt new file mode 100644 index 0000000..6a86280 --- /dev/null +++ b/merges.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca47ea60cf5bd48832586adc264f439d0ea4254176b24a95f85eaa9750e2b5f9 +size 2414077 diff --git a/model-00001-of-00041.safetensors b/model-00001-of-00041.safetensors new file mode 100644 index 0000000..f6f5473 --- /dev/null +++ b/model-00001-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1bb5393a7818d3160927d830676cbfb166032ec18fd92d13b3f27c4fead0987 +size 2998507472 diff --git a/model-00002-of-00041.safetensors b/model-00002-of-00041.safetensors new file mode 100644 index 0000000..786fd69 --- /dev/null +++ b/model-00002-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0548c74136e47f31d99f1ec36e2f7ba26af4607dba634f0d53aad4d2542d26e +size 3000140360 diff --git a/model-00003-of-00041.safetensors b/model-00003-of-00041.safetensors new file mode 100644 index 0000000..e429523 --- /dev/null +++ b/model-00003-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:501907424df6366c8347ab5563c87be221fc943425f9b63beda480e5cefcd6e2 +size 2999224328 diff --git a/model-00004-of-00041.safetensors b/model-00004-of-00041.safetensors new file mode 100644 index 0000000..4a6645e --- /dev/null +++ b/model-00004-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ad6ba2e48b9f3694285125fc8f775477aa5d9f8c6f2fb722622d0b77483efbd +size 3000139080 diff --git a/model-00005-of-00041.safetensors b/model-00005-of-00041.safetensors new file mode 100644 index 0000000..6c7fb83 --- /dev/null +++ b/model-00005-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbad54e6e5b5e458182b0ae33394a7bff87f02be2709913d7d0e3bd6f9464338 +size 2999224664 diff --git a/model-00006-of-00041.safetensors b/model-00006-of-00041.safetensors new file mode 100644 index 0000000..e94db01 --- /dev/null +++ b/model-00006-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:559f18c233a7a3b23270cb5078cdb2a0d459a4096006bc86bcdaca31a611476c +size 2999223312 diff --git a/model-00007-of-00041.safetensors b/model-00007-of-00041.safetensors new file mode 100644 index 0000000..cd4fd5d --- /dev/null +++ b/model-00007-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bff913bf65ab332dfb0ea6eaf76cc4c7b57389c6df7beb80e631cca298d3e1cd +size 3000141216 diff --git a/model-00008-of-00041.safetensors b/model-00008-of-00041.safetensors new file mode 100644 index 0000000..b7fc4c3 --- /dev/null +++ b/model-00008-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:353643fc6a9a90c7e2b098d3f272e7f312fc5a3d367a5255bd9fd2b0bea52a8c +size 2999226896 diff --git a/model-00009-of-00041.safetensors b/model-00009-of-00041.safetensors new file mode 100644 index 0000000..1ba934b --- /dev/null +++ b/model-00009-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b8c84b2ee3746464971bf11e3ef07ba26ae0b8845882602c5ca2cbad2e09ca1 +size 3000144016 diff --git a/model-00010-of-00041.safetensors b/model-00010-of-00041.safetensors new file mode 100644 index 0000000..12e3ef0 --- /dev/null +++ b/model-00010-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a2a546c16ae0aefe42d03d3812473b1baf3286d7b7a8b12082731a9c083ea0c +size 2999227968 diff --git a/model-00011-of-00041.safetensors b/model-00011-of-00041.safetensors new file mode 100644 index 0000000..be4c481 --- /dev/null +++ b/model-00011-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:850ee336ff659ef59ddb41f78c0f9336eaf000318caa274aaf37f65df5d96e22 +size 3000142744 diff --git a/model-00012-of-00041.safetensors b/model-00012-of-00041.safetensors new file mode 100644 index 0000000..7c86a5b --- /dev/null +++ b/model-00012-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cfa62d7f9831a94b54b03646c9175ab2c4fc2e57579a0ae8894d2fff24cc728 +size 2999228304 diff --git a/model-00013-of-00041.safetensors b/model-00013-of-00041.safetensors new file mode 100644 index 0000000..c9d05b2 --- /dev/null +++ b/model-00013-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8addc7bd6faf539b78af92ba2aeafa46579172b3ace929ea8ced778cc621a283 +size 2999226960 diff --git a/model-00014-of-00041.safetensors b/model-00014-of-00041.safetensors new file mode 100644 index 0000000..2a6601a --- /dev/null +++ b/model-00014-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a75f9d474eb11b4139d269b43bd606ac56af76a44038f340edd40aee80a6422 +size 3000144048 diff --git a/model-00015-of-00041.safetensors b/model-00015-of-00041.safetensors new file mode 100644 index 0000000..901ed5f --- /dev/null +++ b/model-00015-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24fba6f5010a7d2aff61906259532594faddf86cce972a90af6971f7fd96232b +size 2999226896 diff --git a/model-00016-of-00041.safetensors b/model-00016-of-00041.safetensors new file mode 100644 index 0000000..63ceee1 --- /dev/null +++ b/model-00016-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c529e29ff7f8fb7b71e1b48a607fb44162de3e52a6fb4978d97c88c1dcc66320 +size 3000144016 diff --git a/model-00017-of-00041.safetensors b/model-00017-of-00041.safetensors new file mode 100644 index 0000000..79a347d --- /dev/null +++ b/model-00017-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa08176464a10afe3188caf0fa40e95377be3c1391dc0dad148e8570f5d06775 +size 2999227968 diff --git a/model-00018-of-00041.safetensors b/model-00018-of-00041.safetensors new file mode 100644 index 0000000..47c9e7d --- /dev/null +++ b/model-00018-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c484c513b19332876b8a9c60d9c048d35bd7f901dee85626b0e4df3026b2b3b +size 3000142744 diff --git a/model-00019-of-00041.safetensors b/model-00019-of-00041.safetensors new file mode 100644 index 0000000..07df1f0 --- /dev/null +++ b/model-00019-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5547279b84b99df76481050b1fa52a620746c6eb37ef7a57fd19a8234ef31588 +size 2999228296 diff --git a/model-00020-of-00041.safetensors b/model-00020-of-00041.safetensors new file mode 100644 index 0000000..364d45a --- /dev/null +++ b/model-00020-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:454ea254158f87dd834aca66f10ba68f6831bbdea3ea0373318f4e205a0464c8 +size 2999226960 diff --git a/model-00021-of-00041.safetensors b/model-00021-of-00041.safetensors new file mode 100644 index 0000000..af862a0 --- /dev/null +++ b/model-00021-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f5049087df124a4a8ce46f4f353d9b8f13d1b4246c92839af2802094ccd3fc2 +size 3000144048 diff --git a/model-00022-of-00041.safetensors b/model-00022-of-00041.safetensors new file mode 100644 index 0000000..9e00d90 --- /dev/null +++ b/model-00022-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86b9c55f0d66ca2b3dd7511fce71e8ad78e300bfc0e3b6c765a2c13a5b7e85ae +size 2999226896 diff --git a/model-00023-of-00041.safetensors b/model-00023-of-00041.safetensors new file mode 100644 index 0000000..2ca38b2 --- /dev/null +++ b/model-00023-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52bca9a623326262d11f48db4f5bc8a831a22afe1a3f66951d1886693bc997cf +size 3000144016 diff --git a/model-00024-of-00041.safetensors b/model-00024-of-00041.safetensors new file mode 100644 index 0000000..4e7dcc7 --- /dev/null +++ b/model-00024-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee2fba75273a76e66a15bca99aa8a6caa1a7fe5997552af3aea3881a7b247371 +size 2999227960 diff --git a/model-00025-of-00041.safetensors b/model-00025-of-00041.safetensors new file mode 100644 index 0000000..2217838 --- /dev/null +++ b/model-00025-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d85eae43d95143d3ac1670620b2d9000969feaa3e2d3ae71e1e2cc4f56488f7 +size 3000142752 diff --git a/model-00026-of-00041.safetensors b/model-00026-of-00041.safetensors new file mode 100644 index 0000000..e5c0aa5 --- /dev/null +++ b/model-00026-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dc328f4523c6dabe4260465cb8b83f51a53df9bdf4092065c98c379fb4d0159 +size 2999228296 diff --git a/model-00027-of-00041.safetensors b/model-00027-of-00041.safetensors new file mode 100644 index 0000000..96ed091 --- /dev/null +++ b/model-00027-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71ffffc321d85127ac937a9d2691f454c2ed227d5dc98813f06e2dd4ef9ec9ba +size 2999226960 diff --git a/model-00028-of-00041.safetensors b/model-00028-of-00041.safetensors new file mode 100644 index 0000000..9dd72d0 --- /dev/null +++ b/model-00028-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b94552e470d43d60ea9e4a287b290e0953e706ed1efaa4080ca9cf29bb6f352b +size 3000144048 diff --git a/model-00029-of-00041.safetensors b/model-00029-of-00041.safetensors new file mode 100644 index 0000000..5cee92f --- /dev/null +++ b/model-00029-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b9f60673c6998ce3bdf735e50a9eecb216dd6e53c8435df9856ef181731dd57 +size 2999226904 diff --git a/model-00030-of-00041.safetensors b/model-00030-of-00041.safetensors new file mode 100644 index 0000000..8c2bfd9 --- /dev/null +++ b/model-00030-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd88be612cafe8676970c19dc1fb4108257f4f6275b4a9bdf62b66ce39e347de +size 3000144016 diff --git a/model-00031-of-00041.safetensors b/model-00031-of-00041.safetensors new file mode 100644 index 0000000..85f1322 --- /dev/null +++ b/model-00031-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6421bbf26c6e545775655d1d6ab8bc0e121e21208679aa5d8b872b4328bebc7 +size 2999227960 diff --git a/model-00032-of-00041.safetensors b/model-00032-of-00041.safetensors new file mode 100644 index 0000000..4f95c14 --- /dev/null +++ b/model-00032-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bf4bbb86bd0a5d0b7bac5c4b446f95fa76570a1f8de139d2788a6fe091eb404 +size 3000142752 diff --git a/model-00033-of-00041.safetensors b/model-00033-of-00041.safetensors new file mode 100644 index 0000000..e5ab950 --- /dev/null +++ b/model-00033-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db001ccad59b360a7647447dce5ba106589bf945f710fb1a6470b88ebb86292b +size 2999228296 diff --git a/model-00034-of-00041.safetensors b/model-00034-of-00041.safetensors new file mode 100644 index 0000000..1a43f6e --- /dev/null +++ b/model-00034-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e9ed188a1f7e537fab26921d8102f1ac83e59160ab4559fdb8b0031fbd65303 +size 2999226960 diff --git a/model-00035-of-00041.safetensors b/model-00035-of-00041.safetensors new file mode 100644 index 0000000..7d234e0 --- /dev/null +++ b/model-00035-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adedc007dc2f50cb0f19da74182367e2b5188a7daae2ce213d454689e04ff7b1 +size 3000144048 diff --git a/model-00036-of-00041.safetensors b/model-00036-of-00041.safetensors new file mode 100644 index 0000000..bad5f25 --- /dev/null +++ b/model-00036-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bb2ba0df492bfd2ea184aa2351b2a6d943c13e38d06f170b987b9a35bb03cf4 +size 2999226904 diff --git a/model-00037-of-00041.safetensors b/model-00037-of-00041.safetensors new file mode 100644 index 0000000..9fa9ba9 --- /dev/null +++ b/model-00037-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37b8ce4f224804a4d99453f497fcae6f86c3b8b927f43d379f5304cbb7892c7b +size 3000144016 diff --git a/model-00038-of-00041.safetensors b/model-00038-of-00041.safetensors new file mode 100644 index 0000000..b1fb23a --- /dev/null +++ b/model-00038-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:945a99a0a4b17dedefb0219cff05838457faee9c4fe38e6c5b39a31eedb3e320 +size 2999227960 diff --git a/model-00039-of-00041.safetensors b/model-00039-of-00041.safetensors new file mode 100644 index 0000000..84003ae --- /dev/null +++ b/model-00039-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7060bb27d64dc254ccc451bb0167792f982c9fb3d16ca5bdaae5dbd7153cfceb +size 3000142752 diff --git a/model-00040-of-00041.safetensors b/model-00040-of-00041.safetensors new file mode 100644 index 0000000..94eeafb --- /dev/null +++ b/model-00040-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2bddb39d77fe9b52719bd5e4404da0709846b34c941cb012544fde683d453a +size 2606931696 diff --git a/model-00041-of-00041.safetensors b/model-00041-of-00041.safetensors new file mode 100644 index 0000000..0f410c9 --- /dev/null +++ b/model-00041-of-00041.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9a14e18c736c0fadff1985dd1826447c2e593564d68ab10b844177325c854e5 +size 1229199576 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..442f135 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9734599c8e3e00dbb5612bfe264edae70e2f994696b54ad57f70a1faf7beb251 +size 14057058 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..2c67e9d --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:757622126525aeeb131756849d93298070ff3f0319c455ec8c5bb0f6b1cebbe8 +size 9730160 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..ff8e2eb --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,495 @@ +{ + "added_tokens_decoder": { + "200000": { + "content": "]!p~[", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200001": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200002": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200003": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200004": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200005": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200006": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200007": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200008": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200009": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200010": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200011": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200012": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200013": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200014": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200015": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200016": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200017": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200018": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200019": { + "content": "]~b]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200020": { + "content": "[e~[", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200021": { + "content": "]!d~[", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200022": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200023": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200024": { + "content": "]<]speech[>[", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200025": { + "content": "]<]image[>[", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200026": { + "content": "]<]video[>[", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200027": { + "content": "]<]start of speech[>[", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200028": { + "content": "]<]end of speech[>[", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200029": { + "content": "]<]start of image[>[", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200030": { + "content": "]<]end of image[>[", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200031": { + "content": "]<]start of video[>[", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200032": { + "content": "]<]end of video[>[", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200033": { + "content": "]<]vision pad[>[", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200034": { + "content": "]~!b[", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200035": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200036": { + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "200037": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200038": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200039": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200040": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200041": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200042": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200043": { + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "200044": { + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "200045": { + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "200046": { + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "200047": { + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "200048": { + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "200049": { + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + "200050": { + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "200051": { + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "200052": { + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + }, + "200053": { + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "]<]speech[>[", + "]<]image[>[", + "]<]video[>[", + "]<]start of speech[>[", + "]<]end of speech[>[", + "]<]start of image[>[", + "]<]end of image[>[", + "]<]start of video[>[", + "]<]end of video[>[", + "]<]vision pad[>[", + "]~!b[", + "", + "", + "", + "", + "", + "", + "", + "", + "[e~[", + "]!d~[", + "]!p~[", + "]~b]", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" + ], + "add_prefix_space": false, + "bos_token": "]~!b[", + "clean_up_tokenization_spaces": false, + "eos_token": "[e~[", + "model_max_length": 40960000, + "tokenizer_class": "GPT2Tokenizer", + "unk_token": "]!d~[" +} diff --git a/vocab.json b/vocab.json new file mode 100644 index 0000000..49a3e05 --- /dev/null +++ b/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b44c066b5dc34c800c4e3ecbd85f3e95ce3bfdbf8a5fe30223e005175103578a +size 4705413