31 lines
		
	
	
		
			729 B
		
	
	
	
		
			JSON
		
	
	
	
	
	
			
		
		
	
	
			31 lines
		
	
	
		
			729 B
		
	
	
	
		
			JSON
		
	
	
	
	
	
| {
 | |
|   "architectures": [
 | |
|     "Qwen3ForCausalLM"
 | |
|   ],
 | |
|   "attention_bias": false,
 | |
|   "attention_dropout": 0.0,
 | |
|   "bos_token_id": 151643,
 | |
|   "eos_token_id": 151645,
 | |
|   "head_dim": 128,
 | |
|   "hidden_act": "silu",
 | |
|   "hidden_size": 5120,
 | |
|   "initializer_range": 0.02,
 | |
|   "intermediate_size": 25600,
 | |
|   "max_position_embeddings": 40960,
 | |
|   "max_window_layers": 64,
 | |
|   "model_type": "qwen3",
 | |
|   "num_attention_heads": 64,
 | |
|   "num_hidden_layers": 64,
 | |
|   "num_key_value_heads": 8,
 | |
|   "rms_norm_eps": 1e-06,
 | |
|   "rope_scaling": null,
 | |
|   "rope_theta": 1000000,
 | |
|   "sliding_window": null,
 | |
|   "tie_word_embeddings": false,
 | |
|   "torch_dtype": "float16",
 | |
|   "transformers_version": "4.51.3",
 | |
|   "use_cache": false,
 | |
|   "use_sliding_window": false,
 | |
|   "vocab_size": 151936
 | |
| }
 | 
