30 lines
		
	
	
		
			726 B
		
	
	
	
		
			JSON
		
	
	
	
	
	
			
		
		
	
	
			30 lines
		
	
	
		
			726 B
		
	
	
	
		
			JSON
		
	
	
	
	
	
| {
 | |
|   "architectures": [
 | |
|     "Qwen3ForCausalLM"
 | |
|   ],
 | |
|   "attention_bias": false,
 | |
|   "attention_dropout": 0.0,
 | |
|   "bos_token_id": 151643,
 | |
|   "eos_token_id": 151645,
 | |
|   "head_dim": 128,
 | |
|   "hidden_act": "silu",
 | |
|   "hidden_size": 1024,
 | |
|   "initializer_range": 0.02,
 | |
|   "intermediate_size": 3072,
 | |
|   "max_position_embeddings": 40960,
 | |
|   "max_window_layers": 28,
 | |
|   "model_type": "qwen3",
 | |
|   "num_attention_heads": 16,
 | |
|   "num_hidden_layers": 28,
 | |
|   "num_key_value_heads": 8,
 | |
|   "rms_norm_eps": 1e-06,
 | |
|   "rope_scaling": null,
 | |
|   "rope_theta": 1000000,
 | |
|   "sliding_window": null,
 | |
|   "tie_word_embeddings": true,
 | |
|   "torch_dtype": "bfloat16",
 | |
|   "transformers_version": "4.51.0",
 | |
|   "use_cache": true,
 | |
|   "use_sliding_window": false,
 | |
|   "vocab_size": 151936
 | |
| } | 
