22 lines
		
	
	
		
			678 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			22 lines
		
	
	
		
			678 B
		
	
	
	
		
			Python
		
	
	
	
	
	
from opencompass.models import HuggingFaceCausalLM
 | 
						|
 | 
						|
 | 
						|
models = [
 | 
						|
    dict(
 | 
						|
        type=HuggingFaceCausalLM,
 | 
						|
        abbr='llama-2-70b-hf',
 | 
						|
        path="meta-llama/Llama-2-70b-hf",
 | 
						|
        tokenizer_path='meta-llama/Llama-2-70b-hf',
 | 
						|
        tokenizer_kwargs=dict(padding_side='left',
 | 
						|
                              truncation_side='left',
 | 
						|
                              use_fast=False,
 | 
						|
                              ),
 | 
						|
        max_out_len=100,
 | 
						|
        max_seq_len=2048,
 | 
						|
        batch_size=8,
 | 
						|
        model_kwargs=dict(device_map='auto'),
 | 
						|
        batch_padding=False, # if false, inference with for-loop without batch padding
 | 
						|
        run_cfg=dict(num_gpus=8, num_procs=1),
 | 
						|
    )
 | 
						|
]
 |