Compare commits
	
		
			7 Commits
		
	
	
		
			c854ab92a6
			...
			11aaeb6912
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					11aaeb6912 | ||
| 
						 | 
					770dc84e80 | ||
| 
						 | 
					e0d94b6bf4 | ||
| 66cd1a0c34 | |||
| 
						 | 
					a14d48dbab | ||
| 
						 | 
					9cbeacecf1 | ||
| 
						 | 
					5498471bbd | 
							
								
								
									
										75
									
								
								.gitattributes
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										75
									
								
								.gitattributes
									
									
									
									
										vendored
									
									
								
							@ -1,6 +1,10 @@
 | 
				
			|||||||
*.7z filter=lfs diff=lfs merge=lfs -text
 | 
					*.7z filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
*.arrow filter=lfs diff=lfs merge=lfs -text
 | 
					*.arrow filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
*.bin filter=lfs diff=lfs merge=lfs -text
 | 
					*.bin filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					<<<<<<< HEAD
 | 
				
			||||||
 | 
					*.bin.* filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					=======
 | 
				
			||||||
 | 
					>>>>>>> c854ab92a6520ab82f6a6874e0b88699b9b0b967
 | 
				
			||||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
 | 
					*.bz2 filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
 | 
					*.ckpt filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
*.ftz filter=lfs diff=lfs merge=lfs -text
 | 
					*.ftz filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
@ -17,6 +21,73 @@
 | 
				
			|||||||
*.ot filter=lfs diff=lfs merge=lfs -text
 | 
					*.ot filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
*.parquet filter=lfs diff=lfs merge=lfs -text
 | 
					*.parquet filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
*.pb filter=lfs diff=lfs merge=lfs -text
 | 
					*.pb filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					<<<<<<< HEAD
 | 
				
			||||||
 | 
					*.pt filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.pth filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.rar filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.tar.* filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.tflite filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.tgz filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.xz filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.zip filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.zstandard filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.tfevents* filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.db* filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.ark* filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.safetensors filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.ckpt filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.jpg filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.png filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.jpeg filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.bmp filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.gif filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.webp filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.mp3 filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.wav filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.wma filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.aac filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.ogg filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.m4a filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.m3u8 filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.amr filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.audio filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.avi filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.flv filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.mp4 filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.mpg filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.asf filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.mov filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.mpeg filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.3gp filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.wmv filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.rmvb filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.rm filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.ts filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.mkv filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.flash filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.vob filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.pdf filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.ost filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.pst filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.doc filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.docx filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.txt filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.ppt filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.pptx filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.xls filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.xlsx filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.vsd filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.vsdx filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.jsonl filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					*.json filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					dataset_infos.json ignore
 | 
				
			||||||
 | 
					*.csv filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					=======
 | 
				
			||||||
 | 
					>>>>>>> c854ab92a6520ab82f6a6874e0b88699b9b0b967
 | 
				
			||||||
*.pickle filter=lfs diff=lfs merge=lfs -text
 | 
					*.pickle filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
*.pkl filter=lfs diff=lfs merge=lfs -text
 | 
					*.pkl filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
*.pt filter=lfs diff=lfs merge=lfs -text
 | 
					*.pt filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
@ -33,4 +104,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 | 
				
			|||||||
*.zip filter=lfs diff=lfs merge=lfs -text
 | 
					*.zip filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
*.zst filter=lfs diff=lfs merge=lfs -text
 | 
					*.zst filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
 | 
					*tfevents* filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					<<<<<<< HEAD
 | 
				
			||||||
*.EncryptBy4pd filter=lfs diff=lfs merge=lfs -text
 | 
					*.EncryptBy4pd filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					=======
 | 
				
			||||||
 | 
					*.EncryptBy4pd filter=lfs diff=lfs merge=lfs -text
 | 
				
			||||||
 | 
					>>>>>>> c854ab92a6520ab82f6a6874e0b88699b9b0b967
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										39
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,39 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					---
 | 
				
			||||||
 | 
					displayName: SAMSum Corpus
 | 
				
			||||||
 | 
					labelTypes:
 | 
				
			||||||
 | 
					- Classification
 | 
				
			||||||
 | 
					license:
 | 
				
			||||||
 | 
					- CC BY-NC-ND 4.0
 | 
				
			||||||
 | 
					mediaTypes:
 | 
				
			||||||
 | 
					- Text
 | 
				
			||||||
 | 
					paperUrl: https://arxiv.org/pdf/1911.12237v2.pdf
 | 
				
			||||||
 | 
					publishDate: "2019"
 | 
				
			||||||
 | 
					publishUrl: https://github.com/huggingface/datasets/tree/master/datasets/samsum
 | 
				
			||||||
 | 
					publisher:
 | 
				
			||||||
 | 
					- Samsung R&D Institute Poland
 | 
				
			||||||
 | 
					tags:
 | 
				
			||||||
 | 
					- Text
 | 
				
			||||||
 | 
					taskTypes:
 | 
				
			||||||
 | 
					- Text Summarization/Simplication
 | 
				
			||||||
 | 
					- Federated Learning
 | 
				
			||||||
 | 
					- Abstractive Text Summarization
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					---
 | 
				
			||||||
 | 
					# 数据集介绍
 | 
				
			||||||
 | 
					  ## 简介
 | 
				
			||||||
 | 
					   SAMSum 数据集包含大约 16k 个带有摘要的类似信使的对话。对话由精通英语的语言学家创建和记录。语言学家被要求创建类似于他们每天所写的对话,以反映他们现实生活中的信使对话的主题比例。风格和语域是多样化的——对话可以是非正式的、半正式的或正式的,它们可能包含俚语、表情符号和错别字。然后,用摘要对对话进行注释。假设摘要应该是人们在第三人称对话中所谈论内容的简明扼要。 SAMSum 数据集由波兰三星研发研究所准备并分发用于研究目的(非商业许可:CC BY-NC-ND 4.0)。 
 | 
				
			||||||
 | 
					  ## 引文
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					"@article{gliwa2019samsum,
 | 
				
			||||||
 | 
					title={SAMSum corpus: A human-annotated dialogue dataset for abstractive summarization},
 | 
				
			||||||
 | 
					author={Gliwa, Bogdan and Mochol, Iwona and Biesek, Maciej and Wawer, Aleksander},
 | 
				
			||||||
 | 
					journal={arXiv preprint arXiv:1911.12237},
 | 
				
			||||||
 | 
					year={2019}
 | 
				
			||||||
 | 
					}"
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					## Download dataset
 | 
				
			||||||
 | 
					:modelscope-code[]{type="git"}
 | 
				
			||||||
							
								
								
									
										1
									
								
								dataset-samsum-corpus
									
									
									
									
									
										Submodule
									
								
							
							
								
								
								
								
								
								
							
						
						
									
										1
									
								
								dataset-samsum-corpus
									
									
									
									
									
										Submodule
									
								
							@ -0,0 +1 @@
 | 
				
			|||||||
 | 
					Subproject commit 770dc84e80bf4cd9de9445bac37ace3879389cb9
 | 
				
			||||||
							
								
								
									
										
											BIN
										
									
								
								dataset_info.json
									 (Stored with Git LFS)
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								dataset_info.json
									 (Stored with Git LFS)
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								dataset_infos.json
									 (Stored with Git LFS)
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								dataset_infos.json
									 (Stored with Git LFS)
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										18
									
								
								metafile.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								metafile.yaml
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,18 @@
 | 
				
			|||||||
 | 
					displayName: SAMSum Corpus
 | 
				
			||||||
 | 
					labelTypes:
 | 
				
			||||||
 | 
					- Classification
 | 
				
			||||||
 | 
					license:
 | 
				
			||||||
 | 
					- CC BY-NC-ND 4.0
 | 
				
			||||||
 | 
					mediaTypes:
 | 
				
			||||||
 | 
					- Text
 | 
				
			||||||
 | 
					paperUrl: https://arxiv.org/pdf/1911.12237v2.pdf
 | 
				
			||||||
 | 
					publishDate: "2019"
 | 
				
			||||||
 | 
					publishUrl: https://github.com/huggingface/datasets/tree/master/datasets/samsum
 | 
				
			||||||
 | 
					publisher:
 | 
				
			||||||
 | 
					- Samsung R&D Institute Poland
 | 
				
			||||||
 | 
					tags:
 | 
				
			||||||
 | 
					- Text
 | 
				
			||||||
 | 
					taskTypes:
 | 
				
			||||||
 | 
					- Text Summarization/Simplication
 | 
				
			||||||
 | 
					- Federated Learning
 | 
				
			||||||
 | 
					- Abstractive Text Summarization
 | 
				
			||||||
							
								
								
									
										9
									
								
								quickstart.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								quickstart.md
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,9 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					## SDK usage
 | 
				
			||||||
 | 
					```python
 | 
				
			||||||
 | 
					from modelscope.msdatasets import MsDataset
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					MsDataset.load("OpenDataLab/SAMSum_Corpus")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Note: If the SDK is not available, please use git to download the dataset.
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
							
								
								
									
										
											BIN
										
									
								
								raw/corpus.7z
									 (Stored with Git LFS)
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								raw/corpus.7z
									 (Stored with Git LFS)
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								sample/other/test.json
									 (Stored with Git LFS)
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								sample/other/test.json
									 (Stored with Git LFS)
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								sample/other/train.json
									 (Stored with Git LFS)
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								sample/other/train.json
									 (Stored with Git LFS)
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								sample/other/val.json
									 (Stored with Git LFS)
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								sample/other/val.json
									 (Stored with Git LFS)
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
		Loading…
	
		Reference in New Issue
	
	Block a user