Compare commits
7 Commits
c854ab92a6
...
11aaeb6912
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
11aaeb6912 | ||
|
|
770dc84e80 | ||
|
|
e0d94b6bf4 | ||
| 66cd1a0c34 | |||
|
|
a14d48dbab | ||
|
|
9cbeacecf1 | ||
|
|
5498471bbd |
77
.gitattributes
vendored
77
.gitattributes
vendored
@ -1,6 +1,10 @@
|
|||||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
<<<<<<< HEAD
|
||||||
|
*.bin.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
=======
|
||||||
|
>>>>>>> c854ab92a6520ab82f6a6874e0b88699b9b0b967
|
||||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
@ -17,6 +21,73 @@
|
|||||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
<<<<<<< HEAD
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.db* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ark* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
|
||||||
|
**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.jpg filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.png filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bmp filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gif filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.webp filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mp3 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wav filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wma filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.aac filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ogg filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.m4a filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.m3u8 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.amr filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.audio filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.avi filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.flv filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mpg filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.asf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mov filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mpeg filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.3gp filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wmv filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rmvb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ts filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mkv filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.flash filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.vob filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pdf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ost filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.doc filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.docx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.txt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ppt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pptx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xls filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xlsx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.vsd filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.vsdx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.jsonl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.json filter=lfs diff=lfs merge=lfs -text
|
||||||
|
dataset_infos.json ignore
|
||||||
|
*.csv filter=lfs diff=lfs merge=lfs -text
|
||||||
|
=======
|
||||||
|
>>>>>>> c854ab92a6520ab82f6a6874e0b88699b9b0b967
|
||||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
@ -33,4 +104,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|||||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
*.EncryptBy4pd filter=lfs diff=lfs merge=lfs -text
|
<<<<<<< HEAD
|
||||||
|
*.EncryptBy4pd filter=lfs diff=lfs merge=lfs -text
|
||||||
|
=======
|
||||||
|
*.EncryptBy4pd filter=lfs diff=lfs merge=lfs -text
|
||||||
|
>>>>>>> c854ab92a6520ab82f6a6874e0b88699b9b0b967
|
||||||
|
|||||||
39
README.md
Normal file
39
README.md
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
|
||||||
|
---
|
||||||
|
displayName: SAMSum Corpus
|
||||||
|
labelTypes:
|
||||||
|
- Classification
|
||||||
|
license:
|
||||||
|
- CC BY-NC-ND 4.0
|
||||||
|
mediaTypes:
|
||||||
|
- Text
|
||||||
|
paperUrl: https://arxiv.org/pdf/1911.12237v2.pdf
|
||||||
|
publishDate: "2019"
|
||||||
|
publishUrl: https://github.com/huggingface/datasets/tree/master/datasets/samsum
|
||||||
|
publisher:
|
||||||
|
- Samsung R&D Institute Poland
|
||||||
|
tags:
|
||||||
|
- Text
|
||||||
|
taskTypes:
|
||||||
|
- Text Summarization/Simplication
|
||||||
|
- Federated Learning
|
||||||
|
- Abstractive Text Summarization
|
||||||
|
|
||||||
|
---
|
||||||
|
# 数据集介绍
|
||||||
|
## 简介
|
||||||
|
SAMSum 数据集包含大约 16k 个带有摘要的类似信使的对话。对话由精通英语的语言学家创建和记录。语言学家被要求创建类似于他们每天所写的对话,以反映他们现实生活中的信使对话的主题比例。风格和语域是多样化的——对话可以是非正式的、半正式的或正式的,它们可能包含俚语、表情符号和错别字。然后,用摘要对对话进行注释。假设摘要应该是人们在第三人称对话中所谈论内容的简明扼要。 SAMSum 数据集由波兰三星研发研究所准备并分发用于研究目的(非商业许可:CC BY-NC-ND 4.0)。
|
||||||
|
## 引文
|
||||||
|
|
||||||
|
```
|
||||||
|
"@article{gliwa2019samsum,
|
||||||
|
title={SAMSum corpus: A human-annotated dialogue dataset for abstractive summarization},
|
||||||
|
author={Gliwa, Bogdan and Mochol, Iwona and Biesek, Maciej and Wawer, Aleksander},
|
||||||
|
journal={arXiv preprint arXiv:1911.12237},
|
||||||
|
year={2019}
|
||||||
|
}"
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Download dataset
|
||||||
|
:modelscope-code[]{type="git"}
|
||||||
1
dataset-samsum-corpus
Submodule
1
dataset-samsum-corpus
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 770dc84e80bf4cd9de9445bac37ace3879389cb9
|
||||||
BIN
dataset_info.json
(Stored with Git LFS)
Normal file
BIN
dataset_info.json
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
dataset_infos.json
(Stored with Git LFS)
Normal file
BIN
dataset_infos.json
(Stored with Git LFS)
Normal file
Binary file not shown.
18
metafile.yaml
Normal file
18
metafile.yaml
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
displayName: SAMSum Corpus
|
||||||
|
labelTypes:
|
||||||
|
- Classification
|
||||||
|
license:
|
||||||
|
- CC BY-NC-ND 4.0
|
||||||
|
mediaTypes:
|
||||||
|
- Text
|
||||||
|
paperUrl: https://arxiv.org/pdf/1911.12237v2.pdf
|
||||||
|
publishDate: "2019"
|
||||||
|
publishUrl: https://github.com/huggingface/datasets/tree/master/datasets/samsum
|
||||||
|
publisher:
|
||||||
|
- Samsung R&D Institute Poland
|
||||||
|
tags:
|
||||||
|
- Text
|
||||||
|
taskTypes:
|
||||||
|
- Text Summarization/Simplication
|
||||||
|
- Federated Learning
|
||||||
|
- Abstractive Text Summarization
|
||||||
9
quickstart.md
Normal file
9
quickstart.md
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
|
||||||
|
## SDK usage
|
||||||
|
```python
|
||||||
|
from modelscope.msdatasets import MsDataset
|
||||||
|
|
||||||
|
MsDataset.load("OpenDataLab/SAMSum_Corpus")
|
||||||
|
|
||||||
|
# Note: If the SDK is not available, please use git to download the dataset.
|
||||||
|
```
|
||||||
BIN
raw/corpus.7z
(Stored with Git LFS)
Normal file
BIN
raw/corpus.7z
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
sample/other/test.json
(Stored with Git LFS)
Normal file
BIN
sample/other/test.json
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
sample/other/train.json
(Stored with Git LFS)
Normal file
BIN
sample/other/train.json
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
sample/other/val.json
(Stored with Git LFS)
Normal file
BIN
sample/other/val.json
(Stored with Git LFS)
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user