Update files from the datasets library (from 1.8.0)
Release notes: https://github.com/huggingface/datasets/releases/tag/1.8.0
This commit is contained in:
parent
7b6ddf6458
commit
0c5d6316a3
@ -1,4 +1,6 @@
|
|||||||
---
|
---
|
||||||
|
languages:
|
||||||
|
- en
|
||||||
paperswithcode_id: imdb-movie-reviews
|
paperswithcode_id: imdb-movie-reviews
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
{"plain_text": {"description": "Large Movie Review Dataset.\nThis is a dataset for binary sentiment classification containing substantially more data than previous benchmark datasets. We provide a set of 25,000 highly polar movie reviews for training, and 25,000 for testing. There is additional unlabeled data for use as well.", "citation": "@InProceedings{maas-EtAl:2011:ACL-HLT2011,\n author = {Maas, Andrew L. and Daly, Raymond E. and Pham, Peter T. and Huang, Dan and Ng, Andrew Y. and Potts, Christopher},\n title = {Learning Word Vectors for Sentiment Analysis},\n booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies},\n month = {June},\n year = {2011},\n address = {Portland, Oregon, USA},\n publisher = {Association for Computational Linguistics},\n pages = {142--150},\n url = {http://www.aclweb.org/anthology/P11-1015}\n}\n", "homepage": "http://ai.stanford.edu/~amaas/data/sentiment/", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 2, "names": ["neg", "pos"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "supervised_keys": null, "builder_name": "imdb", "config_name": "plain_text", "version": {"version_str": "1.0.0", "description": "", "datasets_version_to_prepare": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 32660064, "num_examples": 25000, "dataset_name": "imdb"}, "train": {"name": "train", "num_bytes": 33442202, "num_examples": 25000, "dataset_name": "imdb"}, "unsupervised": {"name": "unsupervised", "num_bytes": 67125548, "num_examples": 50000, "dataset_name": "imdb"}}, "download_checksums": {"http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz": {"num_bytes": 84125825, "checksum": "c40f74a18d3b61f90feba1e17730e0d38e8b97c05fde7008942e91923d1658fe"}}, "download_size": 84125825, "dataset_size": 133227814, "size_in_bytes": 217353639}}
|
{"plain_text": {"description": "Large Movie Review Dataset.\nThis is a dataset for binary sentiment classification containing substantially more data than previous benchmark datasets. We provide a set of 25,000 highly polar movie reviews for training, and 25,000 for testing. There is additional unlabeled data for use as well.", "citation": "@InProceedings{maas-EtAl:2011:ACL-HLT2011,\n author = {Maas, Andrew L. and Daly, Raymond E. and Pham, Peter T. and Huang, Dan and Ng, Andrew Y. and Potts, Christopher},\n title = {Learning Word Vectors for Sentiment Analysis},\n booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies},\n month = {June},\n year = {2011},\n address = {Portland, Oregon, USA},\n publisher = {Association for Computational Linguistics},\n pages = {142--150},\n url = {http://www.aclweb.org/anthology/P11-1015}\n}\n", "homepage": "http://ai.stanford.edu/~amaas/data/sentiment/", "license": "", "features": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"num_classes": 2, "names": ["neg", "pos"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "text-classification", "text_column": "text", "label_column": "label", "labels": ["neg", "pos"]}], "builder_name": "imdb", "config_name": "plain_text", "version": {"version_str": "1.0.0", "description": "", "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 33432835, "num_examples": 25000, "dataset_name": "imdb"}, "test": {"name": "test", "num_bytes": 32650697, "num_examples": 25000, "dataset_name": "imdb"}, "unsupervised": {"name": "unsupervised", "num_bytes": 67106814, "num_examples": 50000, "dataset_name": "imdb"}}, "download_checksums": {"http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz": {"num_bytes": 84125825, "checksum": "c40f74a18d3b61f90feba1e17730e0d38e8b97c05fde7008942e91923d1658fe"}}, "download_size": 84125825, "post_processing_size": null, "dataset_size": 133190346, "size_in_bytes": 217316171}}
|
||||||
2
imdb.py
2
imdb.py
@ -20,6 +20,7 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
import datasets
|
import datasets
|
||||||
|
from datasets.tasks import TextClassification
|
||||||
|
|
||||||
|
|
||||||
_DESCRIPTION = """\
|
_DESCRIPTION = """\
|
||||||
@ -78,6 +79,7 @@ class Imdb(datasets.GeneratorBasedBuilder):
|
|||||||
supervised_keys=None,
|
supervised_keys=None,
|
||||||
homepage="http://ai.stanford.edu/~amaas/data/sentiment/",
|
homepage="http://ai.stanford.edu/~amaas/data/sentiment/",
|
||||||
citation=_CITATION,
|
citation=_CITATION,
|
||||||
|
task_templates=[TextClassification(text_column="text", label_column="label")],
|
||||||
)
|
)
|
||||||
|
|
||||||
def _vocab_text_gen(self, archive):
|
def _vocab_text_gen(self, archive):
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user