common_voice

مراجع:

أب

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/ab')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 8
'other' 752
'test' 9
'train' 22
'validated' 31
'validation' 0
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

ع

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/ar')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 6333
'other' 18283
'test' 7622
'train' 14227
'validated' 43291
'validation' 7517
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

مثل

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/as')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 31
'other' 0
'test' 110
'train' 270
'validated' 504
'validation' 124
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

ر

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/br')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 623
'other' 10912
'test' 2087
'train' 2780
'validated' 8560
'validation' 1997
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

كاليفورنيا

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/ca')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 18846
'other' 64446
'test' 15724
'train' 285584
'validated' 416701
'validation' 15724
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

cnh

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/cnh')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 433
'other' 2934
'test' 752
'train' 807
'validated' 2432
'validation' 756
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

خدمات العملاء

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/cs')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 685
'other' 7475
'test' 4144
'train' 5655
'validated' 30431
'validation' 4118
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

السيرة الذاتية

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/cv')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 1282
'other' 6927
'test' 788
'train' 931
'validated' 3496
'validation' 818
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

قبرصي

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/cy')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 3648
'other' 17919
'test' 4820
'train' 6839
'validated' 72984
'validation' 4776
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

دي

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/de')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 32789
'other' 10095
'test' 15588
'train' 246525
'validated' 565186
'validation' 15588
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

dv

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/dv')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 840
'other' 0
'test' 2202
'train' 2680
'validated' 11866
'validation' 2077
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

ش

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/el')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 185
'other' 5659
'test' 1522
'train' 2316
'validated' 5996
'validation' 1401
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

أون

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/en')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 189562
'other' 169895
'test' 16164
'train' 564337
'validated' 1224864
'validation' 16164
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

eo

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/eo')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 4736
'other' 2946
'test' 8969
'train' 19587
'validated' 58094
'validation' 8987
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

وفاق

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/es')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 40640
'other' 144791
'test' 15089
'train' 161813
'validated' 236314
'validation' 15089
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

وآخرون

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/et')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 3557
'other' 569
'test' 2509
'train' 2966
'validated' 10683
'validation' 2507
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

الاتحاد الأوروبي

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/eu')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 5387
'other' 23570
'test' 5172
'train' 7505
'validated' 63009
'validation' 5172
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

اتحاد كرة القدم

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/fa')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 11698
'other' 22510
'test' 5213
'train' 7593
'validated' 251659
'validation' 5213
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

فاي

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/fi')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 59
'other' 149
'test' 428
'train' 460
'validated' 1305
'validation' 415
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

الاب

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/fr')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 40351
'other' 3222
'test' 15763
'train' 298982
'validated' 461004
'validation' 15763
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

السنة المالية NL

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/fy-NL')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 1031
'other' 21569
'test' 3020
'train' 3927
'validated' 10495
'validation' 2790
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

الجا-IE

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/ga-IE')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 409
'other' 2130
'test' 506
'train' 541
'validated' 3352
'validation' 497
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

أهلاً

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/hi')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 60
'other' 139
'test' 127
'train' 157
'validated' 419
'validation' 135
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

hsb

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/hsb')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 227
'other' 62
'test' 387
'train' 808
'validated' 1367
'validation' 172
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

هو

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/hu')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 169
'other' 295
'test' 1649
'train' 3348
'validated' 6457
'validation' 1434
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

Ia

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/ia')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 192
'other' 1095
'test' 899
'train' 3477
'validated' 5978
'validation' 1601
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

بطاقة تعريف

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/id')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 470
'other' 6782
'test' 1844
'train' 2130
'validated' 8696
'validation' 1835
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

هو - هي

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/it')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 12189
'other' 14549
'test' 12928
'train' 58015
'validated' 102579
'validation' 12928
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

جا

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/ja')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 504
'other' 885
'test' 632
'train' 722
'validated' 3072
'validation' 586
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

كا

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/ka')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 139
'other' 44
'test' 656
'train' 1058
'validated' 2275
'validation' 527
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

كاب

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/kab')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 18134
'other' 88021
'test' 14622
'train' 120530
'validated' 573718
'validation' 14622
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

كي

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/ky')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 926
'other' 7223
'test' 1503
'train' 1955
'validated' 9236
'validation' 1511
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

إل جي

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/lg')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 290
'other' 3110
'test' 584
'train' 1250
'validated' 2220
'validation' 384
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

لتر

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/lt')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 102
'other' 1629
'test' 466
'train' 931
'validated' 1644
'validation' 244
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

المستوى

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/lv')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 143
'other' 1560
'test' 1882
'train' 2552
'validated' 6444
'validation' 2002
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

مليون

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/mn')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 667
'other' 3272
'test' 1862
'train' 2183
'validated' 7487
'validation' 1837
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

جبل

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/mt')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 314
'other' 5714
'test' 1617
'train' 2036
'validated' 5747
'validation' 1516
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

nl

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/nl')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 3308
'other' 27
'test' 5708
'train' 9460
'validated' 52488
'validation' 4938
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

أو

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/or')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 62
'other' 4302
'test' 98
'train' 388
'validated' 615
'validation' 129
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

ألم

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/pa-IN')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 43
'other' 1411
'test' 116
'train' 211
'validated' 371
'validation' 44
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

رر

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/pl')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 4601
'other' 12848
'test' 5153
'train' 7468
'validated' 90791
'validation' 5153
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

نقطة

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/pt')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 1740
'other' 8390
'test' 4641
'train' 6514
'validated' 41584
'validation' 4592
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

rm-sursilv

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/rm-sursilv')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 639
'other' 2102
'test' 1194
'train' 1384
'validated' 3783
'validation' 1205
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

rm-vallader

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/rm-vallader')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 374
'other' 727
'test' 378
'train' 574
'validated' 1316
'validation' 357
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

ريال عماني

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/ro')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 485
'other' 1945
'test' 1778
'train' 3399
'validated' 6039
'validation' 858
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

رو

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/ru')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 3056
'other' 10247
'test' 8007
'train' 15481
'validated' 74256
'validation' 7963
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

rw

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/rw')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 206790
'other' 22923
'test' 15724
'train' 515197
'validated' 832929
'validation' 15032
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

ساه

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/sah')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 66
'other' 1275
'test' 757
'train' 1442
'validated' 2606
'validation' 405
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

sl

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/sl')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 92
'other' 2502
'test' 881
'train' 2038
'validated' 4669
'validation' 556
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

سانت-SE

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/sv-SE')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 462
'other' 3043
'test' 2027
'train' 2331
'validated' 12552
'validation' 2019
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

تا

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/ta')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 594
'other' 7428
'test' 1781
'train' 2009
'validated' 12652
'validation' 1779
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

ذ

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/th')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 467
'other' 2671
'test' 2188
'train' 2917
'validated' 7028
'validation' 1922
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

آر

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/tr')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 1726
'other' 325
'test' 1647
'train' 1831
'validated' 18685
'validation' 1647
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

تي تي

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/tt')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 287
'other' 1798
'test' 4485
'train' 11211
'validated' 25781
'validation' 2127
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

المملكة المتحدة

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/uk')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 1255
'other' 8161
'test' 3235
'train' 4035
'validated' 22337
'validation' 3236
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

سادسا

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/vi')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 78
'other' 870
'test' 198
'train' 221
'validated' 619
'validation' 200
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

صوت

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/vot')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 6
'other' 411
'test' 0
'train' 3
'validated' 3
'validation' 0
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

زه-CN

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/zh-CN')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 5305
'other' 8948
'test' 8760
'train' 18541
'validated' 36405
'validation' 8743
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

زه-هونج كونج

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/zh-HK')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 2999
'other' 38830
'test' 5172
'train' 7506
'validated' 41835
'validation' 5172
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}

زه-TW

استخدم الأمر التالي لتحميل مجموعة البيانات هذه في TFDS:

ds = tfds.load('huggingface:common_voice/zh-TW')
  • وصف :
Common Voice is Mozilla's initiative to help teach machines how real people speak.
The dataset currently consists of 7,335 validated hours of speech in 60 languages, but we’re always adding more voices and languages.
ينقسم أمثلة
'invalidated' 3584
'other' 22477
'test' 2895
'train' 3507
'validated' 61232
'validation' 2895
  • سمات :
{
    "client_id": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "path": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "audio": {
        "sampling_rate": 48000,
        "mono": true,
        "decode": true,
        "id": null,
        "_type": "Audio"
    },
    "sentence": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "up_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "down_votes": {
        "dtype": "int64",
        "id": null,
        "_type": "Value"
    },
    "age": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "gender": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "accent": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "locale": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    },
    "segment": {
        "dtype": "string",
        "id": null,
        "_type": "Value"
    }
}