مراجع:
برای بارگذاری این مجموعه داده در TFDS از دستور زیر استفاده کنید:
ds = tfds.load('huggingface:swda')
- توضیحات :
The Switchboard Dialog Act Corpus (SwDA) extends the Switchboard-1 Telephone Speech Corpus, Release 2 with
turn/utterance-level dialog-act tags. The tags summarize syntactic, semantic, and pragmatic information about the
associated turn. The SwDA project was undertaken at UC Boulder in the late 1990s.
The SwDA is not inherently linked to the Penn Treebank 3 parses of Switchboard, and it is far from straightforward to
align the two resources. In addition, the SwDA is not distributed with the Switchboard's tables of metadata about the
conversations and their participants.
- مجوز : Creative Commons Attribution-NonCommercial-ShareAlike 3.0 مجوز منتقل نشده
- نسخه : 0.0.0
- تقسیم ها :
تقسیم کنید | نمونه ها |
---|---|
'test' | 4514 |
'train' | 213543 |
'validation' | 56729 |
- ویژگی ها :
{
"swda_filename": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"ptb_basename": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"conversation_no": {
"dtype": "int64",
"id": null,
"_type": "Value"
},
"transcript_index": {
"dtype": "int64",
"id": null,
"_type": "Value"
},
"act_tag": {
"num_classes": 217,
"names": [
"b^m^r",
"qw^r^t",
"aa^h",
"br^m",
"fa^r",
"aa,ar",
"sd^e(^q)^r",
"^2",
"sd;qy^d",
"oo",
"bk^m",
"aa^t",
"cc^t",
"qy^d^c",
"qo^t",
"ng^m",
"qw^h",
"qo^r",
"aa",
"qy^d^t",
"qrr^d",
"br^r",
"fx",
"sd,qy^g",
"ny^e",
"^h^t",
"fc^m",
"qw(^q)",
"co",
"o^t",
"b^m^t",
"qr^d",
"qw^g",
"ad(^q)",
"qy(^q)",
"na^r",
"am^r",
"qr^t",
"ad^c",
"qw^c",
"bh^r",
"h^t",
"ft^m",
"ba^r",
"qw^d^t",
"%",
"t3",
"nn",
"bd",
"h^m",
"h^r",
"sd^r",
"qh^m",
"^q^t",
"sv^2",
"ft",
"ar^m",
"qy^h",
"sd^e^m",
"qh^r",
"cc",
"fp^m",
"ad",
"qo",
"na^m^t",
"fo^c",
"qy",
"sv^e^r",
"aap",
"no",
"aa^2",
"sv(^q)",
"sv^e",
"nd",
"\"",
"bf^2",
"bk",
"fp",
"nn^r^t",
"fa^c",
"ny^t",
"ny^c^r",
"qw",
"qy^t",
"b",
"fo",
"qw^r",
"am",
"bf^t",
"^2^t",
"b^2",
"x",
"fc",
"qr",
"no^t",
"bk^t",
"bd^r",
"bf",
"^2^g",
"qh^c",
"ny^c",
"sd^e^r",
"br",
"fe",
"by",
"^2^r",
"fc^r",
"b^m",
"sd,sv",
"fa^t",
"sv^m",
"qrr",
"^h^r",
"na",
"fp^r",
"o",
"h,sd",
"t1^t",
"nn^r",
"cc^r",
"sv^c",
"co^t",
"qy^r",
"sv^r",
"qy^d^h",
"sd",
"nn^e",
"ny^r",
"b^t",
"ba^m",
"ar",
"bf^r",
"sv",
"bh^m",
"qy^g^t",
"qo^d^c",
"qo^d",
"nd^t",
"aa^r",
"sd^2",
"sv;sd",
"qy^c^r",
"qw^m",
"qy^g^r",
"no^r",
"qh(^q)",
"sd;sv",
"bf(^q)",
"+",
"qy^2",
"qw^d",
"qy^g",
"qh^g",
"nn^t",
"ad^r",
"oo^t",
"co^c",
"ng",
"^q",
"qw^d^c",
"qrr^t",
"^h",
"aap^r",
"bc^r",
"sd^m",
"bk^r",
"qy^g^c",
"qr(^q)",
"ng^t",
"arp",
"h",
"bh",
"sd^c",
"^g",
"o^r",
"qy^c",
"sd^e",
"fw",
"ar^r",
"qy^m",
"bc",
"sv^t",
"aap^m",
"sd;no",
"ng^r",
"bf^g",
"sd^e^t",
"o^c",
"b^r",
"b^m^g",
"ba",
"t1",
"qy^d(^q)",
"nn^m",
"ny",
"ba,fe",
"aa^m",
"qh",
"na^m",
"oo(^q)",
"qw^t",
"na^t",
"qh^h",
"qy^d^m",
"ny^m",
"fa",
"qy^d",
"fc^t",
"sd(^q)",
"qy^d^r",
"bf^m",
"sd(^q)^t",
"ft^t",
"^q^r",
"sd^t",
"sd(^q)^r",
"ad^t"
],
"names_file": null,
"id": null,
"_type": "ClassLabel"
},
"damsl_act_tag": {
"num_classes": 43,
"names": [
"ad",
"qo",
"qy",
"arp_nd",
"sd",
"h",
"bh",
"no",
"^2",
"^g",
"ar",
"aa",
"sv",
"bk",
"fp",
"qw",
"b",
"ba",
"t1",
"oo_co_cc",
"+",
"ny",
"qw^d",
"x",
"qh",
"fc",
"fo_o_fw_\"_by_bc",
"aap_am",
"%",
"bf",
"t3",
"nn",
"bd",
"ng",
"^q",
"br",
"qy^d",
"fa",
"^h",
"b^m",
"ft",
"qrr",
"na"
],
"names_file": null,
"id": null,
"_type": "ClassLabel"
},
"caller": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"utterance_index": {
"dtype": "int64",
"id": null,
"_type": "Value"
},
"subutterance_index": {
"dtype": "int64",
"id": null,
"_type": "Value"
},
"text": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"pos": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"trees": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"ptb_treenumbers": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"talk_day": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"length": {
"dtype": "int64",
"id": null,
"_type": "Value"
},
"topic_description": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"prompt": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"from_caller": {
"dtype": "int64",
"id": null,
"_type": "Value"
},
"from_caller_sex": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"from_caller_education": {
"dtype": "int64",
"id": null,
"_type": "Value"
},
"from_caller_birth_year": {
"dtype": "int64",
"id": null,
"_type": "Value"
},
"from_caller_dialect_area": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"to_caller": {
"dtype": "int64",
"id": null,
"_type": "Value"
},
"to_caller_sex": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"to_caller_education": {
"dtype": "int64",
"id": null,
"_type": "Value"
},
"to_caller_birth_year": {
"dtype": "int64",
"id": null,
"_type": "Value"
},
"to_caller_dialect_area": {
"dtype": "string",
"id": null,
"_type": "Value"
}
}