参考文献:
v2.2
次のコマンドを使用して、このデータセットを TFDS にロードします。
ds = tfds.load('huggingface:multi_woz_v22/v2.2')
- 説明:
Multi-Domain Wizard-of-Oz dataset (MultiWOZ), a fully-labeled collection of human-human written conversations spanning over multiple domains and topics.
MultiWOZ 2.1 (Eric et al., 2019) identified and fixed many erroneous annotations and user utterances in the original version, resulting in an
improved version of the dataset. MultiWOZ 2.2 is a yet another improved version of this dataset, which identifies and fizes dialogue state annotation errors
across 17.3% of the utterances on top of MultiWOZ 2.1 and redefines the ontology by disallowing vocabularies of slots with a large number of possible values
(e.g., restaurant name, time of booking) and introducing standardized slot span annotations for these slots.
- ライセンス: Apache ライセンス 2.0
- バージョン: 2.2.0
- 分割:
スプリット | 例 |
---|---|
'test' | 1000 |
'train' | 8437 |
'validation' | 1000 |
- 特徴:
{
"dialogue_id": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"services": {
"feature": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"length": -1,
"id": null,
"_type": "Sequence"
},
"turns": {
"feature": {
"turn_id": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"speaker": {
"num_classes": 2,
"names": [
"USER",
"SYSTEM"
],
"id": null,
"_type": "ClassLabel"
},
"utterance": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"frames": {
"feature": {
"service": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"state": {
"active_intent": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"requested_slots": {
"feature": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"length": -1,
"id": null,
"_type": "Sequence"
},
"slots_values": {
"feature": {
"slots_values_name": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"slots_values_list": {
"feature": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"length": -1,
"id": null,
"_type": "Sequence"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
}
},
"slots": {
"feature": {
"slot": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"value": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"start": {
"dtype": "int32",
"id": null,
"_type": "Value"
},
"exclusive_end": {
"dtype": "int32",
"id": null,
"_type": "Value"
},
"copy_from": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"copy_from_value": {
"feature": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"length": -1,
"id": null,
"_type": "Sequence"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
},
"dialogue_acts": {
"dialog_act": {
"feature": {
"act_type": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"act_slots": {
"feature": {
"slot_name": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"slot_value": {
"dtype": "string",
"id": null,
"_type": "Value"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
},
"span_info": {
"feature": {
"act_type": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"act_slot_name": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"act_slot_value": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"span_start": {
"dtype": "int32",
"id": null,
"_type": "Value"
},
"span_end": {
"dtype": "int32",
"id": null,
"_type": "Value"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
}
}
},
"length": -1,
"id": null,
"_type": "Sequence"
}
}
v2.2_アクティブ_のみ
次のコマンドを使用して、このデータセットを TFDS にロードします。
ds = tfds.load('huggingface:multi_woz_v22/v2.2_active_only')
- 説明:
Multi-Domain Wizard-of-Oz dataset (MultiWOZ), a fully-labeled collection of human-human written conversations spanning over multiple domains and topics.
MultiWOZ 2.1 (Eric et al., 2019) identified and fixed many erroneous annotations and user utterances in the original version, resulting in an
improved version of the dataset. MultiWOZ 2.2 is a yet another improved version of this dataset, which identifies and fizes dialogue state annotation errors
across 17.3% of the utterances on top of MultiWOZ 2.1 and redefines the ontology by disallowing vocabularies of slots with a large number of possible values
(e.g., restaurant name, time of booking) and introducing standardized slot span annotations for these slots.
- ライセンス: Apache ライセンス 2.0
- バージョン: 2.2.0
- 分割:
スプリット | 例 |
---|---|
'test' | 1000 |
'train' | 8437 |
'validation' | 1000 |
- 特徴:
{
"dialogue_id": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"services": {
"feature": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"length": -1,
"id": null,
"_type": "Sequence"
},
"turns": {
"feature": {
"turn_id": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"speaker": {
"num_classes": 2,
"names": [
"USER",
"SYSTEM"
],
"id": null,
"_type": "ClassLabel"
},
"utterance": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"frames": {
"feature": {
"service": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"state": {
"active_intent": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"requested_slots": {
"feature": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"length": -1,
"id": null,
"_type": "Sequence"
},
"slots_values": {
"feature": {
"slots_values_name": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"slots_values_list": {
"feature": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"length": -1,
"id": null,
"_type": "Sequence"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
}
},
"slots": {
"feature": {
"slot": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"value": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"start": {
"dtype": "int32",
"id": null,
"_type": "Value"
},
"exclusive_end": {
"dtype": "int32",
"id": null,
"_type": "Value"
},
"copy_from": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"copy_from_value": {
"feature": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"length": -1,
"id": null,
"_type": "Sequence"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
},
"dialogue_acts": {
"dialog_act": {
"feature": {
"act_type": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"act_slots": {
"feature": {
"slot_name": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"slot_value": {
"dtype": "string",
"id": null,
"_type": "Value"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
},
"span_info": {
"feature": {
"act_type": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"act_slot_name": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"act_slot_value": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"span_start": {
"dtype": "int32",
"id": null,
"_type": "Value"
},
"span_end": {
"dtype": "int32",
"id": null,
"_type": "Value"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
}
}
},
"length": -1,
"id": null,
"_type": "Sequence"
}
}