Referencias:
v2.2
Utilice el siguiente comando para cargar este conjunto de datos en TFDS:
ds = tfds.load('huggingface:multi_woz_v22/v2.2')
- Descripción :
Multi-Domain Wizard-of-Oz dataset (MultiWOZ), a fully-labeled collection of human-human written conversations spanning over multiple domains and topics.
MultiWOZ 2.1 (Eric et al., 2019) identified and fixed many erroneous annotations and user utterances in the original version, resulting in an
improved version of the dataset. MultiWOZ 2.2 is a yet another improved version of this dataset, which identifies and fizes dialogue state annotation errors
across 17.3% of the utterances on top of MultiWOZ 2.1 and redefines the ontology by disallowing vocabularies of slots with a large number of possible values
(e.g., restaurant name, time of booking) and introducing standardized slot span annotations for these slots.
- Licencia : Licencia Apache 2.0
- Versión : 2.2.0
- Divisiones :
Separar | Ejemplos |
---|---|
'test' | 1000 |
'train' | 8437 |
'validation' | 1000 |
- Características :
{
"dialogue_id": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"services": {
"feature": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"length": -1,
"id": null,
"_type": "Sequence"
},
"turns": {
"feature": {
"turn_id": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"speaker": {
"num_classes": 2,
"names": [
"USER",
"SYSTEM"
],
"id": null,
"_type": "ClassLabel"
},
"utterance": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"frames": {
"feature": {
"service": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"state": {
"active_intent": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"requested_slots": {
"feature": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"length": -1,
"id": null,
"_type": "Sequence"
},
"slots_values": {
"feature": {
"slots_values_name": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"slots_values_list": {
"feature": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"length": -1,
"id": null,
"_type": "Sequence"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
}
},
"slots": {
"feature": {
"slot": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"value": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"start": {
"dtype": "int32",
"id": null,
"_type": "Value"
},
"exclusive_end": {
"dtype": "int32",
"id": null,
"_type": "Value"
},
"copy_from": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"copy_from_value": {
"feature": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"length": -1,
"id": null,
"_type": "Sequence"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
},
"dialogue_acts": {
"dialog_act": {
"feature": {
"act_type": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"act_slots": {
"feature": {
"slot_name": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"slot_value": {
"dtype": "string",
"id": null,
"_type": "Value"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
},
"span_info": {
"feature": {
"act_type": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"act_slot_name": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"act_slot_value": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"span_start": {
"dtype": "int32",
"id": null,
"_type": "Value"
},
"span_end": {
"dtype": "int32",
"id": null,
"_type": "Value"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
}
}
},
"length": -1,
"id": null,
"_type": "Sequence"
}
}
v2.2_solo_activo
Utilice el siguiente comando para cargar este conjunto de datos en TFDS:
ds = tfds.load('huggingface:multi_woz_v22/v2.2_active_only')
- Descripción :
Multi-Domain Wizard-of-Oz dataset (MultiWOZ), a fully-labeled collection of human-human written conversations spanning over multiple domains and topics.
MultiWOZ 2.1 (Eric et al., 2019) identified and fixed many erroneous annotations and user utterances in the original version, resulting in an
improved version of the dataset. MultiWOZ 2.2 is a yet another improved version of this dataset, which identifies and fizes dialogue state annotation errors
across 17.3% of the utterances on top of MultiWOZ 2.1 and redefines the ontology by disallowing vocabularies of slots with a large number of possible values
(e.g., restaurant name, time of booking) and introducing standardized slot span annotations for these slots.
- Licencia : Licencia Apache 2.0
- Versión : 2.2.0
- Divisiones :
Separar | Ejemplos |
---|---|
'test' | 1000 |
'train' | 8437 |
'validation' | 1000 |
- Características :
{
"dialogue_id": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"services": {
"feature": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"length": -1,
"id": null,
"_type": "Sequence"
},
"turns": {
"feature": {
"turn_id": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"speaker": {
"num_classes": 2,
"names": [
"USER",
"SYSTEM"
],
"id": null,
"_type": "ClassLabel"
},
"utterance": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"frames": {
"feature": {
"service": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"state": {
"active_intent": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"requested_slots": {
"feature": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"length": -1,
"id": null,
"_type": "Sequence"
},
"slots_values": {
"feature": {
"slots_values_name": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"slots_values_list": {
"feature": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"length": -1,
"id": null,
"_type": "Sequence"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
}
},
"slots": {
"feature": {
"slot": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"value": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"start": {
"dtype": "int32",
"id": null,
"_type": "Value"
},
"exclusive_end": {
"dtype": "int32",
"id": null,
"_type": "Value"
},
"copy_from": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"copy_from_value": {
"feature": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"length": -1,
"id": null,
"_type": "Sequence"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
},
"dialogue_acts": {
"dialog_act": {
"feature": {
"act_type": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"act_slots": {
"feature": {
"slot_name": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"slot_value": {
"dtype": "string",
"id": null,
"_type": "Value"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
},
"span_info": {
"feature": {
"act_type": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"act_slot_name": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"act_slot_value": {
"dtype": "string",
"id": null,
"_type": "Value"
},
"span_start": {
"dtype": "int32",
"id": null,
"_type": "Value"
},
"span_end": {
"dtype": "int32",
"id": null,
"_type": "Value"
}
},
"length": -1,
"id": null,
"_type": "Sequence"
}
}
},
"length": -1,
"id": null,
"_type": "Sequence"
}
}