Download

We provide two UnCommonSense datasets: smaller and larger.

Smaller dataset (up to top-k per concept)
Link: download
Data size:
  • #negations: 6.2 millions
  • #subjects: 8k everyday concepts
Details:
Format: JSON Lines (https://jsonlines.org/).
Samples:
{
    "subject": "gorilla",
    "predicate": "HasProperty",
    "object": "territorial",
    "tail_phrase": "be territorial",
    "score": 0.23,
    "strict_siblings":
        [
            {
                "wild animal": ["tiger", "lion", "monkey", "chimpanzee"]
            },

            {
                "species": ["wombat", "tarsier", "gibbon"]
            }
        ]
}

{
    "subject": "tabbouleh",
    "predicate": "ReceivesAction",
    "object": "baked",
    "tail_phrase": "be baked"
    "score": 0.17,
    "strict_siblings":
        [
            {
                "food": ["loaf", "samosa", "flatbread"]
            },

            {
                "side dish": ["casserole", "pasta"]
            }
        ]
}
Larger dataset (complete final set of negations)
Data size:
  • #negations: 13.6 millions
  • #subjects: 8k everyday concepts
Details:
Format: JSON Lines (https://jsonlines.org/).
Samples:
{
    "subject": "gorilla",
    "predicate": "HasA",
    "object": "tail",
    "tail_phrase": "have tail",
    "score": 0.15,
    "strict_siblings":
        [
            "monkey",
            "lemur"
        ],
    "relaxed_siblings":
        [
            {
                "subject": "tiger",
                "predicate": "HasA",
                "object": "long tail"
            },

            {
                "subject": "baboon",
                "predicate": "HasA",
                "object": "long tail"
            },

            {
                "subject": "cheetah",
                "predicate": "HasA",
                "object": "long muscular tail"
            }
        ],
    "siblings":
        [
            {
                "wild animal": ["baboon", "monkey", "lemur", "cheetah", "tiger"]
            },

        ]
}

{
    "subject": "tabbouleh",
    "predicate": "ReceivesAction",
    "object": "baked",
    "tail_phrase": "be baked",
    "score": 0.7,
    "strict_siblings":
        [
            "flatbread",
            "samosa",
            "pasta",
            "casserole",
            "loaf",
            "enchilada"
        ],
    "relaxed_siblings":
        [
            {
                "subject": "eggplant",
                "predicate": "ReceivesAction",
                "object": "baked in the oven"
            },

            {
                "subject": "chutney",
                "predicate": "ReceivesAction",
                "object": "cooked"
            },

            {
                "subject": "polenta",
                "predicate": "ReceivesAction",
                "object": "cooked"
            },

            {
                "subject": "kohlrabi",
                "predicate": "ReceivesAction",
                "object": "cooked"
            },

            {
                "subject": "couscous",
                "predicate": "ReceivesAction",
                "object": "cooked"
            }
        ],
    "siblings":
        [
            {
                "side dish": ["loaf","pasta", "casserole", "couscous", "enchilada", "eggplant", "polenta", "kohlrabi", "chutney", "flatbread", "samosa"]
            }
        ]
}