Evaluation
Evall log
{"files":{"EXIST2022_test_GOLD_EvALL_Task1_en.tsv":{"description":"The file is correctly parser without errors or warnings.\\nFile name: EXIST2022_test_GOLD_EvALL_Task1_en.tsv.","errors":{},"gold":true,"name":"EXIST2022_test_GOLD_EvALL_Task1_en.tsv","status":"OK"},"task1_SINAI-TL_1.tsv_en.tsv":{"description":"The file is correctly parser without errors or warnings.\\nFile name: task1_SINAI-TL_1.tsv_en.tsv.","errors":{},"gold":false,"name":"task1_SINAI-TL_1.tsv_en.tsv","status":"OK"}},"metrics":{"Accuracy":{"acronym":"Acc","description":"Coming soon!","name":"Accuracy","results":{"average_per_test_case":0.823076923076923,"test_cases":[{"average":0.823076923076923,"name":"EXIST2022"}]},"status":"OK"},"CrossEntropy":{"acronym":"CE","description":"Coming soon!\\nThe evaluation FAIL.","name":"Cross Entropy","preconditions":{"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION":{"description":" The selected metric cannot be evaluated as the formats of the gold and predictions are not valid for this evaluation context.\\nThe metric name is: Cross Entropy.\\nTest case(s) name: EXIST2022.","name":"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION","status":"FAIL","test_cases":["EXIST2022"]}},"results":{"average_per_test_case":null,"test_cases":[]},"status":"FAIL"},"DCG":{"acronym":"DCG","description":"Coming soon!\\nThe evaluation FAIL.","name":"Discounted Cumulative Gain","preconditions":{"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION":{"description":" The selected metric cannot be evaluated as the formats of the gold and predictions are not valid for this evaluation context.\\nThe metric name is: Discounted Cumulative Gain.\\nTest case(s) name: EXIST2022.","name":"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION","status":"FAIL","test_cases":["EXIST2022"]}},"results":{"average_per_test_case":null,"test_cases":[]},"status":"FAIL"},"FMeasure":{"acronym":"F1","description":"Coming soon!","name":"F-Measure","results":{"average_per_test_case":0.8199987959785684,"test_cases":[{"average":0.8199987959785684,"classes":{"non-sexist":0.8435374149659863,"sexist":0.7964601769911505},"name":"EXIST2022"}]},"status":"OK"},"ICM":{"acronym":"ICM","description":"Coming soon!","name":"Information Contrast model","results":{"average_per_test_case":0.44654032337082566,"test_cases":[{"average":0.44654032337082566,"name":"EXIST2022"}]},"status":"OK"},"ICMNorm":{"acronym":"ICM-Norm","description":"Coming soon!","name":"Normalized Information Contrast Model","results":{"average_per_test_case":0.7282266999643587,"test_cases":[{"average":0.7282266999643587,"name":"EXIST2022"}]},"status":"OK"},"ICMSoft":{"acronym":"ICM-Soft","description":"Coming soon!\\nThe evaluation FAIL.","name":"Information Contrast Model Soft","preconditions":{"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION":{"description":" The selected metric cannot be evaluated as the formats of the gold and predictions are not valid for this evaluation context.\\nThe metric name is: Information Contrast Model Soft.\\nTest case(s) name: EXIST2022.","name":"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION","status":"FAIL","test_cases":["EXIST2022"]}},"results":{"average_per_test_case":null,"test_cases":[]},"status":"FAIL"},"ICMSoftNorm":{"acronym":"ICM-Soft-Norm","description":"Coming soon!\\nThe evaluation FAIL.","name":"Normalized Information Contrast Model Soft","preconditions":{"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION":{"description":" The selected metric cannot be evaluated as the formats of the gold and predictions are not valid for this evaluation context.\\nThe metric name is: Normalized Information Contrast Model Soft.\\nTest case(s) name: EXIST2022.","name":"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION","status":"FAIL","test_cases":["EXIST2022"]}},"results":{"average_per_test_case":null,"test_cases":[]},"status":"FAIL"},"Kappa":{"acronym":"Kappa","description":"Coming soon!","name":"Cohen's Kappa","results":{"average_per_test_case":0.6406519942912942,"test_cases":[{"average":0.6406519942912942,"name":"EXIST2022"}]},"status":"OK"},"MainReciprocalRank":{"acronym":"UNKNOWN","description":"UNKNOWN\\nThe evaluation FAIL.","errors":[{"description":"The selected metric does not exist.\\nThe metric name is: MainReciprocalRank.","name":"METRIC_UNKONW_METRIC_ERROR"}],"name":"MainReciprocalRank","status":"FAIL"},"MeanAveragePrecision":{"acronym":"UNKNOWN","description":"UNKNOWN\\nThe evaluation FAIL.","errors":[{"description":"The selected metric does not exist.\\nThe metric name is: MeanAveragePrecision.","name":"METRIC_UNKONW_METRIC_ERROR"}],"name":"MeanAveragePrecision","status":"FAIL"},"Precision":{"acronym":"Pr","description":"Coming soon!","name":"Precision","results":{"average_per_test_case":0.8179093796126493,"test_cases":[{"average":0.8179093796126493,"classes":{"non-sexist":0.8763250883392226,"sexist":0.759493670886076},"name":"EXIST2022"}]},"status":"OK"},"PrecisionAtK":{"acronym":"P@k","description":"Coming soon!\\nThe evaluation FAIL.","name":"Precision at k","preconditions":{"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION":{"description":" The selected metric cannot be evaluated as the formats of the gold and predictions are not valid for this evaluation context.\\nThe metric name is: Precision at k.\\nTest case(s) name: EXIST2022.","name":"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION","status":"FAIL","test_cases":["EXIST2022"]}},"results":{"average_per_test_case":null,"test_cases":[]},"status":"FAIL"},"RPrecision":{"acronym":"RPre.","description":"Coming soon!\\nThe evaluation FAIL.","name":"R Precision","preconditions":{"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION":{"description":" The selected metric cannot be evaluated as the formats of the gold and predictions are not valid for this evaluation context.\\nThe metric name is: R Precision.\\nTest case(s) name: EXIST2022.","name":"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION","status":"FAIL","test_cases":["EXIST2022"]}},"results":{"average_per_test_case":null,"test_cases":[]},"status":"FAIL"},"Recall":{"acronym":"Re","description":"Coming soon!","name":"Recall","results":{"average_per_test_case":0.825162028211971,"test_cases":[{"average":0.825162028211971,"classes":{"non-sexist":0.8131147540983606,"sexist":0.8372093023255814},"name":"EXIST2022"}]},"status":"OK"},"SystemPrecision":{"acronym":"SP","description":"Coming soon!\\nThe evaluation FAIL.","name":"System Precision","preconditions":{"METRIC_PRECONDITION_DIFFERENT_ITEMS_IN_GOLD_AND_PRED_ERROR":{"description":"The selected metric cannot be evaluated because the gold and predictions have the same number of items.\\nThe metric name is: System Precision.\\nTest case(s) name: EXIST2022.","name":"METRIC_PRECONDITION_DIFFERENT_ITEMS_IN_GOLD_AND_PRED_ERROR","status":"FAIL","test_cases":["EXIST2022"]}},"results":{"average_per_test_case":null,"test_cases":[]},"status":"FAIL"},"nDCG":{"acronym":"nDCG","description":"Coming soon!\\nThe evaluation FAIL.","name":"Normalized Discounted Cumulative Gain","preconditions":{"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION":{"description":" The selected metric cannot be evaluated as the formats of the gold and predictions are not valid for this evaluation context.\\nThe metric name is: Normalized Discounted Cumulative Gain.\\nTest case(s) name: EXIST2022.","name":"METRIC_PRECONDITION_NOT_VALID_FORMAT_FOR_CONTEXT_EVALUATION","status":"FAIL","test_cases":["EXIST2022"]}},"results":{"average_per_test_case":null,"test_cases":[]},"status":"FAIL"}}}
ICM Norm
0.73
System
SINAI-TL_1
Results split
All
PrecisiĆ³n
0.82
Recall
0.83
F1
0.82
Accuracy
0.82
ICM
0.45
Kappa
0.64