Evaluation
Evall log
{"files":{"EXIST2025_T1_2_en_es_hard_test_gold.json":{"description":"The file is correctly parser without errors or warnings.\\nFile name: EXIST2025_T1_2_en_es_hard_test_gold.json.","errors":{},"gold":true,"name":"EXIST2025_T1_2_en_es_hard_test_gold.json","status":"OK"},"task1_2_hard_CodeHerGuard_2.json":{"description":"The file is correctly parser without errors or warnings.\\nFile name: task1_2_hard_CodeHerGuard_2.json.","errors":{},"gold":false,"name":"task1_2_hard_CodeHerGuard_2.json","status":"OK"}},"metrics":{"Accuracy":{"acronym":"Acc","description":"Coming soon!\\nThe evaluation WARNING.","name":"Accuracy","preconditions":{"METRIC_PRECONDITION_HIERARCHY_NOT_VALID_FOR_METRIC":{"description":"The hierarchy is provided for the evaluation but this metric does not allow to use it. Hierarchy is ignored.\\nThe metric name is: Accuracy.\\nTest case(s) name: EXIST2025.","name":"METRIC_PRECONDITION_HIERARCHY_NOT_VALID_FOR_METRIC","status":"WARNING","test_cases":["EXIST2025"]}},"results":{"average_per_test_case":0.7318401937046005,"test_cases":[{"average":0.7318401937046005,"name":"EXIST2025"}]},"status":"WARNING"},"FMeasure":{"acronym":"F1","description":"Coming soon!\\nThe evaluation WARNING.","name":"F-Measure","preconditions":{"METRIC_PRECONDITION_HIERARCHY_NOT_VALID_FOR_METRIC":{"description":"The hierarchy is provided for the evaluation but this metric does not allow to use it. Hierarchy is ignored.\\nThe metric name is: F-Measure.\\nTest case(s) name: EXIST2025.","name":"METRIC_PRECONDITION_HIERARCHY_NOT_VALID_FOR_METRIC","status":"WARNING","test_cases":["EXIST2025"]}},"results":{"average_per_test_case":0.4055723149619994,"test_cases":[{"average":0.4055723149619994,"classes":{"DIRECT":0.6197757390417941,"JUDGEMENTAL":0.1346801346801347,"NO":0.7639372822299652,"REPORTED":0.1038961038961039},"name":"EXIST2025"}]},"status":"WARNING"},"ICM":{"acronym":"ICM","description":"Coming soon!","name":"Information Contrast model","results":{"average_per_test_case":-0.002686113454755264,"test_cases":[{"average":-0.002686113454755264,"name":"EXIST2025"}]},"status":"OK"},"ICMNorm":{"acronym":"ICM-Norm","description":"Coming soon!","name":"Normalized Information Contrast Model","results":{"average_per_test_case":0.4991266344951854,"test_cases":[{"average":0.4991266344951854,"name":"EXIST2025"}]},"status":"OK"},"Kappa":{"acronym":"Kappa","description":"Coming soon!\\nThe evaluation WARNING.","name":"Cohen's Kappa","preconditions":{"METRIC_PRECONDITION_HIERARCHY_NOT_VALID_FOR_METRIC":{"description":"The hierarchy is provided for the evaluation but this metric does not allow to use it. Hierarchy is ignored.\\nThe metric name is: Cohen's Kappa.\\nTest case(s) name: EXIST2025.","name":"METRIC_PRECONDITION_HIERARCHY_NOT_VALID_FOR_METRIC","status":"WARNING","test_cases":["EXIST2025"]}},"results":{"average_per_test_case":0.5046822081504805,"test_cases":[{"average":0.5046822081504805,"name":"EXIST2025"}]},"status":"WARNING"},"Precision":{"acronym":"Pr","description":"Coming soon!\\nThe evaluation WARNING.","name":"Precision","preconditions":{"METRIC_PRECONDITION_HIERARCHY_NOT_VALID_FOR_METRIC":{"description":"The hierarchy is provided for the evaluation but this metric does not allow to use it. Hierarchy is ignored.\\nThe metric name is: Precision.\\nTest case(s) name: EXIST2025.","name":"METRIC_PRECONDITION_HIERARCHY_NOT_VALID_FOR_METRIC","status":"WARNING","test_cases":["EXIST2025"]}},"results":{"average_per_test_case":0.3957562246340558,"test_cases":[{"average":0.3957562246340558,"classes":{"DIRECT":0.5196581196581197,"JUDGEMENTAL":0.1388888888888889,"NO":0.6664133738601824,"REPORTED":0.25806451612903225},"name":"EXIST2025"}]},"status":"WARNING"},"Recall":{"acronym":"Re","description":"Coming soon!\\nThe evaluation WARNING.","name":"Recall","preconditions":{"METRIC_PRECONDITION_HIERARCHY_NOT_VALID_FOR_METRIC":{"description":"The hierarchy is provided for the evaluation but this metric does not allow to use it. Hierarchy is ignored.\\nThe metric name is: Recall.\\nTest case(s) name: EXIST2025.","name":"METRIC_PRECONDITION_HIERARCHY_NOT_VALID_FOR_METRIC","status":"WARNING","test_cases":["EXIST2025"]}},"results":{"average_per_test_case":0.4645835828788277,"test_cases":[{"average":0.4645835828788277,"classes":{"DIRECT":0.7676767676767676,"JUDGEMENTAL":0.13071895424836602,"NO":0.8948979591836734,"REPORTED":0.06504065040650407},"name":"EXIST2025"}]},"status":"WARNING"},"SystemPrecision":{"acronym":"SP","description":"Coming soon!\\nThe evaluation WARNING.","name":"System Precision","preconditions":{"METRIC_PRECONDITION_HIERARCHY_NOT_VALID_FOR_METRIC":{"description":"The hierarchy is provided for the evaluation but this metric does not allow to use it. Hierarchy is ignored.\\nThe metric name is: System Precision.\\nTest case(s) name: EXIST2025.","name":"METRIC_PRECONDITION_HIERARCHY_NOT_VALID_FOR_METRIC","status":"WARNING","test_cases":["EXIST2025"]}},"results":{"average_per_test_case":0.5823699421965318,"test_cases":[{"average":0.5823699421965318,"name":"EXIST2025"}]},"status":"WARNING"}}}
ICM Norm
0.50
metric_params
{"FMeasure":{"alfa_param":0.5,"custom":false},"ICM":{"alpha_1":2,"alpha_2":2,"beta":3,"custom":false}}
Sistema
EXist2025_hybridSexism
Partición resultados
All
Precisión
0.40
Recall
0.46
F1
0.41
Accuracy
0.73
ICM
0.00
SystemPrecision
0.58
Kappa
0.50

