Results - Health Misinformation 2022¶
citius.base¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
citius.base graded.usefulness ndcg all 0.6855
citius.base binary.useful-correct P_10 all 0.5600
citius.base binary.incorrect P_10 all 0.2243
citius.base binary.useful-correct ndcg all 0.5948
citius.base graded.harmful-only compatibility all 0.2148
citius.base graded.helpful-only compatibility all 0.2559
citius.r1¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
citius.r1 graded.usefulness ndcg all 0.5955
citius.r1 binary.useful-correct P_10 all 0.5000
citius.r1 binary.incorrect P_10 all 0.1838
citius.r1 binary.useful-correct ndcg all 0.5341
citius.r1 graded.harmful-only compatibility all 0.1533
citius.r1 graded.helpful-only compatibility all 0.1836
citius.r2¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
citius.r2 graded.usefulness ndcg all 0.5965
citius.r2 binary.useful-correct P_10 all 0.4978
citius.r2 binary.incorrect P_10 all 0.1676
citius.r2 binary.useful-correct ndcg all 0.5335
citius.r2 graded.harmful-only compatibility all 0.1457
citius.r2 graded.helpful-only compatibility all 0.1841
citius.r3¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
citius.r3 graded.usefulness ndcg all 0.6652
citius.r3 binary.useful-correct P_10 all 0.4956
citius.r3 binary.incorrect P_10 all 0.1757
citius.r3 binary.useful-correct ndcg all 0.5912
citius.r3 graded.harmful-only compatibility all 0.1463
citius.r3 graded.helpful-only compatibility all 0.2427
citius.r4¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
citius.r4 graded.usefulness ndcg all 0.6802
citius.r4 binary.useful-correct P_10 all 0.5622
citius.r4 binary.incorrect P_10 all 0.1973
citius.r4 binary.useful-correct ndcg all 0.6009
citius.r4 graded.harmful-only compatibility all 0.1775
citius.r4 graded.helpful-only compatibility all 0.2607
citius.r5¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
citius.r5 graded.usefulness ndcg all 0.6861
citius.r5 binary.useful-correct P_10 all 0.5467
citius.r5 binary.incorrect P_10 all 0.2297
citius.r5 binary.useful-correct ndcg all 0.5897
citius.r5 graded.harmful-only compatibility all 0.2016
citius.r5 graded.helpful-only compatibility all 0.2579
citius.r6¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
citius.r6 graded.usefulness ndcg all 0.6806
citius.r6 binary.useful-correct P_10 all 0.5711
citius.r6 binary.incorrect P_10 all 0.1973
citius.r6 binary.useful-correct ndcg all 0.6013
citius.r6 graded.harmful-only compatibility all 0.1801
citius.r6 graded.helpful-only compatibility all 0.2610
webis-uniqa-ax-lin¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
webis-uniqa-ax-lin graded.usefulness ndcg all 0.5575
webis-uniqa-ax-lin binary.useful-correct P_10 all 0.3089
webis-uniqa-ax-lin binary.incorrect P_10 all 0.1297
webis-uniqa-ax-lin binary.useful-correct ndcg all 0.4955
webis-uniqa-ax-lin graded.harmful-only compatibility all 0.1168
webis-uniqa-ax-lin graded.helpful-only compatibility all 0.1458
webis-uniqa-ax-pol¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
webis-uniqa-ax-pol graded.usefulness ndcg all 0.5842
webis-uniqa-ax-pol binary.useful-correct P_10 all 0.3556
webis-uniqa-ax-pol binary.incorrect P_10 all 0.1514
webis-uniqa-ax-pol binary.useful-correct ndcg all 0.5215
webis-uniqa-ax-pol graded.harmful-only compatibility all 0.1372
webis-uniqa-ax-pol graded.helpful-only compatibility all 0.1848
webis-uniqa-ax-com¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
webis-uniqa-ax-com graded.usefulness ndcg all 0.6555
webis-uniqa-ax-com binary.useful-correct P_10 all 0.5222
webis-uniqa-ax-com binary.incorrect P_10 all 0.2297
webis-uniqa-ax-com binary.useful-correct ndcg all 0.5779
webis-uniqa-ax-com graded.harmful-only compatibility all 0.1739
webis-uniqa-ax-com graded.helpful-only compatibility all 0.2589
webis-longck-ax-lin¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
webis-longck-ax-lin graded.usefulness ndcg all 0.4910
webis-longck-ax-lin binary.useful-correct P_10 all 0.2689
webis-longck-ax-lin binary.incorrect P_10 all 0.0919
webis-longck-ax-lin binary.useful-correct ndcg all 0.4330
webis-longck-ax-lin graded.harmful-only compatibility all 0.0668
webis-longck-ax-lin graded.helpful-only compatibility all 0.1122
webis-longck-ax-pol¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
webis-longck-ax-pol graded.usefulness ndcg all 0.5373
webis-longck-ax-pol binary.useful-correct P_10 all 0.3200
webis-longck-ax-pol binary.incorrect P_10 all 0.1108
webis-longck-ax-pol binary.useful-correct ndcg all 0.4750
webis-longck-ax-pol graded.harmful-only compatibility all 0.0851
webis-longck-ax-pol graded.helpful-only compatibility all 0.1473
webis-longck-ax-com¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
webis-longck-ax-com graded.usefulness ndcg all 0.6610
webis-longck-ax-com binary.useful-correct P_10 all 0.5489
webis-longck-ax-com binary.incorrect P_10 all 0.1811
webis-longck-ax-com binary.useful-correct ndcg all 0.5808
webis-longck-ax-com graded.harmful-only compatibility all 0.1450
webis-longck-ax-com graded.helpful-only compatibility all 0.2733
webis-longck-uniqa-ax-pol¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
webis-longck-uniqa-ax-pol graded.usefulness ndcg all 0.5683
webis-longck-uniqa-ax-pol binary.useful-correct P_10 all 0.3756
webis-longck-uniqa-ax-pol binary.incorrect P_10 all 0.0973
webis-longck-uniqa-ax-pol binary.useful-correct ndcg all 0.5148
webis-longck-uniqa-ax-pol graded.harmful-only compatibility all 0.0830
webis-longck-uniqa-ax-pol graded.helpful-only compatibility all 0.1734
webis-longck-uniqa-ax-com¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
webis-longck-uniqa-ax-com graded.usefulness ndcg all 0.6530
webis-longck-uniqa-ax-com binary.useful-correct P_10 all 0.4822
webis-longck-uniqa-ax-com binary.incorrect P_10 all 0.2297
webis-longck-uniqa-ax-com binary.useful-correct ndcg all 0.5715
webis-longck-uniqa-ax-com graded.harmful-only compatibility all 0.1721
webis-longck-uniqa-ax-com graded.helpful-only compatibility all 0.2501
webis-longck-uniqa-pol¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
webis-longck-uniqa-pol graded.usefulness ndcg all 0.5692
webis-longck-uniqa-pol binary.useful-correct P_10 all 0.3844
webis-longck-uniqa-pol binary.incorrect P_10 all 0.1000
webis-longck-uniqa-pol binary.useful-correct ndcg all 0.5173
webis-longck-uniqa-pol graded.harmful-only compatibility all 0.0798
webis-longck-uniqa-pol graded.helpful-only compatibility all 0.1748
webis-longck-uniqa-ax-lin¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
webis-longck-uniqa-ax-lin graded.usefulness ndcg all 0.5244
webis-longck-uniqa-ax-lin binary.useful-correct P_10 all 0.3400
webis-longck-uniqa-ax-lin binary.incorrect P_10 all 0.0784
webis-longck-uniqa-ax-lin binary.useful-correct ndcg all 0.4781
webis-longck-uniqa-ax-lin graded.harmful-only compatibility all 0.0685
webis-longck-uniqa-ax-lin graded.helpful-only compatibility all 0.1421
WatS-MT5-MT5¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
WatS-MT5-MT5 graded.usefulness ndcg all 0.7277
WatS-MT5-MT5 binary.useful-correct P_10 all 0.5378
WatS-MT5-MT5 binary.incorrect P_10 all 0.2081
WatS-MT5-MT5 binary.useful-correct ndcg all 0.6439
WatS-MT5-MT5 graded.harmful-only compatibility all 0.1943
WatS-MT5-MT5 graded.helpful-only compatibility all 0.2464
WatS-Bigbird2_75-MT5¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
WatS-Bigbird2_75-MT5 graded.usefulness ndcg all 0.6989
WatS-Bigbird2_75-MT5 binary.useful-correct P_10 all 0.4956
WatS-Bigbird2_75-MT5 binary.incorrect P_10 all 0.2270
WatS-Bigbird2_75-MT5 binary.useful-correct ndcg all 0.6161
WatS-Bigbird2_75-MT5 graded.harmful-only compatibility all 0.2092
WatS-Bigbird2_75-MT5 graded.helpful-only compatibility all 0.2166
WatS-Bigbird2_75-MT5-TA2¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
WatS-Bigbird2_75-MT5-TA2 graded.usefulness ndcg all 0.7018
WatS-Bigbird2_75-MT5-TA2 binary.useful-correct P_10 all 0.5556
WatS-Bigbird2_75-MT5-TA2 binary.incorrect P_10 all 0.1649
WatS-Bigbird2_75-MT5-TA2 binary.useful-correct ndcg all 0.6374
WatS-Bigbird2_75-MT5-TA2 graded.harmful-only compatibility all 0.1530
WatS-Bigbird2_75-MT5-TA2 graded.helpful-only compatibility all 0.2415
WatS-BM25-Query¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
WatS-BM25-Query graded.usefulness ndcg all 0.5398
WatS-BM25-Query binary.useful-correct P_10 all 0.3756
WatS-BM25-Query binary.incorrect P_10 all 0.1622
WatS-BM25-Query binary.useful-correct ndcg all 0.4686
WatS-BM25-Query graded.harmful-only compatibility all 0.1403
WatS-BM25-Query graded.helpful-only compatibility all 0.1711
WatS-BM25-Question¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
WatS-BM25-Question graded.usefulness ndcg all 0.6385
WatS-BM25-Question binary.useful-correct P_10 all 0.3756
WatS-BM25-Question binary.incorrect P_10 all 0.1973
WatS-BM25-Question binary.useful-correct ndcg all 0.5527
WatS-BM25-Question graded.harmful-only compatibility all 0.1487
WatS-BM25-Question graded.helpful-only compatibility all 0.1928
WatS-Trust¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
WatS-Trust graded.usefulness ndcg all 0.5155
WatS-Trust binary.useful-correct P_10 all 0.4089
WatS-Trust binary.incorrect P_10 all 0.1811
WatS-Trust binary.useful-correct ndcg all 0.4419
WatS-Trust graded.harmful-only compatibility all 0.1422
WatS-Trust graded.helpful-only compatibility all 0.2050
WatS-Trust-L1¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
WatS-Trust-L1 graded.usefulness ndcg all 0.4322
WatS-Trust-L1 binary.useful-correct P_10 all 0.4022
WatS-Trust-L1 binary.incorrect P_10 all 0.1865
WatS-Trust-L1 binary.useful-correct ndcg all 0.3668
WatS-Trust-L1 graded.harmful-only compatibility all 0.1528
WatS-Trust-L1 graded.helpful-only compatibility all 0.1867
WatS-Trust-MT5¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
WatS-Trust-MT5 graded.usefulness ndcg all 0.7268
WatS-Trust-MT5 binary.useful-correct P_10 all 0.5444
WatS-Trust-MT5 binary.incorrect P_10 all 0.2405
WatS-Trust-MT5 binary.useful-correct ndcg all 0.6337
WatS-Trust-MT5 graded.harmful-only compatibility all 0.1883
WatS-Trust-MT5 graded.helpful-only compatibility all 0.2446
WatS-Trust-MT5-L1¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
WatS-Trust-MT5-L1 graded.usefulness ndcg all 0.7293
WatS-Trust-MT5-L1 binary.useful-correct P_10 all 0.5600
WatS-Trust-MT5-L1 binary.incorrect P_10 all 0.2027
WatS-Trust-MT5-L1 binary.useful-correct ndcg all 0.6470
WatS-Trust-MT5-L1 graded.harmful-only compatibility all 0.1773
WatS-Trust-MT5-L1 graded.helpful-only compatibility all 0.2534
WatS-Bigbird2_75-MT5-TA1¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
WatS-Bigbird2_75-MT5-TA1 graded.usefulness ndcg all 0.7068
WatS-Bigbird2_75-MT5-TA1 binary.useful-correct P_10 all 0.5378
WatS-Bigbird2_75-MT5-TA1 binary.incorrect P_10 all 0.1946
WatS-Bigbird2_75-MT5-TA1 binary.useful-correct ndcg all 0.6318
WatS-Bigbird2_75-MT5-TA1 graded.harmful-only compatibility all 0.1711
WatS-Bigbird2_75-MT5-TA1 graded.helpful-only compatibility all 0.2440
hm22_ref.vera_mdt5¶
Metadata
| Participants
| Input
| Summary
| Appendix
summary (trec_eval)
hm22_ref.vera_mdt5 graded.usefulness ndcg all 0.7428
hm22_ref.vera_mdt5 binary.useful-correct P_10 all 0.6622
hm22_ref.vera_mdt5 binary.incorrect P_10 all 0.1351
hm22_ref.vera_mdt5 binary.useful-correct ndcg all 0.7059
hm22_ref.vera_mdt5 graded.harmful-only compatibility all 0.1308
hm22_ref.vera_mdt5 graded.helpful-only compatibility all 0.3391
hm22_ref.vera_mt5¶
Metadata
| Participants
| Input
| Summary
| Appendix
summary (trec_eval)
hm22_ref.vera_mt5 graded.usefulness ndcg all 0.7403
hm22_ref.vera_mt5 binary.useful-correct P_10 all 0.6711
hm22_ref.vera_mt5 binary.incorrect P_10 all 0.1270
hm22_ref.vera_mt5 binary.useful-correct ndcg all 0.7061
hm22_ref.vera_mt5 graded.harmful-only compatibility all 0.1168
hm22_ref.vera_mt5 graded.helpful-only compatibility all 0.3386
hm22_ref_comb.vera_mdt5¶
Metadata
| Participants
| Input
| Summary
| Appendix
summary (trec_eval)
hm22_ref_comb.vera_mdt5 graded.usefulness ndcg all 0.7454
hm22_ref_comb.vera_mdt5 binary.useful-correct P_10 all 0.6889
hm22_ref_comb.vera_mdt5 binary.incorrect P_10 all 0.1135
hm22_ref_comb.vera_mdt5 binary.useful-correct ndcg all 0.7108
hm22_ref_comb.vera_mdt5 graded.harmful-only compatibility all 0.1064
hm22_ref_comb.vera_mdt5 graded.helpful-only compatibility all 0.3447
hm22_ref_comb.vera_mt5¶
Metadata
| Participants
| Input
| Summary
| Appendix
summary (trec_eval)
hm22_ref_comb.vera_mt5 graded.usefulness ndcg all 0.7431
hm22_ref_comb.vera_mt5 binary.useful-correct P_10 all 0.6956
hm22_ref_comb.vera_mt5 binary.incorrect P_10 all 0.1027
hm22_ref_comb.vera_mt5 binary.useful-correct ndcg all 0.7117
hm22_ref_comb.vera_mt5 graded.harmful-only compatibility all 0.0894
hm22_ref_comb.vera_mt5 graded.helpful-only compatibility all 0.3460
hm22_ref.mdt5¶
Metadata
| Participants
| Input
| Summary
| Appendix
summary (trec_eval)
hm22_ref.mdt5 graded.usefulness ndcg all 0.7505
hm22_ref.mdt5 binary.useful-correct P_10 all 0.6600
hm22_ref.mdt5 binary.incorrect P_10 all 0.1432
hm22_ref.mdt5 binary.useful-correct ndcg all 0.6952
hm22_ref.mdt5 graded.harmful-only compatibility all 0.1467
hm22_ref.mdt5 graded.helpful-only compatibility all 0.3216
hm22_ref.mt5¶
Metadata
| Participants
| Input
| Summary
| Appendix
summary (trec_eval)
hm22_ref.mt5 graded.usefulness ndcg all 0.7490
hm22_ref.mt5 binary.useful-correct P_10 all 0.6511
hm22_ref.mt5 binary.incorrect P_10 all 0.1216
hm22_ref.mt5 binary.useful-correct ndcg all 0.6997
hm22_ref.mt5 graded.harmful-only compatibility all 0.1264
hm22_ref.mt5 graded.helpful-only compatibility all 0.3276
hm22_ref_comb.mdt5¶
Metadata
| Participants
| Input
| Summary
| Appendix
summary (trec_eval)
hm22_ref_comb.mdt5 graded.usefulness ndcg all 0.7505
hm22_ref_comb.mdt5 binary.useful-correct P_10 all 0.6622
hm22_ref_comb.mdt5 binary.incorrect P_10 all 0.1405
hm22_ref_comb.mdt5 binary.useful-correct ndcg all 0.6946
hm22_ref_comb.mdt5 graded.harmful-only compatibility all 0.1398
hm22_ref_comb.mdt5 graded.helpful-only compatibility all 0.3209
hm22_ref_comb.mt5¶
Metadata
| Participants
| Input
| Summary
| Appendix
summary (trec_eval)
hm22_ref_comb.mt5 graded.usefulness ndcg all 0.7490
hm22_ref_comb.mt5 binary.useful-correct P_10 all 0.6578
hm22_ref_comb.mt5 binary.incorrect P_10 all 0.1108
hm22_ref_comb.mt5 binary.useful-correct ndcg all 0.7006
hm22_ref_comb.mt5 graded.harmful-only compatibility all 0.1166
hm22_ref_comb.mt5 graded.helpful-only compatibility all 0.3282
hm22.vera_mdt5¶
Metadata
| Participants
| Input
| Summary
| Appendix
summary (trec_eval)
hm22.vera_mdt5 graded.usefulness ndcg all 0.7353
hm22.vera_mdt5 binary.useful-correct P_10 all 0.6311
hm22.vera_mdt5 binary.incorrect P_10 all 0.1378
hm22.vera_mdt5 binary.useful-correct ndcg all 0.6982
hm22.vera_mdt5 graded.harmful-only compatibility all 0.1238
hm22.vera_mdt5 graded.helpful-only compatibility all 0.3272
hm22.vera_mt5¶
Metadata
| Participants
| Input
| Summary
| Appendix
summary (trec_eval)
hm22.vera_mt5 graded.usefulness ndcg all 0.7338
hm22.vera_mt5 binary.useful-correct P_10 all 0.6400
hm22.vera_mt5 binary.incorrect P_10 all 0.1459
hm22.vera_mt5 binary.useful-correct ndcg all 0.6977
hm22.vera_mt5 graded.harmful-only compatibility all 0.1159
hm22.vera_mt5 graded.helpful-only compatibility all 0.3214
hm22.mdt5¶
Metadata
| Participants
| Input
| Summary
| Appendix
summary (trec_eval)
hm22.mdt5 graded.usefulness ndcg all 0.7455
hm22.mdt5 binary.useful-correct P_10 all 0.5822
hm22.mdt5 binary.incorrect P_10 all 0.2135
hm22.mdt5 binary.useful-correct ndcg all 0.6576
hm22.mdt5 graded.harmful-only compatibility all 0.1894
hm22.mdt5 graded.helpful-only compatibility all 0.2780
hm22.mt5¶
Metadata
| Participants
| Input
| Summary
| Appendix
summary (trec_eval)
hm22.mt5 graded.usefulness ndcg all 0.7464
hm22.mt5 binary.useful-correct P_10 all 0.5800
hm22.mt5 binary.incorrect P_10 all 0.2162
hm22.mt5 binary.useful-correct ndcg all 0.6601
hm22.mt5 graded.harmful-only compatibility all 0.1942
hm22.mt5 graded.helpful-only compatibility all 0.2838
hm22_ref.vera¶
Metadata
| Participants
| Input
| Summary
| Appendix
summary (trec_eval)
hm22_ref.vera graded.usefulness ndcg all 0.6477
hm22_ref.vera binary.useful-correct P_10 all 0.5956
hm22_ref.vera binary.incorrect P_10 all 0.1108
hm22_ref.vera binary.useful-correct ndcg all 0.6466
hm22_ref.vera graded.harmful-only compatibility all 0.0971
hm22_ref.vera graded.helpful-only compatibility all 0.2836
hm22.vera¶
Metadata
| Participants
| Input
| Summary
| Appendix
summary (trec_eval)
hm22.vera graded.usefulness ndcg all 0.6358
hm22.vera binary.useful-correct P_10 all 0.5956
hm22.vera binary.incorrect P_10 all 0.1108
hm22.vera binary.useful-correct ndcg all 0.6346
hm22.vera graded.harmful-only compatibility all 0.0868
hm22.vera graded.helpful-only compatibility all 0.2697
hm22_ref_neg.vera¶
Metadata
| Participants
| Input
| Summary
| Appendix
summary (trec_eval)
hm22_ref_neg.vera graded.usefulness ndcg all 0.5807
hm22_ref_neg.vera binary.useful-correct P_10 all 0.2489
hm22_ref_neg.vera binary.incorrect P_10 all 0.3216
hm22_ref_neg.vera binary.useful-correct ndcg all 0.4580
hm22_ref_neg.vera graded.harmful-only compatibility all 0.2793
hm22_ref_neg.vera graded.helpful-only compatibility all 0.1121
hm22_ref_neg.mdt5¶
Metadata
| Participants
| Input
| Summary
| Appendix
summary (trec_eval)
hm22_ref_neg.mdt5 graded.usefulness ndcg all 0.7312
hm22_ref_neg.mdt5 binary.useful-correct P_10 all 0.5089
hm22_ref_neg.mdt5 binary.incorrect P_10 all 0.2838
hm22_ref_neg.mdt5 binary.useful-correct ndcg all 0.6301
hm22_ref_neg.mdt5 graded.harmful-only compatibility all 0.2447
hm22_ref_neg.mdt5 graded.helpful-only compatibility all 0.2388
hm22_ref_neg.mt5¶
Metadata
| Participants
| Input
| Summary
| Appendix
summary (trec_eval)
hm22_ref_neg.mt5 graded.usefulness ndcg all 0.7281
hm22_ref_neg.mt5 binary.useful-correct P_10 all 0.4800
hm22_ref_neg.mt5 binary.incorrect P_10 all 0.2703
hm22_ref_neg.mt5 binary.useful-correct ndcg all 0.6218
hm22_ref_neg.mt5 graded.harmful-only compatibility all 0.2704
hm22_ref_neg.mt5 graded.helpful-only compatibility all 0.2299
hm22_ref_neg.vera_mdt5¶
Metadata
| Participants
| Input
| Summary
| Appendix
summary (trec_eval)
hm22_ref_neg.vera_mdt5 graded.usefulness ndcg all 0.6886
hm22_ref_neg.vera_mdt5 binary.useful-correct P_10 all 0.3356
hm22_ref_neg.vera_mdt5 binary.incorrect P_10 all 0.3135
hm22_ref_neg.vera_mdt5 binary.useful-correct ndcg all 0.5639
hm22_ref_neg.vera_mdt5 graded.harmful-only compatibility all 0.2988
hm22_ref_neg.vera_mdt5 graded.helpful-only compatibility all 0.1723
hm22_ref_neg.vera_mt5¶
Metadata
| Participants
| Input
| Summary
| Appendix
summary (trec_eval)
hm22_ref_neg.vera_mt5 graded.usefulness ndcg all 0.6855
hm22_ref_neg.vera_mt5 binary.useful-correct P_10 all 0.3267
hm22_ref_neg.vera_mt5 binary.incorrect P_10 all 0.3135
hm22_ref_neg.vera_mt5 binary.useful-correct ndcg all 0.5600
hm22_ref_neg.vera_mt5 graded.harmful-only compatibility all 0.2870
hm22_ref_neg.vera_mt5 graded.helpful-only compatibility all 0.1669
bm25¶
Metadata
| Participants
| Input
| Summary
| Appendix
summary (trec_eval)
bm25 graded.usefulness ndcg all 0.6385
bm25 binary.useful-correct P_10 all 0.3756
bm25 binary.incorrect P_10 all 0.1973
bm25 binary.useful-correct ndcg all 0.5527
bm25 graded.harmful-only compatibility all 0.1487
bm25 graded.helpful-only compatibility all 0.1928
WatS-Manual¶
Metadata
| Participants
| Proceedings
| Input
| Summary
| Appendix
summary (trec_eval)
WatS-Manual graded.usefulness ndcg all 0.7263
WatS-Manual binary.useful-correct P_10 all 0.6244
WatS-Manual binary.incorrect P_10 all 0.1162
WatS-Manual binary.useful-correct ndcg all 0.6656
WatS-Manual graded.harmful-only compatibility all 0.1398
WatS-Manual graded.helpful-only compatibility all 0.2843