Skip to content

Results - Health Misinformation 2021

watrrfall

Metadata | Participants| Input| Summary| Appendix

summary (trec_eval)
watrrfall   graded.usefulness   ndcg    all 0.3858
watrrfall   binary.useful-correct   P_10    all 0.1382
watrrfall   binary.incorrect    P_10    all 0.1781
watrrfall   binary.useful-correct   ndcg    all 0.2676
watrrfall   binary.useful-credible  ndcg    all 0.3215
watrrfall   binary.useful-correct-credible  ndcg    all 0.2298
watrrfall   graded.harmful-only compatibility   all 0.0960
watrrfall   graded.helpful-only compatibility   all 0.0540

watrrfg

Metadata | Participants| Input| Summary| Appendix

summary (trec_eval)
watrrfg graded.usefulness   ndcg    all 0.3804
watrrfg binary.useful-correct   P_10    all 0.1235
watrrfg binary.incorrect    P_10    all 0.1125
watrrfg binary.useful-correct   ndcg    all 0.2758
watrrfg binary.useful-credible  ndcg    all 0.3021
watrrfg binary.useful-correct-credible  ndcg    all 0.2308
watrrfg graded.harmful-only compatibility   all 0.0633
watrrfg graded.helpful-only compatibility   all 0.0507

watrrfnp

Metadata | Participants| Input| Summary| Appendix

summary (trec_eval)
watrrfnp    graded.usefulness   ndcg    all 0.3499
watrrfnp    binary.useful-correct   P_10    all 0.1441
watrrfnp    binary.incorrect    P_10    all 0.1344
watrrfnp    binary.useful-correct   ndcg    all 0.2484
watrrfnp    binary.useful-credible  ndcg    all 0.2866
watrrfnp    binary.useful-correct-credible  ndcg    all 0.2108
watrrfnp    graded.harmful-only compatibility   all 0.0718
watrrfnp    graded.helpful-only compatibility   all 0.0547

watbm25

Metadata | Participants| Input| Summary| Appendix

summary (trec_eval)
watbm25 graded.usefulness   ndcg    all 0.4913
watbm25 binary.useful-correct   P_10    all 0.2735
watbm25 binary.incorrect    P_10    all 0.2437
watbm25 binary.useful-correct   ndcg    all 0.3591
watbm25 binary.useful-credible  ndcg    all 0.3997
watbm25 binary.useful-correct-credible  ndcg    all 0.3086
watbm25 graded.harmful-only compatibility   all 0.1247
watbm25 graded.helpful-only compatibility   all 0.0993

watbm25p

Metadata | Participants| Input| Summary| Appendix

summary (trec_eval)
watbm25p    graded.usefulness   ndcg    all 0.3695
watbm25p    binary.useful-correct   P_10    all 0.2147
watbm25p    binary.incorrect    P_10    all 0.1500
watbm25p    binary.useful-correct   ndcg    all 0.2625
watbm25p    binary.useful-credible  ndcg    all 0.3172
watbm25p    binary.useful-correct-credible  ndcg    all 0.2301
watbm25p    graded.harmful-only compatibility   all 0.1126
watbm25p    graded.helpful-only compatibility   all 0.0895

watgoog

Metadata | Participants| Input| Summary| Appendix

summary (trec_eval)
watgoog graded.usefulness   ndcg    all 0.0689
watgoog binary.useful-correct   P_10    all 0.0500
watgoog binary.incorrect    P_10    all 0.0563
watgoog binary.useful-correct   ndcg    all 0.0495
watgoog binary.useful-credible  ndcg    all 0.0459
watgoog binary.useful-correct-credible  ndcg    all 0.0340
watgoog graded.harmful-only compatibility   all 0.0223
watgoog graded.helpful-only compatibility   all 0.0073

watgoogp

Metadata | Participants| Input| Summary| Appendix

summary (trec_eval)
watgoogp    graded.usefulness   ndcg    all 0.1593
watgoogp    binary.useful-correct   P_10    all 0.1176
watgoogp    binary.incorrect    P_10    all 0.1094
watgoogp    binary.useful-correct   ndcg    all 0.1070
watgoogp    binary.useful-credible  ndcg    all 0.1238
watgoogp    binary.useful-correct-credible  ndcg    all 0.0840
watgoogp    graded.harmful-only compatibility   all 0.0536
watgoogp    graded.helpful-only compatibility   all 0.0300

watmed

Metadata | Participants| Input| Summary| Appendix

summary (trec_eval)
watmed  graded.usefulness   ndcg    all 0.0772
watmed  binary.useful-correct   P_10    all 0.0441
watmed  binary.incorrect    P_10    all 0.0375
watmed  binary.useful-correct   ndcg    all 0.0468
watmed  binary.useful-credible  ndcg    all 0.0856
watmed  binary.useful-correct-credible  ndcg    all 0.0503
watmed  graded.harmful-only compatibility   all 0.0335
watmed  graded.helpful-only compatibility   all 0.0258

watrrfm

Metadata | Participants| Input| Summary| Appendix

summary (trec_eval)
watrrfm graded.usefulness   ndcg    all 0.3997
watrrfm binary.useful-correct   P_10    all 0.1794
watrrfm binary.incorrect    P_10    all 0.1625
watrrfm binary.useful-correct   ndcg    all 0.2885
watrrfm binary.useful-credible  ndcg    all 0.3348
watrrfm binary.useful-correct-credible  ndcg    all 0.2499
watrrfm graded.harmful-only compatibility   all 0.1029
watrrfm graded.helpful-only compatibility   all 0.0785

citius.r1

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
citius.r1   graded.usefulness   ndcg    all 0.6173
citius.r1   binary.useful-correct   P_10    all 0.5118
citius.r1   binary.incorrect    P_10    all 0.2000
citius.r1   binary.useful-correct   ndcg    all 0.4979
citius.r1   binary.useful-credible  ndcg    all 0.5351
citius.r1   binary.useful-correct-credible  ndcg    all 0.4373
citius.r1   graded.harmful-only compatibility   all 0.1225
citius.r1   graded.helpful-only compatibility   all 0.2340

citius.r2

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
citius.r2   graded.usefulness   ndcg    all 0.6143
citius.r2   binary.useful-correct   P_10    all 0.4500
citius.r2   binary.incorrect    P_10    all 0.2750
citius.r2   binary.useful-correct   ndcg    all 0.4729
citius.r2   binary.useful-credible  ndcg    all 0.5251
citius.r2   binary.useful-correct-credible  ndcg    all 0.4173
citius.r2   graded.harmful-only compatibility   all 0.1659
citius.r2   graded.helpful-only compatibility   all 0.2080

citius.r3

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
citius.r3   graded.usefulness   ndcg    all 0.6204
citius.r3   binary.useful-correct   P_10    all 0.4559
citius.r3   binary.incorrect    P_10    all 0.2781
citius.r3   binary.useful-correct   ndcg    all 0.4723
citius.r3   binary.useful-credible  ndcg    all 0.5276
citius.r3   binary.useful-correct-credible  ndcg    all 0.4159
citius.r3   graded.harmful-only compatibility   all 0.1605
citius.r3   graded.helpful-only compatibility   all 0.2195

citius.r4

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
citius.r4   graded.usefulness   ndcg    all 0.6153
citius.r4   binary.useful-correct   P_10    all 0.4353
citius.r4   binary.incorrect    P_10    all 0.2812
citius.r4   binary.useful-correct   ndcg    all 0.4714
citius.r4   binary.useful-credible  ndcg    all 0.5257
citius.r4   binary.useful-correct-credible  ndcg    all 0.4171
citius.r4   graded.harmful-only compatibility   all 0.1645
citius.r4   graded.helpful-only compatibility   all 0.2056

citius.r5

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
citius.r5   graded.usefulness   ndcg    all 0.6204
citius.r5   binary.useful-correct   P_10    all 0.4529
citius.r5   binary.incorrect    P_10    all 0.2719
citius.r5   binary.useful-correct   ndcg    all 0.4762
citius.r5   binary.useful-credible  ndcg    all 0.5305
citius.r5   binary.useful-correct-credible  ndcg    all 0.4213
citius.r5   graded.harmful-only compatibility   all 0.1589
citius.r5   graded.helpful-only compatibility   all 0.2183

citius.r6

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
citius.r6   graded.usefulness   ndcg    all 0.6137
citius.r6   binary.useful-correct   P_10    all 0.4588
citius.r6   binary.incorrect    P_10    all 0.2719
citius.r6   binary.useful-correct   ndcg    all 0.4728
citius.r6   binary.useful-credible  ndcg    all 0.5227
citius.r6   binary.useful-correct-credible  ndcg    all 0.4153
citius.r6   graded.harmful-only compatibility   all 0.1661
citius.r6   graded.helpful-only compatibility   all 0.2048

citius.r9

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
citius.r9   graded.usefulness   ndcg    all 0.6148
citius.r9   binary.useful-correct   P_10    all 0.4765
citius.r9   binary.incorrect    P_10    all 0.2625
citius.r9   binary.useful-correct   ndcg    all 0.4808
citius.r9   binary.useful-credible  ndcg    all 0.5278
citius.r9   binary.useful-correct-credible  ndcg    all 0.4232
citius.r9   graded.harmful-only compatibility   all 0.1425
citius.r9   graded.helpful-only compatibility   all 0.2216

citius.r10

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
citius.r10  graded.usefulness   ndcg    all 0.6164
citius.r10  binary.useful-correct   P_10    all 0.4765
citius.r10  binary.incorrect    P_10    all 0.2531
citius.r10  binary.useful-correct   ndcg    all 0.4744
citius.r10  binary.useful-credible  ndcg    all 0.5264
citius.r10  binary.useful-correct-credible  ndcg    all 0.4153
citius.r10  graded.harmful-only compatibility   all 0.1535
citius.r10  graded.helpful-only compatibility   all 0.2166

citius.r7

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
citius.r7   graded.usefulness   ndcg    all 0.5980
citius.r7   binary.useful-correct   P_10    all 0.3559
citius.r7   binary.incorrect    P_10    all 0.2656
citius.r7   binary.useful-correct   ndcg    all 0.4509
citius.r7   binary.useful-credible  ndcg    all 0.5095
citius.r7   binary.useful-correct-credible  ndcg    all 0.3970
citius.r7   graded.harmful-only compatibility   all 0.1278
citius.r7   graded.helpful-only compatibility   all 0.1504

citius.r8

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
citius.r8   graded.usefulness   ndcg    all 0.6108
citius.r8   binary.useful-correct   P_10    all 0.3824
citius.r8   binary.incorrect    P_10    all 0.2687
citius.r8   binary.useful-correct   ndcg    all 0.4692
citius.r8   binary.useful-credible  ndcg    all 0.5406
citius.r8   binary.useful-correct-credible  ndcg    all 0.4327
citius.r8   graded.harmful-only compatibility   all 0.1555
citius.r8   graded.helpful-only compatibility   all 0.1840

upv_bm25

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
upv_bm25    graded.usefulness   ndcg    all 0.5285
upv_bm25    binary.useful-correct   P_10    all 0.3441
upv_bm25    binary.incorrect    P_10    all 0.2344
upv_bm25    binary.useful-correct   ndcg    all 0.3828
upv_bm25    binary.useful-credible  ndcg    all 0.4450
upv_bm25    binary.useful-correct-credible  ndcg    all 0.3321
upv_bm25    graded.harmful-only compatibility   all 0.1043
upv_bm25    graded.helpful-only compatibility   all 0.1341

upv_fuse_2

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
upv_fuse_2  graded.usefulness   ndcg    all 0.5316
upv_fuse_2  binary.useful-correct   P_10    all 0.3412
upv_fuse_2  binary.incorrect    P_10    all 0.2500
upv_fuse_2  binary.useful-correct   ndcg    all 0.3959
upv_fuse_2  binary.useful-credible  ndcg    all 0.4413
upv_fuse_2  binary.useful-correct-credible  ndcg    all 0.3345
upv_fuse_2  graded.harmful-only compatibility   all 0.1084
upv_fuse_2  graded.helpful-only compatibility   all 0.1378

upv_fuse_3

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
upv_fuse_3  graded.usefulness   ndcg    all 0.5127
upv_fuse_3  binary.useful-correct   P_10    all 0.2794
upv_fuse_3  binary.incorrect    P_10    all 0.2562
upv_fuse_3  binary.useful-correct   ndcg    all 0.3666
upv_fuse_3  binary.useful-credible  ndcg    all 0.4322
upv_fuse_3  binary.useful-correct-credible  ndcg    all 0.3176
upv_fuse_3  graded.harmful-only compatibility   all 0.1114
upv_fuse_3  graded.helpful-only compatibility   all 0.1093

upv_fuse_4

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
upv_fuse_4  graded.usefulness   ndcg    all 0.5152
upv_fuse_4  binary.useful-correct   P_10    all 0.2706
upv_fuse_4  binary.incorrect    P_10    all 0.2656
upv_fuse_4  binary.useful-correct   ndcg    all 0.3666
upv_fuse_4  binary.useful-credible  ndcg    all 0.4352
upv_fuse_4  binary.useful-correct-credible  ndcg    all 0.3190
upv_fuse_4  graded.harmful-only compatibility   all 0.1201
upv_fuse_4  graded.helpful-only compatibility   all 0.1102

upv_fuse_5

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
upv_fuse_5  graded.usefulness   ndcg    all 0.5038
upv_fuse_5  binary.useful-correct   P_10    all 0.2529
upv_fuse_5  binary.incorrect    P_10    all 0.2375
upv_fuse_5  binary.useful-correct   ndcg    all 0.3584
upv_fuse_5  binary.useful-credible  ndcg    all 0.4296
upv_fuse_5  binary.useful-correct-credible  ndcg    all 0.3112
upv_fuse_5  graded.harmful-only compatibility   all 0.1061
upv_fuse_5  graded.helpful-only compatibility   all 0.1195

upv_fuse_6

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
upv_fuse_6  graded.usefulness   ndcg    all 0.5060
upv_fuse_6  binary.useful-correct   P_10    all 0.2500
upv_fuse_6  binary.incorrect    P_10    all 0.2375
upv_fuse_6  binary.useful-correct   ndcg    all 0.3588
upv_fuse_6  binary.useful-credible  ndcg    all 0.4312
upv_fuse_6  binary.useful-correct-credible  ndcg    all 0.3148
upv_fuse_6  graded.harmful-only compatibility   all 0.1040
upv_fuse_6  graded.helpful-only compatibility   all 0.1222

upv_fuse_7

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
upv_fuse_7  graded.usefulness   ndcg    all 0.5204
upv_fuse_7  binary.useful-correct   P_10    all 0.2941
upv_fuse_7  binary.incorrect    P_10    all 0.2594
upv_fuse_7  binary.useful-correct   ndcg    all 0.3835
upv_fuse_7  binary.useful-credible  ndcg    all 0.4338
upv_fuse_7  binary.useful-correct-credible  ndcg    all 0.3287
upv_fuse_7  graded.harmful-only compatibility   all 0.1036
upv_fuse_7  graded.helpful-only compatibility   all 0.1286

upv_fuse_8

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
upv_fuse_8  graded.usefulness   ndcg    all 0.5202
upv_fuse_8  binary.useful-correct   P_10    all 0.2912
upv_fuse_8  binary.incorrect    P_10    all 0.2656
upv_fuse_8  binary.useful-correct   ndcg    all 0.3818
upv_fuse_8  binary.useful-credible  ndcg    all 0.4359
upv_fuse_8  binary.useful-correct-credible  ndcg    all 0.3321
upv_fuse_8  graded.harmful-only compatibility   all 0.1026
upv_fuse_8  graded.helpful-only compatibility   all 0.1304

upv_fuse_9

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
upv_fuse_9  graded.usefulness   ndcg    all 0.5185
upv_fuse_9  binary.useful-correct   P_10    all 0.2941
upv_fuse_9  binary.incorrect    P_10    all 0.2562
upv_fuse_9  binary.useful-correct   ndcg    all 0.3743
upv_fuse_9  binary.useful-credible  ndcg    all 0.4303
upv_fuse_9  binary.useful-correct-credible  ndcg    all 0.3173
upv_fuse_9  graded.harmful-only compatibility   all 0.1018
upv_fuse_9  graded.helpful-only compatibility   all 0.1090

upv_fuse_10

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
upv_fuse_10 graded.usefulness   ndcg    all 0.5195
upv_fuse_10 binary.useful-correct   P_10    all 0.2912
upv_fuse_10 binary.incorrect    P_10    all 0.2625
upv_fuse_10 binary.useful-correct   ndcg    all 0.3748
upv_fuse_10 binary.useful-credible  ndcg    all 0.4338
upv_fuse_10 binary.useful-correct-credible  ndcg    all 0.3197
upv_fuse_10 graded.harmful-only compatibility   all 0.1084
upv_fuse_10 graded.helpful-only compatibility   all 0.1128

WatSAM-BM25

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
WatSAM-BM25 graded.usefulness   ndcg    all 0.2121
WatSAM-BM25 binary.useful-correct   P_10    all 0.3029
WatSAM-BM25 binary.incorrect    P_10    all 0.1937
WatSAM-BM25 binary.useful-correct   ndcg    all 0.1637
WatSAM-BM25 binary.useful-credible  ndcg    all 0.2587
WatSAM-BM25 binary.useful-correct-credible  ndcg    all 0.1988
WatSAM-BM25 graded.harmful-only compatibility   all 0.1192
WatSAM-BM25 graded.helpful-only compatibility   all 0.1607

WatSMM-CAL

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
WatSMM-CAL  graded.usefulness   ndcg    all 0.2229
WatSMM-CAL  binary.useful-correct   P_10    all 0.3765
WatSMM-CAL  binary.incorrect    P_10    all 0.1812
WatSMM-CAL  binary.useful-correct   ndcg    all 0.1793
WatSMM-CAL  binary.useful-credible  ndcg    all 0.2841
WatSMM-CAL  binary.useful-correct-credible  ndcg    all 0.2124
WatSMM-CAL  graded.harmful-only compatibility   all 0.1291
WatSMM-CAL  graded.helpful-only compatibility   all 0.1940

WatSMC-CAL

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
WatSMC-CAL  graded.usefulness   ndcg    all 0.5925
WatSMC-CAL  binary.useful-correct   P_10    all 0.5059
WatSMC-CAL  binary.incorrect    P_10    all 0.2500
WatSMC-CAL  binary.useful-correct   ndcg    all 0.4488
WatSMC-CAL  binary.useful-credible  ndcg    all 0.5453
WatSMC-CAL  binary.useful-correct-credible  ndcg    all 0.4116
WatSMC-CAL  graded.harmful-only compatibility   all 0.1670
WatSMC-CAL  graded.helpful-only compatibility   all 0.2251

WatSMM-CALHC

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
WatSMM-CALHC    graded.usefulness   ndcg    all 0.2241
WatSMM-CALHC    binary.useful-correct   P_10    all 0.3735
WatSMM-CALHC    binary.incorrect    P_10    all 0.1906
WatSMM-CALHC    binary.useful-correct   ndcg    all 0.1792
WatSMM-CALHC    binary.useful-credible  ndcg    all 0.2887
WatSMM-CALHC    binary.useful-correct-credible  ndcg    all 0.2117
WatSMM-CALHC    graded.harmful-only compatibility   all 0.1361
WatSMM-CALHC    graded.helpful-only compatibility   all 0.2014

WatSMM-CALPR

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
WatSMM-CALPR    graded.usefulness   ndcg    all 0.2223
WatSMM-CALPR    binary.useful-correct   P_10    all 0.3765
WatSMM-CALPR    binary.incorrect    P_10    all 0.2031
WatSMM-CALPR    binary.useful-correct   ndcg    all 0.1742
WatSMM-CALPR    binary.useful-credible  ndcg    all 0.2842
WatSMM-CALPR    binary.useful-correct-credible  ndcg    all 0.2081
WatSMM-CALPR    graded.harmful-only compatibility   all 0.1392
WatSMM-CALPR    graded.helpful-only compatibility   all 0.1968

WatSMM-Fused

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
WatSMM-Fused    graded.usefulness   ndcg    all 0.2242
WatSMM-Fused    binary.useful-correct   P_10    all 0.3824
WatSMM-Fused    binary.incorrect    P_10    all 0.2031
WatSMM-Fused    binary.useful-correct   ndcg    all 0.1774
WatSMM-Fused    binary.useful-credible  ndcg    all 0.2894
WatSMM-Fused    binary.useful-correct-credible  ndcg    all 0.2121
WatSMM-Fused    graded.harmful-only compatibility   all 0.1402
WatSMM-Fused    graded.helpful-only compatibility   all 0.2028

WatSMM-CALQA100

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
WatSMM-CALQA100 graded.usefulness   ndcg    all 0.2210
WatSMM-CALQA100 binary.useful-correct   P_10    all 0.3971
WatSMM-CALQA100 binary.incorrect    P_10    all 0.0906
WatSMM-CALQA100 binary.useful-correct   ndcg    all 0.2004
WatSMM-CALQA100 binary.useful-credible  ndcg    all 0.2789
WatSMM-CALQA100 binary.useful-correct-credible  ndcg    all 0.2407
WatSMM-CALQA100 graded.harmful-only compatibility   all 0.0636
WatSMM-CALQA100 graded.helpful-only compatibility   all 0.2078

WatSMM-CALQAAll

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
WatSMM-CALQAAll graded.usefulness   ndcg    all 0.2087
WatSMM-CALQAAll binary.useful-correct   P_10    all 0.3971
WatSMM-CALQAAll binary.incorrect    P_10    all 0.0406
WatSMM-CALQAAll binary.useful-correct   ndcg    all 0.1993
WatSMM-CALQAAll binary.useful-credible  ndcg    all 0.2648
WatSMM-CALQAAll binary.useful-correct-credible  ndcg    all 0.2412
WatSMM-CALQAAll graded.harmful-only compatibility   all 0.0335
WatSMM-CALQAAll graded.helpful-only compatibility   all 0.2027

WatSMC-CALQA100

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
WatSMC-CALQA100 graded.usefulness   ndcg    all 0.6055
WatSMC-CALQA100 binary.useful-correct   P_10    all 0.4971
WatSMC-CALQA100 binary.incorrect    P_10    all 0.2375
WatSMC-CALQA100 binary.useful-correct   ndcg    all 0.4877
WatSMC-CALQA100 binary.useful-credible  ndcg    all 0.5474
WatSMC-CALQA100 binary.useful-correct-credible  ndcg    all 0.4503
WatSMC-CALQA100 graded.harmful-only compatibility   all 0.1339
WatSMC-CALQA100 graded.helpful-only compatibility   all 0.2168

WatSMC-CALQAAll

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
WatSMC-CALQAAll graded.usefulness   ndcg    all 0.5921
WatSMC-CALQAAll binary.useful-correct   P_10    all 0.5147
WatSMC-CALQAAll binary.incorrect    P_10    all 0.1375
WatSMC-CALQAAll binary.useful-correct   ndcg    all 0.5047
WatSMC-CALQAAll binary.useful-credible  ndcg    all 0.5369
WatSMC-CALQAAll binary.useful-correct-credible  ndcg    all 0.4621
WatSMC-CALQAAll graded.harmful-only compatibility   all 0.0797
WatSMC-CALQAAll graded.helpful-only compatibility   all 0.2343

WatSMC-CALQAHC1

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
WatSMC-CALQAHC1 graded.usefulness   ndcg    all 0.5885
WatSMC-CALQAHC1 binary.useful-correct   P_10    all 0.5176
WatSMC-CALQAHC1 binary.incorrect    P_10    all 0.1281
WatSMC-CALQAHC1 binary.useful-correct   ndcg    all 0.4955
WatSMC-CALQAHC1 binary.useful-credible  ndcg    all 0.5421
WatSMC-CALQAHC1 binary.useful-correct-credible  ndcg    all 0.4629
WatSMC-CALQAHC1 graded.harmful-only compatibility   all 0.0785
WatSMC-CALQAHC1 graded.helpful-only compatibility   all 0.2481

WatSMC-CALQAHC2

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
WatSMC-CALQAHC2 graded.usefulness   ndcg    all 0.5626
WatSMC-CALQAHC2 binary.useful-correct   P_10    all 0.4059
WatSMC-CALQAHC2 binary.incorrect    P_10    all 0.0781
WatSMC-CALQAHC2 binary.useful-correct   ndcg    all 0.4697
WatSMC-CALQAHC2 binary.useful-credible  ndcg    all 0.5330
WatSMC-CALQAHC2 binary.useful-correct-credible  ndcg    all 0.4469
WatSMC-CALQAHC2 graded.harmful-only compatibility   all 0.0547
WatSMC-CALQAHC2 graded.helpful-only compatibility   all 0.2245

WatSMC-Correct

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
WatSMC-Correct  graded.usefulness   ndcg    all 0.4928
WatSMC-Correct  binary.useful-correct   P_10    all 0.5676
WatSMC-Correct  binary.incorrect    P_10    all 0.0906
WatSMC-Correct  binary.useful-correct   ndcg    all 0.5201
WatSMC-Correct  binary.useful-credible  ndcg    all 0.4976
WatSMC-Correct  binary.useful-correct-credible  ndcg    all 0.5173
WatSMC-Correct  graded.harmful-only compatibility   all 0.0554
WatSMC-Correct  graded.helpful-only compatibility   all 0.2812

baselineBM25

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
baselineBM25    graded.usefulness   ndcg    all 0.5815
baselineBM25    binary.useful-correct   P_10    all 0.3088
baselineBM25    binary.incorrect    P_10    all 0.2906
baselineBM25    binary.useful-correct   ndcg    all 0.4279
baselineBM25    binary.useful-credible  ndcg    all 0.4867
baselineBM25    binary.useful-correct-credible  ndcg    all 0.3813
baselineBM25    graded.harmful-only compatibility   all 0.1445
baselineBM25    graded.helpful-only compatibility   all 0.1292

WatSAE-BM25

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
WatSAE-BM25 graded.usefulness   ndcg    all 0.2430
WatSAE-BM25 binary.useful-correct   P_10    all 0.3088
WatSAE-BM25 binary.incorrect    P_10    all 0.2094
WatSAE-BM25 binary.useful-correct   ndcg    all 0.1886
WatSAE-BM25 binary.useful-credible  ndcg    all 0.2961
WatSAE-BM25 binary.useful-correct-credible  ndcg    all 0.2257
WatSAE-BM25 graded.harmful-only compatibility   all 0.1234
WatSAE-BM25 graded.helpful-only compatibility   all 0.1781

bow_sup_cred

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
bow_sup_cred    graded.usefulness   ndcg    all 0.4773
bow_sup_cred    binary.useful-correct   P_10    all 0.2412
bow_sup_cred    binary.incorrect    P_10    all 0.2281
bow_sup_cred    binary.useful-correct   ndcg    all 0.3436
bow_sup_cred    binary.useful-credible  ndcg    all 0.4058
bow_sup_cred    binary.useful-correct-credible  ndcg    all 0.2976
bow_sup_cred    graded.harmful-only compatibility   all 0.0890
bow_sup_cred    graded.helpful-only compatibility   all 0.1030

WatSAE-BM25RM3

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
WatSAE-BM25RM3  graded.usefulness   ndcg    all 0.2225
WatSAE-BM25RM3  binary.useful-correct   P_10    all 0.3000
WatSAE-BM25RM3  binary.incorrect    P_10    all 0.1719
WatSAE-BM25RM3  binary.useful-correct   ndcg    all 0.1702
WatSAE-BM25RM3  binary.useful-credible  ndcg    all 0.2724
WatSAE-BM25RM3  binary.useful-correct-credible  ndcg    all 0.1962
WatSAE-BM25RM3  graded.harmful-only compatibility   all 0.1092
WatSAE-BM25RM3  graded.helpful-only compatibility   all 0.1399

WatSAE-BM25-RR

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
WatSAE-BM25-RR  graded.usefulness   ndcg    all 0.2306
WatSAE-BM25-RR  binary.useful-correct   P_10    all 0.3088
WatSAE-BM25-RR  binary.incorrect    P_10    all 0.1844
WatSAE-BM25-RR  binary.useful-correct   ndcg    all 0.1826
WatSAE-BM25-RR  binary.useful-credible  ndcg    all 0.2841
WatSAE-BM25-RR  binary.useful-correct-credible  ndcg    all 0.2188
WatSAE-BM25-RR  graded.harmful-only compatibility   all 0.1181
WatSAE-BM25-RR  graded.helpful-only compatibility   all 0.1454

mlm_sup_cred

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
mlm_sup_cred    graded.usefulness   ndcg    all 0.3693
mlm_sup_cred    binary.useful-correct   P_10    all 0.2294
mlm_sup_cred    binary.incorrect    P_10    all 0.1250
mlm_sup_cred    binary.useful-correct   ndcg    all 0.2915
mlm_sup_cred    binary.useful-credible  ndcg    all 0.3209
mlm_sup_cred    binary.useful-correct-credible  ndcg    all 0.2592
mlm_sup_cred    graded.harmful-only compatibility   all 0.0859
mlm_sup_cred    graded.helpful-only compatibility   all 0.1010

lin_use_sup_rf

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
lin_use_sup_rf  graded.usefulness   ndcg    all 0.4102
lin_use_sup_rf  binary.useful-correct   P_10    all 0.2647
lin_use_sup_rf  binary.incorrect    P_10    all 0.2062
lin_use_sup_rf  binary.useful-correct   ndcg    all 0.3306
lin_use_sup_rf  binary.useful-credible  ndcg    all 0.3182
lin_use_sup_rf  binary.useful-correct-credible  ndcg    all 0.2599
lin_use_sup_rf  graded.harmful-only compatibility   all 0.0942
lin_use_sup_rf  graded.helpful-only compatibility   all 0.0934

all_use_sup_cre

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
all_use_sup_cre graded.usefulness   ndcg    all 0.5120
all_use_sup_cre binary.useful-correct   P_10    all 0.3471
all_use_sup_cre binary.incorrect    P_10    all 0.2125
all_use_sup_cre binary.useful-correct   ndcg    all 0.3983
all_use_sup_cre binary.useful-credible  ndcg    all 0.4171
all_use_sup_cre binary.useful-correct-credible  ndcg    all 0.3390
all_use_sup_cre graded.harmful-only compatibility   all 0.0950
all_use_sup_cre graded.helpful-only compatibility   all 0.1355

use_sup_cred

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
use_sup_cred    graded.usefulness   ndcg    all 0.4572
use_sup_cred    binary.useful-correct   P_10    all 0.2588
use_sup_cred    binary.incorrect    P_10    all 0.1844
use_sup_cred    binary.useful-correct   ndcg    all 0.3455
use_sup_cred    binary.useful-credible  ndcg    all 0.3836
use_sup_cred    binary.useful-correct-credible  ndcg    all 0.3009
use_sup_cred    graded.harmful-only compatibility   all 0.0892
use_sup_cred    graded.helpful-only compatibility   all 0.0983

use_rob_cred

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
use_rob_cred    graded.usefulness   ndcg    all 0.4656
use_rob_cred    binary.useful-correct   P_10    all 0.2618
use_rob_cred    binary.incorrect    P_10    all 0.1781
use_rob_cred    binary.useful-correct   ndcg    all 0.3474
use_rob_cred    binary.useful-credible  ndcg    all 0.3946
use_rob_cred    binary.useful-correct-credible  ndcg    all 0.3068
use_rob_cred    graded.harmful-only compatibility   all 0.0764
use_rob_cred    graded.helpful-only compatibility   all 0.1082

bm25_rob_rf

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
bm25_rob_rf graded.usefulness   ndcg    all 0.4666
bm25_rob_rf binary.useful-correct   P_10    all 0.2794
bm25_rob_rf binary.incorrect    P_10    all 0.2375
bm25_rob_rf binary.useful-correct   ndcg    all 0.3597
bm25_rob_rf binary.useful-credible  ndcg    all 0.3730
bm25_rob_rf binary.useful-correct-credible  ndcg    all 0.2812
bm25_rob_rf graded.harmful-only compatibility   all 0.1011
bm25_rob_rf graded.helpful-only compatibility   all 0.1072

webis-bm25

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
webis-bm25  graded.usefulness   ndcg    all 0.5809
webis-bm25  binary.useful-correct   P_10    all 0.3088
webis-bm25  binary.incorrect    P_10    all 0.2906
webis-bm25  binary.useful-correct   ndcg    all 0.4275
webis-bm25  binary.useful-credible  ndcg    all 0.4856
webis-bm25  binary.useful-correct-credible  ndcg    all 0.3796
webis-bm25  graded.harmful-only compatibility   all 0.1454
webis-bm25  graded.helpful-only compatibility   all 0.1292

webis-t5

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
webis-t5    graded.usefulness   ndcg    all 0.3400
webis-t5    binary.useful-correct   P_10    all 0.3235
webis-t5    binary.incorrect    P_10    all 0.2969
webis-t5    binary.useful-correct   ndcg    all 0.2383
webis-t5    binary.useful-credible  ndcg    all 0.2618
webis-t5    binary.useful-correct-credible  ndcg    all 0.1912
webis-t5    graded.harmful-only compatibility   all 0.1447
webis-t5    graded.helpful-only compatibility   all 0.1314

webis-bm25-ax1

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
webis-bm25-ax1  graded.usefulness   ndcg    all 0.5807
webis-bm25-ax1  binary.useful-correct   P_10    all 0.3088
webis-bm25-ax1  binary.incorrect    P_10    all 0.2844
webis-bm25-ax1  binary.useful-correct   ndcg    all 0.4325
webis-bm25-ax1  binary.useful-credible  ndcg    all 0.4877
webis-bm25-ax1  binary.useful-correct-credible  ndcg    all 0.3880
webis-bm25-ax1  graded.harmful-only compatibility   all 0.1474
webis-bm25-ax1  graded.helpful-only compatibility   all 0.1339

webis-bm25-ax3

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
webis-bm25-ax3  graded.usefulness   ndcg    all 0.5810
webis-bm25-ax3  binary.useful-correct   P_10    all 0.3088
webis-bm25-ax3  binary.incorrect    P_10    all 0.2844
webis-bm25-ax3  binary.useful-correct   ndcg    all 0.4285
webis-bm25-ax3  binary.useful-credible  ndcg    all 0.4859
webis-bm25-ax3  binary.useful-correct-credible  ndcg    all 0.3802
webis-bm25-ax3  graded.harmful-only compatibility   all 0.1445
webis-bm25-ax3  graded.helpful-only compatibility   all 0.1318

webis-t5-ax1

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
webis-t5-ax1    graded.usefulness   ndcg    all 0.3407
webis-t5-ax1    binary.useful-correct   P_10    all 0.3471
webis-t5-ax1    binary.incorrect    P_10    all 0.3344
webis-t5-ax1    binary.useful-correct   ndcg    all 0.2362
webis-t5-ax1    binary.useful-credible  ndcg    all 0.2645
webis-t5-ax1    binary.useful-correct-credible  ndcg    all 0.1896
webis-t5-ax1    graded.harmful-only compatibility   all 0.1449
webis-t5-ax1    graded.helpful-only compatibility   all 0.1297

webis-t5-ax3

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
webis-t5-ax3    graded.usefulness   ndcg    all 0.3410
webis-t5-ax3    binary.useful-correct   P_10    all 0.3412
webis-t5-ax3    binary.incorrect    P_10    all 0.3344
webis-t5-ax3    binary.useful-correct   ndcg    all 0.2392
webis-t5-ax3    binary.useful-credible  ndcg    all 0.2632
webis-t5-ax3    binary.useful-correct-credible  ndcg    all 0.1907
webis-t5-ax3    graded.harmful-only compatibility   all 0.1438
webis-t5-ax3    graded.helpful-only compatibility   all 0.1327

WatSMT-SD-S1

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
WatSMT-SD-S1    graded.usefulness   ndcg    all 0.4236
WatSMT-SD-S1    binary.useful-correct   P_10    all 0.4971
WatSMT-SD-S1    binary.incorrect    P_10    all 0.1031
WatSMT-SD-S1    binary.useful-correct   ndcg    all 0.4953
WatSMT-SD-S1    binary.useful-credible  ndcg    all 0.3721
WatSMT-SD-S1    binary.useful-correct-credible  ndcg    all 0.4129
WatSMT-SD-S1    graded.harmful-only compatibility   all 0.0369
WatSMT-SD-S1    graded.helpful-only compatibility   all 0.2202

WatSMT-SD-S2

Metadata | Participants | Proceedings| Input| Summary| Appendix

summary (trec_eval)
WatSMT-SD-S2    graded.usefulness   ndcg    all 0.4229
WatSMT-SD-S2    binary.useful-correct   P_10    all 0.4529
WatSMT-SD-S2    binary.incorrect    P_10    all 0.1188
WatSMT-SD-S2    binary.useful-correct   ndcg    all 0.4673
WatSMT-SD-S2    binary.useful-credible  ndcg    all 0.3771
WatSMT-SD-S2    binary.useful-correct-credible  ndcg    all 0.3983
WatSMT-SD-S2    graded.harmful-only compatibility   all 0.0588
WatSMT-SD-S2    graded.helpful-only compatibility   all 0.1961

bm25

Metadata | Participants| Input| Summary| Appendix

summary (trec_eval)
bm25    graded.usefulness   ndcg    all 0.5989
bm25    binary.useful-correct   P_10    all 0.3559
bm25    binary.incorrect    P_10    all 0.2687
bm25    binary.useful-correct   ndcg    all 0.4509
bm25    binary.useful-credible  ndcg    all 0.5071
bm25    binary.useful-correct-credible  ndcg    all 0.3970
bm25    graded.harmful-only compatibility   all 0.1229
bm25    graded.helpful-only compatibility   all 0.1469

mt5

Metadata | Participants| Input| Summary| Appendix

summary (trec_eval)
mt5 graded.usefulness   ndcg    all 0.6344
mt5 binary.useful-correct   P_10    all 0.4265
mt5 binary.incorrect    P_10    all 0.3094
mt5 binary.useful-correct   ndcg    all 0.4934
mt5 binary.useful-credible  ndcg    all 0.5546
mt5 binary.useful-correct-credible  ndcg    all 0.4391
mt5 graded.harmful-only compatibility   all 0.1527
mt5 graded.helpful-only compatibility   all 0.2030

vera_mdt5_0.95

Metadata | Participants| Input| Summary| Appendix

summary (trec_eval)
vera_mdt5_0.95  graded.usefulness   ndcg    all 0.6309
vera_mdt5_0.95  binary.useful-correct   P_10    all 0.5941
vera_mdt5_0.95  binary.incorrect    P_10    all 0.1063
vera_mdt5_0.95  binary.useful-correct   ndcg    all 0.5835
vera_mdt5_0.95  binary.useful-credible  ndcg    all 0.5509
vera_mdt5_0.95  binary.useful-correct-credible  ndcg    all 0.5135
vera_mdt5_0.95  graded.harmful-only compatibility   all 0.0655
vera_mdt5_0.95  graded.helpful-only compatibility   all 0.3020

mdt5_r

Metadata | Participants| Input| Summary| Appendix

summary (trec_eval)
mdt5_r  graded.usefulness   ndcg    all 0.6389
mdt5_r  binary.useful-correct   P_10    all 0.5265
mdt5_r  binary.incorrect    P_10    all 0.1937
mdt5_r  binary.useful-correct   ndcg    all 0.5483
mdt5_r  binary.useful-credible  ndcg    all 0.5623
mdt5_r  binary.useful-correct-credible  ndcg    all 0.4810
mdt5_r  graded.harmful-only compatibility   all 0.1285
mdt5_r  graded.helpful-only compatibility   all 0.2598

vera0

Metadata | Participants| Input| Summary| Appendix

summary (trec_eval)
vera0   graded.usefulness   ndcg    all 0.5543
vera0   binary.useful-correct   P_10    all 0.5765
vera0   binary.incorrect    P_10    all 0.0438
vera0   binary.useful-correct   ndcg    all 0.5617
vera0   binary.useful-credible  ndcg    all 0.4814
vera0   binary.useful-correct-credible  ndcg    all 0.4738
vera0   graded.harmful-only compatibility   all 0.0313
vera0   graded.helpful-only compatibility   all 0.2537

mt5_r

Metadata | Participants| Input| Summary| Appendix

summary (trec_eval)
mt5_r   graded.usefulness   ndcg    all 0.6337
mt5_r   binary.useful-correct   P_10    all 0.5647
mt5_r   binary.incorrect    P_10    all 0.1344
mt5_r   binary.useful-correct   ndcg    all 0.5486
mt5_r   binary.useful-credible  ndcg    all 0.5634
mt5_r   binary.useful-correct-credible  ndcg    all 0.4856
mt5_r   graded.harmful-only compatibility   all 0.0927
mt5_r   graded.helpful-only compatibility   all 0.2702

mdt5

Metadata | Participants| Input| Summary| Appendix

summary (trec_eval)
mdt5    graded.usefulness   ndcg    all 0.6397
mdt5    binary.useful-correct   P_10    all 0.4412
mdt5    binary.incorrect    P_10    all 0.3219
mdt5    binary.useful-correct   ndcg    all 0.4944
mdt5    binary.useful-credible  ndcg    all 0.5534
mdt5    binary.useful-correct-credible  ndcg    all 0.4354
mdt5    graded.harmful-only compatibility   all 0.1684
mdt5    graded.helpful-only compatibility   all 0.2120

vera_mt5_0.95

Metadata | Participants| Input| Summary| Appendix

summary (trec_eval)
vera_mt5_0.95   graded.usefulness   ndcg    all 0.6290
vera_mt5_0.95   binary.useful-correct   P_10    all 0.6118
vera_mt5_0.95   binary.incorrect    P_10    all 0.0875
vera_mt5_0.95   binary.useful-correct   ndcg    all 0.5803
vera_mt5_0.95   binary.useful-credible  ndcg    all 0.5467
vera_mt5_0.95   binary.useful-correct-credible  ndcg    all 0.5049
vera_mt5_0.95   graded.harmful-only compatibility   all 0.0532
vera_mt5_0.95   graded.helpful-only compatibility   all 0.3035

vera_mdt5_0.5

Metadata | Participants| Input| Summary| Appendix

summary (trec_eval)
vera_mdt5_0.5   graded.usefulness   ndcg    all 0.5996
vera_mdt5_0.5   binary.useful-correct   P_10    all 0.6059
vera_mdt5_0.5   binary.incorrect    P_10    all 0.0625
vera_mdt5_0.5   binary.useful-correct   ndcg    all 0.5769
vera_mdt5_0.5   binary.useful-credible  ndcg    all 0.5207
vera_mdt5_0.5   binary.useful-correct-credible  ndcg    all 0.4964
vera_mdt5_0.5   graded.harmful-only compatibility   all 0.0415
vera_mdt5_0.5   graded.helpful-only compatibility   all 0.3044

vera_mt5_0.5

Metadata | Participants| Input| Summary| Appendix

summary (trec_eval)
vera_mt5_0.5    graded.usefulness   ndcg    all 0.6003
vera_mt5_0.5    binary.useful-correct   P_10    all 0.6176
vera_mt5_0.5    binary.incorrect    P_10    all 0.0563
vera_mt5_0.5    binary.useful-correct   ndcg    all 0.5778
vera_mt5_0.5    binary.useful-credible  ndcg    all 0.5214
vera_mt5_0.5    binary.useful-correct-credible  ndcg    all 0.4936
vera_mt5_0.5    graded.harmful-only compatibility   all 0.0376
vera_mt5_0.5    graded.helpful-only compatibility   all 0.3024