|
|
import gradio as gr |
|
|
import pandas as pd |
|
|
import random, os, csv |
|
|
|
|
|
|
|
|
DATASET_FILES = { |
|
|
"ESConv": "./csv/esconv.csv", |
|
|
"CraigslistBargain": "./csv/craigslist.csv", |
|
|
} |
|
|
|
|
|
|
|
|
def prepare_examples(user_id: str, dataset: str): |
|
|
""" |
|
|
Read CSV, create Ours-vs-[PPDPP|DPDP] pairs, shuffle + randomise L/R. |
|
|
A deterministic seed (user_id+dataset) keeps the order stable for reloads. |
|
|
""" |
|
|
random.seed(f"{user_id}_{dataset}") |
|
|
df = pd.read_csv(DATASET_FILES[dataset]) |
|
|
pairs = [] |
|
|
for _, row in df.iterrows(): |
|
|
for other in ["PPDPP", "DPDP"]: |
|
|
pairs.append( |
|
|
dict( |
|
|
background=row["Background Information"], |
|
|
ours=row["Ours"], |
|
|
other=row[other], |
|
|
other_name=other, |
|
|
) |
|
|
) |
|
|
random.shuffle(pairs) |
|
|
|
|
|
prepared = [] |
|
|
for item in pairs: |
|
|
if random.random() < 0.5: |
|
|
left_text, right_text = item["ours"], item["other"] |
|
|
left_name, right_name = "Ours", item["other_name"] |
|
|
else: |
|
|
left_text, right_text = item["other"], item["ours"] |
|
|
left_name, right_name = item["other_name"], "Ours" |
|
|
prepared.append( |
|
|
dict( |
|
|
background=item["background"], |
|
|
left_text=left_text, |
|
|
right_text=right_text, |
|
|
left_name=left_name, |
|
|
right_name=right_name, |
|
|
) |
|
|
) |
|
|
return prepared |
|
|
|
|
|
|
|
|
def save_all_to_csv(user_id, dataset, examples, responses): |
|
|
"""Rewrite CSV completely (idempotent).""" |
|
|
if not responses: |
|
|
return |
|
|
filename = f"{user_id}_{dataset}_results.csv" |
|
|
metrics = list(next(iter(responses.values())).keys()) |
|
|
header = ( |
|
|
["UserID", "Dataset", "Background", "Response A Method", "Response B Method"] |
|
|
+ metrics |
|
|
) |
|
|
with open(filename, "w", newline="", encoding="utf-8") as f: |
|
|
writer = csv.DictWriter(f, fieldnames=header) |
|
|
writer.writeheader() |
|
|
for idx in sorted(responses): |
|
|
ex = examples[idx] |
|
|
row = dict( |
|
|
UserID=user_id, |
|
|
Dataset=dataset, |
|
|
Background=ex["background"], |
|
|
**{ |
|
|
"Response A Method": ex["left_name"], |
|
|
"Response B Method": ex["right_name"], |
|
|
}, |
|
|
) |
|
|
row.update(responses[idx]) |
|
|
writer.writerow(row) |
|
|
|
|
|
|
|
|
def load_responses_from_csv(user_id, dataset, examples): |
|
|
""" |
|
|
Reconstruct a {idx: metrics-dict} mapping from an existing results file. |
|
|
Matching uses Background + response-method orientation to stay robust. |
|
|
""" |
|
|
filename = f"{user_id}_{dataset}_results.csv" |
|
|
if not os.path.exists(filename): |
|
|
return {} |
|
|
|
|
|
df = pd.read_csv(filename) |
|
|
|
|
|
|
|
|
idx_map = { |
|
|
(ex["background"], ex["left_name"], ex["right_name"]): i |
|
|
for i, ex in enumerate(examples) |
|
|
} |
|
|
|
|
|
responses = {} |
|
|
for _, row in df.iterrows(): |
|
|
key = (row["Background"], row["Response A Method"], row["Response B Method"]) |
|
|
if key in idx_map: |
|
|
idx = idx_map[key] |
|
|
metric_cols = [ |
|
|
c |
|
|
for c in row.keys() |
|
|
if c |
|
|
not in [ |
|
|
"UserID", |
|
|
"Dataset", |
|
|
"Background", |
|
|
"Response A Method", |
|
|
"Response B Method", |
|
|
] |
|
|
] |
|
|
responses[idx] = {k: row[k] for k in metric_cols} |
|
|
return responses |
|
|
|
|
|
|
|
|
|
|
|
def es_load_example(idx, examples, responses): |
|
|
ex = examples[idx] |
|
|
prev = responses.get(idx, {}) |
|
|
return ( |
|
|
ex["background"], |
|
|
ex["left_text"], |
|
|
ex["right_text"], |
|
|
f"Item {idx+1} of {len(examples)}", |
|
|
prev.get("Identification"), |
|
|
prev.get("Comforting"), |
|
|
prev.get("Suggestion"), |
|
|
prev.get("Overall"), |
|
|
"", |
|
|
) |
|
|
|
|
|
|
|
|
def cb_load_example(idx, examples, responses): |
|
|
ex = examples[idx] |
|
|
prev = responses.get(idx, {}) |
|
|
return ( |
|
|
ex["background"], |
|
|
ex["left_text"], |
|
|
ex["right_text"], |
|
|
f"Item {idx+1} of {len(examples)}", |
|
|
prev.get("Persuasiveness"), |
|
|
prev.get("Coherence"), |
|
|
prev.get("Naturalness"), |
|
|
prev.get("Overall"), |
|
|
"", |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
def login_fn(user_id, dataset): |
|
|
if not user_id or not dataset: |
|
|
raise gr.Error("Please enter your User ID and select a dataset.") |
|
|
|
|
|
examples = prepare_examples(user_id, dataset) |
|
|
responses = load_responses_from_csv(user_id, dataset, examples) |
|
|
idx = 0 |
|
|
|
|
|
if dataset == "ESConv": |
|
|
( |
|
|
bg, |
|
|
lft, |
|
|
rgt, |
|
|
prog, |
|
|
ident, |
|
|
com, |
|
|
sug, |
|
|
ovl, |
|
|
err_es, |
|
|
) = es_load_example(idx, examples, responses) |
|
|
|
|
|
bg_cb = lft_cb = rgt_cb = prog_cb = "" |
|
|
per = coh = nat = ovl_cb = None |
|
|
err_cb = "" |
|
|
else: |
|
|
( |
|
|
bg_cb, |
|
|
lft_cb, |
|
|
rgt_cb, |
|
|
prog_cb, |
|
|
per, |
|
|
coh, |
|
|
nat, |
|
|
ovl_cb, |
|
|
err_cb, |
|
|
) = cb_load_example(idx, examples, responses) |
|
|
|
|
|
bg = lft = rgt = prog = "" |
|
|
ident = com = sug = ovl = None |
|
|
err_es = "" |
|
|
|
|
|
return ( |
|
|
gr.update(visible=False), |
|
|
gr.update(visible=(dataset == "ESConv")), |
|
|
gr.update(visible=(dataset == "CraigslistBargain")), |
|
|
user_id, dataset, examples, idx, responses, |
|
|
|
|
|
bg, lft, rgt, prog, ident, com, sug, ovl, err_es, |
|
|
|
|
|
bg_cb, lft_cb, rgt_cb, prog_cb, per, coh, nat, ovl_cb, err_cb, |
|
|
) |
|
|
|
|
|
|
|
|
def logout_fn(user_id, dataset, examples, idx, responses): |
|
|
|
|
|
if dataset: |
|
|
save_all_to_csv(user_id, dataset, examples, responses) |
|
|
|
|
|
|
|
|
return ( |
|
|
gr.update(visible=True), |
|
|
gr.update(visible=False), |
|
|
gr.update(visible=False), |
|
|
"", "", [], 0, {}, |
|
|
*[""] * 9, |
|
|
*[""] * 10, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
def es_next_fn(user_id, dataset, examples, idx, responses, ident, com, sug, ovl): |
|
|
if None in (ident, com, sug, ovl): |
|
|
ex = examples[idx] |
|
|
return ( |
|
|
ex["background"], |
|
|
ex["left_text"], |
|
|
ex["right_text"], |
|
|
f"Item {idx+1} of {len(examples)}", |
|
|
idx, |
|
|
responses, |
|
|
ident, |
|
|
com, |
|
|
sug, |
|
|
ovl, |
|
|
"All metrics must be answered before proceeding.", |
|
|
) |
|
|
|
|
|
responses[idx] = dict( |
|
|
Identification=ident, Comforting=com, Suggestion=sug, Overall=ovl |
|
|
) |
|
|
save_all_to_csv(user_id, dataset, examples, responses) |
|
|
idx += 1 |
|
|
|
|
|
if idx >= len(examples): |
|
|
return ("π© Survey complete! Thank you.",) * 4 + ( |
|
|
idx, |
|
|
responses, |
|
|
None, |
|
|
None, |
|
|
None, |
|
|
None, |
|
|
"", |
|
|
) |
|
|
|
|
|
return es_load_example(idx, examples, responses)[:4] + ( |
|
|
idx, |
|
|
responses, |
|
|
) + es_load_example(idx, examples, responses)[4:] |
|
|
|
|
|
|
|
|
def es_prev_fn(user_id, dataset, examples, idx, responses, ident, com, sug, ovl): |
|
|
if None not in (ident, com, sug, ovl): |
|
|
responses[idx] = dict( |
|
|
Identification=ident, Comforting=com, Suggestion=sug, Overall=ovl |
|
|
) |
|
|
save_all_to_csv(user_id, dataset, examples, responses) |
|
|
|
|
|
idx = max(0, idx - 1) |
|
|
return es_load_example(idx, examples, responses)[:4] + ( |
|
|
idx, |
|
|
responses, |
|
|
) + es_load_example(idx, examples, responses)[4:] |
|
|
|
|
|
|
|
|
def cb_next_fn(user_id, dataset, examples, idx, responses, per, coh, nat, ovl_cb): |
|
|
if None in (per, coh, nat, ovl_cb): |
|
|
ex = examples[idx] |
|
|
return ( |
|
|
ex["background"], |
|
|
ex["left_text"], |
|
|
ex["right_text"], |
|
|
f"Item {idx+1} of {len(examples)}", |
|
|
idx, |
|
|
responses, |
|
|
per, |
|
|
coh, |
|
|
nat, |
|
|
ovl_cb, |
|
|
"All metrics must be answered before proceeding.", |
|
|
) |
|
|
|
|
|
responses[idx] = dict( |
|
|
Persuasiveness=per, Coherence=coh, Naturalness=nat, Overall=ovl_cb |
|
|
) |
|
|
save_all_to_csv(user_id, dataset, examples, responses) |
|
|
idx += 1 |
|
|
|
|
|
if idx >= len(examples): |
|
|
return ("π© Survey complete! Thank you.",) * 5 + (None, "") |
|
|
|
|
|
return cb_load_example(idx, examples, responses)[:4] + ( |
|
|
idx, |
|
|
responses, |
|
|
) + cb_load_example(idx, examples, responses)[4:] |
|
|
|
|
|
|
|
|
def cb_prev_fn(user_id, dataset, examples, idx, responses, per, coh, nat, ovl_cb): |
|
|
if None not in (per, coh, nat, ovl_cb): |
|
|
responses[idx] = dict( |
|
|
Persuasiveness=per, Coherence=coh, Naturalness=nat, Overall=ovl_cb |
|
|
) |
|
|
save_all_to_csv(user_id, dataset, examples, responses) |
|
|
|
|
|
idx = max(0, idx - 1) |
|
|
return cb_load_example(idx, examples, responses)[:4] + ( |
|
|
idx, |
|
|
responses, |
|
|
) + cb_load_example(idx, examples, responses)[4:] |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
|
|
|
with gr.Column() as login_panel: |
|
|
gr.Markdown("## Human Evaluation Survey") |
|
|
user_id_in = gr.Textbox(label="User ID") |
|
|
ds_dd = gr.Dropdown(list(DATASET_FILES.keys()), label="Select dataset") |
|
|
start_btn = gr.Button("Start survey") |
|
|
|
|
|
|
|
|
uid_state = gr.State("") |
|
|
ds_state = gr.State("") |
|
|
ex_state = gr.State([]) |
|
|
idx_state = gr.State(0) |
|
|
resp_state = gr.State({}) |
|
|
|
|
|
|
|
|
with gr.Column(visible=False) as es_panel: |
|
|
bg = gr.Textbox(label="Background context", interactive=False) |
|
|
with gr.Row(): |
|
|
lbox = gr.Textbox(label="Response A", interactive=False) |
|
|
rbox = gr.Textbox(label="Response B", interactive=False) |
|
|
|
|
|
ident = gr.Radio( |
|
|
["Prefer Response A", "Prefer Response B", "No preference"], |
|
|
label="Identification (Ident.)", |
|
|
) |
|
|
com = gr.Radio( |
|
|
["Prefer Response A", "Prefer Response B", "No preference"], |
|
|
label="Comforting (Com.)", |
|
|
) |
|
|
sug = gr.Radio( |
|
|
["Prefer Response A", "Prefer Response B", "No preference"], |
|
|
label="Suggestion (Sug.)", |
|
|
) |
|
|
ovl_es = gr.Radio( |
|
|
["Prefer Response A", "Prefer Response B", "No preference"], |
|
|
label="Overall (Ov.)", |
|
|
) |
|
|
err_es = gr.HTML(visible=False) |
|
|
prog = gr.Text(label="Progress") |
|
|
with gr.Row(): |
|
|
prev_btn = gr.Button("β Prev") |
|
|
next_btn = gr.Button("βΆ Next") |
|
|
logout_es = gr.Button("πͺ Logout") |
|
|
|
|
|
|
|
|
with gr.Column(visible=False) as cb_panel: |
|
|
bg_cb = gr.Textbox(label="Background context", interactive=False) |
|
|
with gr.Row(): |
|
|
lbox_cb = gr.Textbox(label="Response A", interactive=False) |
|
|
rbox_cb = gr.Textbox(label="Response B", interactive=False) |
|
|
|
|
|
per = gr.Radio( |
|
|
["Prefer Response A", "Prefer Response B", "No preference"], |
|
|
label="Persuasiveness (Per.)", |
|
|
) |
|
|
coh = gr.Radio( |
|
|
["Prefer Response A", "Prefer Response B", "No preference"], |
|
|
label="Coherence (Coh.)", |
|
|
) |
|
|
nat = gr.Radio( |
|
|
["Prefer Response A", "Prefer Response B", "No preference"], |
|
|
label="Naturalness (Nat.)", |
|
|
) |
|
|
ovl_cb = gr.Radio( |
|
|
["Prefer Response A", "Prefer Response B", "No preference"], |
|
|
label="Overall", |
|
|
) |
|
|
err_cb = gr.HTML(visible=False) |
|
|
prog_cb = gr.Text(label="Progress") |
|
|
with gr.Row(): |
|
|
prev_cb = gr.Button("β Prev") |
|
|
next_cb = gr.Button("βΆ Next") |
|
|
logout_cb = gr.Button("πͺ Logout") |
|
|
|
|
|
|
|
|
start_btn.click( |
|
|
login_fn, |
|
|
inputs=[user_id_in, ds_dd], |
|
|
outputs=[ |
|
|
login_panel, |
|
|
es_panel, |
|
|
cb_panel, |
|
|
uid_state, |
|
|
ds_state, |
|
|
ex_state, |
|
|
idx_state, |
|
|
resp_state, |
|
|
|
|
|
bg, |
|
|
lbox, |
|
|
rbox, |
|
|
prog, |
|
|
ident, |
|
|
com, |
|
|
sug, |
|
|
ovl_es, |
|
|
err_es, |
|
|
|
|
|
bg_cb, |
|
|
lbox_cb, |
|
|
rbox_cb, |
|
|
prog_cb, |
|
|
per, |
|
|
coh, |
|
|
nat, |
|
|
ovl_cb, |
|
|
err_cb, |
|
|
], |
|
|
) |
|
|
|
|
|
|
|
|
next_btn.click( |
|
|
es_next_fn, |
|
|
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, ident, com, sug, ovl_es], |
|
|
outputs=[bg, lbox, rbox, prog, idx_state, resp_state, ident, com, sug, ovl_es, err_es], |
|
|
) |
|
|
prev_btn.click( |
|
|
es_prev_fn, |
|
|
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, ident, com, sug, ovl_es], |
|
|
outputs=[bg, lbox, rbox, prog, idx_state, resp_state, ident, com, sug, ovl_es, err_es], |
|
|
) |
|
|
|
|
|
|
|
|
next_cb.click( |
|
|
cb_next_fn, |
|
|
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, per, coh, nat, ovl_cb], |
|
|
outputs=[bg_cb, lbox_cb, rbox_cb, prog_cb, idx_state, resp_state, per, coh, nat, ovl_cb, err_cb], |
|
|
) |
|
|
prev_cb.click( |
|
|
cb_prev_fn, |
|
|
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, per, coh, nat, ovl_cb], |
|
|
outputs=[bg_cb, lbox_cb, rbox_cb, prog_cb, idx_state, resp_state, per, coh, nat, ovl_cb, err_cb], |
|
|
) |
|
|
|
|
|
|
|
|
for logout_btn in (logout_es, logout_cb): |
|
|
logout_btn.click( |
|
|
logout_fn, |
|
|
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state], |
|
|
outputs=[ |
|
|
login_panel, |
|
|
es_panel, |
|
|
cb_panel, |
|
|
uid_state, |
|
|
ds_state, |
|
|
ex_state, |
|
|
idx_state, |
|
|
resp_state, |
|
|
bg, |
|
|
lbox, |
|
|
rbox, |
|
|
prog, |
|
|
ident, |
|
|
com, |
|
|
sug, |
|
|
ovl_es, |
|
|
err_es, |
|
|
bg_cb, |
|
|
lbox_cb, |
|
|
rbox_cb, |
|
|
prog_cb, |
|
|
per, |
|
|
coh, |
|
|
nat, |
|
|
ovl_cb, |
|
|
err_cb, |
|
|
], |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(share=True) |