EMNLP / app.py
sutdaiday
fixed
2a9914a
raw
history blame
16.9 kB
import gradio as gr
import pandas as pd
import random, os, csv
# ─── Configuration ─────────────────────────────────────────────────────────────
DATASET_FILES = {
"ESConv": "./csv/esconv.csv",
"CraigslistBargain": "./csv/craigslist.csv",
}
# ─── Helper functions ──────────────────────────────────────────────────────────
def prepare_examples(user_id: str, dataset: str):
"""
Read CSV, create Ours-vs-[PPDPP|DPDP] pairs, shuffle + randomise L/R.
A deterministic seed (user_id+dataset) keeps the order stable for reloads.
"""
random.seed(f"{user_id}_{dataset}") # keeps order identical per user
df = pd.read_csv(DATASET_FILES[dataset])
pairs = []
for _, row in df.iterrows():
for other in ["PPDPP", "DPDP"]:
pairs.append(
dict(
background=row["Background Information"],
ours=row["Ours"],
other=row[other],
other_name=other,
)
)
random.shuffle(pairs)
prepared = []
for item in pairs:
if random.random() < 0.5:
left_text, right_text = item["ours"], item["other"]
left_name, right_name = "Ours", item["other_name"]
else:
left_text, right_text = item["other"], item["ours"]
left_name, right_name = item["other_name"], "Ours"
prepared.append(
dict(
background=item["background"],
left_text=left_text,
right_text=right_text,
left_name=left_name,
right_name=right_name,
)
)
return prepared
def save_all_to_csv(user_id, dataset, examples, responses):
"""Rewrite CSV completely (idempotent)."""
if not responses:
return
filename = f"{user_id}_{dataset}_results.csv"
metrics = list(next(iter(responses.values())).keys())
header = (
["UserID", "Dataset", "Background", "Response A Method", "Response B Method"]
+ metrics
)
with open(filename, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=header)
writer.writeheader()
for idx in sorted(responses):
ex = examples[idx]
row = dict(
UserID=user_id,
Dataset=dataset,
Background=ex["background"],
**{
"Response A Method": ex["left_name"],
"Response B Method": ex["right_name"],
},
)
row.update(responses[idx])
writer.writerow(row)
def load_responses_from_csv(user_id, dataset, examples):
"""
Reconstruct a {idx: metrics-dict} mapping from an existing results file.
Matching uses Background + response-method orientation to stay robust.
"""
filename = f"{user_id}_{dataset}_results.csv"
if not os.path.exists(filename):
return {}
df = pd.read_csv(filename)
# Build lookup: (background, a_method, b_method) β†’ idx
idx_map = {
(ex["background"], ex["left_name"], ex["right_name"]): i
for i, ex in enumerate(examples)
}
responses = {}
for _, row in df.iterrows():
key = (row["Background"], row["Response A Method"], row["Response B Method"])
if key in idx_map:
idx = idx_map[key]
metric_cols = [
c
for c in row.keys()
if c
not in [
"UserID",
"Dataset",
"Background",
"Response A Method",
"Response B Method",
]
]
responses[idx] = {k: row[k] for k in metric_cols}
return responses
# ─── Panel-specific loaders ────────────────────────────────────────────────────
def es_load_example(idx, examples, responses):
ex = examples[idx]
prev = responses.get(idx, {})
return (
ex["background"],
ex["left_text"],
ex["right_text"],
f"Item {idx+1} of {len(examples)}",
prev.get("Identification"),
prev.get("Comforting"),
prev.get("Suggestion"),
prev.get("Overall"),
"", # error
)
def cb_load_example(idx, examples, responses):
ex = examples[idx]
prev = responses.get(idx, {})
return (
ex["background"],
ex["left_text"],
ex["right_text"],
f"Item {idx+1} of {len(examples)}",
prev.get("Persuasiveness"),
prev.get("Coherence"),
prev.get("Naturalness"),
prev.get("Overall"),
"", # error
)
# ─── Login & Logout ────────────────────────────────────────────────────────────
def login_fn(user_id, dataset):
if not user_id or not dataset:
raise gr.Error("Please enter your User ID and select a dataset.")
examples = prepare_examples(user_id, dataset)
responses = load_responses_from_csv(user_id, dataset, examples)
idx = 0
if dataset == "ESConv":
(
bg,
lft,
rgt,
prog,
ident,
com,
sug,
ovl,
err_es,
) = es_load_example(idx, examples, responses)
# CB placeholders
bg_cb = lft_cb = rgt_cb = prog_cb = ""
per = coh = nat = ovl_cb = None
err_cb = ""
else:
(
bg_cb,
lft_cb,
rgt_cb,
prog_cb,
per,
coh,
nat,
ovl_cb,
err_cb,
) = cb_load_example(idx, examples, responses)
# ESConv placeholders
bg = lft = rgt = prog = ""
ident = com = sug = ovl = None
err_es = ""
return (
gr.update(visible=False), # hide login panel
gr.update(visible=(dataset == "ESConv")),
gr.update(visible=(dataset == "CraigslistBargain")),
user_id, dataset, examples, idx, responses,
# ESConv outputs
bg, lft, rgt, prog, ident, com, sug, ovl, err_es,
# CB outputs
bg_cb, lft_cb, rgt_cb, prog_cb, per, coh, nat, ovl_cb, err_cb,
)
def logout_fn(user_id, dataset, examples, idx, responses):
# Save before quitting
if dataset:
save_all_to_csv(user_id, dataset, examples, responses)
# Empty / reset everything
return (
gr.update(visible=True), # show login
gr.update(visible=False), # hide es
gr.update(visible=False), # hide cb
"", "", [], 0, {}, # state vars reset
*[""] * 9, # ESConv component resets
*[""] * 10, # CB component resets
)
# ─── Navigation callback helpers ───────────────────────────────────────────────
def es_next_fn(user_id, dataset, examples, idx, responses, ident, com, sug, ovl):
if None in (ident, com, sug, ovl):
ex = examples[idx]
return (
ex["background"],
ex["left_text"],
ex["right_text"],
f"Item {idx+1} of {len(examples)}",
idx,
responses,
ident,
com,
sug,
ovl,
"All metrics must be answered before proceeding.",
)
responses[idx] = dict(
Identification=ident, Comforting=com, Suggestion=sug, Overall=ovl
)
save_all_to_csv(user_id, dataset, examples, responses)
idx += 1
if idx >= len(examples):
return ("🚩 Survey complete! Thank you.",) * 4 + (
idx,
responses,
None,
None,
None,
None,
"",
)
return es_load_example(idx, examples, responses)[:4] + (
idx,
responses,
) + es_load_example(idx, examples, responses)[4:]
def es_prev_fn(user_id, dataset, examples, idx, responses, ident, com, sug, ovl):
if None not in (ident, com, sug, ovl):
responses[idx] = dict(
Identification=ident, Comforting=com, Suggestion=sug, Overall=ovl
)
save_all_to_csv(user_id, dataset, examples, responses)
idx = max(0, idx - 1)
return es_load_example(idx, examples, responses)[:4] + (
idx,
responses,
) + es_load_example(idx, examples, responses)[4:]
def cb_next_fn(user_id, dataset, examples, idx, responses, per, coh, nat, ovl_cb):
if None in (per, coh, nat, ovl_cb):
ex = examples[idx]
return (
ex["background"],
ex["left_text"],
ex["right_text"],
f"Item {idx+1} of {len(examples)}",
idx,
responses,
per,
coh,
nat,
ovl_cb,
"All metrics must be answered before proceeding.",
)
responses[idx] = dict(
Persuasiveness=per, Coherence=coh, Naturalness=nat, Overall=ovl_cb
)
save_all_to_csv(user_id, dataset, examples, responses)
idx += 1
if idx >= len(examples):
return ("🚩 Survey complete! Thank you.",) * 5 + (None, "")
return cb_load_example(idx, examples, responses)[:4] + (
idx,
responses,
) + cb_load_example(idx, examples, responses)[4:]
def cb_prev_fn(user_id, dataset, examples, idx, responses, per, coh, nat, ovl_cb):
if None not in (per, coh, nat, ovl_cb):
responses[idx] = dict(
Persuasiveness=per, Coherence=coh, Naturalness=nat, Overall=ovl_cb
)
save_all_to_csv(user_id, dataset, examples, responses)
idx = max(0, idx - 1)
return cb_load_example(idx, examples, responses)[:4] + (
idx,
responses,
) + cb_load_example(idx, examples, responses)[4:]
# ─── Build Gradio App ──────────────────────────────────────────────────────────
with gr.Blocks() as demo:
# ── Login ────────────────────────────────────────────────────────────────
with gr.Column() as login_panel:
gr.Markdown("## Human Evaluation Survey")
user_id_in = gr.Textbox(label="User ID")
ds_dd = gr.Dropdown(list(DATASET_FILES.keys()), label="Select dataset")
start_btn = gr.Button("Start survey")
# Shared state
uid_state = gr.State("")
ds_state = gr.State("")
ex_state = gr.State([])
idx_state = gr.State(0)
resp_state = gr.State({})
# ── ESConv Panel ──────────────────────────────────────────────────────────
with gr.Column(visible=False) as es_panel:
bg = gr.Textbox(label="Background context", interactive=False)
with gr.Row():
lbox = gr.Textbox(label="Response A", interactive=False)
rbox = gr.Textbox(label="Response B", interactive=False)
ident = gr.Radio(
["Prefer Response A", "Prefer Response B", "No preference"],
label="Identification (Ident.)",
)
com = gr.Radio(
["Prefer Response A", "Prefer Response B", "No preference"],
label="Comforting (Com.)",
)
sug = gr.Radio(
["Prefer Response A", "Prefer Response B", "No preference"],
label="Suggestion (Sug.)",
)
ovl_es = gr.Radio(
["Prefer Response A", "Prefer Response B", "No preference"],
label="Overall (Ov.)",
)
err_es = gr.HTML(visible=False)
prog = gr.Text(label="Progress")
with gr.Row():
prev_btn = gr.Button("β—€ Prev")
next_btn = gr.Button("β–Ά Next")
logout_es = gr.Button("πŸšͺ Logout")
# ── CraigslistBargain Panel ──────────────────────────────────────────────
with gr.Column(visible=False) as cb_panel:
bg_cb = gr.Textbox(label="Background context", interactive=False)
with gr.Row():
lbox_cb = gr.Textbox(label="Response A", interactive=False)
rbox_cb = gr.Textbox(label="Response B", interactive=False)
per = gr.Radio(
["Prefer Response A", "Prefer Response B", "No preference"],
label="Persuasiveness (Per.)",
)
coh = gr.Radio(
["Prefer Response A", "Prefer Response B", "No preference"],
label="Coherence (Coh.)",
)
nat = gr.Radio(
["Prefer Response A", "Prefer Response B", "No preference"],
label="Naturalness (Nat.)",
)
ovl_cb = gr.Radio(
["Prefer Response A", "Prefer Response B", "No preference"],
label="Overall",
)
err_cb = gr.HTML(visible=False)
prog_cb = gr.Text(label="Progress")
with gr.Row():
prev_cb = gr.Button("β—€ Prev")
next_cb = gr.Button("β–Ά Next")
logout_cb = gr.Button("πŸšͺ Logout")
# ── Wiring ───────────────────────────────────────────────────────────────
start_btn.click(
login_fn,
inputs=[user_id_in, ds_dd],
outputs=[
login_panel,
es_panel,
cb_panel,
uid_state,
ds_state,
ex_state,
idx_state,
resp_state,
# ESConv
bg,
lbox,
rbox,
prog,
ident,
com,
sug,
ovl_es,
err_es,
# CB
bg_cb,
lbox_cb,
rbox_cb,
prog_cb,
per,
coh,
nat,
ovl_cb,
err_cb,
],
)
# ESConv navigation
next_btn.click(
es_next_fn,
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, ident, com, sug, ovl_es],
outputs=[bg, lbox, rbox, prog, idx_state, resp_state, ident, com, sug, ovl_es, err_es],
)
prev_btn.click(
es_prev_fn,
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, ident, com, sug, ovl_es],
outputs=[bg, lbox, rbox, prog, idx_state, resp_state, ident, com, sug, ovl_es, err_es],
)
# CB navigation
next_cb.click(
cb_next_fn,
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, per, coh, nat, ovl_cb],
outputs=[bg_cb, lbox_cb, rbox_cb, prog_cb, idx_state, resp_state, per, coh, nat, ovl_cb, err_cb],
)
prev_cb.click(
cb_prev_fn,
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state, per, coh, nat, ovl_cb],
outputs=[bg_cb, lbox_cb, rbox_cb, prog_cb, idx_state, resp_state, per, coh, nat, ovl_cb, err_cb],
)
# Logout buttons (both panels share same callback)
for logout_btn in (logout_es, logout_cb):
logout_btn.click(
logout_fn,
inputs=[uid_state, ds_state, ex_state, idx_state, resp_state],
outputs=[
login_panel,
es_panel,
cb_panel,
uid_state,
ds_state,
ex_state,
idx_state,
resp_state,
bg,
lbox,
rbox,
prog,
ident,
com,
sug,
ovl_es,
err_es,
bg_cb,
lbox_cb,
rbox_cb,
prog_cb,
per,
coh,
nat,
ovl_cb,
err_cb,
],
)
# ─── Run ───────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
demo.launch(share=True)