jjkim
commited on
Commit
·
cc070df
1
Parent(s):
346d7a2
change candidate to prediction
Browse files- code_eval.py +14 -14
code_eval.py
CHANGED
|
@@ -157,9 +157,9 @@ class CodeEval(evaluate.Metric):
|
|
| 157 |
|
| 158 |
def _compute(
|
| 159 |
self,
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
references,
|
| 164 |
ref_key,
|
| 165 |
ref_template,
|
|
@@ -179,27 +179,27 @@ class CodeEval(evaluate.Metric):
|
|
| 179 |
"This metric is currently not supported on Windows."
|
| 180 |
)
|
| 181 |
|
| 182 |
-
|
| 183 |
references = sorted(references, key=lambda x: x["id"])
|
| 184 |
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
| 185 |
results = {}
|
| 186 |
-
for
|
| 187 |
-
assert
|
| 188 |
-
tid =
|
| 189 |
|
| 190 |
results[tid] = []
|
| 191 |
-
|
| 192 |
ref = ref_d[ref_key]
|
| 193 |
-
for
|
| 194 |
-
result = Result(task_id=tid,
|
| 195 |
-
body = Template(
|
| 196 |
for r in ref:
|
| 197 |
assert isinstance(r, str)
|
| 198 |
test = Template(ref_template).safe_substitute(ref_key=r)
|
| 199 |
-
test = Template(test).safe_substitute(
|
| 200 |
|
| 201 |
test_program = body + "\n" + test
|
| 202 |
-
args = (test_program, timeout, tid,
|
| 203 |
future = executor.submit(check_correctness, *args)
|
| 204 |
result.add(future)
|
| 205 |
results[tid].append(result)
|
|
@@ -266,7 +266,7 @@ def estimate_pass_at_k(num_samples, num_correct, k):
|
|
| 266 |
|
| 267 |
class Result(BaseModel):
|
| 268 |
task_id: str
|
| 269 |
-
|
| 270 |
|
| 271 |
passed: Optional[bool] = None
|
| 272 |
result: List[str] = []
|
|
|
|
| 157 |
|
| 158 |
def _compute(
|
| 159 |
self,
|
| 160 |
+
predictions,
|
| 161 |
+
pred_key,
|
| 162 |
+
pred_template,
|
| 163 |
references,
|
| 164 |
ref_key,
|
| 165 |
ref_template,
|
|
|
|
| 179 |
"This metric is currently not supported on Windows."
|
| 180 |
)
|
| 181 |
|
| 182 |
+
predictions = sorted(predictions, key=lambda x: x["id"])
|
| 183 |
references = sorted(references, key=lambda x: x["id"])
|
| 184 |
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
| 185 |
results = {}
|
| 186 |
+
for pred_d, ref_d in zip(predictions, references):
|
| 187 |
+
assert pred_d["id"] == ref_d["id"]
|
| 188 |
+
tid = pred_d["id"]
|
| 189 |
|
| 190 |
results[tid] = []
|
| 191 |
+
pred = pred_d[pred_key]
|
| 192 |
ref = ref_d[ref_key]
|
| 193 |
+
for pid, p in enumerate(pred):
|
| 194 |
+
result = Result(task_id=tid, prediction_id=pid)
|
| 195 |
+
body = Template(pred_template).safe_substitute(prediction=p)
|
| 196 |
for r in ref:
|
| 197 |
assert isinstance(r, str)
|
| 198 |
test = Template(ref_template).safe_substitute(ref_key=r)
|
| 199 |
+
test = Template(test).safe_substitute(prediction=p)
|
| 200 |
|
| 201 |
test_program = body + "\n" + test
|
| 202 |
+
args = (test_program, timeout, tid, pid)
|
| 203 |
future = executor.submit(check_correctness, *args)
|
| 204 |
result.add(future)
|
| 205 |
results[tid].append(result)
|
|
|
|
| 266 |
|
| 267 |
class Result(BaseModel):
|
| 268 |
task_id: str
|
| 269 |
+
prediction_id: int
|
| 270 |
|
| 271 |
passed: Optional[bool] = None
|
| 272 |
result: List[str] = []
|