code_eval

Runtime error

App Files Files Community

jjkim commited on Oct 10, 2023

Commit

cc070df

1 Parent(s): 346d7a2

change candidate to prediction

Browse files

Files changed (1) hide show

code_eval.py +14 -14

code_eval.py CHANGED Viewed

@@ -157,9 +157,9 @@ class CodeEval(evaluate.Metric):
     def _compute(
         self,
-        candidates,
-        cand_key,
-        cand_template,
         references,
         ref_key,
         ref_template,
@@ -179,27 +179,27 @@ class CodeEval(evaluate.Metric):
                 "This metric is currently not supported on Windows."
             )
-        candidates = sorted(candidates, key=lambda x: x["id"])
         references = sorted(references, key=lambda x: x["id"])
         with ThreadPoolExecutor(max_workers=num_workers) as executor:
             results = {}
-            for cand_d, ref_d in zip(candidates, references):
-                assert cand_d["id"] == ref_d["id"]
-                tid = cand_d["id"]
                 results[tid] = []
-                cand = cand_d[cand_key]
                 ref = ref_d[ref_key]
-                for cid, c in enumerate(cand):
-                    result = Result(task_id=tid, completion_id=cid)
-                    body = Template(cand_template).safe_substitute(candidate=c)
                     for r in ref:
                         assert isinstance(r, str)
                         test = Template(ref_template).safe_substitute(ref_key=r)
-                        test = Template(test).safe_substitute(reference=c)
                         test_program = body + "\n" + test
-                        args = (test_program, timeout, tid, cid)
                         future = executor.submit(check_correctness, *args)
                         result.add(future)
                     results[tid].append(result)
@@ -266,7 +266,7 @@ def estimate_pass_at_k(num_samples, num_correct, k):
 class Result(BaseModel):
     task_id: str
-    completion_id: int
     passed: Optional[bool] = None
     result: List[str] = []

     def _compute(
         self,
+        predictions,
+        pred_key,
+        pred_template,
         references,
         ref_key,
         ref_template,
                 "This metric is currently not supported on Windows."
             )
+        predictions = sorted(predictions, key=lambda x: x["id"])
         references = sorted(references, key=lambda x: x["id"])
         with ThreadPoolExecutor(max_workers=num_workers) as executor:
             results = {}
+            for pred_d, ref_d in zip(predictions, references):
+                assert pred_d["id"] == ref_d["id"]
+                tid = pred_d["id"]
                 results[tid] = []
+                pred = pred_d[pred_key]
                 ref = ref_d[ref_key]
+                for pid, p in enumerate(pred):
+                    result = Result(task_id=tid, prediction_id=pid)
+                    body = Template(pred_template).safe_substitute(prediction=p)
                     for r in ref:
                         assert isinstance(r, str)
                         test = Template(ref_template).safe_substitute(ref_key=r)
+                        test = Template(test).safe_substitute(prediction=p)
                         test_program = body + "\n" + test
+                        args = (test_program, timeout, tid, pid)
                         future = executor.submit(check_correctness, *args)
                         result.add(future)
                     results[tid].append(result)
 class Result(BaseModel):
     task_id: str
+    prediction_id: int
     passed: Optional[bool] = None
     result: List[str] = []