Spaces:

sunhill
/

cider

Sleeping

App Files Files Community

cider / cider.py

sunhill

regular input

df29cfd 3 months ago

raw

history blame contribute delete

4.58 kB

	"""This module implements the CIDEr metric for image captioning evaluation."""

	import evaluate
	import datasets

	from .cider_scorer import CiderScorer

	_CITATION = """\
	@InProceedings{Vedantam_2015_CVPR,
	author = {Vedantam, Ramakrishna and Lawrence Zitnick, C. and Parikh, Devi},
	title = {CIDEr: Consensus-Based Image Description Evaluation},
	booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
	month = {June},
	year = {2015}
	}
	"""

	_DESCRIPTION = """\
	This is a metric to evaluate image captioning. It is based on the idea of
	measuring the consensus between a candidate image caption and a set of
	reference image captions written by humans. The CIDEr score is computed by
	comparing the n-grams of the candidate caption to the n-grams of the reference
	captions, and measuring how many n-grams are shared between the candidate and
	the references. The score is then normalized by the length of the candidate
	caption and the number of reference captions.
	"""


	_KWARGS_DESCRIPTION = """
	CIDEr (Consensus-based Image Description Evaluation) is a metric for evaluating the quality of image captions.
	It measures how similar a generated caption is to a set of reference captions written by humans.
	Args:
	predictions: list of predictions to score.
	references: list of references for each prediction.
	Returns:
	score: CIDEr score.
	Examples:
	>>> metric = evaluate.load("sunhill/cider")
	>>> results = metric.compute(
	predictions=[['train traveling down a track in front of a road']],
	references=[
	[
	'a train traveling down tracks next to lights',
	'a blue and silver train next to train station and trees',
	'a blue train is next to a sidewalk on the rails',
	'a passenger train pulls into a train station',
	'a train coming down the tracks arriving at a station'
	]
	]
	)
	"""


	@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
	class CIDEr(evaluate.Metric):
	"""CIDEr metric."""

	def _info(self):
	return evaluate.MetricInfo(
	# This is the description that will appear on the modules page.
	module_type="metric",
	description=_DESCRIPTION,
	citation=_CITATION,
	inputs_description=_KWARGS_DESCRIPTION,
	# This defines the format of each prediction and reference
	features=[
	datasets.Features(
	{
	"predictions": datasets.Value("string"),
	"references": datasets.Value("string"),
	}
	),
	datasets.Features(
	{
	"predictions": datasets.Value("string"),
	"references": datasets.Sequence(datasets.Value("string")),
	}
	),
	],
	# Homepage of the module for documentation
	homepage="https://huggingface.co/spaces/sunhill/cider",
	# Additional links to the codebase or references
	codebase_urls=[
	"https://github.com/ramavedantam/cider",
	"https://github.com/EricWWWW/image-caption-metrics",
	],
	reference_urls=[
	(
	"https://openaccess.thecvf.com/content_cvpr_2015/html/"
	"Vedantam_CIDEr_Consensus-Based_Image_2015_CVPR_paper.html"
	)
	],
	)

	def _compute(self, predictions, references):
	"""Returns the scores"""
	assert len(predictions) == len(references), (
	"The number of predictions and references should be the same. "
	f"Got {len(predictions)} predictions and {len(references)} references."
	)
	cider_scorer = CiderScorer(n=4, sigma=6.0)
	for pred, ref in zip(predictions, references):
	assert isinstance(pred, str), (
	f"Each prediction should be a string. Got {type(pred)}."
	)
	if isinstance(ref, str):
	ref = [ref]
	assert isinstance(ref, list) and all(isinstance(r, str) for r in ref), (
	"Each reference should be a list of strings. "
	f"Got {type(ref)} with elements of type {[type(r) for r in ref]}."
	)
	cider_scorer += (pred, ref)
	score, _ = cider_scorer.compute_score()
	return {"cider_score": score.item()}