import{s as He,o as Ne}from"../chunks/scheduler.7da89386.js";import{S as ze,i as Ge,g as i,s as n,r as m,A as Ie,h as o,f as s,c as l,j as Ue,u as h,x as c,k as Le,y as Qe,a,v as f,d as u,t as g,w as d,m as Ye,n as Pe}from"../chunks/index.20910acc.js";import{T as Re}from"../chunks/Tip.53e22153.js";import{C as Ee}from"../chunks/CodeBlock.143bd81e.js";import{H as Z,E as Ze}from"../chunks/getInferenceSnippets.217b4024.js";function qe(q){let r;return{c(){r=Ye(`💡 GLUE is actually a collection of different subsets on different tasks, so first you need to choose the one that corresponds to the NLI task, such as mnli, which is described as “crowdsourced collection of sentence pairs with textual entailment annotations”`)},l(p){r=Pe(p,`💡 GLUE is actually a collection of different subsets on different tasks, so first you need to choose the one that corresponds to the NLI task, such as mnli, which is described as “crowdsourced collection of sentence pairs with textual entailment annotations”`)},m(p,w){a(p,r,w)},d(p){p&&s(r)}}}function Ve(q){let r,p,w,V,x,W,$,ge="So you’ve trained your model and want to see how well it’s doing on a dataset of your choice. Where do you start?",X,v,de="There is no “one size fits all” approach to choosing an evaluation metric, but some good guidelines to keep in mind are:",A,b,B,M,ye="There are 3 high-level categories of metrics:",D,T,we='
question
and context
into your model and return the prediction_text
, which should be compared with the references
(based on matching the id
of the question) :',pe,Q,me,Y,_e="You can find examples of dataset structures by consulting the “Dataset Preview” function or the dataset card for a given dataset, and you can see how to use its dedicated evaluation function based on the metric card.",he,P,fe,R,ue;return x=new Z({props:{title:"Choosing a metric for your task",local:"choosing-a-metric-for-your-task",headingTag:"h1"}}),b=new Z({props:{title:"Categories of metrics",local:"categories-of-metrics",headingTag:"h2"}}),J=new Z({props:{title:"Generic metrics",local:"generic-metrics",headingTag:"h3"}}),U=new Ee({props:{code:"cHJlY2lzaW9uX21ldHJpYyUyMCUzRCUyMGV2YWx1YXRlLmxvYWQoJTIycHJlY2lzaW9uJTIyKSUwQXJlc3VsdHMlMjAlM0QlMjBwcmVjaXNpb25fbWV0cmljLmNvbXB1dGUocmVmZXJlbmNlcyUzRCU1QjAlMkMlMjAxJTVEJTJDJTIwcHJlZGljdGlvbnMlM0QlNUIwJTJDJTIwMSU1RCklMEFwcmludChyZXN1bHRzKQ==",highlighted:` precision_metric = evaluate.load("precision")
results = precision_metric.compute(references=[0, 1], predictions=[0, 1])
print(results)
{'precision': 1.0}`,wrap:!1}}),L=new Z({props:{title:"Task-specific metrics",local:"task-specific-metrics",headingTag:"h3"}}),z=new Z({props:{title:"Dataset-specific metrics",local:"dataset-specific-metrics",headingTag:"h3"}}),y=new Re({props:{warning:!0,$$slots:{default:[qe]},$$scope:{ctx:q}}}),Q=new Ee({props:{code:"ZnJvbSUyMGV2YWx1YXRlJTIwaW1wb3J0JTIwbG9hZCUwQXNxdWFkX21ldHJpYyUyMCUzRCUyMGxvYWQoJTIyc3F1YWQlMjIpJTBBcHJlZGljdGlvbnMlMjAlM0QlMjAlNUIlN0IncHJlZGljdGlvbl90ZXh0JyUzQSUyMCcxOTc2JyUyQyUyMCdpZCclM0ElMjAnNTZlMTBhM2JlMzQzM2UxNDAwNDIyYjIyJyU3RCU1RCUwQXJlZmVyZW5jZXMlMjAlM0QlMjAlNUIlN0InYW5zd2VycyclM0ElMjAlN0InYW5zd2VyX3N0YXJ0JyUzQSUyMCU1Qjk3JTVEJTJDJTIwJ3RleHQnJTNBJTIwJTVCJzE5NzYnJTVEJTdEJTJDJTIwJ2lkJyUzQSUyMCc1NmUxMGEzYmUzNDMzZTE0MDA0MjJiMjInJTdEJTVEJTBBcmVzdWx0cyUyMCUzRCUyMHNxdWFkX21ldHJpYy5jb21wdXRlKHByZWRpY3Rpb25zJTNEcHJlZGljdGlvbnMlMkMlMjByZWZlcmVuY2VzJTNEcmVmZXJlbmNlcyklMEFyZXN1bHRz",highlighted:` from evaluate import load
squad_metric = load("squad")
predictions = [{'prediction_text': '1976', 'id': '56e10a3be3433e1400422b22'}]
references = [{'answers': {'answer_start': [97], 'text': ['1976']}, 'id': '56e10a3be3433e1400422b22'}]
results = squad_metric.compute(predictions=predictions, references=references)
results
{'exact_match': 100.0, 'f1': 100.0}`,wrap:!1}}),P=new Ze({props:{source:"https://github.com/huggingface/evaluate/blob/main/docs/source/choosing_a_metric.mdx"}}),{c(){r=i("meta"),p=n(),w=i("p"),V=n(),m(x.$$.fragment),W=n(),$=i("p"),$.innerHTML=ge,X=n(),v=i("p"),v.textContent=de,A=n(),m(b.$$.fragment),B=n(),M=i("p"),M.textContent=ye,D=n(),T=i("ol"),T.innerHTML=we,S=n(),k=i("p"),k.textContent=xe,F=n(),m(J.$$.fragment),O=n(),j=i("p"),j.textContent=$e,K=n(),_=i("p"),_.innerHTML=ve,ee=n(),C=i("p"),C.innerHTML=be,te=n(),m(U.$$.fragment),se=n(),m(L.$$.fragment),ae=n(),E=i("p"),E.innerHTML=Me,ne=n(),H=i("p"),H.textContent=Te,le=n(),N=i("ul"),N.innerHTML=ke,ie=n(),m(z.$$.fragment),oe=n(),G=i("p"),G.innerHTML=Je,re=n(),m(y.$$.fragment),ce=n(),I=i("p"),I.innerHTML=je,pe=n(),m(Q.$$.fragment),me=n(),Y=i("p"),Y.textContent=_e,he=n(),m(P.$$.fragment),fe=n(),R=i("p"),this.h()},l(e){const t=Ie("svelte-u9bgzb",document.head);r=o(t,"META",{name:!0,content:!0}),t.forEach(s),p=l(e),w=o(e,"P",{}),Ue(w).forEach(s),V=l(e),h(x.$$.fragment,e),W=l(e),$=o(e,"P",{"data-svelte-h":!0}),c($)!=="svelte-w5iban"&&($.innerHTML=ge),X=l(e),v=o(e,"P",{"data-svelte-h":!0}),c(v)!=="svelte-wmso9y"&&(v.textContent=de),A=l(e),h(b.$$.fragment,e),B=l(e),M=o(e,"P",{"data-svelte-h":!0}),c(M)!=="svelte-1rew1ae"&&(M.textContent=ye),D=l(e),T=o(e,"OL",{"data-svelte-h":!0}),c(T)!=="svelte-ku3854"&&(T.innerHTML=we),S=l(e),k=o(e,"P",{"data-svelte-h":!0}),c(k)!=="svelte-1mxbm8r"&&(k.textContent=xe),F=l(e),h(J.$$.fragment,e),O=l(e),j=o(e,"P",{"data-svelte-h":!0}),c(j)!=="svelte-1853br0"&&(j.textContent=$e),K=l(e),_=o(e,"P",{"data-svelte-h":!0}),c(_)!=="svelte-1idvrws"&&(_.innerHTML=ve),ee=l(e),C=o(e,"P",{"data-svelte-h":!0}),c(C)!=="svelte-10sti0h"&&(C.innerHTML=be),te=l(e),h(U.$$.fragment,e),se=l(e),h(L.$$.fragment,e),ae=l(e),E=o(e,"P",{"data-svelte-h":!0}),c(E)!=="svelte-lqkjii"&&(E.innerHTML=Me),ne=l(e),H=o(e,"P",{"data-svelte-h":!0}),c(H)!=="svelte-9rwqnd"&&(H.textContent=Te),le=l(e),N=o(e,"UL",{"data-svelte-h":!0}),c(N)!=="svelte-w8ah3a"&&(N.innerHTML=ke),ie=l(e),h(z.$$.fragment,e),oe=l(e),G=o(e,"P",{"data-svelte-h":!0}),c(G)!=="svelte-6nmerd"&&(G.innerHTML=Je),re=l(e),h(y.$$.fragment,e),ce=l(e),I=o(e,"P",{"data-svelte-h":!0}),c(I)!=="svelte-tgnxzb"&&(I.innerHTML=je),pe=l(e),h(Q.$$.fragment,e),me=l(e),Y=o(e,"P",{"data-svelte-h":!0}),c(Y)!=="svelte-t54ozd"&&(Y.textContent=_e),he=l(e),h(P.$$.fragment,e),fe=l(e),R=o(e,"P",{}),Ue(R).forEach(s),this.h()},h(){Le(r,"name","hf:doc:metadata"),Le(r,"content",We)},m(e,t){Qe(document.head,r),a(e,p,t),a(e,w,t),a(e,V,t),f(x,e,t),a(e,W,t),a(e,$,t),a(e,X,t),a(e,v,t),a(e,A,t),f(b,e,t),a(e,B,t),a(e,M,t),a(e,D,t),a(e,T,t),a(e,S,t),a(e,k,t),a(e,F,t),f(J,e,t),a(e,O,t),a(e,j,t),a(e,K,t),a(e,_,t),a(e,ee,t),a(e,C,t),a(e,te,t),f(U,e,t),a(e,se,t),f(L,e,t),a(e,ae,t),a(e,E,t),a(e,ne,t),a(e,H,t),a(e,le,t),a(e,N,t),a(e,ie,t),f(z,e,t),a(e,oe,t),a(e,G,t),a(e,re,t),f(y,e,t),a(e,ce,t),a(e,I,t),a(e,pe,t),f(Q,e,t),a(e,me,t),a(e,Y,t),a(e,he,t),f(P,e,t),a(e,fe,t),a(e,R,t),ue=!0},p(e,[t]){const Ce={};t&2&&(Ce.$$scope={dirty:t,ctx:e}),y.$set(Ce)},i(e){ue||(u(x.$$.fragment,e),u(b.$$.fragment,e),u(J.$$.fragment,e),u(U.$$.fragment,e),u(L.$$.fragment,e),u(z.$$.fragment,e),u(y.$$.fragment,e),u(Q.$$.fragment,e),u(P.$$.fragment,e),ue=!0)},o(e){g(x.$$.fragment,e),g(b.$$.fragment,e),g(J.$$.fragment,e),g(U.$$.fragment,e),g(L.$$.fragment,e),g(z.$$.fragment,e),g(y.$$.fragment,e),g(Q.$$.fragment,e),g(P.$$.fragment,e),ue=!1},d(e){e&&(s(p),s(w),s(V),s(W),s($),s(X),s(v),s(A),s(B),s(M),s(D),s(T),s(S),s(k),s(F),s(O),s(j),s(K),s(_),s(ee),s(C),s(te),s(se),s(ae),s(E),s(ne),s(H),s(le),s(N),s(ie),s(oe),s(G),s(re),s(ce),s(I),s(pe),s(me),s(Y),s(he),s(fe),s(R)),s(r),d(x,e),d(b,e),d(J,e),d(U,e),d(L,e),d(z,e),d(y,e),d(Q,e),d(P,e)}}}const We='{"title":"Choosing a metric for your task","local":"choosing-a-metric-for-your-task","sections":[{"title":"Categories of metrics","local":"categories-of-metrics","sections":[{"title":"Generic metrics","local":"generic-metrics","sections":[],"depth":3},{"title":"Task-specific metrics","local":"task-specific-metrics","sections":[],"depth":3},{"title":"Dataset-specific metrics","local":"dataset-specific-metrics","sections":[],"depth":3}],"depth":2}],"depth":1}';function Xe(q){return Ne(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Oe extends ze{constructor(r){super(),Ge(this,r,Xe,Ve,He,{})}}export{Oe as component};