import{s as Rs,o as Ss,n as j}from"../chunks/scheduler.18a86fab.js";import{S as Xs,i as Qs,g as c,s as a,r as u,A as Es,h as p,f as d,c as r,j as v,x as k,u as f,k as $,l as Ls,y as s,a as m,v as g,d as _,t as T,w as b}from"../chunks/index.98837b22.js";import{T as yt}from"../chunks/Tip.77304350.js";import{D as J}from"../chunks/Docstring.a1ef7999.js";import{C as K}from"../chunks/CodeBlock.8d0c2e8a.js";import{E as kt}from"../chunks/ExampleCodeBlock.8c3ee1f9.js";import{H as O,E as As}from"../chunks/getInferenceSnippets.06c2775f.js";import{H as Ps,a as Dn}from"../chunks/HfOption.6641485e.js";function Ys(w){let t,h="Click on the T5 models in the right sidebar for more examples of how to apply T5 to different language tasks.";return{c(){t=c("p"),t.textContent=h},l(o){t=p(o,"P",{"data-svelte-h":!0}),k(t)!=="svelte-t4ran1"&&(t.textContent=h)},m(o,l){m(o,t,l)},p:j,d(o){o&&d(t)}}}function Ds(w){let t,h;return t=new K({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwcGlwZWxpbmUlMEElMEFwaXBlbGluZSUyMCUzRCUyMHBpcGVsaW5lKCUwQSUyMCUyMCUyMCUyMHRhc2slM0QlMjJ0ZXh0MnRleHQtZ2VuZXJhdGlvbiUyMiUyQyUwQSUyMCUyMCUyMCUyMG1vZGVsJTNEJTIyZ29vZ2xlLXQ1JTJGdDUtYmFzZSUyMiUyQyUwQSUyMCUyMCUyMCUyMGR0eXBlJTNEdG9yY2guZmxvYXQxNiUyQyUwQSUyMCUyMCUyMCUyMGRldmljZSUzRDAlMEEpJTBBcGlwZWxpbmUoJTIydHJhbnNsYXRlJTIwRW5nbGlzaCUyMHRvJTIwRnJlbmNoJTNBJTIwVGhlJTIwd2VhdGhlciUyMGlzJTIwbmljZSUyMHRvZGF5LiUyMik=",highlighted:`import torch
from transformers import pipeline
pipeline = pipeline(
task="text2text-generation",
model="google-t5/t5-base",
dtype=torch.float16,
device=0
)
pipeline("translate English to French: The weather is nice today.")`,wrap:!1}}),{c(){u(t.$$.fragment)},l(o){f(t.$$.fragment,o)},m(o,l){g(t,o,l),h=!0},p:j,i(o){h||(_(t.$$.fragment,o),h=!0)},o(o){T(t.$$.fragment,o),h=!1},d(o){b(t,o)}}}function Os(w){let t,h;return t=new K({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b01vZGVsRm9yU2VxMlNlcUxNJTJDJTIwQXV0b1Rva2VuaXplciUwQSUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmdvb2dsZS10NSUyRnQ1LWJhc2UlMjIlMEElMjAlMjAlMjAlMjApJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JTZXEyU2VxTE0uZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMmdvb2dsZS10NSUyRnQ1LWJhc2UlMjIlMkMlMEElMjAlMjAlMjAlMjBkdHlwZSUzRHRvcmNoLmZsb2F0MTYlMkMlMEElMjAlMjAlMjAlMjBkZXZpY2VfbWFwJTNEJTIyYXV0byUyMiUwQSUyMCUyMCUyMCUyMCklMEElMEFpbnB1dF9pZHMlMjAlM0QlMjB0b2tlbml6ZXIoJTIydHJhbnNsYXRlJTIwRW5nbGlzaCUyMHRvJTIwRnJlbmNoJTNBJTIwVGhlJTIwd2VhdGhlciUyMGlzJTIwbmljZSUyMHRvZGF5LiUyMiUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpLnRvKG1vZGVsLmRldmljZSklMEElMEFvdXRwdXQlMjAlM0QlMjBtb2RlbC5nZW5lcmF0ZSgqKmlucHV0X2lkcyUyQyUyMGNhY2hlX2ltcGxlbWVudGF0aW9uJTNEJTIyc3RhdGljJTIyKSUwQXByaW50KHRva2VuaXplci5kZWNvZGUob3V0cHV0JTVCMCU1RCUyQyUyMHNraXBfc3BlY2lhbF90b2tlbnMlM0RUcnVlKSk=",highlighted:`import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(
"google-t5/t5-base"
)
model = AutoModelForSeq2SeqLM.from_pretrained(
"google-t5/t5-base",
dtype=torch.float16,
device_map="auto"
)
input_ids = tokenizer("translate English to French: The weather is nice today.", return_tensors="pt").to(model.device)
output = model.generate(**input_ids, cache_implementation="static")
print(tokenizer.decode(output[0], skip_special_tokens=True))`,wrap:!1}}),{c(){u(t.$$.fragment)},l(o){f(t.$$.fragment,o)},m(o,l){g(t,o,l),h=!0},p:j,i(o){h||(_(t.$$.fragment,o),h=!0)},o(o){T(t.$$.fragment,o),h=!1},d(o){b(t,o)}}}function Ks(w){let t,h;return t=new K({props:{code:"ZWNobyUyMC1lJTIwJTIydHJhbnNsYXRlJTIwRW5nbGlzaCUyMHRvJTIwRnJlbmNoJTNBJTIwVGhlJTIwd2VhdGhlciUyMGlzJTIwbmljZSUyMHRvZGF5LiUyMiUyMCU3QyUyMHRyYW5zZm9ybWVycyUyMHJ1biUyMC0tdGFzayUyMHRleHQydGV4dC1nZW5lcmF0aW9uJTIwLS1tb2RlbCUyMGdvb2dsZS10NSUyRnQ1LWJhc2UlMjAtLWRldmljZSUyMDA=",highlighted:'echo -e "translate English to French: The weather is nice today." | transformers run --task text2text-generation --model google-t5/t5-base --device 0',wrap:!1}}),{c(){u(t.$$.fragment)},l(o){f(t.$$.fragment,o)},m(o,l){g(t,o,l),h=!0},p:j,i(o){h||(_(t.$$.fragment,o),h=!0)},o(o){T(t.$$.fragment,o),h=!1},d(o){b(t,o)}}}function ea(w){let t,h,o,l,M,n;return t=new Dn({props:{id:"usage",option:"Pipeline",$$slots:{default:[Ds]},$$scope:{ctx:w}}}),o=new Dn({props:{id:"usage",option:"AutoModel",$$slots:{default:[Os]},$$scope:{ctx:w}}}),M=new Dn({props:{id:"usage",option:"transformers CLI",$$slots:{default:[Ks]},$$scope:{ctx:w}}}),{c(){u(t.$$.fragment),h=a(),u(o.$$.fragment),l=a(),u(M.$$.fragment)},l(y){f(t.$$.fragment,y),h=r(y),f(o.$$.fragment,y),l=r(y),f(M.$$.fragment,y)},m(y,z){g(t,y,z),m(y,h,z),g(o,y,z),m(y,l,z),g(M,y,z),n=!0},p(y,z){const ro={};z&2&&(ro.$$scope={dirty:z,ctx:y}),t.$set(ro);const je={};z&2&&(je.$$scope={dirty:z,ctx:y}),o.$set(je);const ee={};z&2&&(ee.$$scope={dirty:z,ctx:y}),M.$set(ee)},i(y){n||(_(t.$$.fragment,y),_(o.$$.fragment,y),_(M.$$.fragment,y),n=!0)},o(y){T(t.$$.fragment,y),T(o.$$.fragment,y),T(M.$$.fragment,y),n=!1},d(y){y&&(d(h),d(l)),b(t,y),b(o,y),b(M,y)}}}function ta(w){let t,h=`Although the recipe for forward pass needs to be defined within this function, one should call the Module
instance afterwards instead of this since the former takes care of running the pre and post processing steps while
the latter silently ignores them.`;return{c(){t=c("p"),t.innerHTML=h},l(o){t=p(o,"P",{"data-svelte-h":!0}),k(t)!=="svelte-fincs2"&&(t.innerHTML=h)},m(o,l){m(o,t,l)},p:j,d(o){o&&d(t)}}}function oa(w){let t,h="Example:",o,l,M;return l=new K({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBUNU1vZGVsJTBBJTBBdG9rZW5pemVyJTIwJTNEJTIwQXV0b1Rva2VuaXplci5mcm9tX3ByZXRyYWluZWQoJTIyZ29vZ2xlLXQ1JTJGdDUtc21hbGwlMjIpJTBBbW9kZWwlMjAlM0QlMjBUNU1vZGVsLmZyb21fcHJldHJhaW5lZCglMjJnb29nbGUtdDUlMkZ0NS1zbWFsbCUyMiklMEElMEFpbnB1dF9pZHMlMjAlM0QlMjB0b2tlbml6ZXIoJTBBJTIwJTIwJTIwJTIwJTIyU3R1ZGllcyUyMGhhdmUlMjBiZWVuJTIwc2hvd24lMjB0aGF0JTIwb3duaW5nJTIwYSUyMGRvZyUyMGlzJTIwZ29vZCUyMGZvciUyMHlvdSUyMiUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIlMEEpLmlucHV0X2lkcyUyMCUyMCUyMyUyMEJhdGNoJTIwc2l6ZSUyMDElMEFkZWNvZGVyX2lucHV0X2lkcyUyMCUzRCUyMHRva2VuaXplciglMjJTdHVkaWVzJTIwc2hvdyUyMHRoYXQlMjIlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnB0JTIyKS5pbnB1dF9pZHMlMjAlMjAlMjMlMjBCYXRjaCUyMHNpemUlMjAxJTBBJTBBJTIzJTIwcHJlcHJvY2VzcyUzQSUyMFByZXBlbmQlMjBkZWNvZGVyX2lucHV0X2lkcyUyMHdpdGglMjBzdGFydCUyMHRva2VuJTIwd2hpY2glMjBpcyUyMHBhZCUyMHRva2VuJTIwZm9yJTIwVDVNb2RlbC4lMEElMjMlMjBUaGlzJTIwaXMlMjBub3QlMjBuZWVkZWQlMjBmb3IlMjB0b3JjaCdzJTIwVDVGb3JDb25kaXRpb25hbEdlbmVyYXRpb24lMjBhcyUyMGl0JTIwZG9lcyUyMHRoaXMlMjBpbnRlcm5hbGx5JTIwdXNpbmclMjBsYWJlbHMlMjBhcmcuJTBBZGVjb2Rlcl9pbnB1dF9pZHMlMjAlM0QlMjBtb2RlbC5fc2hpZnRfcmlnaHQoZGVjb2Rlcl9pbnB1dF9pZHMpJTBBJTBBJTIzJTIwZm9yd2FyZCUyMHBhc3MlMEFvdXRwdXRzJTIwJTNEJTIwbW9kZWwoaW5wdXRfaWRzJTNEaW5wdXRfaWRzJTJDJTIwZGVjb2Rlcl9pbnB1dF9pZHMlM0RkZWNvZGVyX2lucHV0X2lkcyklMEFsYXN0X2hpZGRlbl9zdGF0ZXMlMjAlM0QlMjBvdXRwdXRzLmxhc3RfaGlkZGVuX3N0YXRl",highlighted:`from transformers import AutoTokenizer, T5Model
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
model = T5Model.from_pretrained("google-t5/t5-small")
input_ids = tokenizer(
"Studies have been shown that owning a dog is good for you", return_tensors="pt"
).input_ids # Batch size 1
decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids # Batch size 1
# preprocess: Prepend decoder_input_ids with start token which is pad token for T5Model.
# This is not needed for torch's T5ForConditionalGeneration as it does this internally using labels arg.
decoder_input_ids = model._shift_right(decoder_input_ids)
# forward pass
outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
last_hidden_states = outputs.last_hidden_state`,wrap:!1}}),{c(){t=c("p"),t.textContent=h,o=a(),u(l.$$.fragment)},l(n){t=p(n,"P",{"data-svelte-h":!0}),k(t)!=="svelte-11lpom8"&&(t.textContent=h),o=r(n),f(l.$$.fragment,n)},m(n,y){m(n,t,y),m(n,o,y),g(l,n,y),M=!0},p:j,i(n){M||(_(l.$$.fragment,n),M=!0)},o(n){T(l.$$.fragment,n),M=!1},d(n){n&&(d(t),d(o)),b(l,n)}}}function na(w){let t,h=`Although the recipe for forward pass needs to be defined within this function, one should call the Module
instance afterwards instead of this since the former takes care of running the pre and post processing steps while
the latter silently ignores them.`;return{c(){t=c("p"),t.innerHTML=h},l(o){t=p(o,"P",{"data-svelte-h":!0}),k(t)!=="svelte-fincs2"&&(t.innerHTML=h)},m(o,l){m(o,t,l)},p:j,d(o){o&&d(t)}}}function sa(w){let t,h="Examples:",o,l,M;return l=new K({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBUNUZvckNvbmRpdGlvbmFsR2VuZXJhdGlvbiUwQSUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKCUyMmdvb2dsZS10NSUyRnQ1LXNtYWxsJTIyKSUwQW1vZGVsJTIwJTNEJTIwVDVGb3JDb25kaXRpb25hbEdlbmVyYXRpb24uZnJvbV9wcmV0cmFpbmVkKCUyMmdvb2dsZS10NSUyRnQ1LXNtYWxsJTIyKSUwQSUwQSUyMyUyMHRyYWluaW5nJTBBaW5wdXRfaWRzJTIwJTNEJTIwdG9rZW5pemVyKCUyMlRoZSUyMCUzQ2V4dHJhX2lkXzAlM0UlMjB3YWxrcyUyMGluJTIwJTNDZXh0cmFfaWRfMSUzRSUyMHBhcmslMjIlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnB0JTIyKS5pbnB1dF9pZHMlMEFsYWJlbHMlMjAlM0QlMjB0b2tlbml6ZXIoJTIyJTNDZXh0cmFfaWRfMCUzRSUyMGN1dGUlMjBkb2clMjAlM0NleHRyYV9pZF8xJTNFJTIwdGhlJTIwJTNDZXh0cmFfaWRfMiUzRSUyMiUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpLmlucHV0X2lkcyUwQW91dHB1dHMlMjAlM0QlMjBtb2RlbChpbnB1dF9pZHMlM0RpbnB1dF9pZHMlMkMlMjBsYWJlbHMlM0RsYWJlbHMpJTBBbG9zcyUyMCUzRCUyMG91dHB1dHMubG9zcyUwQWxvZ2l0cyUyMCUzRCUyMG91dHB1dHMubG9naXRzJTBBJTBBJTIzJTIwaW5mZXJlbmNlJTBBaW5wdXRfaWRzJTIwJTNEJTIwdG9rZW5pemVyKCUwQSUyMCUyMCUyMCUyMCUyMnN1bW1hcml6ZSUzQSUyMHN0dWRpZXMlMjBoYXZlJTIwc2hvd24lMjB0aGF0JTIwb3duaW5nJTIwYSUyMGRvZyUyMGlzJTIwZ29vZCUyMGZvciUyMHlvdSUyMiUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIlMEEpLmlucHV0X2lkcyUyMCUyMCUyMyUyMEJhdGNoJTIwc2l6ZSUyMDElMEFvdXRwdXRzJTIwJTNEJTIwbW9kZWwuZ2VuZXJhdGUoaW5wdXRfaWRzKSUwQXByaW50KHRva2VuaXplci5kZWNvZGUob3V0cHV0cyU1QjAlNUQlMkMlMjBza2lwX3NwZWNpYWxfdG9rZW5zJTNEVHJ1ZSkpJTBBJTIzJTIwc3R1ZGllcyUyMGhhdmUlMjBzaG93biUyMHRoYXQlMjBvd25pbmclMjBhJTIwZG9nJTIwaXMlMjBnb29kJTIwZm9yJTIweW91Lg==",highlighted:`from transformers import AutoTokenizer, T5ForConditionalGeneration
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
model = T5ForConditionalGeneration.from_pretrained("google-t5/t5-small")
# training
input_ids = tokenizer("The <extra_id_0> walks in <extra_id_1> park", return_tensors="pt").input_ids
labels = tokenizer("<extra_id_0> cute dog <extra_id_1> the <extra_id_2>", return_tensors="pt").input_ids
outputs = model(input_ids=input_ids, labels=labels)
loss = outputs.loss
logits = outputs.logits
# inference
input_ids = tokenizer(
"summarize: studies have shown that owning a dog is good for you", return_tensors="pt"
).input_ids # Batch size 1
outputs = model.generate(input_ids)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
# studies have shown that owning a dog is good for you.`,wrap:!1}}),{c(){t=c("p"),t.textContent=h,o=a(),u(l.$$.fragment)},l(n){t=p(n,"P",{"data-svelte-h":!0}),k(t)!=="svelte-kvfsh7"&&(t.textContent=h),o=r(n),f(l.$$.fragment,n)},m(n,y){m(n,t,y),m(n,o,y),g(l,n,y),M=!0},p:j,i(n){M||(_(l.$$.fragment,n),M=!0)},o(n){T(l.$$.fragment,n),M=!1},d(n){n&&(d(t),d(o)),b(l,n)}}}function aa(w){let t,h=`Although the recipe for forward pass needs to be defined within this function, one should call the Module
instance afterwards instead of this since the former takes care of running the pre and post processing steps while
the latter silently ignores them.`;return{c(){t=c("p"),t.innerHTML=h},l(o){t=p(o,"P",{"data-svelte-h":!0}),k(t)!=="svelte-fincs2"&&(t.innerHTML=h)},m(o,l){m(o,t,l)},p:j,d(o){o&&d(t)}}}function ra(w){let t,h="Example:",o,l,M;return l=new K({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBUNUVuY29kZXJNb2RlbCUwQSUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKCUyMmdvb2dsZS10NSUyRnQ1LXNtYWxsJTIyKSUwQW1vZGVsJTIwJTNEJTIwVDVFbmNvZGVyTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUyMmdvb2dsZS10NSUyRnQ1LXNtYWxsJTIyKSUwQWlucHV0X2lkcyUyMCUzRCUyMHRva2VuaXplciglMEElMjAlMjAlMjAlMjAlMjJTdHVkaWVzJTIwaGF2ZSUyMGJlZW4lMjBzaG93biUyMHRoYXQlMjBvd25pbmclMjBhJTIwZG9nJTIwaXMlMjBnb29kJTIwZm9yJTIweW91JTIyJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiUwQSkuaW5wdXRfaWRzJTIwJTIwJTIzJTIwQmF0Y2glMjBzaXplJTIwMSUwQW91dHB1dHMlMjAlM0QlMjBtb2RlbChpbnB1dF9pZHMlM0RpbnB1dF9pZHMpJTBBbGFzdF9oaWRkZW5fc3RhdGVzJTIwJTNEJTIwb3V0cHV0cy5sYXN0X2hpZGRlbl9zdGF0ZQ==",highlighted:`from transformers import AutoTokenizer, T5EncoderModel
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
model = T5EncoderModel.from_pretrained("google-t5/t5-small")
input_ids = tokenizer(
"Studies have been shown that owning a dog is good for you", return_tensors="pt"
).input_ids # Batch size 1
outputs = model(input_ids=input_ids)
last_hidden_states = outputs.last_hidden_state`,wrap:!1}}),{c(){t=c("p"),t.textContent=h,o=a(),u(l.$$.fragment)},l(n){t=p(n,"P",{"data-svelte-h":!0}),k(t)!=="svelte-11lpom8"&&(t.textContent=h),o=r(n),f(l.$$.fragment,n)},m(n,y){m(n,t,y),m(n,o,y),g(l,n,y),M=!0},p:j,i(n){M||(_(l.$$.fragment,n),M=!0)},o(n){T(l.$$.fragment,n),M=!1},d(n){n&&(d(t),d(o)),b(l,n)}}}function ia(w){let t,h=`Although the recipe for forward pass needs to be defined within this function, one should call the Module
instance afterwards instead of this since the former takes care of running the pre and post processing steps while
the latter silently ignores them.`;return{c(){t=c("p"),t.innerHTML=h},l(o){t=p(o,"P",{"data-svelte-h":!0}),k(t)!=="svelte-fincs2"&&(t.innerHTML=h)},m(o,l){m(o,t,l)},p:j,d(o){o&&d(t)}}}function da(w){let t,h="Example of single-label classification:",o,l,M;return l=new K({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Rva2VuaXplciUyQyUyMFQ1Rm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbiUwQSUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKCUyMmdvb2dsZS10NSUyRnQ1LXNtYWxsJTIyKSUwQW1vZGVsJTIwJTNEJTIwVDVGb3JTZXF1ZW5jZUNsYXNzaWZpY2F0aW9uLmZyb21fcHJldHJhaW5lZCglMjJnb29nbGUtdDUlMkZ0NS1zbWFsbCUyMiklMEElMEFpbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoJTIySGVsbG8lMkMlMjBteSUyMGRvZyUyMGlzJTIwY3V0ZSUyMiUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTBBJTBBd2l0aCUyMHRvcmNoLm5vX2dyYWQoKSUzQSUwQSUyMCUyMCUyMCUyMGxvZ2l0cyUyMCUzRCUyMG1vZGVsKCoqaW5wdXRzKS5sb2dpdHMlMEElMEFwcmVkaWN0ZWRfY2xhc3NfaWQlMjAlM0QlMjBsb2dpdHMuYXJnbWF4KCkuaXRlbSgpJTBBbW9kZWwuY29uZmlnLmlkMmxhYmVsJTVCcHJlZGljdGVkX2NsYXNzX2lkJTVEJTBBJTBBJTIzJTIwVG8lMjB0cmFpbiUyMGElMjBtb2RlbCUyMG9uJTIwJTYwbnVtX2xhYmVscyU2MCUyMGNsYXNzZXMlMkMlMjB5b3UlMjBjYW4lMjBwYXNzJTIwJTYwbnVtX2xhYmVscyUzRG51bV9sYWJlbHMlNjAlMjB0byUyMCU2MC5mcm9tX3ByZXRyYWluZWQoLi4uKSU2MCUwQW51bV9sYWJlbHMlMjAlM0QlMjBsZW4obW9kZWwuY29uZmlnLmlkMmxhYmVsKSUwQW1vZGVsJTIwJTNEJTIwVDVGb3JTZXF1ZW5jZUNsYXNzaWZpY2F0aW9uLmZyb21fcHJldHJhaW5lZCglMjJnb29nbGUtdDUlMkZ0NS1zbWFsbCUyMiUyQyUyMG51bV9sYWJlbHMlM0RudW1fbGFiZWxzKSUwQSUwQWxhYmVscyUyMCUzRCUyMHRvcmNoLnRlbnNvciglNUIxJTVEKSUwQWxvc3MlMjAlM0QlMjBtb2RlbCgqKmlucHV0cyUyQyUyMGxhYmVscyUzRGxhYmVscykubG9zcyUwQXJvdW5kKGxvc3MuaXRlbSgpJTJDJTIwMik=",highlighted:`import torch
from transformers import AutoTokenizer, T5ForSequenceClassification
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
model = T5ForSequenceClassification.from_pretrained("google-t5/t5-small")
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
with torch.no_grad():
logits = model(**inputs).logits
predicted_class_id = logits.argmax().item()
model.config.id2label[predicted_class_id]
...
# To train a model on \`num_labels\` classes, you can pass \`num_labels=num_labels\` to \`.from_pretrained(...)\`
num_labels = len(model.config.id2label)
model = T5ForSequenceClassification.from_pretrained("google-t5/t5-small", num_labels=num_labels)
labels = torch.tensor([1])
loss = model(**inputs, labels=labels).loss
round(loss.item(), 2)
...`,wrap:!1}}),{c(){t=c("p"),t.textContent=h,o=a(),u(l.$$.fragment)},l(n){t=p(n,"P",{"data-svelte-h":!0}),k(t)!=="svelte-ykxpe4"&&(t.textContent=h),o=r(n),f(l.$$.fragment,n)},m(n,y){m(n,t,y),m(n,o,y),g(l,n,y),M=!0},p:j,i(n){M||(_(l.$$.fragment,n),M=!0)},o(n){T(l.$$.fragment,n),M=!1},d(n){n&&(d(t),d(o)),b(l,n)}}}function la(w){let t,h="Example of multi-label classification:",o,l,M;return l=new K({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Rva2VuaXplciUyQyUyMFQ1Rm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbiUwQSUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKCUyMmdvb2dsZS10NSUyRnQ1LXNtYWxsJTIyKSUwQW1vZGVsJTIwJTNEJTIwVDVGb3JTZXF1ZW5jZUNsYXNzaWZpY2F0aW9uLmZyb21fcHJldHJhaW5lZCglMjJnb29nbGUtdDUlMkZ0NS1zbWFsbCUyMiUyQyUyMHByb2JsZW1fdHlwZSUzRCUyMm11bHRpX2xhYmVsX2NsYXNzaWZpY2F0aW9uJTIyKSUwQSUwQWlucHV0cyUyMCUzRCUyMHRva2VuaXplciglMjJIZWxsbyUyQyUyMG15JTIwZG9nJTIwaXMlMjBjdXRlJTIyJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiklMEElMEF3aXRoJTIwdG9yY2gubm9fZ3JhZCgpJTNBJTBBJTIwJTIwJTIwJTIwbG9naXRzJTIwJTNEJTIwbW9kZWwoKippbnB1dHMpLmxvZ2l0cyUwQSUwQXByZWRpY3RlZF9jbGFzc19pZHMlMjAlM0QlMjB0b3JjaC5hcmFuZ2UoMCUyQyUyMGxvZ2l0cy5zaGFwZSU1Qi0xJTVEKSU1QnRvcmNoLnNpZ21vaWQobG9naXRzKS5zcXVlZXplKGRpbSUzRDApJTIwJTNFJTIwMC41JTVEJTBBJTBBJTIzJTIwVG8lMjB0cmFpbiUyMGElMjBtb2RlbCUyMG9uJTIwJTYwbnVtX2xhYmVscyU2MCUyMGNsYXNzZXMlMkMlMjB5b3UlMjBjYW4lMjBwYXNzJTIwJTYwbnVtX2xhYmVscyUzRG51bV9sYWJlbHMlNjAlMjB0byUyMCU2MC5mcm9tX3ByZXRyYWluZWQoLi4uKSU2MCUwQW51bV9sYWJlbHMlMjAlM0QlMjBsZW4obW9kZWwuY29uZmlnLmlkMmxhYmVsKSUwQW1vZGVsJTIwJTNEJTIwVDVGb3JTZXF1ZW5jZUNsYXNzaWZpY2F0aW9uLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJnb29nbGUtdDUlMkZ0NS1zbWFsbCUyMiUyQyUyMG51bV9sYWJlbHMlM0RudW1fbGFiZWxzJTJDJTIwcHJvYmxlbV90eXBlJTNEJTIybXVsdGlfbGFiZWxfY2xhc3NpZmljYXRpb24lMjIlMEEpJTBBJTBBbGFiZWxzJTIwJTNEJTIwdG9yY2guc3VtKCUwQSUyMCUyMCUyMCUyMHRvcmNoLm5uLmZ1bmN0aW9uYWwub25lX2hvdChwcmVkaWN0ZWRfY2xhc3NfaWRzJTVCTm9uZSUyQyUyMCUzQSU1RC5jbG9uZSgpJTJDJTIwbnVtX2NsYXNzZXMlM0RudW1fbGFiZWxzKSUyQyUyMGRpbSUzRDElMEEpLnRvKHRvcmNoLmZsb2F0KSUwQWxvc3MlMjAlM0QlMjBtb2RlbCgqKmlucHV0cyUyQyUyMGxhYmVscyUzRGxhYmVscykubG9zcw==",highlighted:`import torch
from transformers import AutoTokenizer, T5ForSequenceClassification
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
model = T5ForSequenceClassification.from_pretrained("google-t5/t5-small", problem_type="multi_label_classification")
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
with torch.no_grad():
logits = model(**inputs).logits
predicted_class_ids = torch.arange(0, logits.shape[-1])[torch.sigmoid(logits).squeeze(dim=0) > 0.5]
# To train a model on \`num_labels\` classes, you can pass \`num_labels=num_labels\` to \`.from_pretrained(...)\`
num_labels = len(model.config.id2label)
model = T5ForSequenceClassification.from_pretrained(
"google-t5/t5-small", num_labels=num_labels, problem_type="multi_label_classification"
)
labels = torch.sum(
torch.nn.functional.one_hot(predicted_class_ids[None, :].clone(), num_classes=num_labels), dim=1
).to(torch.float)
loss = model(**inputs, labels=labels).loss`,wrap:!1}}),{c(){t=c("p"),t.textContent=h,o=a(),u(l.$$.fragment)},l(n){t=p(n,"P",{"data-svelte-h":!0}),k(t)!=="svelte-1l8e32d"&&(t.textContent=h),o=r(n),f(l.$$.fragment,n)},m(n,y){m(n,t,y),m(n,o,y),g(l,n,y),M=!0},p:j,i(n){M||(_(l.$$.fragment,n),M=!0)},o(n){T(l.$$.fragment,n),M=!1},d(n){n&&(d(t),d(o)),b(l,n)}}}function ca(w){let t,h=`Although the recipe for forward pass needs to be defined within this function, one should call the Module
instance afterwards instead of this since the former takes care of running the pre and post processing steps while
the latter silently ignores them.`;return{c(){t=c("p"),t.innerHTML=h},l(o){t=p(o,"P",{"data-svelte-h":!0}),k(t)!=="svelte-fincs2"&&(t.innerHTML=h)},m(o,l){m(o,t,l)},p:j,d(o){o&&d(t)}}}function pa(w){let t,h="Example:",o,l,M;return l=new K({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBUNUZvclRva2VuQ2xhc3NpZmljYXRpb24lMEFpbXBvcnQlMjB0b3JjaCUwQSUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKCUyMmdvb2dsZS10NSUyRnQ1LXNtYWxsJTIyKSUwQW1vZGVsJTIwJTNEJTIwVDVGb3JUb2tlbkNsYXNzaWZpY2F0aW9uLmZyb21fcHJldHJhaW5lZCglMjJnb29nbGUtdDUlMkZ0NS1zbWFsbCUyMiklMEElMEFpbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoJTBBJTIwJTIwJTIwJTIwJTIySHVnZ2luZ0ZhY2UlMjBpcyUyMGElMjBjb21wYW55JTIwYmFzZWQlMjBpbiUyMFBhcmlzJTIwYW5kJTIwTmV3JTIwWW9yayUyMiUyQyUyMGFkZF9zcGVjaWFsX3Rva2VucyUzREZhbHNlJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiUwQSklMEElMEF3aXRoJTIwdG9yY2gubm9fZ3JhZCgpJTNBJTBBJTIwJTIwJTIwJTIwbG9naXRzJTIwJTNEJTIwbW9kZWwoKippbnB1dHMpLmxvZ2l0cyUwQSUwQXByZWRpY3RlZF90b2tlbl9jbGFzc19pZHMlMjAlM0QlMjBsb2dpdHMuYXJnbWF4KC0xKSUwQSUwQSUyMyUyME5vdGUlMjB0aGF0JTIwdG9rZW5zJTIwYXJlJTIwY2xhc3NpZmllZCUyMHJhdGhlciUyMHRoZW4lMjBpbnB1dCUyMHdvcmRzJTIwd2hpY2glMjBtZWFucyUyMHRoYXQlMEElMjMlMjB0aGVyZSUyMG1pZ2h0JTIwYmUlMjBtb3JlJTIwcHJlZGljdGVkJTIwdG9rZW4lMjBjbGFzc2VzJTIwdGhhbiUyMHdvcmRzLiUwQSUyMyUyME11bHRpcGxlJTIwdG9rZW4lMjBjbGFzc2VzJTIwbWlnaHQlMjBhY2NvdW50JTIwZm9yJTIwdGhlJTIwc2FtZSUyMHdvcmQlMEFwcmVkaWN0ZWRfdG9rZW5zX2NsYXNzZXMlMjAlM0QlMjAlNUJtb2RlbC5jb25maWcuaWQybGFiZWwlNUJ0Lml0ZW0oKSU1RCUyMGZvciUyMHQlMjBpbiUyMHByZWRpY3RlZF90b2tlbl9jbGFzc19pZHMlNUIwJTVEJTVEJTBBcHJlZGljdGVkX3Rva2Vuc19jbGFzc2VzJTBBJTBBbGFiZWxzJTIwJTNEJTIwcHJlZGljdGVkX3Rva2VuX2NsYXNzX2lkcyUwQWxvc3MlMjAlM0QlMjBtb2RlbCgqKmlucHV0cyUyQyUyMGxhYmVscyUzRGxhYmVscykubG9zcyUwQXJvdW5kKGxvc3MuaXRlbSgpJTJDJTIwMik=",highlighted:`from transformers import AutoTokenizer, T5ForTokenClassification
import torch
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
model = T5ForTokenClassification.from_pretrained("google-t5/t5-small")
inputs = tokenizer(
"HuggingFace is a company based in Paris and New York", add_special_tokens=False, return_tensors="pt"
)
with torch.no_grad():
logits = model(**inputs).logits
predicted_token_class_ids = logits.argmax(-1)
# Note that tokens are classified rather then input words which means that
# there might be more predicted token classes than words.
# Multiple token classes might account for the same word
predicted_tokens_classes = [model.config.id2label[t.item()] for t in predicted_token_class_ids[0]]
predicted_tokens_classes
...
labels = predicted_token_class_ids
loss = model(**inputs, labels=labels).loss
round(loss.item(), 2)
...`,wrap:!1}}),{c(){t=c("p"),t.textContent=h,o=a(),u(l.$$.fragment)},l(n){t=p(n,"P",{"data-svelte-h":!0}),k(t)!=="svelte-11lpom8"&&(t.textContent=h),o=r(n),f(l.$$.fragment,n)},m(n,y){m(n,t,y),m(n,o,y),g(l,n,y),M=!0},p:j,i(n){M||(_(l.$$.fragment,n),M=!0)},o(n){T(l.$$.fragment,n),M=!1},d(n){n&&(d(t),d(o)),b(l,n)}}}function ma(w){let t,h=`Although the recipe for forward pass needs to be defined within this function, one should call the Module
instance afterwards instead of this since the former takes care of running the pre and post processing steps while
the latter silently ignores them.`;return{c(){t=c("p"),t.innerHTML=h},l(o){t=p(o,"P",{"data-svelte-h":!0}),k(t)!=="svelte-fincs2"&&(t.innerHTML=h)},m(o,l){m(o,t,l)},p:j,d(o){o&&d(t)}}}function ha(w){let t,h="Example:",o,l,M;return l=new K({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBUNUZvclF1ZXN0aW9uQW5zd2VyaW5nJTBBaW1wb3J0JTIwdG9yY2glMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJnb29nbGUtdDUlMkZ0NS1zbWFsbCUyMiklMEFtb2RlbCUyMCUzRCUyMFQ1Rm9yUXVlc3Rpb25BbnN3ZXJpbmcuZnJvbV9wcmV0cmFpbmVkKCUyMmdvb2dsZS10NSUyRnQ1LXNtYWxsJTIyKSUwQSUwQXF1ZXN0aW9uJTJDJTIwdGV4dCUyMCUzRCUyMCUyMldobyUyMHdhcyUyMEppbSUyMEhlbnNvbiUzRiUyMiUyQyUyMCUyMkppbSUyMEhlbnNvbiUyMHdhcyUyMGElMjBuaWNlJTIwcHVwcGV0JTIyJTBBJTBBaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyKHF1ZXN0aW9uJTJDJTIwdGV4dCUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTBBd2l0aCUyMHRvcmNoLm5vX2dyYWQoKSUzQSUwQSUyMCUyMCUyMCUyMG91dHB1dHMlMjAlM0QlMjBtb2RlbCgqKmlucHV0cyklMEElMEFhbnN3ZXJfc3RhcnRfaW5kZXglMjAlM0QlMjBvdXRwdXRzLnN0YXJ0X2xvZ2l0cy5hcmdtYXgoKSUwQWFuc3dlcl9lbmRfaW5kZXglMjAlM0QlMjBvdXRwdXRzLmVuZF9sb2dpdHMuYXJnbWF4KCklMEElMEFwcmVkaWN0X2Fuc3dlcl90b2tlbnMlMjAlM0QlMjBpbnB1dHMuaW5wdXRfaWRzJTVCMCUyQyUyMGFuc3dlcl9zdGFydF9pbmRleCUyMCUzQSUyMGFuc3dlcl9lbmRfaW5kZXglMjAlMkIlMjAxJTVEJTBBdG9rZW5pemVyLmRlY29kZShwcmVkaWN0X2Fuc3dlcl90b2tlbnMlMkMlMjBza2lwX3NwZWNpYWxfdG9rZW5zJTNEVHJ1ZSklMEElMEElMjMlMjB0YXJnZXQlMjBpcyUyMCUyMm5pY2UlMjBwdXBwZXQlMjIlMEF0YXJnZXRfc3RhcnRfaW5kZXglMjAlM0QlMjB0b3JjaC50ZW5zb3IoJTVCMTQlNUQpJTBBdGFyZ2V0X2VuZF9pbmRleCUyMCUzRCUyMHRvcmNoLnRlbnNvciglNUIxNSU1RCklMEElMEFvdXRwdXRzJTIwJTNEJTIwbW9kZWwoKippbnB1dHMlMkMlMjBzdGFydF9wb3NpdGlvbnMlM0R0YXJnZXRfc3RhcnRfaW5kZXglMkMlMjBlbmRfcG9zaXRpb25zJTNEdGFyZ2V0X2VuZF9pbmRleCklMEFsb3NzJTIwJTNEJTIwb3V0cHV0cy5sb3NzJTBBcm91bmQobG9zcy5pdGVtKCklMkMlMjAyKQ==",highlighted:`from transformers import AutoTokenizer, T5ForQuestionAnswering
import torch
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
model = T5ForQuestionAnswering.from_pretrained("google-t5/t5-small")
question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
inputs = tokenizer(question, text, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
answer_start_index = outputs.start_logits.argmax()
answer_end_index = outputs.end_logits.argmax()
predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
tokenizer.decode(predict_answer_tokens, skip_special_tokens=True)
...
# target is "nice puppet"
target_start_index = torch.tensor([14])
target_end_index = torch.tensor([15])
outputs = model(**inputs, start_positions=target_start_index, end_positions=target_end_index)
loss = outputs.loss
round(loss.item(), 2)
...`,wrap:!1}}),{c(){t=c("p"),t.textContent=h,o=a(),u(l.$$.fragment)},l(n){t=p(n,"P",{"data-svelte-h":!0}),k(t)!=="svelte-11lpom8"&&(t.textContent=h),o=r(n),f(l.$$.fragment,n)},m(n,y){m(n,t,y),m(n,o,y),g(l,n,y),M=!0},p:j,i(n){M||(_(l.$$.fragment,n),M=!0)},o(n){T(l.$$.fragment,n),M=!1},d(n){n&&(d(t),d(o)),b(l,n)}}}function ua(w){let t,h,o,l,M,n="This model was released on 2019-10-23 and added to Hugging Face Transformers on 2020-11-16.",y,z,ro='
1e-4
and 3e-4
work well for most tasks.TFT5Model
. It is used to
instantiate a T5 model according to the specified arguments, defining the model architecture. Instantiating a
configuration with the defaults will yield a similar configuration to that of the T5
google-t5/t5-small architecture.`,So,wt,rs=`Configuration objects inherit from PretrainedConfig and can be used to control the model outputs. Read the
documentation from PretrainedConfig for more information.`,wo,Re,vo,U,Se,Xo,vt,is='Construct a T5 tokenizer. Based on SentencePiece.',Qo,$t,ds=`This tokenizer inherits from PreTrainedTokenizer which contains most of the main methods. Users should refer to
this superclass for more information regarding those methods.`,Eo,te,Xe,Lo,zt,ls=`Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
adding special tokens. A sequence has the following format:`,Ao,Jt,cs="X </s>
A </s> B </s>
prepare_for_model
method.`,Do,he,Ee,Oo,xt,ms=`Create a mask from the two sequences passed to be used in a sequence-pair classification task. T5 does not make
use of token type ids, therefore a list of zeros is returned.`,Ko,jt,Le,$o,Ae,zo,C,Pe,en,Ct,hs=`Construct a “fast” T5 tokenizer (backed by HuggingFace’s tokenizers library). Based on
Unigram.`,tn,Ft,us=`This tokenizer inherits from PreTrainedTokenizerFast which contains most of the main methods. Users should
refer to this superclass for more information regarding those methods.`,on,oe,Ye,nn,qt,fs=`Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
adding special tokens. A sequence has the following format:`,sn,It,gs="X </s>
A </s> B </s>
__call__
special method.',hn,fe,un,ge,xo,tt,jo,q,ot,fn,Ht,Ms="T5 Model with a language modeling
head on top.",gn,Vt,ws=`This model inherits from PreTrainedModel. Check the superclass documentation for the generic methods the
library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
etc.)`,_n,Rt,vs=`This model is also a PyTorch torch.nn.Module subclass.
Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
and behavior.`,Tn,X,nt,bn,St,$s='The T5ForConditionalGeneration forward method, overrides the __call__
special method.',yn,_e,kn,Te,Co,st,Fo,I,at,Mn,Xt,zs="The bare T5 Model outputting raw hidden-states without any specific head on top.",wn,Qt,Js=`This model inherits from PreTrainedModel. Check the superclass documentation for the generic methods the
library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
etc.)`,vn,Et,Us=`This model is also a PyTorch torch.nn.Module subclass.
Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
and behavior.`,$n,Q,rt,zn,Lt,xs='The T5EncoderModel forward method, overrides the __call__
special method.',Jn,be,Un,ye,qo,it,Io,Z,dt,xn,At,js=`T5 model with a sequence classification/head on top (a linear layer on top of the pooled output) e.g. for GLUE
tasks.`,jn,Pt,Cs=`This model inherits from PreTrainedModel. Check the superclass documentation for the generic methods the
library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
etc.)`,Cn,Yt,Fs=`This model is also a PyTorch torch.nn.Module subclass.
Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
and behavior.`,Fn,G,lt,qn,Dt,qs='The T5ForSequenceClassification forward method, overrides the __call__
special method.',In,ke,Zn,Me,Wn,we,Zo,ct,Wo,W,pt,Nn,Ot,Is=`The T5 transformer with a token classification head on top (a linear layer on top of the hidden-states
output) e.g. for Named-Entity-Recognition (NER) tasks.`,Gn,Kt,Zs=`This model inherits from PreTrainedModel. Check the superclass documentation for the generic methods the
library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
etc.)`,Bn,eo,Ws=`This model is also a PyTorch torch.nn.Module subclass.
Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
and behavior.`,Hn,E,mt,Vn,to,Ns='The T5ForTokenClassification forward method, overrides the __call__
special method.',Rn,ve,Sn,$e,No,ht,Go,N,ut,Xn,oo,Gs=`The T5 transformer with a span classification head on top for extractive question-answering tasks like
SQuAD (a linear layer on top of the hidden-states output to compute span start logits
and span end logits
).`,Qn,no,Bs=`This model inherits from PreTrainedModel. Check the superclass documentation for the generic methods the
library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
etc.)`,En,so,Hs=`This model is also a PyTorch torch.nn.Module subclass.
Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
and behavior.`,Ln,L,ft,An,ao,Vs='The T5ForQuestionAnswering forward method, overrides the __call__
special method.',Pn,ze,Yn,Je,Bo,gt,Ho,io,Vo;return ee=new O({props:{title:"T5",local:"t5",headingTag:"h1"}}),ce=new yt({props:{warning:!1,$$slots:{default:[Ys]},$$scope:{ctx:w}}}),pe=new Ps({props:{id:"usage",options:["Pipeline","AutoModel","transformers CLI"],$$slots:{default:[ea]},$$scope:{ctx:w}}}),Ne=new K({props:{code:"JTIzJTIwcGlwJTIwaW5zdGFsbCUyMHRvcmNoYW8lMEFpbXBvcnQlMjB0b3JjaCUwQWZyb20lMjB0cmFuc2Zvcm1lcnMlMjBpbXBvcnQlMjBUb3JjaEFvQ29uZmlnJTJDJTIwQXV0b01vZGVsRm9yU2VxMlNlcUxNJTJDJTIwQXV0b1Rva2VuaXplciUwQSUwQXF1YW50aXphdGlvbl9jb25maWclMjAlM0QlMjBUb3JjaEFvQ29uZmlnKCUyMmludDRfd2VpZ2h0X29ubHklMjIlMkMlMjBncm91cF9zaXplJTNEMTI4KSUwQW1vZGVsJTIwJTNEJTIwQXV0b01vZGVsRm9yU2VxMlNlcUxNLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjJnb29nbGUlMkZ0NS12MV8xLXhsJTIyJTJDJTBBJTIwJTIwJTIwJTIwZHR5cGUlM0R0b3JjaC5iZmxvYXQxNiUyQyUwQSUyMCUyMCUyMCUyMGRldmljZV9tYXAlM0QlMjJhdXRvJTIyJTJDJTBBJTIwJTIwJTIwJTIwcXVhbnRpemF0aW9uX2NvbmZpZyUzRHF1YW50aXphdGlvbl9jb25maWclMEEpJTBBJTBBdG9rZW5pemVyJTIwJTNEJTIwQXV0b1Rva2VuaXplci5mcm9tX3ByZXRyYWluZWQoJTIyZ29vZ2xlJTJGdDUtdjFfMS14bCUyMiklMEFpbnB1dF9pZHMlMjAlM0QlMjB0b2tlbml6ZXIoJTIydHJhbnNsYXRlJTIwRW5nbGlzaCUyMHRvJTIwRnJlbmNoJTNBJTIwVGhlJTIwd2VhdGhlciUyMGlzJTIwbmljZSUyMHRvZGF5LiUyMiUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpLnRvKG1vZGVsLmRldmljZSklMEElMEFvdXRwdXQlMjAlM0QlMjBtb2RlbC5nZW5lcmF0ZSgqKmlucHV0X2lkcyUyQyUyMGNhY2hlX2ltcGxlbWVudGF0aW9uJTNEJTIyc3RhdGljJTIyKSUwQXByaW50KHRva2VuaXplci5kZWNvZGUob3V0cHV0JTVCMCU1RCUyQyUyMHNraXBfc3BlY2lhbF90b2tlbnMlM0RUcnVlKSk=",highlighted:`# pip install torchao
import torch
from transformers import TorchAoConfig, AutoModelForSeq2SeqLM, AutoTokenizer
quantization_config = TorchAoConfig("int4_weight_only", group_size=128)
model = AutoModelForSeq2SeqLM.from_pretrained(
"google/t5-v1_1-xl",
dtype=torch.bfloat16,
device_map="auto",
quantization_config=quantization_config
)
tokenizer = AutoTokenizer.from_pretrained("google/t5-v1_1-xl")
input_ids = tokenizer("translate English to French: The weather is nice today.", return_tensors="pt").to(model.device)
output = model.generate(**input_ids, cache_implementation="static")
print(tokenizer.decode(output[0], skip_special_tokens=True))`,wrap:!1}}),Ge=new O({props:{title:"Notes",local:"notes",headingTag:"h2"}}),He=new O({props:{title:"T5Config",local:"transformers.T5Config",headingTag:"h2"}}),Ve=new J({props:{name:"class transformers.T5Config",anchor:"transformers.T5Config",parameters:[{name:"vocab_size",val:" = 32128"},{name:"d_model",val:" = 512"},{name:"d_kv",val:" = 64"},{name:"d_ff",val:" = 2048"},{name:"num_layers",val:" = 6"},{name:"num_decoder_layers",val:" = None"},{name:"num_heads",val:" = 8"},{name:"relative_attention_num_buckets",val:" = 32"},{name:"relative_attention_max_distance",val:" = 128"},{name:"dropout_rate",val:" = 0.1"},{name:"layer_norm_epsilon",val:" = 1e-06"},{name:"initializer_factor",val:" = 1.0"},{name:"feed_forward_proj",val:" = 'relu'"},{name:"is_encoder_decoder",val:" = True"},{name:"use_cache",val:" = True"},{name:"pad_token_id",val:" = 0"},{name:"eos_token_id",val:" = 1"},{name:"classifier_dropout",val:" = 0.0"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.T5Config.vocab_size",description:`vocab_size (int
, optional, defaults to 32128) —
Vocabulary size of the T5 model. Defines the number of different tokens that can be represented by the
inputs_ids
passed when calling T5Model or TFT5Model
.`,name:"vocab_size"},{anchor:"transformers.T5Config.d_model",description:`d_model (int
, optional, defaults to 512) —
Size of the encoder layers and the pooler layer.`,name:"d_model"},{anchor:"transformers.T5Config.d_kv",description:`d_kv (int
, optional, defaults to 64) —
Size of the key, query, value projections per attention head. The inner_dim
of the projection layer will
be defined as num_heads * d_kv
.`,name:"d_kv"},{anchor:"transformers.T5Config.d_ff",description:`d_ff (int
, optional, defaults to 2048) —
Size of the intermediate feed forward layer in each T5Block
.`,name:"d_ff"},{anchor:"transformers.T5Config.num_layers",description:`num_layers (int
, optional, defaults to 6) —
Number of hidden layers in the Transformer encoder.`,name:"num_layers"},{anchor:"transformers.T5Config.num_decoder_layers",description:`num_decoder_layers (int
, optional) —
Number of hidden layers in the Transformer decoder. Will use the same value as num_layers
if not set.`,name:"num_decoder_layers"},{anchor:"transformers.T5Config.num_heads",description:`num_heads (int
, optional, defaults to 8) —
Number of attention heads for each attention layer in the Transformer encoder.`,name:"num_heads"},{anchor:"transformers.T5Config.relative_attention_num_buckets",description:`relative_attention_num_buckets (int
, optional, defaults to 32) —
The number of buckets to use for each attention layer.`,name:"relative_attention_num_buckets"},{anchor:"transformers.T5Config.relative_attention_max_distance",description:`relative_attention_max_distance (int
, optional, defaults to 128) —
The maximum distance of the longer sequences for the bucket separation.`,name:"relative_attention_max_distance"},{anchor:"transformers.T5Config.dropout_rate",description:`dropout_rate (float
, optional, defaults to 0.1) —
The ratio for all dropout layers.`,name:"dropout_rate"},{anchor:"transformers.T5Config.classifier_dropout",description:`classifier_dropout (float
, optional, defaults to 0.0) —
The dropout ratio for classifier.`,name:"classifier_dropout"},{anchor:"transformers.T5Config.layer_norm_eps",description:`layer_norm_eps (float
, optional, defaults to 1e-6) —
The epsilon used by the layer normalization layers.`,name:"layer_norm_eps"},{anchor:"transformers.T5Config.initializer_factor",description:`initializer_factor (float
, optional, defaults to 1) —
A factor for initializing all weight matrices (should be kept to 1, used internally for initialization
testing).`,name:"initializer_factor"},{anchor:"transformers.T5Config.feed_forward_proj",description:`feed_forward_proj (string
, optional, defaults to "relu"
) —
Type of feed forward layer to be used. Should be one of "relu"
or "gated-gelu"
. T5v1.1 uses the
"gated-gelu"
feed forward projection. Original T5 uses "relu"
.`,name:"feed_forward_proj"},{anchor:"transformers.T5Config.use_cache",description:`use_cache (bool
, optional, defaults to True
) —
Whether or not the model should return the last key/values attentions (not used by all models).`,name:"use_cache"}],source:"https://github.com/huggingface/transformers/blob/v4.56.2/src/transformers/models/t5/configuration_t5.py#L27"}}),Re=new O({props:{title:"T5Tokenizer",local:"transformers.T5Tokenizer",headingTag:"h2"}}),Se=new J({props:{name:"class transformers.T5Tokenizer",anchor:"transformers.T5Tokenizer",parameters:[{name:"vocab_file",val:""},{name:"eos_token",val:" = ''"},{name:"unk_token",val:" = 'str
) —
SentencePiece file (generally has a .spm extension) that
contains the vocabulary necessary to instantiate a tokenizer.`,name:"vocab_file"},{anchor:"transformers.T5Tokenizer.eos_token",description:`eos_token (str
, optional, defaults to "</s>"
) —
The end of sequence token.
When building a sequence using special tokens, this is not the token that is used for the end of sequence.
The token used is the sep_token
.
str
, optional, defaults to "<unk>"
) —
The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
token instead.`,name:"unk_token"},{anchor:"transformers.T5Tokenizer.pad_token",description:`pad_token (str
, optional, defaults to "<pad>"
) —
The token used for padding, for example when batching sequences of different lengths.`,name:"pad_token"},{anchor:"transformers.T5Tokenizer.extra_ids",description:`extra_ids (int
, optional, defaults to 100) —
Add a number of extra ids added to the vocabulary for use as sentinels. These tokens are
accessible as “list[str]
, optional):
Additional special tokens used by the tokenizer.dict
, optional) —
Will be passed to the SentencePieceProcessor.__init__()
method. The Python wrapper for
SentencePiece can be used, among other things,
to set:
enable_sampling
: Enable subword regularization.
nbest_size
: Sampling parameters for unigram. Invalid for BPE-Dropout.
nbest_size = {0,1}
: No sampling is performed.nbest_size > 1
: samples from the nbest_size results.nbest_size < 0
: assuming that nbest_size is infinite and samples from the all hypothesis (lattice)
using forward-filtering-and-backward-sampling algorithm.alpha
: Smoothing parameter for unigram sampling, and dropout probability of merge operations for
BPE-dropout.
bool
, optional) —
Whether or not the legacy
behaviour of the tokenizer should be used. Legacy is before the merge of #24622
and #25224 which includes fixes to properly handle tokens that appear after special tokens. A simple
example:
legacy=True
:list[int]
) —
List of IDs to which the special tokens will be added.`,name:"token_ids_0"},{anchor:"transformers.T5Tokenizer.build_inputs_with_special_tokens.token_ids_1",description:`token_ids_1 (list[int]
, optional) —
Optional second list of IDs for sequence pairs.`,name:"token_ids_1"}],source:"https://github.com/huggingface/transformers/blob/v4.56.2/src/transformers/models/t5/tokenization_t5.py#L317",returnDescription:`