import{s as go,o as uo,n as mt}from"../chunks/scheduler.bdbef820.js";import{S as fo,i as ho,g as a,s as n,r as u,A as _o,h as r,f as i,c as o,j as J,u as f,x as m,k as C,y as t,a as p,v as h,d as _,t as b,w as y}from"../chunks/index.33f81d56.js";import{T as Gn}from"../chunks/Tip.34194030.js";import{D as B}from"../chunks/Docstring.8b71f9f8.js";import{C as Cn}from"../chunks/CodeBlock.3bad7fc9.js";import{E as Jn}from"../chunks/ExampleCodeBlock.030724b7.js";import{H as gt,E as bo}from"../chunks/getInferenceSnippets.d10958e6.js";function yo(Z){let l,k=`A large number of these flags control the logits or the stopping criteria of the generation. Make sure you check
the generate-related classes for a full
description of the possible manipulations, as well as examples of their usage.`;return{c(){l=a("p"),l.innerHTML=k},l(g){l=r(g,"P",{"data-svelte-h":!0}),m(l)!=="svelte-1lhagsi"&&(l.innerHTML=k)},m(g,d){p(g,l,d)},p:mt,d(g){g&&i(l)}}}function Mo(Z){let l,k="Examples:",g,d,M;return d=new Cn({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEdlbmVyYXRpb25Db25maWclMEElMEElMjMlMjBEb3dubG9hZCUyMGNvbmZpZ3VyYXRpb24lMjBmcm9tJTIwaHVnZ2luZ2ZhY2UuY28lMjBhbmQlMjBjYWNoZS4lMEFnZW5lcmF0aW9uX2NvbmZpZyUyMCUzRCUyMEdlbmVyYXRpb25Db25maWcuZnJvbV9wcmV0cmFpbmVkKCUyMm9wZW5haS1jb21tdW5pdHklMkZncHQyJTIyKSUwQSUwQSUyMyUyMEUuZy4lMjBjb25maWclMjB3YXMlMjBzYXZlZCUyMHVzaW5nJTIwKnNhdmVfcHJldHJhaW5lZCgnLiUyRnRlc3QlMkZzYXZlZF9tb2RlbCUyRicpKiUwQWdlbmVyYXRpb25fY29uZmlnLnNhdmVfcHJldHJhaW5lZCglMjIuJTJGdGVzdCUyRnNhdmVkX21vZGVsJTJGJTIyKSUwQWdlbmVyYXRpb25fY29uZmlnJTIwJTNEJTIwR2VuZXJhdGlvbkNvbmZpZy5mcm9tX3ByZXRyYWluZWQoJTIyLiUyRnRlc3QlMkZzYXZlZF9tb2RlbCUyRiUyMiklMEElMEElMjMlMjBZb3UlMjBjYW4lMjBhbHNvJTIwc3BlY2lmeSUyMGNvbmZpZ3VyYXRpb24lMjBuYW1lcyUyMHRvJTIweW91ciUyMGdlbmVyYXRpb24lMjBjb25maWd1cmF0aW9uJTIwZmlsZSUwQWdlbmVyYXRpb25fY29uZmlnLnNhdmVfcHJldHJhaW5lZCglMjIuJTJGdGVzdCUyRnNhdmVkX21vZGVsJTJGJTIyJTJDJTIwY29uZmlnX2ZpbGVfbmFtZSUzRCUyMm15X2NvbmZpZ3VyYXRpb24uanNvbiUyMiklMEFnZW5lcmF0aW9uX2NvbmZpZyUyMCUzRCUyMEdlbmVyYXRpb25Db25maWcuZnJvbV9wcmV0cmFpbmVkKCUyMi4lMkZ0ZXN0JTJGc2F2ZWRfbW9kZWwlMkYlMjIlMkMlMjAlMjJteV9jb25maWd1cmF0aW9uLmpzb24lMjIpJTBBJTBBJTIzJTIwSWYlMjB5b3UnZCUyMGxpa2UlMjB0byUyMHRyeSUyMGElMjBtaW5vciUyMHZhcmlhdGlvbiUyMHRvJTIwYW4lMjBleGlzdGluZyUyMGNvbmZpZ3VyYXRpb24lMkMlMjB5b3UlMjBjYW4lMjBhbHNvJTIwcGFzcyUyMGdlbmVyYXRpb24lMEElMjMlMjBhcmd1bWVudHMlMjB0byUyMCU2MC5mcm9tX3ByZXRyYWluZWQoKSU2MC4lMjBCZSUyMG1pbmRmdWwlMjB0aGF0JTIwdHlwb3MlMjBhbmQlMjB1bnVzZWQlMjBhcmd1bWVudHMlMjB3aWxsJTIwYmUlMjBpZ25vcmVkJTBBZ2VuZXJhdGlvbl9jb25maWclMkMlMjB1bnVzZWRfa3dhcmdzJTIwJTNEJTIwR2VuZXJhdGlvbkNvbmZpZy5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIyb3BlbmFpLWNvbW11bml0eSUyRmdwdDIlMjIlMkMlMjB0b3BfayUzRDElMkMlMjBmb28lM0RGYWxzZSUyQyUyMGRvX3NhbXBsZSUzRFRydWUlMkMlMjByZXR1cm5fdW51c2VkX2t3YXJncyUzRFRydWUlMEEpJTBBZ2VuZXJhdGlvbl9jb25maWcudG9wX2slMEElMEF1bnVzZWRfa3dhcmdz",highlighted:`from transformers import GenerationConfig
# Download configuration from huggingface.co and cache.
generation_config = GenerationConfig.from_pretrained("openai-community/gpt2")
# E.g. config was saved using *save_pretrained('./test/saved_model/')*
generation_config.save_pretrained("./test/saved_model/")
generation_config = GenerationConfig.from_pretrained("./test/saved_model/")
# You can also specify configuration names to your generation configuration file
generation_config.save_pretrained("./test/saved_model/", config_file_name="my_configuration.json")
generation_config = GenerationConfig.from_pretrained("./test/saved_model/", "my_configuration.json")
# If you'd like to try a minor variation to an existing configuration, you can also pass generation
# arguments to \`.from_pretrained()\`. Be mindful that typos and unused arguments will be ignored
generation_config, unused_kwargs = GenerationConfig.from_pretrained(
"openai-community/gpt2", top_k=1, foo=False, do_sample=True, return_unused_kwargs=True
)
generation_config.top_k
1
unused_kwargs
{'foo': False}`,wrap:!1}}),{c(){l=a("p"),l.textContent=k,g=n(),u(d.$$.fragment)},l(s){l=r(s,"P",{"data-svelte-h":!0}),m(l)!=="svelte-kvfsh7"&&(l.textContent=k),g=o(s),f(d.$$.fragment,s)},m(s,v){p(s,l,v),p(s,g,v),h(d,s,v),M=!0},p:mt,i(s){M||(_(d.$$.fragment,s),M=!0)},o(s){b(d.$$.fragment,s),M=!1},d(s){s&&(i(l),i(g)),y(d,s)}}}function vo(Z){let l,k=`Most generation-controlling parameters are set in generation_config
which, if not passed, will be set to the
model’s default generation configuration. You can override any generation_config
by passing the corresponding
parameters to generate(), e.g. .generate(inputs, num_beams=4, do_sample=True)
.`,g,d,M=`For an overview of generation strategies and code examples, check out the following
guide.`;return{c(){l=a("p"),l.innerHTML=k,g=n(),d=a("p"),d.innerHTML=M},l(s){l=r(s,"P",{"data-svelte-h":!0}),m(l)!=="svelte-1c5u34l"&&(l.innerHTML=k),g=o(s),d=r(s,"P",{"data-svelte-h":!0}),m(d)!=="svelte-fvlq1g"&&(d.innerHTML=M)},m(s,v){p(s,l,v),p(s,g,v),p(s,d,v)},p:mt,d(s){s&&(i(l),i(g),i(d))}}}function To(Z){let l,k="Examples:",g,d,M;return d=new Cn({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEdQVDJUb2tlbml6ZXIlMkMlMjBBdXRvTW9kZWxGb3JDYXVzYWxMTSUwQWltcG9ydCUyMG51bXB5JTIwYXMlMjBucCUwQSUwQXRva2VuaXplciUyMCUzRCUyMEdQVDJUb2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKCUyMmdwdDIlMjIpJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWxGb3JDYXVzYWxMTS5mcm9tX3ByZXRyYWluZWQoJTIyb3BlbmFpLWNvbW11bml0eSUyRmdwdDIlMjIpJTBBdG9rZW5pemVyLnBhZF90b2tlbl9pZCUyMCUzRCUyMHRva2VuaXplci5lb3NfdG9rZW5faWQlMEFpbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIoJTVCJTIyVG9kYXklMjBpcyUyMiU1RCUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTBBJTBBJTIzJTIwRXhhbXBsZSUyMDElM0ElMjBQcmludCUyMHRoZSUyMHNjb3JlcyUyMGZvciUyMGVhY2glMjB0b2tlbiUyMGdlbmVyYXRlZCUyMHdpdGglMjBHcmVlZHklMjBTZWFyY2glMEFvdXRwdXRzJTIwJTNEJTIwbW9kZWwuZ2VuZXJhdGUoKippbnB1dHMlMkMlMjBtYXhfbmV3X3Rva2VucyUzRDUlMkMlMjByZXR1cm5fZGljdF9pbl9nZW5lcmF0ZSUzRFRydWUlMkMlMjBvdXRwdXRfc2NvcmVzJTNEVHJ1ZSklMEF0cmFuc2l0aW9uX3Njb3JlcyUyMCUzRCUyMG1vZGVsLmNvbXB1dGVfdHJhbnNpdGlvbl9zY29yZXMoJTBBJTIwJTIwJTIwJTIwb3V0cHV0cy5zZXF1ZW5jZXMlMkMlMjBvdXRwdXRzLnNjb3JlcyUyQyUyMG5vcm1hbGl6ZV9sb2dpdHMlM0RUcnVlJTBBKSUwQSUyMyUyMGlucHV0X2xlbmd0aCUyMGlzJTIwdGhlJTIwbGVuZ3RoJTIwb2YlMjB0aGUlMjBpbnB1dCUyMHByb21wdCUyMGZvciUyMGRlY29kZXItb25seSUyMG1vZGVscyUyQyUyMGxpa2UlMjB0aGUlMjBHUFQlMjBmYW1pbHklMkMlMjBhbmQlMjAxJTIwZm9yJTBBJTIzJTIwZW5jb2Rlci1kZWNvZGVyJTIwbW9kZWxzJTJDJTIwbGlrZSUyMEJBUlQlMjBvciUyMFQ1LiUwQWlucHV0X2xlbmd0aCUyMCUzRCUyMDElMjBpZiUyMG1vZGVsLmNvbmZpZy5pc19lbmNvZGVyX2RlY29kZXIlMjBlbHNlJTIwaW5wdXRzLmlucHV0X2lkcy5zaGFwZSU1QjElNUQlMEFnZW5lcmF0ZWRfdG9rZW5zJTIwJTNEJTIwb3V0cHV0cy5zZXF1ZW5jZXMlNUIlM0ElMkMlMjBpbnB1dF9sZW5ndGglM0ElNUQlMEFmb3IlMjB0b2slMkMlMjBzY29yZSUyMGluJTIwemlwKGdlbmVyYXRlZF90b2tlbnMlNUIwJTVEJTJDJTIwdHJhbnNpdGlvbl9zY29yZXMlNUIwJTVEKSUzQSUwQSUyMCUyMCUyMCUyMCUyMyUyMCU3QyUyMHRva2VuJTIwJTdDJTIwdG9rZW4lMjBzdHJpbmclMjAlN0MlMjBsb2clMjBwcm9iYWJpbGl0eSUyMCU3QyUyMHByb2JhYmlsaXR5JTBBJTIwJTIwJTIwJTIwcHJpbnQoZiUyMiU3QyUyMCU3QnRvayUzQTVkJTdEJTIwJTdDJTIwJTdCdG9rZW5pemVyLmRlY29kZSh0b2spJTNBOHMlN0QlMjAlN0MlMjAlN0JzY29yZS5udW1weSgpJTNBLjNmJTdEJTIwJTdDJTIwJTdCbnAuZXhwKHNjb3JlLm51bXB5KCkpJTNBLjIlMjUlN0QlMjIpJTBBJTBBJTIzJTIwRXhhbXBsZSUyMDIlM0ElMjBSZWNvbnN0cnVjdCUyMHRoZSUyMHNlcXVlbmNlJTIwc2NvcmVzJTIwZnJvbSUyMEJlYW0lMjBTZWFyY2glMEFvdXRwdXRzJTIwJTNEJTIwbW9kZWwuZ2VuZXJhdGUoJTBBJTIwJTIwJTIwJTIwKippbnB1dHMlMkMlMEElMjAlMjAlMjAlMjBtYXhfbmV3X3Rva2VucyUzRDUlMkMlMEElMjAlMjAlMjAlMjBudW1fYmVhbXMlM0Q0JTJDJTBBJTIwJTIwJTIwJTIwbnVtX3JldHVybl9zZXF1ZW5jZXMlM0Q0JTJDJTBBJTIwJTIwJTIwJTIwcmV0dXJuX2RpY3RfaW5fZ2VuZXJhdGUlM0RUcnVlJTJDJTBBJTIwJTIwJTIwJTIwb3V0cHV0X3Njb3JlcyUzRFRydWUlMkMlMEEpJTBBdHJhbnNpdGlvbl9zY29yZXMlMjAlM0QlMjBtb2RlbC5jb21wdXRlX3RyYW5zaXRpb25fc2NvcmVzKCUwQSUyMCUyMCUyMCUyMG91dHB1dHMuc2VxdWVuY2VzJTJDJTIwb3V0cHV0cy5zY29yZXMlMkMlMjBvdXRwdXRzLmJlYW1faW5kaWNlcyUyQyUyMG5vcm1hbGl6ZV9sb2dpdHMlM0RGYWxzZSUwQSklMEElMjMlMjBJZiUyMHlvdSUyMHN1bSUyMHRoZSUyMGdlbmVyYXRlZCUyMHRva2VucyclMjBzY29yZXMlMjBhbmQlMjBhcHBseSUyMHRoZSUyMGxlbmd0aCUyMHBlbmFsdHklMkMlMjB5b3UnbGwlMjBnZXQlMjB0aGUlMjBzZXF1ZW5jZSUyMHNjb3Jlcy4lMEElMjMlMjBUaXAlMjAxJTNBJTIwcmVjb21wdXRpbmclMjB0aGUlMjBzY29yZXMlMjBpcyUyMG9ubHklMjBndWFyYW50ZWVkJTIwdG8lMjBtYXRjaCUyMHdpdGglMjAlNjBub3JtYWxpemVfbG9naXRzJTNERmFsc2UlNjAuJTIwRGVwZW5kaW5nJTIwb24lMjB0aGUlMEElMjMlMjB1c2UlMjBjYXNlJTJDJTIweW91JTIwbWlnaHQlMjB3YW50JTIwdG8lMjByZWNvbXB1dGUlMjBpdCUyMHdpdGglMjAlNjBub3JtYWxpemVfbG9naXRzJTNEVHJ1ZSU2MC4lMEElMjMlMjBUaXAlMjAyJTNBJTIwdGhlJTIwb3V0cHV0JTIwbGVuZ3RoJTIwZG9lcyUyME5PVCUyMGluY2x1ZGUlMjB0aGUlMjBpbnB1dCUyMGxlbmd0aCUwQW91dHB1dF9sZW5ndGglMjAlM0QlMjBucC5zdW0odHJhbnNpdGlvbl9zY29yZXMubnVtcHkoKSUyMCUzQyUyMDAlMkMlMjBheGlzJTNEMSklMEFsZW5ndGhfcGVuYWx0eSUyMCUzRCUyMG1vZGVsLmdlbmVyYXRpb25fY29uZmlnLmxlbmd0aF9wZW5hbHR5JTBBcmVjb25zdHJ1Y3RlZF9zY29yZXMlMjAlM0QlMjB0cmFuc2l0aW9uX3Njb3Jlcy5zdW0oYXhpcyUzRDEpJTIwJTJGJTIwKG91dHB1dF9sZW5ndGgqKmxlbmd0aF9wZW5hbHR5KSUwQXByaW50KG5wLmFsbGNsb3NlKG91dHB1dHMuc2VxdWVuY2VzX3Njb3JlcyUyQyUyMHJlY29uc3RydWN0ZWRfc2NvcmVzKSk=",highlighted:`from transformers import GPT2Tokenizer, AutoModelForCausalLM
import numpy as np
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
tokenizer.pad_token_id = tokenizer.eos_token_id
inputs = tokenizer(["Today is"], return_tensors="pt")
# Example 1: Print the scores for each token generated with Greedy Search
outputs = model.generate(**inputs, max_new_tokens=5, return_dict_in_generate=True, output_scores=True)
transition_scores = model.compute_transition_scores(
outputs.sequences, outputs.scores, normalize_logits=True
)
# input_length is the length of the input prompt for decoder-only models, like the GPT family, and 1 for
# encoder-decoder models, like BART or T5.
input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
generated_tokens = outputs.sequences[:, input_length:]
for tok, score in zip(generated_tokens[0], transition_scores[0]):
# | token | token string | log probability | probability
print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.numpy():.3f} | {np.exp(score.numpy()):.2%}")
| 262 | the | -1.414 | 24.33%
| 1110 | day | -2.609 | 7.36%
| 618 | when | -2.010 | 13.40%
| 356 | we | -1.859 | 15.58%
| 460 | can | -2.508 | 8.14%
# Example 2: Reconstruct the sequence scores from Beam Search
outputs = model.generate(
**inputs,
max_new_tokens=5,
num_beams=4,
num_return_sequences=4,
return_dict_in_generate=True,
output_scores=True,
)
transition_scores = model.compute_transition_scores(
outputs.sequences, outputs.scores, outputs.beam_indices, normalize_logits=False
)
# If you sum the generated tokens' scores and apply the length penalty, you'll get the sequence scores.
# Tip 1: recomputing the scores is only guaranteed to match with \`normalize_logits=False\`. Depending on the
# use case, you might want to recompute it with \`normalize_logits=True\`.
# Tip 2: the output length does NOT include the input length
output_length = np.sum(transition_scores.numpy() < 0, axis=1)
length_penalty = model.generation_config.length_penalty
reconstructed_scores = transition_scores.sum(axis=1) / (output_length**length_penalty)
print(np.allclose(outputs.sequences_scores, reconstructed_scores))
True`,wrap:!1}}),{c(){l=a("p"),l.textContent=k,g=n(),u(d.$$.fragment)},l(s){l=r(s,"P",{"data-svelte-h":!0}),m(l)!=="svelte-kvfsh7"&&(l.textContent=k),g=o(s),f(d.$$.fragment,s)},m(s,v){p(s,l,v),p(s,g,v),h(d,s,v),M=!0},p:mt,i(s){M||(_(d.$$.fragment,s),M=!0)},o(s){b(d.$$.fragment,s),M=!1},d(s){s&&(i(l),i(g)),y(d,s)}}}function wo(Z){let l,k=`Most generation-controlling parameters are set in generation_config
which, if not passed, will be set to the
model’s default generation configuration. You can override any generation_config
by passing the corresponding
parameters to generate, e.g. .generate(inputs, num_beams=4, do_sample=True)
.`,g,d,M=`For an overview of generation strategies and code examples, check out the following
guide.`;return{c(){l=a("p"),l.innerHTML=k,g=n(),d=a("p"),d.innerHTML=M},l(s){l=r(s,"P",{"data-svelte-h":!0}),m(l)!=="svelte-1pahvb2"&&(l.innerHTML=k),g=o(s),d=r(s,"P",{"data-svelte-h":!0}),m(d)!=="svelte-fvlq1g"&&(d.innerHTML=M)},m(s,v){p(s,l,v),p(s,g,v),p(s,d,v)},p:mt,d(s){s&&(i(l),i(g),i(d))}}}function xo(Z){let l,k="Examples:",g,d,M;return d=new Cn({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEdQVDJUb2tlbml6ZXIlMkMlMjBURkF1dG9Nb2RlbEZvckNhdXNhbExNJTBBaW1wb3J0JTIwbnVtcHklMjBhcyUyMG5wJTBBJTBBdG9rZW5pemVyJTIwJTNEJTIwR1BUMlRva2VuaXplci5mcm9tX3ByZXRyYWluZWQoJTIyb3BlbmFpLWNvbW11bml0eSUyRmdwdDIlMjIpJTBBbW9kZWwlMjAlM0QlMjBURkF1dG9Nb2RlbEZvckNhdXNhbExNLmZyb21fcHJldHJhaW5lZCglMjJvcGVuYWktY29tbXVuaXR5JTJGZ3B0MiUyMiklMEF0b2tlbml6ZXIucGFkX3Rva2VuX2lkJTIwJTNEJTIwdG9rZW5pemVyLmVvc190b2tlbl9pZCUwQWlucHV0cyUyMCUzRCUyMHRva2VuaXplciglNUIlMjJUb2RheSUyMGlzJTIyJTVEJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJ0ZiUyMiklMEElMEElMjMlMjBFeGFtcGxlJTIwMSUzQSUyMFByaW50JTIwdGhlJTIwc2NvcmVzJTIwZm9yJTIwZWFjaCUyMHRva2VuJTIwZ2VuZXJhdGVkJTIwd2l0aCUyMEdyZWVkeSUyMFNlYXJjaCUwQW91dHB1dHMlMjAlM0QlMjBtb2RlbC5nZW5lcmF0ZSgqKmlucHV0cyUyQyUyMG1heF9uZXdfdG9rZW5zJTNENSUyQyUyMHJldHVybl9kaWN0X2luX2dlbmVyYXRlJTNEVHJ1ZSUyQyUyMG91dHB1dF9zY29yZXMlM0RUcnVlKSUwQXRyYW5zaXRpb25fc2NvcmVzJTIwJTNEJTIwbW9kZWwuY29tcHV0ZV90cmFuc2l0aW9uX3Njb3JlcyglMEElMjAlMjAlMjAlMjBvdXRwdXRzLnNlcXVlbmNlcyUyQyUyMG91dHB1dHMuc2NvcmVzJTJDJTIwbm9ybWFsaXplX2xvZ2l0cyUzRFRydWUlMEEpJTBBJTIzJTIwaW5wdXRfbGVuZ3RoJTIwaXMlMjB0aGUlMjBsZW5ndGglMjBvZiUyMHRoZSUyMGlucHV0JTIwcHJvbXB0JTIwZm9yJTIwZGVjb2Rlci1vbmx5JTIwbW9kZWxzJTJDJTIwbGlrZSUyMHRoZSUyMEdQVCUyMGZhbWlseSUyQyUyMGFuZCUyMDElMjBmb3IlMEElMjMlMjBlbmNvZGVyLWRlY29kZXIlMjBtb2RlbHMlMkMlMjBsaWtlJTIwQkFSVCUyMG9yJTIwVDUuJTBBaW5wdXRfbGVuZ3RoJTIwJTNEJTIwMSUyMGlmJTIwbW9kZWwuY29uZmlnLmlzX2VuY29kZXJfZGVjb2RlciUyMGVsc2UlMjBpbnB1dHMuaW5wdXRfaWRzLnNoYXBlJTVCMSU1RCUwQWdlbmVyYXRlZF90b2tlbnMlMjAlM0QlMjBvdXRwdXRzLnNlcXVlbmNlcyU1QiUzQSUyQyUyMGlucHV0X2xlbmd0aCUzQSU1RCUwQWZvciUyMHRvayUyQyUyMHNjb3JlJTIwaW4lMjB6aXAoZ2VuZXJhdGVkX3Rva2VucyU1QjAlNUQlMkMlMjB0cmFuc2l0aW9uX3Njb3JlcyU1QjAlNUQpJTNBJTBBJTIwJTIwJTIwJTIwJTIzJTIwJTdDJTIwdG9rZW4lMjAlN0MlMjB0b2tlbiUyMHN0cmluZyUyMCU3QyUyMGxvZ2l0cyUyMCU3QyUyMHByb2JhYmlsaXR5JTBBJTIwJTIwJTIwJTIwcHJpbnQoZiUyMiU3QyUyMCU3QnRvayUzQTVkJTdEJTIwJTdDJTIwJTdCdG9rZW5pemVyLmRlY29kZSh0b2spJTNBOHMlN0QlMjAlN0MlMjAlN0JzY29yZS5udW1weSgpJTNBLjNmJTdEJTIwJTdDJTIwJTdCbnAuZXhwKHNjb3JlLm51bXB5KCkpJTNBLjIlMjUlN0QlMjIpJTBBJTBBJTIzJTIwRXhhbXBsZSUyMDIlM0ElMjBSZWNvbnN0cnVjdCUyMHRoZSUyMHNlcXVlbmNlJTIwc2NvcmVzJTIwZnJvbSUyMEJlYW0lMjBTZWFyY2glMEFvdXRwdXRzJTIwJTNEJTIwbW9kZWwuZ2VuZXJhdGUoJTBBJTIwJTIwJTIwJTIwKippbnB1dHMlMkMlMEElMjAlMjAlMjAlMjBtYXhfbmV3X3Rva2VucyUzRDUlMkMlMEElMjAlMjAlMjAlMjBudW1fYmVhbXMlM0Q0JTJDJTBBJTIwJTIwJTIwJTIwbnVtX3JldHVybl9zZXF1ZW5jZXMlM0Q0JTJDJTBBJTIwJTIwJTIwJTIwcmV0dXJuX2RpY3RfaW5fZ2VuZXJhdGUlM0RUcnVlJTJDJTBBJTIwJTIwJTIwJTIwb3V0cHV0X3Njb3JlcyUzRFRydWUlMkMlMEEpJTBBdHJhbnNpdGlvbl9zY29yZXMlMjAlM0QlMjBtb2RlbC5jb21wdXRlX3RyYW5zaXRpb25fc2NvcmVzKCUwQSUyMCUyMCUyMCUyMG91dHB1dHMuc2VxdWVuY2VzJTJDJTIwb3V0cHV0cy5zY29yZXMlMkMlMjBvdXRwdXRzLmJlYW1faW5kaWNlcyUyQyUyMG5vcm1hbGl6ZV9sb2dpdHMlM0RGYWxzZSUwQSklMEElMjMlMjBJZiUyMHlvdSUyMHN1bSUyMHRoZSUyMGdlbmVyYXRlZCUyMHRva2VucyclMjBzY29yZXMlMjBhbmQlMjBhcHBseSUyMHRoZSUyMGxlbmd0aCUyMHBlbmFsdHklMkMlMjB5b3UnbGwlMjBnZXQlMjB0aGUlMjBzZXF1ZW5jZSUyMHNjb3Jlcy4lMEElMjMlMjBUaXAlM0ElMjByZWNvbXB1dGluZyUyMHRoZSUyMHNjb3JlcyUyMGlzJTIwb25seSUyMGd1YXJhbnRlZWQlMjB0byUyMG1hdGNoJTIwd2l0aCUyMCU2MG5vcm1hbGl6ZV9sb2dpdHMlM0RGYWxzZSU2MC4lMjBEZXBlbmRpbmclMjBvbiUyMHRoZSUwQSUyMyUyMHVzZSUyMGNhc2UlMkMlMjB5b3UlMjBtaWdodCUyMHdhbnQlMjB0byUyMHJlY29tcHV0ZSUyMGl0JTIwd2l0aCUyMCU2MG5vcm1hbGl6ZV9sb2dpdHMlM0RUcnVlJTYwLiUwQW91dHB1dF9sZW5ndGglMjAlM0QlMjBucC5zdW0odHJhbnNpdGlvbl9zY29yZXMubnVtcHkoKSUyMCUzQyUyMDAlMkMlMjBheGlzJTNEMSklMEFsZW5ndGhfcGVuYWx0eSUyMCUzRCUyMG1vZGVsLmdlbmVyYXRpb25fY29uZmlnLmxlbmd0aF9wZW5hbHR5JTBBcmVjb25zdHJ1Y3RlZF9zY29yZXMlMjAlM0QlMjBucC5zdW0odHJhbnNpdGlvbl9zY29yZXMlMkMlMjBheGlzJTNEMSklMjAlMkYlMjAob3V0cHV0X2xlbmd0aCoqbGVuZ3RoX3BlbmFsdHkpJTBBcHJpbnQobnAuYWxsY2xvc2Uob3V0cHV0cy5zZXF1ZW5jZXNfc2NvcmVzJTJDJTIwcmVjb25zdHJ1Y3RlZF9zY29yZXMpKQ==",highlighted:`from transformers import GPT2Tokenizer, TFAutoModelForCausalLM
import numpy as np
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
model = TFAutoModelForCausalLM.from_pretrained("openai-community/gpt2")
tokenizer.pad_token_id = tokenizer.eos_token_id
inputs = tokenizer(["Today is"], return_tensors="tf")
# Example 1: Print the scores for each token generated with Greedy Search
outputs = model.generate(**inputs, max_new_tokens=5, return_dict_in_generate=True, output_scores=True)
transition_scores = model.compute_transition_scores(
outputs.sequences, outputs.scores, normalize_logits=True
)
# input_length is the length of the input prompt for decoder-only models, like the GPT family, and 1 for
# encoder-decoder models, like BART or T5.
input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
generated_tokens = outputs.sequences[:, input_length:]
for tok, score in zip(generated_tokens[0], transition_scores[0]):
# | token | token string | logits | probability
print(f"| {tok:5d} | {tokenizer.decode(tok):8s} | {score.numpy():.3f} | {np.exp(score.numpy()):.2%}")
| 262 | the | -1.414 | 24.33%
| 1110 | day | -2.609 | 7.36%
| 618 | when | -2.010 | 13.40%
| 356 | we | -1.859 | 15.58%
| 460 | can | -2.508 | 8.14%
# Example 2: Reconstruct the sequence scores from Beam Search
outputs = model.generate(
**inputs,
max_new_tokens=5,
num_beams=4,
num_return_sequences=4,
return_dict_in_generate=True,
output_scores=True,
)
transition_scores = model.compute_transition_scores(
outputs.sequences, outputs.scores, outputs.beam_indices, normalize_logits=False
)
# If you sum the generated tokens' scores and apply the length penalty, you'll get the sequence scores.
# Tip: recomputing the scores is only guaranteed to match with \`normalize_logits=False\`. Depending on the
# use case, you might want to recompute it with \`normalize_logits=True\`.
output_length = np.sum(transition_scores.numpy() < 0, axis=1)
length_penalty = model.generation_config.length_penalty
reconstructed_scores = np.sum(transition_scores, axis=1) / (output_length**length_penalty)
print(np.allclose(outputs.sequences_scores, reconstructed_scores))
True`,wrap:!1}}),{c(){l=a("p"),l.textContent=k,g=n(),u(d.$$.fragment)},l(s){l=r(s,"P",{"data-svelte-h":!0}),m(l)!=="svelte-kvfsh7"&&(l.textContent=k),g=o(s),f(d.$$.fragment,s)},m(s,v){p(s,l,v),p(s,g,v),h(d,s,v),M=!0},p:mt,i(s){M||(_(d.$$.fragment,s),M=!0)},o(s){b(d.$$.fragment,s),M=!1},d(s){s&&(i(l),i(g)),y(d,s)}}}function ko(Z){let l,k,g,d,M,s,v,Un="각 프레임워크에는 해당하는 GenerationMixin
클래스에서 구현된 텍스트 생성을 위한 generate 메소드가 있습니다:",ut,ne,Bn='
generate
call supports the following generation methods
for text-decoder, text-to-text, speech-to-text, and vision-to-text models:`,Zt,Ue,Wn="num_beams=1
and do_sample=False
num_beams=1
and do_sample=True
num_beams>1
and do_sample=False
num_beams>1
and do_sample=True
num_beams>1
and num_beam_groups>1
constraints!=None
or force_words_ids!=None
assistant_model
or prompt_lookup_num_tokens
is passed to .generate()
save_directory
, so that it can be re-loaded using the
from_pretrained() class method.`,Nt,Y,de,Lt,We,Xn=`Updates attributes of this class instance with attributes from kwargs
if they match existing attributes,
returning all the unused kwargs.`,St,H,me,Et,ze,Rn=`Validates the values of the attributes of the GenerationConfig instance. Raises exceptions in the presence
of parameterization that can be detected as incorrect from the configuration instance alone.`,qt,He,Nn=`Note that some parameters not validated here are best validated at generate runtime, as they may depend on
other inputs and/or the model, such as parameters related to the generation length.`,Dt,Q,pe,Yt,Ve,Ln='Returns the generation mode triggered by the GenerationConfig instance.',yt,I,ge,Qt,Fe,Sn=`Class that holds arguments for watermark generation and should be passed into GenerationConfig
during generate
.
See this paper for more details on the arguments.`,At,Xe,En="Accepts the following keys:",Pt,Re,qn=`float
):
Used for watermarking. The ratio of “green” tokens used to the vocabulary size. Defaults to 0.25.float
):
Used with watermarking. The bias added to the selected “green” tokens’ logits. Defaults to 2.0.int
):
Hashing key used for watermarking. Defaults to 15485863 (the millionth prime).str
):
Algorithm to use for watermarking. Accepts values:int
):
The context length of previous tokens to use in seeding. Higher context length makes watermarking more robust.GenerationConfig
at initialization time or ensuring generate
-related tests are run in transformers
CI.`,Kt,Le,Yn=`A model class should inherit from GenerationMixin
to enable calling methods like generate
, or when it
has defined a custom generate
method that relies on GenerationMixin
, directly or indirectly, which
approximately shares the same interface to public methods like generate
. Three examples:`,en,Se,Qn=`LlamaForCausalLM
should inherit from GenerationMixin
to enable calling generate
and other public
methods in the mixin;BlipForQuestionAnswering
has a custom generate
method that approximately shares the same interface as
GenerationMixin.generate
(it has a few extra arguments, and the same output). That function also calls
GenerationMixin.generate
indirectly, through an inner model. As such, BlipForQuestionAnswering
should
inherit from GenerationMixin
to benefit from all generation-related automation in our codebase;BarkModel
has a custom generate
method and one of its inner models calls GenerationMixin.generate
.
However, its generate
does not share the same interface as GenerationMixin.generate
. In this case,
BarkModel
should NOT inherit from GenerationMixin
, as it breaks the generate
interface.num_beams=1
and do_sample=False
num_beams=1
and do_sample=True
num_beams>1
and do_sample=False
num_beams>1
and do_sample=True
num_beams>1
and num_beam_groups>1
constraints!=None
or force_words_ids!=None
assistant_model
or prompt_lookup_num_tokens
is passed to .generate()
greedy_search()
if num_beams=1
and
do_sample=False
contrastive_search()
if penalty_alpha>0
and
top_k>1
sample()
if num_beams=1
and
do_sample=True
beam_search()
if num_beams>1
_greedy_search()
if num_beams=1
and
do_sample=False
_sample()
if num_beams=1
and
do_sample=True
_beam_search()
if num_beams>1
and
do_sample=False
int
, optional, defaults to 20) —
The maximum length the generated tokens can have. Corresponds to the length of the input prompt +
max_new_tokens
. Its effect is overridden by max_new_tokens
, if also set.`,name:"max_length"},{anchor:"transformers.GenerationConfig.max_new_tokens",description:`max_new_tokens (int
, optional) —
The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt.`,name:"max_new_tokens"},{anchor:"transformers.GenerationConfig.min_length",description:`min_length (int
, optional, defaults to 0) —
The minimum length of the sequence to be generated. Corresponds to the length of the input prompt +
min_new_tokens
. Its effect is overridden by min_new_tokens
, if also set.`,name:"min_length"},{anchor:"transformers.GenerationConfig.min_new_tokens",description:`min_new_tokens (int
, optional) —
The minimum numbers of tokens to generate, ignoring the number of tokens in the prompt.`,name:"min_new_tokens"},{anchor:"transformers.GenerationConfig.early_stopping",description:`early_stopping (bool
or str
, optional, defaults to False
) —
Controls the stopping condition for beam-based methods, like beam-search. It accepts the following values:
True
, where the generation stops as soon as there are num_beams
complete candidates; False
, where an
heuristic is applied and the generation stops when is it very unlikely to find better candidates;
"never"
, where the beam search procedure only stops when there cannot be better candidates (canonical
beam search algorithm).`,name:"early_stopping"},{anchor:"transformers.GenerationConfig.max_time",description:`max_time (float
, optional) —
The maximum amount of time you allow the computation to run for in seconds. generation will still finish
the current pass after allocated time has been passed.`,name:"max_time"},{anchor:"transformers.GenerationConfig.stop_strings",description:`stop_strings (str or list[str]
, optional) —
A string or a list of strings that should terminate generation if the model outputs them.`,name:"stop_strings"}]},{title:"Parameters that control the generation strategy used",parametersDescription:[{anchor:"transformers.GenerationConfig.do_sample",description:`do_sample (bool
, optional, defaults to False
) —
Whether or not to use sampling ; use greedy decoding otherwise.`,name:"do_sample"},{anchor:"transformers.GenerationConfig.num_beams",description:`num_beams (int
, optional, defaults to 1) —
Number of beams for beam search. 1 means no beam search.`,name:"num_beams"},{anchor:"transformers.GenerationConfig.num_beam_groups",description:`num_beam_groups (int
, optional, defaults to 1) —
Number of groups to divide num_beams
into in order to ensure diversity among different groups of beams.
this paper for more details.`,name:"num_beam_groups"}]},{title:"Parameters that control the cache",parametersDescription:[{anchor:"transformers.GenerationConfig.use_cache",description:`use_cache (bool
, optional, defaults to True
) —
Whether or not the model should use the past last key/values attentions (if applicable to the model) to
speed up decoding.`,name:"use_cache"},{anchor:"transformers.GenerationConfig.cache_implementation",description:`cache_implementation (str
, optional, default to None
) —
Name of the cache class that will be instantiated in generate
, for faster decoding. Possible values are:
"dynamic"
: DynamicCache"static"
: StaticCache"offloaded"
: DynamicCache(offloaded=True)
"offloaded_static"
: StaticCache(offloaded=True)
"quantized"
: QuantizedCacheIf none is specified, we will use the default cache for the model (which is often DynamicCache). See
our cache documentation for further information.`,name:"cache_implementation"},{anchor:"transformers.GenerationConfig.cache_config",description:`cache_config (dict
, optional, default to None
) —
Arguments used in the key-value cache class can be passed in cache_config
.`,name:"cache_config"},{anchor:"transformers.GenerationConfig.return_legacy_cache",description:`return_legacy_cache (bool
, optional, default to True
) —
Whether to return the legacy or new format of the cache when DynamicCache
is used by default.`,name:"return_legacy_cache"}]},{title:"Parameters for manipulation of the model output logits",parametersDescription:[{anchor:"transformers.GenerationConfig.temperature",description:`temperature (float
, optional, defaults to 1.0) —
The value used to module the next token probabilities. This value is set in a model’s generation_config.json
file. If it isn’t set, the default value is 1.0`,name:"temperature"},{anchor:"transformers.GenerationConfig.top_k",description:`top_k (int
, optional, defaults to 50) —
The number of highest probability vocabulary tokens to keep for top-k-filtering. This value is set in a model’s generation_config.json
file. If it isn’t set, the default value is 50.`,name:"top_k"},{anchor:"transformers.GenerationConfig.top_p",description:`top_p (float
, optional, defaults to 1.0) —
If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to
top_p
or higher are kept for generation. This value is set in a model’s generation_config.json
file. If it isn’t set, the default value is 1.0`,name:"top_p"},{anchor:"transformers.GenerationConfig.min_p",description:`min_p (float
, optional) —
Minimum token probability, which will be scaled by the probability of the most likely token. It must be a
value between 0 and 1. Typical values are in the 0.01-0.2 range, comparably selective as setting top_p
in
the 0.99-0.8 range (use the opposite of normal top_p
values).`,name:"min_p"},{anchor:"transformers.GenerationConfig.typical_p",description:`typical_p (float
, optional, defaults to 1.0) —
Local typicality measures how similar the conditional probability of predicting a target token next is to
the expected conditional probability of predicting a random token next, given the partial text already
generated. If set to float < 1, the smallest set of the most locally typical tokens with probabilities that
add up to typical_p
or higher are kept for generation. See this
paper for more details.`,name:"typical_p"},{anchor:"transformers.GenerationConfig.epsilon_cutoff",description:`epsilon_cutoff (float
, optional, defaults to 0.0) —
If set to float strictly between 0 and 1, only tokens with a conditional probability greater than
epsilon_cutoff
will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on the
size of the model. See Truncation Sampling as Language Model
Desmoothing for more details.`,name:"epsilon_cutoff"},{anchor:"transformers.GenerationConfig.eta_cutoff",description:`eta_cutoff (float
, optional, defaults to 0.0) —
Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly between
0 and 1, a token is only considered if it is greater than either eta_cutoff
or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits)))
. The latter term is intuitively the expected next token
probability, scaled by sqrt(eta_cutoff)
. In the paper, suggested values range from 3e-4 to 2e-3,
depending on the size of the model. See Truncation Sampling as Language Model
Desmoothing for more details.`,name:"eta_cutoff"},{anchor:"transformers.GenerationConfig.diversity_penalty",description:`diversity_penalty (float
, optional, defaults to 0.0) —
This value is subtracted from a beam’s score if it generates a token same as any beam from other group at a
particular time. Note that diversity_penalty
is only effective if group beam search
is enabled.`,name:"diversity_penalty"},{anchor:"transformers.GenerationConfig.repetition_penalty",description:`repetition_penalty (float
, optional, defaults to 1.0) —
The parameter for repetition penalty. 1.0 means no penalty. See this
paper for more details.`,name:"repetition_penalty"},{anchor:"transformers.GenerationConfig.encoder_repetition_penalty",description:`encoder_repetition_penalty (float
, optional, defaults to 1.0) —
The parameter for encoder_repetition_penalty. An exponential penalty on sequences that are not in the
original input. 1.0 means no penalty.`,name:"encoder_repetition_penalty"},{anchor:"transformers.GenerationConfig.length_penalty",description:`length_penalty (float
, optional, defaults to 1.0) —
Exponential penalty to the length that is used with beam-based generation. It is applied as an exponent to
the sequence length, which in turn is used to divide the score of the sequence. Since the score is the log
likelihood of the sequence (i.e. negative), length_penalty
> 0.0 promotes longer sequences, while
length_penalty
< 0.0 encourages shorter sequences.`,name:"length_penalty"},{anchor:"transformers.GenerationConfig.no_repeat_ngram_size",description:`no_repeat_ngram_size (int
, optional, defaults to 0) —
If set to int > 0, all ngrams of that size can only occur once.`,name:"no_repeat_ngram_size"},{anchor:"transformers.GenerationConfig.bad_words_ids",description:`bad_words_ids (list[list[int]]
, optional) —
List of list of token ids that are not allowed to be generated. Check
NoBadWordsLogitsProcessor for further documentation and examples.`,name:"bad_words_ids"},{anchor:"transformers.GenerationConfig.force_words_ids",description:`force_words_ids (list[list[int]]
or list[list[list[int]]]
, optional) —
List of token ids that must be generated. If given a list[list[int]]
, this is treated as a simple list of
words that must be included, the opposite to bad_words_ids
. If given list[list[list[int]]]
, this
triggers a disjunctive constraint, where one
can allow different forms of each word.`,name:"force_words_ids"},{anchor:"transformers.GenerationConfig.renormalize_logits",description:`renormalize_logits (bool
, optional, defaults to False
) —
Whether to renormalize the logits after applying all the logits processors (including the custom
ones). It’s highly recommended to set this flag to True
as the search algorithms suppose the score logits
are normalized but some logit processors break the normalization.`,name:"renormalize_logits"},{anchor:"transformers.GenerationConfig.constraints",description:`constraints (list[Constraint]
, optional) —
Custom constraints that can be added to the generation to ensure that the output will contain the use of
certain tokens as defined by Constraint
objects, in the most sensible way possible.`,name:"constraints"},{anchor:"transformers.GenerationConfig.forced_bos_token_id",description:`forced_bos_token_id (int
, optional, defaults to model.config.forced_bos_token_id
) —
The id of the token to force as the first generated token after the decoder_start_token_id
. Useful for
multilingual models like mBART where the first generated token needs to be the target
language token.`,name:"forced_bos_token_id"},{anchor:"transformers.GenerationConfig.forced_eos_token_id",description:"forced_eos_token_id (int
or list[int], *optional*, defaults to
model.config.forced_eos_token_id) -- The id of the token to force as the last generated token when
max_length` is reached. Optionally, use a\nlist to set multiple end-of-sequence tokens.",name:"forced_eos_token_id"},{anchor:"transformers.GenerationConfig.remove_invalid_values",description:`remove_invalid_values (bool
, optional, defaults to model.config.remove_invalid_values
) —
Whether to remove possible nan and inf outputs of the model to prevent the generation method to crash.
Note that using remove_invalid_values
can slow down generation.`,name:"remove_invalid_values"},{anchor:"transformers.GenerationConfig.exponential_decay_length_penalty",description:`exponential_decay_length_penalty (tuple(int, float)
, optional) —
This Tuple adds an exponentially increasing length penalty, after a certain amount of tokens have been
generated. The tuple shall consist of: (start_index, decay_factor)
where start_index
indicates where
penalty starts and decay_factor
represents the factor of exponential decay`,name:"exponential_decay_length_penalty"},{anchor:"transformers.GenerationConfig.suppress_tokens",description:`suppress_tokens (list[int]
, optional) —
A list of tokens that will be suppressed at generation. The SupressTokens
logit processor will set their
log probs to -inf
so that they are not sampled.`,name:"suppress_tokens"},{anchor:"transformers.GenerationConfig.begin_suppress_tokens",description:`begin_suppress_tokens (list[int]
, optional) —
A list of tokens that will be suppressed at the beginning of the generation. The SupressBeginTokens
logit
processor will set their log probs to -inf
so that they are not sampled.`,name:"begin_suppress_tokens"},{anchor:"transformers.GenerationConfig.sequence_bias",description:`sequence_bias (dict[tuple[int], float]
, optional)) —
Dictionary that maps a sequence of tokens to its bias term. Positive biases increase the odds of the
sequence being selected, while negative biases do the opposite. Check
SequenceBiasLogitsProcessor for further documentation and examples.`,name:"sequence_bias"},{anchor:"transformers.GenerationConfig.token_healing",description:`token_healing (bool
, optional, defaults to False
) —
Heal tail tokens of prompts by replacing them with their appropriate extensions.
This enhances the quality of completions for prompts affected by greedy tokenization bias.`,name:"token_healing"},{anchor:"transformers.GenerationConfig.guidance_scale",description:`guidance_scale (float
, optional) —
The guidance scale for classifier free guidance (CFG). CFG is enabled by setting guidance_scale > 1
.
Higher guidance scale encourages the model to generate samples that are more closely linked to the input
prompt, usually at the expense of poorer quality.`,name:"guidance_scale"},{anchor:"transformers.GenerationConfig.watermarking_config",description:`watermarking_config (BaseWatermarkingConfig
or dict
, optional) —
Arguments used to watermark the model outputs by adding a small bias to randomly selected set of “green”
tokens. See the docs of SynthIDTextWatermarkingConfig
and WatermarkingConfig for more
details. If passed as Dict
, it will be converted to a WatermarkingConfig
internally.`,name:"watermarking_config"}]},{title:"Parameters that define the output variables of generate",parametersDescription:[{anchor:"transformers.GenerationConfig.num_return_sequences",description:`num_return_sequences (int
, optional, defaults to 1) —
The number of independently computed returned sequences for each element in the batch.`,name:"num_return_sequences"},{anchor:"transformers.GenerationConfig.output_attentions",description:`output_attentions (bool
, optional, defaults to False
) —
Whether or not to return the attentions tensors of all attention layers. See attentions
under returned
tensors for more details.`,name:"output_attentions"},{anchor:"transformers.GenerationConfig.output_hidden_states",description:`output_hidden_states (bool
, optional, defaults to False
) —
Whether or not to return the hidden states of all layers. See hidden_states
under returned tensors for
more details.`,name:"output_hidden_states"},{anchor:"transformers.GenerationConfig.output_scores",description:`output_scores (bool
, optional, defaults to False
) —
Whether or not to return the prediction scores. See scores
under returned tensors for more details.`,name:"output_scores"},{anchor:"transformers.GenerationConfig.output_logits",description:`output_logits (bool
, optional) —
Whether or not to return the unprocessed prediction logit scores. See logits
under returned tensors for
more details.`,name:"output_logits"},{anchor:"transformers.GenerationConfig.return_dict_in_generate",description:`return_dict_in_generate (bool
, optional, defaults to False
) —
Whether or not to return a ModelOutput, as opposed to returning exclusively the generated
sequence. This flag must be set to True
to return the generation cache (when use_cache
is True
)
or optional outputs (see flags starting with output_
)`,name:"return_dict_in_generate"}]},{title:"Special tokens that can be used at generation time",parametersDescription:[{anchor:"transformers.GenerationConfig.pad_token_id",description:`pad_token_id (int
, optional) —
The id of the padding token.`,name:"pad_token_id"},{anchor:"transformers.GenerationConfig.bos_token_id",description:`bos_token_id (int
, optional) —
The id of the beginning-of-sequence token.`,name:"bos_token_id"},{anchor:"transformers.GenerationConfig.eos_token_id",description:`eos_token_id (Union[int, list[int]]
, optional) —
The id of the end-of-sequence token. Optionally, use a list to set multiple end-of-sequence tokens.`,name:"eos_token_id"}]},{title:"Generation parameters exclusive to encoder-decoder models",parametersDescription:[{anchor:"transformers.GenerationConfig.encoder_no_repeat_ngram_size",description:`encoder_no_repeat_ngram_size (int
, optional, defaults to 0) —
If set to int > 0, all ngrams of that size that occur in the encoder_input_ids
cannot occur in the
decoder_input_ids
.`,name:"encoder_no_repeat_ngram_size"},{anchor:"transformers.GenerationConfig.decoder_start_token_id",description:`decoder_start_token_id (int
or list[int]
, optional) —
If an encoder-decoder model starts decoding with a different token than bos, the id of that token or a list of length
batch_size
. Indicating a list enables different start ids for each element in the batch
(e.g. multilingual models with different target languages in one batch)`,name:"decoder_start_token_id"}]},{title:"Generation parameters exclusive to assistant generation",parametersDescription:[{anchor:"transformers.GenerationConfig.is_assistant",description:`is_assistant (bool
, optional, defaults to False
) —
Whether the model is an assistant (draft) model.`,name:"is_assistant"},{anchor:"transformers.GenerationConfig.num_assistant_tokens",description:`num_assistant_tokens (int
, optional, defaults to 20) —
Defines the number of speculative tokens that shall be generated by the assistant model before being
checked by the target model at each iteration. Higher values for num_assistant_tokens
make the generation
more speculative : If the assistant model is performant larger speed-ups can be reached, if the assistant
model requires lots of corrections, lower speed-ups are reached.`,name:"num_assistant_tokens"},{anchor:"transformers.GenerationConfig.num_assistant_tokens_schedule",description:`num_assistant_tokens_schedule (str
, optional, defaults to "constant"
) —
Defines the schedule at which max assistant tokens shall be changed during inference.
"heuristic"
: When all speculative tokens are correct, increase num_assistant_tokens
by 2 else
reduce by 1. num_assistant_tokens
value is persistent over multiple generation calls with the same assistant model."heuristic_transient"
: Same as "heuristic"
but num_assistant_tokens
is reset to its initial value after each generation call."constant"
: num_assistant_tokens
stays unchanged during generationfloat
, optional, defaults to 0.4) —
The confidence threshold for the assistant model. If the assistant model’s confidence in its prediction for the current token is lower
than this threshold, the assistant model stops the current token generation iteration, even if the number of speculative tokens
(defined by num_assistant_tokens
) is not yet reached. The assistant’s confidence threshold is adjusted throughout the speculative iterations to reduce the number of unnecessary draft and target forward passes, biased towards avoiding false negatives.
assistant_confidence_threshold
value is persistent over multiple generation calls with the same assistant model.
It is an unsupervised version of the dynamic speculation lookahead
from Dynamic Speculation Lookahead Accelerates Speculative Decoding of Large Language Models https://huggingface.co/papers/2405.04304.`,name:"assistant_confidence_threshold"},{anchor:"transformers.GenerationConfig.prompt_lookup_num_tokens",description:`prompt_lookup_num_tokens (int
, optional) —
The number of tokens to be output as candidate tokens.`,name:"prompt_lookup_num_tokens"},{anchor:"transformers.GenerationConfig.max_matching_ngram_size",description:`max_matching_ngram_size (int
, optional) —
The maximum ngram size to be considered for matching in the prompt. Default to 2 if not provided.`,name:"max_matching_ngram_size"},{anchor:"transformers.GenerationConfig.assistant_early_exit(int,",description:`assistant_early_exit(int
, optional) —
If set to a positive integer, early exit of the model will be used as an assistant. Can only be used with
models that support early exit (i.e. models where logits from intermediate layers can be interpreted by the LM head).`,name:"assistant_early_exit(int,"},{anchor:"transformers.GenerationConfig.assistant_lookbehind(int,",description:`assistant_lookbehind(int
, optional, defaults to 10) —
If set to a positive integer, the re-encodeing process will additionally consider the last assistant_lookbehind
assistant tokens
to correctly align tokens. Can only be used with different tokenizers in speculative decoding.
See this blog for more details.`,name:"assistant_lookbehind(int,"},{anchor:"transformers.GenerationConfig.target_lookbehind(int,",description:`target_lookbehind(int
, optional, defaults to 10) —
If set to a positive integer, the re-encodeing process will additionally consider the last target_lookbehind
target tokens
to correctly align tokens. Can only be used with different tokenizers in speculative decoding.
See this blog for more details.`,name:"target_lookbehind(int,"}]},{title:"Parameters related to performances and compilation",parametersDescription:[{anchor:"transformers.GenerationConfig.compile_config",description:`compile_config (CompileConfig, optional) —
If using a compilable cache, this controls how generate
will compile
the forward pass for faster
inference.`,name:"compile_config"},{anchor:"transformers.GenerationConfig.disable_compile",description:`disable_compile (bool
, optional) —
Whether to disable the automatic compilation of the forward pass. Automatic compilation happens when
specific criteria are met, including using a compilable cache. Please open an issue if you find the
need to use this flag.`,name:"disable_compile"}]}]}}),S=new Gn({props:{$$slots:{default:[yo]},$$scope:{ctx:Z}}}),ie=new B({props:{name:"from_pretrained",anchor:"transformers.GenerationConfig.from_pretrained",parameters:[{name:"pretrained_model_name",val:": typing.Union[str, os.PathLike]"},{name:"config_file_name",val:": typing.Union[str, os.PathLike, NoneType] = None"},{name:"cache_dir",val:": typing.Union[str, os.PathLike, NoneType] = None"},{name:"force_download",val:": bool = False"},{name:"local_files_only",val:": bool = False"},{name:"token",val:": typing.Union[bool, str, NoneType] = None"},{name:"revision",val:": str = 'main'"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.GenerationConfig.from_pretrained.pretrained_model_name",description:`pretrained_model_name (str
or os.PathLike
) —
This can be either:
./my_model_directory/
.str
or os.PathLike
, optional, defaults to "generation_config.json"
) —
Name of the generation configuration JSON file to be loaded from pretrained_model_name
.`,name:"config_file_name"},{anchor:"transformers.GenerationConfig.from_pretrained.cache_dir",description:`cache_dir (str
or os.PathLike
, optional) —
Path to a directory in which a downloaded pretrained model configuration should be cached if the
standard cache should not be used.`,name:"cache_dir"},{anchor:"transformers.GenerationConfig.from_pretrained.force_download",description:`force_download (bool
, optional, defaults to False
) —
Whether or not to force to (re-)download the configuration files and override the cached versions if
they exist.`,name:"force_download"},{anchor:"transformers.GenerationConfig.from_pretrained.resume_download",description:`resume_download —
Deprecated and ignored. All downloads are now resumed by default when possible.
Will be removed in v5 of Transformers.`,name:"resume_download"},{anchor:"transformers.GenerationConfig.from_pretrained.proxies",description:`proxies (dict[str, str]
, optional) —
A dictionary of proxy servers to use by protocol or endpoint, e.g., {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
The proxies are used on each request.`,name:"proxies"},{anchor:"transformers.GenerationConfig.from_pretrained.token",description:`token (str
or bool
, optional) —
The token to use as HTTP bearer authorization for remote files. If True
, or not specified, will use
the token generated when running hf auth login
(stored in ~/.huggingface
).`,name:"token"},{anchor:"transformers.GenerationConfig.from_pretrained.revision",description:`revision (str
, optional, defaults to "main"
) —
The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
git-based system for storing models and other artifacts on huggingface.co, so revision
can be any
identifier allowed by git.
To test a pull request you made on the Hub, you can pass revision="refs/pr/<pr_number>"
.
bool
, optional, defaults to False
) —
If False
, then this function returns just the final configuration object.
If True
, then this functions returns a Tuple(config, unused_kwargs)
where unused_kwargs is a
dictionary consisting of the key/value pairs whose keys are not configuration attributes: i.e., the
part of kwargs
which has not been used to update config
and is otherwise ignored.`,name:"return_unused_kwargs"},{anchor:"transformers.GenerationConfig.from_pretrained.subfolder",description:`subfolder (str
, optional, defaults to ""
) —
In case the relevant files are located inside a subfolder of the model repo on huggingface.co, you can
specify the folder name here.`,name:"subfolder"},{anchor:"transformers.GenerationConfig.from_pretrained.kwargs",description:`kwargs (dict[str, Any]
, optional) —
The values in kwargs of any keys which are configuration attributes will be used to override the loaded
values. Behavior concerning key/value pairs whose keys are not configuration attributes is controlled
by the return_unused_kwargs
keyword parameter.`,name:"kwargs"}],source:"https://github.com/huggingface/transformers/blob/v4.56.2/src/transformers/generation/configuration_utils.py#L837",returnDescription:`