import{s as Rp,o as Ep,n as T}from"../chunks/scheduler.defa9a21.js";import{S as Sp,i as Hp,g as i,s as n,r as g,A as Lp,h as d,f as h,c,j as U,u,x as j,k as x,y as l,a as w,v as f,d as b,t as $,w as y}from"../chunks/index.fe795e71.js";import{T as Se}from"../chunks/Tip.179eb360.js";import{D as k}from"../chunks/Docstring.90d6fe5c.js";import{C as B}from"../chunks/CodeBlock.204b6c34.js";import{E as G}from"../chunks/ExampleCodeBlock.f6fae62d.js";import{H as fo,E as Pp}from"../chunks/getInferenceSnippets.2234a8dd.js";function Dp(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoZ3JhZGllbnRfYWNjdW11bGF0aW9uX3N0ZXBzJTNEMSklMEFkYXRhbG9hZGVyJTJDJTIwbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBzY2hlZHVsZXIlMjAlM0QlMjBhY2NlbGVyYXRvci5wcmVwYXJlKGRhdGFsb2FkZXIlMkMlMjBtb2RlbCUyQyUyMG9wdGltaXplciUyQyUyMHNjaGVkdWxlciklMEElMEFmb3IlMjBpbnB1dCUyQyUyMG91dHB1dCUyMGluJTIwZGF0YWxvYWRlciUzQSUwQSUyMCUyMCUyMCUyMHdpdGglMjBhY2NlbGVyYXRvci5hY2N1bXVsYXRlKG1vZGVsKSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG91dHB1dHMlMjAlM0QlMjBtb2RlbChpbnB1dCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBsb3NzJTIwJTNEJTIwbG9zc19mdW5jKG91dHB1dHMpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbG9zcy5iYWNrd2FyZCgpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwb3B0aW1pemVyLnN0ZXAoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHNjaGVkdWxlci5zdGVwKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvcHRpbWl6ZXIuemVyb19ncmFkKCk=",highlighted:`from accelerate import Accelerator
accelerator = Accelerator(gradient_accumulation_steps=1)
dataloader, model, optimizer, scheduler = accelerator.prepare(dataloader, model, optimizer, scheduler)
for input, output in dataloader:
with accelerator.accumulate(model):
outputs = model(input)
loss = loss_func(outputs)
loss.backward()
optimizer.step()
scheduler.step()
optimizer.zero_grad()`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Kp(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IobWl4ZWRfcHJlY2lzaW9uJTNEJTIyZnAxNiUyMiklMEF3aXRoJTIwYWNjZWxlcmF0b3IuYXV0b2Nhc3QoKSUzQSUwQSUyMCUyMCUyMCUyMHRyYWluKCk=",highlighted:`from accelerate import Accelerator
accelerator = Accelerator(mixed_precision="fp16")
with accelerator.autocast():
train()`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function qp(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoZ3JhZGllbnRfYWNjdW11bGF0aW9uX3N0ZXBzJTNEMiklMEFvdXRwdXRzJTIwJTNEJTIwbW9kZWwoaW5wdXRzKSUwQWxvc3MlMjAlM0QlMjBsb3NzX2ZuKG91dHB1dHMlMkMlMjBsYWJlbHMpJTBBYWNjZWxlcmF0b3IuYmFja3dhcmQobG9zcyk=",highlighted:`from accelerate import Accelerator
accelerator = Accelerator(gradient_accumulation_steps=2)
outputs = model(inputs)
loss = loss_fn(outputs, labels)
accelerator.backward(loss)`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Op(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQSUyMyUyMEFzc3VtZSUyMGxhdGVyJTIwaW4lMjB0aGUlMjB0cmFpbmluZyUyMHNjcmlwdCUwQSUyMyUyMCU2MHNob3VsZF9kb19icmVha3BvaW50JTYwJTIwaXMlMjBhJTIwY3VzdG9tJTIwZnVuY3Rpb24lMjB0byUyMG1vbml0b3IlMjB3aGVuJTIwdG8lMjBicmVhayUyQyUwQSUyMyUyMGUuZy4lMjB3aGVuJTIwdGhlJTIwbG9zcyUyMGlzJTIwTmFOJTBBaWYlMjBzaG91bGRfZG9fYnJlYWtwb2ludChsb3NzKSUzQSUwQSUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLnNldF90cmlnZ2VyKCklMEElMjMlMjBBc3N1bWUlMjBsYXRlciUyMGluJTIwdGhlJTIwdHJhaW5pbmclMjBzY3JpcHQlMEFpZiUyMGFjY2VsZXJhdG9yLmNoZWNrX3RyaWdnZXIoKSUzQSUwQSUyMCUyMCUyMCUyMGJyZWFr",highlighted:`from accelerate import Accelerator
accelerator = Accelerator()
# Assume later in the training script
# \`should_do_breakpoint\` is a custom function to monitor when to break,
# e.g. when the loss is NaN
if should_do_breakpoint(loss):
accelerator.set_trigger()
# Assume later in the training script
if accelerator.check_trigger():
break`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function ei(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQW1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwc2NoZWR1bGVyJTIwJTNEJTIwLi4uJTBBbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBzY2hlZHVsZXIlMjAlM0QlMjBhY2NlbGVyYXRvci5wcmVwYXJlKG1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwc2NoZWR1bGVyKSUwQW1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwc2NoZWR1bGVyJTIwJTNEJTIwYWNjZWxlcmF0b3IuY2xlYXIobW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBzY2hlZHVsZXIp",highlighted:`from accelerate import Accelerator
accelerator = Accelerator()
model, optimizer, scheduler = ...
model, optimizer, scheduler = accelerator.prepare(model, optimizer, scheduler)
model, optimizer, scheduler = accelerator.clear(model, optimizer, scheduler)`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function ti(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoZ3JhZGllbnRfYWNjdW11bGF0aW9uX3N0ZXBzJTNEMiklMEFkYXRhbG9hZGVyJTJDJTIwbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBzY2hlZHVsZXIlMjAlM0QlMjBhY2NlbGVyYXRvci5wcmVwYXJlKGRhdGFsb2FkZXIlMkMlMjBtb2RlbCUyQyUyMG9wdGltaXplciUyQyUyMHNjaGVkdWxlciklMEElMEFmb3IlMjBpbnB1dCUyQyUyMHRhcmdldCUyMGluJTIwZGF0YWxvYWRlciUzQSUwQSUyMCUyMCUyMCUyMG9wdGltaXplci56ZXJvX2dyYWQoKSUwQSUyMCUyMCUyMCUyMG91dHB1dCUyMCUzRCUyMG1vZGVsKGlucHV0KSUwQSUyMCUyMCUyMCUyMGxvc3MlMjAlM0QlMjBsb3NzX2Z1bmMob3V0cHV0JTJDJTIwdGFyZ2V0KSUwQSUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLmJhY2t3YXJkKGxvc3MpJTBBJTIwJTIwJTIwJTIwaWYlMjBhY2NlbGVyYXRvci5zeW5jX2dyYWRpZW50cyUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLmNsaXBfZ3JhZF9ub3JtXyhtb2RlbC5wYXJhbWV0ZXJzKCklMkMlMjBtYXhfZ3JhZF9ub3JtKSUwQSUyMCUyMCUyMCUyMG9wdGltaXplci5zdGVwKCk=",highlighted:`from accelerate import Accelerator
accelerator = Accelerator(gradient_accumulation_steps=2)
dataloader, model, optimizer, scheduler = accelerator.prepare(dataloader, model, optimizer, scheduler)
for input, target in dataloader:
optimizer.zero_grad()
output = model(input)
loss = loss_func(output, target)
accelerator.backward(loss)
if accelerator.sync_gradients:
accelerator.clip_grad_norm_(model.parameters(), max_grad_norm)
optimizer.step()`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function ai(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoZ3JhZGllbnRfYWNjdW11bGF0aW9uX3N0ZXBzJTNEMiklMEFkYXRhbG9hZGVyJTJDJTIwbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBzY2hlZHVsZXIlMjAlM0QlMjBhY2NlbGVyYXRvci5wcmVwYXJlKGRhdGFsb2FkZXIlMkMlMjBtb2RlbCUyQyUyMG9wdGltaXplciUyQyUyMHNjaGVkdWxlciklMEElMEFmb3IlMjBpbnB1dCUyQyUyMHRhcmdldCUyMGluJTIwZGF0YWxvYWRlciUzQSUwQSUyMCUyMCUyMCUyMG9wdGltaXplci56ZXJvX2dyYWQoKSUwQSUyMCUyMCUyMCUyMG91dHB1dCUyMCUzRCUyMG1vZGVsKGlucHV0KSUwQSUyMCUyMCUyMCUyMGxvc3MlMjAlM0QlMjBsb3NzX2Z1bmMob3V0cHV0JTJDJTIwdGFyZ2V0KSUwQSUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLmJhY2t3YXJkKGxvc3MpJTBBJTIwJTIwJTIwJTIwaWYlMjBhY2NlbGVyYXRvci5zeW5jX2dyYWRpZW50cyUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLmNsaXBfZ3JhZF92YWx1ZV8obW9kZWwucGFyYW1ldGVycygpJTJDJTIwY2xpcF92YWx1ZSklMEElMjAlMjAlMjAlMjBvcHRpbWl6ZXIuc3RlcCgp",highlighted:`from accelerate import Accelerator
accelerator = Accelerator(gradient_accumulation_steps=2)
dataloader, model, optimizer, scheduler = accelerator.prepare(dataloader, model, optimizer, scheduler)
for input, target in dataloader:
optimizer.zero_grad()
output = model(input)
loss = loss_func(output, target)
accelerator.backward(loss)
if accelerator.sync_gradients:
accelerator.clip_grad_value_(model.parameters(), clip_value)
optimizer.step()`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function si(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IobG9nX3dpdGglM0QlMjJ0ZW5zb3Jib2FyZCUyMiklMEFhY2NlbGVyYXRvci5pbml0X3RyYWNrZXJzKCUyMm15X3Byb2plY3QlMjIpJTBBJTIzJTIwRG8lMjB0cmFpbmluZyUwQWFjY2VsZXJhdG9yLmVuZF90cmFpbmluZygp",highlighted:`from accelerate import Accelerator
accelerator = Accelerator(log_with="tensorboard")
accelerator.init_trackers("my_project")
# Do training
accelerator.end_training()`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function li(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQW1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwc2NoZWR1bGVyJTIwJTNEJTIwLi4uJTBBbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBzY2hlZHVsZXIlMjAlM0QlMjBhY2NlbGVyYXRvci5wcmVwYXJlKG1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwc2NoZWR1bGVyKSUwQW1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwc2NoZWR1bGVyJTIwJTNEJTIwYWNjZWxlcmF0b3IuZnJlZV9tZW1vcnkobW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBzY2hlZHVsZXIp",highlighted:`from accelerate import Accelerator
accelerator = Accelerator()
model, optimizer, scheduler = ...
model, optimizer, scheduler = accelerator.prepare(model, optimizer, scheduler)
model, optimizer, scheduler = accelerator.free_memory(model, optimizer, scheduler)`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function ri(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"JTIzJTIwQXNzdW1pbmclMjBmb3VyJTIwcHJvY2Vzc2VzJTBBaW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwYWNjZWxlcmF0ZSUyMGltcG9ydCUyMEFjY2VsZXJhdG9yJTBBJTBBYWNjZWxlcmF0b3IlMjAlM0QlMjBBY2NlbGVyYXRvcigpJTBBcHJvY2Vzc190ZW5zb3IlMjAlM0QlMjB0b3JjaC50ZW5zb3IoJTVCYWNjZWxlcmF0b3IucHJvY2Vzc19pbmRleCU1RCklMEFnYXRoZXJlZF90ZW5zb3IlMjAlM0QlMjBhY2NlbGVyYXRvci5nYXRoZXIocHJvY2Vzc190ZW5zb3IpJTBBZ2F0aGVyZWRfdGVuc29y",highlighted:`# Assuming four processes
import torch
from accelerate import Accelerator
accelerator = Accelerator()
process_tensor = torch.tensor([accelerator.process_index])
gathered_tensor = accelerator.gather(process_tensor)
gathered_tensor
tensor([0, 1, 2, 3])`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function ni(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"JTIzJTIwQXNzdW1pbmclMjB0d28lMjBwcm9jZXNzZXMlMkMlMjB3aXRoJTIwYSUyMGJhdGNoJTIwc2l6ZSUyMG9mJTIwNSUyMG9uJTIwYSUyMGRhdGFzZXQlMjB3aXRoJTIwOSUyMHNhbXBsZXMlMEFpbXBvcnQlMjB0b3JjaCUwQWZyb20lMjBhY2NlbGVyYXRlJTIwaW1wb3J0JTIwQWNjZWxlcmF0b3IlMEElMEFhY2NlbGVyYXRvciUyMCUzRCUyMEFjY2VsZXJhdG9yKCklMEFkYXRhbG9hZGVyJTIwJTNEJTIwdG9yY2gudXRpbHMuZGF0YS5EYXRhTG9hZGVyKHJhbmdlKDkpJTJDJTIwYmF0Y2hfc2l6ZSUzRDUpJTBBZGF0YWxvYWRlciUyMCUzRCUyMGFjY2VsZXJhdG9yLnByZXBhcmUoZGF0YWxvYWRlciklMEFiYXRjaCUyMCUzRCUyMG5leHQoaXRlcihkYXRhbG9hZGVyKSklMEFnYXRoZXJlZF9pdGVtcyUyMCUzRCUyMGFjY2VsZXJhdG9yLmdhdGhlcl9mb3JfbWV0cmljcyhiYXRjaCklMEFsZW4oZ2F0aGVyZWRfaXRlbXMp",highlighted:`# Assuming two processes, with a batch size of 5 on a dataset with 9 samples
import torch
from accelerate import Accelerator
accelerator = Accelerator()
dataloader = torch.utils.data.DataLoader(range(9), batch_size=5)
dataloader = accelerator.prepare(dataloader)
batch = next(iter(dataloader))
gathered_items = accelerator.gather_for_metrics(batch)
len(gathered_items)
9`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function ci(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwYWNjZWxlcmF0ZSUyMGltcG9ydCUyMEFjY2VsZXJhdG9yJTBBJTBBYWNjZWxlcmF0b3IlMjAlM0QlMjBBY2NlbGVyYXRvcigpJTBBbmV0JTIwJTNEJTIwdG9yY2gubm4uTGluZWFyKDIlMkMlMjAyKSUwQW5ldCUyMCUzRCUyMGFjY2VsZXJhdG9yLnByZXBhcmUobmV0KSUwQXN0YXRlX2RpY3QlMjAlM0QlMjBhY2NlbGVyYXRvci5nZXRfc3RhdGVfZGljdChuZXQp",highlighted:`import torch
from accelerate import Accelerator
accelerator = Accelerator()
net = torch.nn.Linear(2, 2)
net = accelerator.prepare(net)
state_dict = accelerator.get_state_dict(net)`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function oi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IobG9nX3dpdGglM0QlMjJ0ZW5zb3Jib2FyZCUyMiklMEFhY2NlbGVyYXRvci5pbml0X3RyYWNrZXJzKCUyMm15X3Byb2plY3QlMjIpJTBBdGVuc29yYm9hcmRfdHJhY2tlciUyMCUzRCUyMGFjY2VsZXJhdG9yLmdldF90cmFja2VyKCUyMnRlbnNvcmJvYXJkJTIyKQ==",highlighted:`from accelerate import Accelerator
accelerator = Accelerator(log_with="tensorboard")
accelerator.init_trackers("my_project")
tensorboard_tracker = accelerator.get_tracker("tensorboard")`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function pi(_){let t,m=`join_uneven_inputs
is only supported for Distributed Data Parallel training on multiple GPUs. For any other
configuration, this method will have no effect.`;return{c(){t=i("p"),t.innerHTML=m},l(r){t=d(r,"P",{"data-svelte-h":!0}),j(t)!=="svelte-rvhzx4"&&(t.innerHTML=m)},m(r,a){w(r,t,a)},p:T,d(r){r&&h(t)}}}function ii(_){let t,m="Overidding even_batches
will not affect iterable-style data loaders.";return{c(){t=i("p"),t.innerHTML=m},l(r){t=d(r,"P",{"data-svelte-h":!0}),j(t)!=="svelte-1kjj3tv"&&(t.innerHTML=m)},m(r,a){w(r,t,a)},p:T,d(r){r&&h(t)}}}function di(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoZXZlbl9iYXRjaGVzJTNEVHJ1ZSklMEFkZHBfbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBkYXRhbG9hZGVyJTIwJTNEJTIwYWNjZWxlcmF0b3IucHJlcGFyZShtb2RlbCUyQyUyMG9wdGltaXplciUyQyUyMGRhdGFsb2FkZXIpJTBBJTBBd2l0aCUyMGFjY2VsZXJhdG9yLmpvaW5fdW5ldmVuX2lucHV0cyglNUJkZHBfbW9kZWwlNUQlMkMlMjBldmVuX2JhdGNoZXMlM0RGYWxzZSklM0ElMEElMjAlMjAlMjAlMjBmb3IlMjBpbnB1dCUyQyUyMG91dHB1dCUyMGluJTIwZGF0YWxvYWRlciUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG91dHB1dHMlMjAlM0QlMjBtb2RlbChpbnB1dCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBsb3NzJTIwJTNEJTIwbG9zc19mdW5jKG91dHB1dHMpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbG9zcy5iYWNrd2FyZCgpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwb3B0aW1pemVyLnN0ZXAoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG9wdGltaXplci56ZXJvX2dyYWQoKQ==",highlighted:`from accelerate import Accelerator
accelerator = Accelerator(even_batches=True)
ddp_model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)
with accelerator.join_uneven_inputs([ddp_model], even_batches=False):
for input, output in dataloader:
outputs = model(input)
loss = loss_func(outputs)
loss.backward()
optimizer.step()
optimizer.zero_grad()`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function mi(_){let t,m=`Should only be used in conjunction with Accelerator.save_state(). If a file is not registered for
checkpointing, it will not be loaded if stored in the directory.`;return{c(){t=i("p"),t.innerHTML=m},l(r){t=d(r,"P",{"data-svelte-h":!0}),j(t)!=="svelte-1y1ax9s"&&(t.innerHTML=m)},m(r,a){w(r,t,a)},p:T,d(r){r&&h(t)}}}function hi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQW1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwbHJfc2NoZWR1bGVyJTIwJTNEJTIwLi4uJTBBbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBscl9zY2hlZHVsZXIlMjAlM0QlMjBhY2NlbGVyYXRvci5wcmVwYXJlKG1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwbHJfc2NoZWR1bGVyKSUwQWFjY2VsZXJhdG9yLmxvYWRfc3RhdGUoJTIybXlfY2hlY2twb2ludCUyMik=",highlighted:`from accelerate import Accelerator
accelerator = Accelerator()
model, optimizer, lr_scheduler = ...
model, optimizer, lr_scheduler = accelerator.prepare(model, optimizer, lr_scheduler)
accelerator.load_state("my_checkpoint")`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function gi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQXdpdGglMjBhY2NlbGVyYXRvci5sb2NhbF9tYWluX3Byb2Nlc3NfZmlyc3QoKSUzQSUwQSUyMCUyMCUyMCUyMCUyMyUyMFRoaXMlMjB3aWxsJTIwYmUlMjBwcmludGVkJTIwZmlyc3QlMjBieSUyMGxvY2FsJTIwcHJvY2VzcyUyMDAlMjB0aGVuJTIwaW4lMjBhJTIwc2VlbWluZ2x5JTBBJTIwJTIwJTIwJTIwJTIzJTIwcmFuZG9tJTIwb3JkZXIlMjBieSUyMHRoZSUyMG90aGVyJTIwcHJvY2Vzc2VzLiUwQSUyMCUyMCUyMCUyMHByaW50KGYlMjJUaGlzJTIwd2lsbCUyMGJlJTIwcHJpbnRlZCUyMGJ5JTIwcHJvY2VzcyUyMCU3QmFjY2VsZXJhdG9yLmxvY2FsX3Byb2Nlc3NfaW5kZXglN0QlMjIp",highlighted:`from accelerate import Accelerator
accelerator = Accelerator()
with accelerator.local_main_process_first():
# This will be printed first by local process 0 then in a seemingly
# random order by the other processes.
print(f"This will be printed by process {accelerator.local_process_index}")`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function ui(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQXdpdGglMjBhY2NlbGVyYXRvci5tYWluX3Byb2Nlc3NfZmlyc3QoKSUzQSUwQSUyMCUyMCUyMCUyMCUyMyUyMFRoaXMlMjB3aWxsJTIwYmUlMjBwcmludGVkJTIwZmlyc3QlMjBieSUyMHByb2Nlc3MlMjAwJTIwdGhlbiUyMGluJTIwYSUyMHNlZW1pbmdseSUwQSUyMCUyMCUyMCUyMCUyMyUyMHJhbmRvbSUyMG9yZGVyJTIwYnklMjB0aGUlMjBvdGhlciUyMHByb2Nlc3Nlcy4lMEElMjAlMjAlMjAlMjBwcmludChmJTIyVGhpcyUyMHdpbGwlMjBiZSUyMHByaW50ZWQlMjBieSUyMHByb2Nlc3MlMjAlN0JhY2NlbGVyYXRvci5wcm9jZXNzX2luZGV4JTdEJTIyKQ==",highlighted:`from accelerate import Accelerator
accelerator = Accelerator()
with accelerator.main_process_first():
# This will be printed first by process 0 then in a seemingly
# random order by the other processes.
print(f"This will be printed by process {accelerator.process_index}")`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function fi(_){let t,m="context_parallel
is currently only supported together with FSDP2, and requires parallelism_config.cp_size
>",r,a,o=`
object
must have a load_state_dict
and state_dict
function to be stored.";return{c(){t=i("p"),t.innerHTML=m},l(r){t=d(r,"P",{"data-svelte-h":!0}),j(t)!=="svelte-1kglckk"&&(t.innerHTML=m)},m(r,a){w(r,t,a)},p:T,d(r){r&&h(t)}}}function Xi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQSUyMyUyMEFzc3VtZSUyMCU2MEN1c3RvbU9iamVjdCU2MCUyMGhhcyUyMGElMjAlNjBzdGF0ZV9kaWN0JTYwJTIwYW5kJTIwJTYwbG9hZF9zdGF0ZV9kaWN0JTYwJTIwZnVuY3Rpb24uJTBBb2JqJTIwJTNEJTIwQ3VzdG9tT2JqZWN0KCklMEFhY2NlbGVyYXRvci5yZWdpc3Rlcl9mb3JfY2hlY2twb2ludGluZyhvYmopJTBBYWNjZWxlcmF0b3Iuc2F2ZV9zdGF0ZSglMjJjaGVja3BvaW50LnB0JTIyKQ==",highlighted:'from accelerate import Accelerator\n\n accelerator = Accelerator()\n# Assume `CustomObject` has a `state_dict` and `load_state_dict` function.\n obj = CustomObject()\n accelerator.register_for_checkpointing(obj)\n accelerator.save_state("checkpoint.pt")',wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Ai(_){let t,m=`Should only be used in conjunction with Accelerator.register_save_state_pre_hook(). Can be useful to load
configurations in addition to model weights. Can also be used to overwrite model loading with a customized
method. In this case, make sure to remove already loaded models from the models list.`;return{c(){t=i("p"),t.innerHTML=m},l(r){t=d(r,"P",{"data-svelte-h":!0}),j(t)!=="svelte-y7x8xo"&&(t.innerHTML=m)},m(r,a){w(r,t,a)},p:T,d(r){r&&h(t)}}}function Vi(_){let t,m=`Should only be used in conjunction with Accelerator.register_load_state_pre_hook(). Can be useful to save
configurations in addition to model weights. Can also be used to overwrite model saving with a customized
method. In this case, make sure to remove already loaded weights from the weights list.`;return{c(){t=i("p"),t.innerHTML=m},l(r){t=d(r,"P",{"data-svelte-h":!0}),j(t)!=="svelte-k6vf2f"&&(t.innerHTML=m)},m(r,a){w(r,t,a)},p:T,d(r){r&&h(t)}}}function Ni(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQWFyciUyMCUzRCUyMCU1QjAlMkMlMjAxJTJDJTIwMiUyQyUyMDMlNUQlMEFhY2NlbGVyYXRvci5zYXZlKGFyciUyQyUyMCUyMmFycmF5LnBrbCUyMik=",highlighted:`from accelerate import Accelerator
accelerator = Accelerator()
arr = [0, 1, 2, 3]
accelerator.save(arr, "array.pkl")`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Qi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQW1vZGVsJTIwJTNEJTIwLi4uJTBBYWNjZWxlcmF0b3Iuc2F2ZV9tb2RlbChtb2RlbCUyQyUyMHNhdmVfZGlyZWN0b3J5KQ==",highlighted:`from accelerate import Accelerator
accelerator = Accelerator()
model = ...
accelerator.save_model(model, save_directory)`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Fi(_){let t,m=`Should only be used when wanting to save a checkpoint during training and restoring the state in the same
environment.`;return{c(){t=i("p"),t.textContent=m},l(r){t=d(r,"P",{"data-svelte-h":!0}),j(t)!=="svelte-1ljq3ee"&&(t.textContent=m)},m(r,a){w(r,t,a)},p:T,d(r){r&&h(t)}}}function zi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQW1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwbHJfc2NoZWR1bGVyJTIwJTNEJTIwLi4uJTBBbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBscl9zY2hlZHVsZXIlMjAlM0QlMjBhY2NlbGVyYXRvci5wcmVwYXJlKG1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwbHJfc2NoZWR1bGVyKSUwQWFjY2VsZXJhdG9yLnNhdmVfc3RhdGUob3V0cHV0X2RpciUzRCUyMm15X2NoZWNrcG9pbnQlMjIp",highlighted:`from accelerate import Accelerator
accelerator = Accelerator()
model, optimizer, lr_scheduler = ...
model, optimizer, lr_scheduler = accelerator.prepare(model, optimizer, lr_scheduler)
accelerator.save_state(output_dir="my_checkpoint")`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Ri(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQSUyMyUyMEFzc3VtZSUyMGxhdGVyJTIwaW4lMjB0aGUlMjB0cmFpbmluZyUyMHNjcmlwdCUwQSUyMyUyMCU2MHNob3VsZF9kb19icmVha3BvaW50JTYwJTIwaXMlMjBhJTIwY3VzdG9tJTIwZnVuY3Rpb24lMjB0byUyMG1vbml0b3IlMjB3aGVuJTIwdG8lMjBicmVhayUyQyUwQSUyMyUyMGUuZy4lMjB3aGVuJTIwdGhlJTIwbG9zcyUyMGlzJTIwTmFOJTBBaWYlMjBzaG91bGRfZG9fYnJlYWtwb2ludChsb3NzKSUzQSUwQSUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLnNldF90cmlnZ2VyKCklMEElMjMlMjBBc3N1bWUlMjBsYXRlciUyMGluJTIwdGhlJTIwdHJhaW5pbmclMjBzY3JpcHQlMEFpZiUyMGFjY2VsZXJhdG9yLmNoZWNrX2JyZWFrcG9pbnQoKSUzQSUwQSUyMCUyMCUyMCUyMGJyZWFr",highlighted:`from accelerate import Accelerator
accelerator = Accelerator()
# Assume later in the training script
# \`should_do_breakpoint\` is a custom function to monitor when to break,
# e.g. when the loss is NaN
if should_do_breakpoint(loss):
accelerator.set_trigger()
# Assume later in the training script
if accelerator.check_breakpoint():
break`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Ei(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQWRhdGFsb2FkZXIlMkMlMjBtb2RlbCUyQyUyMG9wdGltaXplciUyQyUyMHNjaGVkdWxlciUyMCUzRCUyMGFjY2VsZXJhdG9yLnByZXBhcmUoZGF0YWxvYWRlciUyQyUyMG1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwc2NoZWR1bGVyKSUwQXNraXBwZWRfZGF0YWxvYWRlciUyMCUzRCUyMGFjY2VsZXJhdG9yLnNraXBfZmlyc3RfYmF0Y2hlcyhkYXRhbG9hZGVyJTJDJTIwbnVtX2JhdGNoZXMlM0QyKSUwQSUyMyUyMGZvciUyMHRoZSUyMGZpcnN0JTIwZXBvY2glMjBvbmx5JTBBZm9yJTIwaW5wdXQlMkMlMjB0YXJnZXQlMjBpbiUyMHNraXBwZWRfZGF0YWxvYWRlciUzQSUwQSUyMCUyMCUyMCUyMG9wdGltaXplci56ZXJvX2dyYWQoKSUwQSUyMCUyMCUyMCUyMG91dHB1dCUyMCUzRCUyMG1vZGVsKGlucHV0KSUwQSUyMCUyMCUyMCUyMGxvc3MlMjAlM0QlMjBsb3NzX2Z1bmMob3V0cHV0JTJDJTIwdGFyZ2V0KSUwQSUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLmJhY2t3YXJkKGxvc3MpJTBBJTIwJTIwJTIwJTIwb3B0aW1pemVyLnN0ZXAoKSUwQSUwQSUyMyUyMHN1YnNlcXVlbnQlMjBlcG9jaHMlMEFmb3IlMjBpbnB1dCUyQyUyMHRhcmdldCUyMGluJTIwZGF0YWxvYWRlciUzQSUwQSUyMCUyMCUyMCUyMG9wdGltaXplci56ZXJvX2dyYWQoKSUwQSUyMCUyMCUyMCUyMC4uLg==",highlighted:`from accelerate import Accelerator
accelerator = Accelerator()
dataloader, model, optimizer, scheduler = accelerator.prepare(dataloader, model, optimizer, scheduler)
skipped_dataloader = accelerator.skip_first_batches(dataloader, num_batches=2)
# for the first epoch only
for input, target in skipped_dataloader:
optimizer.zero_grad()
output = model(input)
loss = loss_func(output, target)
accelerator.backward(loss)
optimizer.step()
# subsequent epochs
for input, target in dataloader:
optimizer.zero_grad()
...`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Si(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"JTIzJTIwQXNzdW1lJTIwdGhlcmUlMjBhcmUlMjB0d28lMjBwcm9jZXNzZXMlMEFmcm9tJTIwYWNjZWxlcmF0ZSUyMGltcG9ydCUyMEFjY2VsZXJhdG9yJTBBJTBBYWNjZWxlcmF0b3IlMjAlM0QlMjBBY2NlbGVyYXRvcigpJTBBd2l0aCUyMGFjY2VsZXJhdG9yLnNwbGl0X2JldHdlZW5fcHJvY2Vzc2VzKCU1QiUyMkElMjIlMkMlMjAlMjJCJTIyJTJDJTIwJTIyQyUyMiU1RCklMjBhcyUyMGlucHV0cyUzQSUwQSUyMCUyMCUyMCUyMHByaW50KGlucHV0cyklMEElMjMlMjBQcm9jZXNzJTIwMCUwQSU1QiUyMkElMjIlMkMlMjAlMjJCJTIyJTVEJTBBJTIzJTIwUHJvY2VzcyUyMDElMEElNUIlMjJDJTIyJTVEJTBBJTBBd2l0aCUyMGFjY2VsZXJhdG9yLnNwbGl0X2JldHdlZW5fcHJvY2Vzc2VzKCU1QiUyMkElMjIlMkMlMjAlMjJCJTIyJTJDJTIwJTIyQyUyMiU1RCUyQyUyMGFwcGx5X3BhZGRpbmclM0RUcnVlKSUyMGFzJTIwaW5wdXRzJTNBJTBBJTIwJTIwJTIwJTIwcHJpbnQoaW5wdXRzKSUwQSUyMyUyMFByb2Nlc3MlMjAwJTBBJTVCJTIyQSUyMiUyQyUyMCUyMkIlMjIlNUQlMEElMjMlMjBQcm9jZXNzJTIwMSUwQSU1QiUyMkMlMjIlMkMlMjAlMjJDJTIyJTVE",highlighted:`# Assume there are two processes
from accelerate import Accelerator
accelerator = Accelerator()
with accelerator.split_between_processes(["A", "B", "C"]) as inputs:
print(inputs)
# Process 0
["A", "B"]
# Process 1
["C"]
with accelerator.split_between_processes(["A", "B", "C"], apply_padding=True) as inputs:
print(inputs)
# Process 0
["A", "B"]
# Process 1
["C", "C"]`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Hi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQWRhdGFsb2FkZXIlMkMlMjBtb2RlbCUyQyUyMG9wdGltaXplciUyMCUzRCUyMGFjY2VsZXJhdG9yLnByZXBhcmUoZGF0YWxvYWRlciUyQyUyMG1vZGVsJTJDJTIwb3B0aW1pemVyKSUwQSUwQXdpdGglMjBhY2NlbGVyYXRvci5ub19zeW5jKCklM0ElMEElMjAlMjAlMjAlMjBsb3NzX2ElMjAlM0QlMjBsb3NzX2Z1bmMobW9kZWwoaW5wdXRfYSkpJTIwJTIwJTIzJTIwZmlyc3QlMjBmb3J3YXJkJTIwcGFzcyUwQSUyMCUyMCUyMCUyMGxvc3NfYiUyMCUzRCUyMGxvc3NfZnVuYyhtb2RlbChpbnB1dF9iKSklMjAlMjAlMjMlMjBzZWNvbmQlMjBmb3J3YXJkJTIwcGFzcyUwQWFjY2VsZXJhdG9yLmJhY2t3YXJkKGxvc3NfYSklMjAlMjAlMjMlMjBObyUyMHN5bmNocm9uaXphdGlvbiUyMGFjcm9zcyUyMHByb2Nlc3NlcyUyQyUyMG9ubHklMjBhY2N1bXVsYXRlJTIwZ3JhZGllbnRzJTBBd2l0aCUyMGFjY2VsZXJhdG9yLnRyaWdnZXJfc3luY19pbl9iYWNrd2FyZChtb2RlbCklM0ElMEElMjAlMjAlMjAlMjBhY2NlbGVyYXRvci5iYWNrd2FyZChsb3NzX2IpJTIwJTIwJTIzJTIwU3luY2hyb25pemF0aW9uJTIwYWNyb3NzJTIwYWxsJTIwcHJvY2Vzc2VzJTBBb3B0aW1pemVyLnN0ZXAoKSUwQW9wdGltaXplci56ZXJvX2dyYWQoKQ==",highlighted:`from accelerate import Accelerator
accelerator = Accelerator()
dataloader, model, optimizer = accelerator.prepare(dataloader, model, optimizer)
with accelerator.no_sync():
loss_a = loss_func(model(input_a)) # first forward pass
loss_b = loss_func(model(input_b)) # second forward pass
accelerator.backward(loss_a) # No synchronization across processes, only accumulate gradients
with accelerator.trigger_sync_in_backward(model):
accelerator.backward(loss_b) # Synchronization across all processes
optimizer.step()
optimizer.zero_grad()`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Li(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQW1vZGVsJTJDJTIwb3B0aW1pemVyJTIwJTNEJTIwYWNjZWxlcmF0b3IucHJlcGFyZShtb2RlbCUyQyUyMG9wdGltaXplciklMEFvdXRwdXRzJTIwJTNEJTIwbW9kZWwoaW5wdXRzKSUwQWxvc3MlMjAlM0QlMjBsb3NzX2ZuKG91dHB1dHMlMkMlMjBsYWJlbHMpJTBBYWNjZWxlcmF0b3IuYmFja3dhcmQobG9zcyklMEFhY2NlbGVyYXRvci51bnNjYWxlX2dyYWRpZW50cyhvcHRpbWl6ZXIlM0RvcHRpbWl6ZXIp",highlighted:`from accelerate import Accelerator
accelerator = Accelerator()
model, optimizer = accelerator.prepare(model, optimizer)
outputs = model(inputs)
loss = loss_fn(outputs, labels)
accelerator.backward(loss)
accelerator.unscale_gradients(optimizer=optimizer)`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Pi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"JTIzJTIwQXNzdW1pbmclMjB0d28lMjBHUFUlMjBwcm9jZXNzZXMlMEFmcm9tJTIwdG9yY2gubm4ucGFyYWxsZWwlMjBpbXBvcnQlMjBEaXN0cmlidXRlZERhdGFQYXJhbGxlbCUwQWZyb20lMjBhY2NlbGVyYXRlJTIwaW1wb3J0JTIwQWNjZWxlcmF0b3IlMEElMEFhY2NlbGVyYXRvciUyMCUzRCUyMEFjY2VsZXJhdG9yKCklMEFtb2RlbCUyMCUzRCUyMGFjY2VsZXJhdG9yLnByZXBhcmUoTXlNb2RlbCgpKSUwQXByaW50KG1vZGVsLl9fY2xhc3NfXy5fX25hbWVfXyklMEElMEFtb2RlbCUyMCUzRCUyMGFjY2VsZXJhdG9yLnVud3JhcF9tb2RlbChtb2RlbCklMEFwcmludChtb2RlbC5fX2NsYXNzX18uX19uYW1lX18p",highlighted:`# Assuming two GPU processes
from torch.nn.parallel import DistributedDataParallel
from accelerate import Accelerator
accelerator = Accelerator()
model = accelerator.prepare(MyModel())
print(model.__class__.__name__)
DistributedDataParallel
model = accelerator.unwrap_model(model)
print(model.__class__.__name__)
MyModel`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Di(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"JTIzJTIwQXNzdW1pbmclMjB0d28lMjBHUFUlMjBwcm9jZXNzZXMlMEFpbXBvcnQlMjB0aW1lJTBBZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQWlmJTIwYWNjZWxlcmF0b3IuaXNfbWFpbl9wcm9jZXNzJTNBJTBBJTIwJTIwJTIwJTIwdGltZS5zbGVlcCgyKSUwQWVsc2UlM0ElMEElMjAlMjAlMjAlMjBwcmludCglMjJJJ20lMjB3YWl0aW5nJTIwZm9yJTIwdGhlJTIwbWFpbiUyMHByb2Nlc3MlMjB0byUyMGZpbmlzaCUyMGl0cyUyMHNsZWVwLi4uJTIyKSUwQWFjY2VsZXJhdG9yLndhaXRfZm9yX2V2ZXJ5b25lKCklMEElMjMlMjBTaG91bGQlMjBwcmludCUyMG9uJTIwZXZlcnklMjBwcm9jZXNzJTIwYXQlMjB0aGUlMjBzYW1lJTIwdGltZSUwQXByaW50KCUyMkV2ZXJ5b25lJTIwaXMlMjBoZXJlJTIyKQ==",highlighted:`# Assuming two GPU processes
import time
from accelerate import Accelerator
accelerator = Accelerator()
if accelerator.is_main_process:
time.sleep(2)
else:
print("I'm waiting for the main process to finish its sleep...")
accelerator.wait_for_everyone()
# Should print on every process at the same time
print("Everyone is here")`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Ki(_){let t,m,r,a,o,e,p,bo='The Accelerator is the main class for enabling distributed training on any type of training setup. Read the Add Accelerator to your code tutorial to learn more about how to add the Accelerator to your script.',El,ja,Sl,v,wa,ql,_s,$o="Creates an instance of an accelerator for distributed training or mixed precision training.",Ol,vs,yo="Available attributes:",er,Js,Mo=`torch.device
) — The device to use.int
) — The process index on the current machine.str
) — The configured mixed precision mode.int
) — The total number of processes used for training.bool
) — Whether or not the optimizer update was skipped (because of
gradient overflow in mixed precision), in which
case the learning rate should not be changed.int
) — The overall index of the current process among all processes.bool
) — Whether the gradients are currently being synced across all processes.bool
) — Whether the current configuration is for distributed training.autocast_handler
can be passed in to override the one set in the Accelerator
object. This is
useful in blocks under autocast
where you want to revert to fp32.`,cr,yt,or,H,Ja,pr,ks,vo=`Scales the gradients in accordance to the GradientAccumulationPlugin
and calls the correct backward()
based
on the configuration.`,ir,Gs,Jo="Should be used in lieu of loss.backward()
.",dr,Mt,mr,L,Ta,hr,Bs,To=`Checks if the internal trigger tensor has been set to 1 in any of the processes. If so, will return True
and
reset the trigger tensor to 0.`,gr,Is,Uo=`Note:
Does not require wait_for_everyone()
`,ur,jt,fr,_e,Ua,br,Cs,xo=`Alias for Accelerate.free_memory
, releases all references to the internal objects stored and call the
garbage collector. You should call this method between two trainings with different models/optimizers.`,$r,wt,yr,ve,xa,Mr,Zs,ko="Should be used in place of torch.nn.utils.clip_grad_norm_
.",jr,_t,wr,Je,ka,_r,Ws,Go="Should be used in place of torch.nn.utils.clip_grad_value_
.",vr,vt,Jr,Te,Ga,Tr,Ys,Bo=`Runs any special end training behaviors, such as stopping trackers on the main process only or destoying
process group. Should always be called at the end of your script if using experiment tracking.`,Ur,Jt,xr,Ue,Ba,kr,Xs,Io=`Will release all references to the internal objects stored and call the garbage collector. You should call this
method between two trainings with different models/optimizers. Also will reset Accelerator.step
to 0.`,Gr,Tt,Br,P,Ia,Ir,As,Co=`Gather the values in tensor across all processes and concatenate them on the first dimension. Useful to
regroup the predictions from all processes when doing evaluation.`,Cr,Vs,Zo=`Note:
This gather happens in all processes.`,Zr,Ut,Wr,xe,Ca,Yr,Ns,Wo=`Gathers input_data
and potentially drops duplicates in the last batch if on a distributed system. Should be
used for gathering the inputs and targets for metric calculation.`,Xr,xt,Ar,ke,Za,Vr,Qs,Yo=`Returns the state dictionary of a model sent through Accelerator.prepare() potentially without full
precision.`,Nr,kt,Qr,Ge,Wa,Fr,Fs,Xo="Returns a tracker
from self.trackers
based on name
on the main process only.",zr,Gt,Rr,A,Ya,Er,zs,Ao=`A context manager that facilitates distributed training or evaluation on uneven inputs, which acts as a wrapper
around torch.distributed.algorithms.join
. This is useful when the total batch size does not evenly divide the
length of the dataset.`,Sr,Bt,Hr,It,Lr,Ct,Pr,D,Xa,Dr,Rs,Vo="Loads the current states of the model, optimizer, scaler, RNG generators, and registered objects.",Kr,Zt,qr,Wt,Or,K,Aa,en,Es,No="Lets the local main process go inside a with block.",tn,Ss,Qo="The other processes will enter the with block after the main process exits.",an,Yt,sn,Xt,Va,ln,Hs,Fo="Runs backward pass on LOMO optimizers.",rn,q,Na,nn,Ls,zo="Lets the main process go first inside a with block.",cn,Ps,Ro="The other processes will enter the with block after the main process exits.",on,At,pn,V,Qa,dn,Ds,Eo="A context manager that enables context parallel training.",mn,Vt,hn,Nt,gn,Qt,un,O,Fa,fn,Ks,So=`A context manager to disable gradient synchronizations across DDP processes by calling
torch.nn.parallel.DistributedDataParallel.no_sync
.`,bn,qs,Ho="If model
is not in DDP, this context manager does nothing",$n,Ft,yn,Be,za,Mn,Os,Lo=`A decorator that will run the decorated function on the last process only. Can also be called using the
PartialState
class.`,jn,zt,wn,Ie,Ra,_n,el,Po=`A decorator that will run the decorated function on the local main process only. Can also be called using the
PartialState
class.`,vn,Rt,Jn,Ce,Ea,Tn,tl,Do=`A decorator that will run the decorated function on a given local process index only. Can also be called using
the PartialState
class.`,Un,Et,xn,Ze,Sa,kn,al,Ko=`A decorator that will run the decorated function on the main process only. Can also be called using the
PartialState
class.`,Gn,St,Bn,We,Ha,In,sl,qo=`A decorator that will run the decorated function on a given process index only. Can also be called using the
PartialState
class.`,Cn,Ht,Zn,Ye,La,Wn,ll,Oo=`Recursively pad the tensors in a nested list/tuple/dictionary of tensors from all devices to the same size so
they can safely be gathered.`,Yn,Lt,Xn,N,Pa,An,rl,ep=`Prepare all objects passed in args
for distributed training and mixed precision, then return them in the same
order.`,Vn,Pt,Nn,Dt,Qn,Kt,Fn,Xe,Da,zn,nl,tp=`Prepares a PyTorch DataLoader for training in any distributed setup. It is recommended to use
Accelerator.prepare() instead.`,Rn,qt,En,Ae,Ka,Sn,cl,ap=`Prepares a PyTorch model for training in any distributed setup. It is recommended to use
Accelerator.prepare() instead.`,Hn,Ot,Ln,Ve,qa,Pn,ol,sp=`Prepares a PyTorch Optimizer for training in any distributed setup. It is recommended to use
Accelerator.prepare() instead.`,Dn,ea,Kn,Ne,Oa,qn,pl,lp=`Prepares a PyTorch Scheduler for training in any distributed setup. It is recommended to use
Accelerator.prepare() instead.`,On,ta,ec,Qe,es,tc,il,rp="Drop in replacement of print()
to only print once per server.",ac,aa,sc,ee,ts,lc,dl,np=`Will profile the code inside the context manager. The profile will be saved to a Chrome Trace file if
profile_handler.output_trace_dir
is set.`,rc,ml,cp="A different profile_handler
can be passed in to override the one set in the Accelerator
object.",nc,sa,cc,te,as,oc,hl,op="Reduce the values in tensor across all processes based on reduction.",pc,gl,pp=`Note:
All processes get the reduced value.`,ic,la,dc,Q,ss,mc,ul,ip="Makes note of objects
and will save or load them in during save_state
or load_state
.",hc,fl,dp=`These should be utilized when the state is being loaded or saved in the same script. It is not designed to be
used in different scripts.`,gc,ra,uc,na,fc,I,ls,bc,bl,mp='Registers a pre hook to be run before load_checkpoint
is called in Accelerator.load_state().',$c,$l,hp="The hook should have the following signature:",yc,yl,gp="hook(models: list[torch.nn.Module], input_dir: str) -> None
",Mc,Ml,up=`The models
argument are the models as saved in the accelerator state under accelerator._models
, and the
input_dir
argument is the input_dir
argument passed to Accelerator.load_state().`,jc,ca,wc,C,rs,_c,jl,fp='Registers a pre hook to be run before save_checkpoint
is called in Accelerator.save_state().',vc,wl,bp="The hook should have the following signature:",Jc,_l,$p="hook(models: list[torch.nn.Module], weights: list[dict[str, torch.Tensor]], input_dir: str) -> None
",Tc,vl,yp=`The models
argument are the models as saved in the accelerator state under accelerator._models
, weigths
argument are the state dicts of the models
, and the input_dir
argument is the input_dir
argument passed
to Accelerator.load_state().`,Uc,oa,xc,ae,ns,kc,Jl,Mp="Save the object passed to disk once per machine. Use in place of torch.save
.",Gc,Tl,jp=`Note:
If save_on_each_node
was passed in as a ProjectConfiguration
, will save the object once per node,
rather than only once on the main node.`,Bc,pa,Ic,Fe,cs,Cc,Ul,wp="Save a model so that it can be re-loaded using load_checkpoint_in_model",Zc,ia,Wc,Z,os,Yc,xl,_p="Saves the current states of the model, optimizer, scaler, RNG generators, and registered objects to a folder.",Xc,kl,vp=`If a ProjectConfiguration
was passed to the Accelerator
object with automatic_checkpoint_naming
enabled
then checkpoints will be saved to self.project_dir/checkpoints
. If the number of current saves is greater
than total_limit
then the oldest save is deleted. Each checkpoint is saved in separate folders named
checkpoint_<iteration>
.`,Ac,Gl,Jp="Otherwise they are just saved to output_dir
.",Vc,da,Nc,ma,Qc,se,ps,Fc,Bl,Tp=`Sets the internal trigger tensor to 1 on the current process. A latter check should follow using this which
will check across all processes.`,zc,Il,Up=`Note:
Does not require wait_for_everyone()
`,Rc,ha,Ec,ze,is,Sc,Cl,xp="Creates a new torch.utils.data.DataLoader
that will efficiently skip the first num_batches
.",Hc,ga,Lc,le,ds,Pc,Zl,kp=`Splits input
between self.num_processes
quickly and can be then used on that process. Useful when doing
distributed inference, such as with different prompts.`,Dc,Wl,Gp="Note that when using a dict
, all keys need to have the same number of elements.",Kc,ua,qc,re,ms,Oc,Yl,Bp=`Trigger the sync of the gradients in the next backward pass of the model after multiple forward passes under
Accelerator.no_sync
(only applicable in multi-GPU scenarios).`,eo,Xl,Ip="If the script is not launched in distributed mode, this context manager does nothing.",to,fa,ao,ne,hs,so,Al,Cp="Unscale the gradients in mixed precision training with AMP. This is a noop in all other settings.",lo,Vl,Zp='Likely should be called through Accelerator.clipgrad_norm() or Accelerator.clipgrad_value()',ro,ba,no,Re,gs,co,Nl,Wp=`Unwraps the model
from the additional layer possible added by prepare(). Useful before saving
the model.`,oo,$a,po,ya,us,io,Ql,Yp="Verifies that model
has not been prepared with big model inference with a device-map resembling auto
.",mo,Ee,fs,ho,Fl,Xp=`Will stop the execution of the current process until every other process has reached that point (so this does
nothing when the script is only run in one process). Useful to do before saving a model.`,go,Ma,Hl,bs,Ll,He,$s,uo,zl,Ap="Recursively gather object in a nested list/tuple/dictionary of objects from all devices.",Pl,ys,Dl,Rl,Kl;return o=new fo({props:{title:"Accelerator",local:"accelerator",headingTag:"h1"}}),ja=new fo({props:{title:"Accelerator",local:"api ][ accelerate.Accelerator",headingTag:"h2"}}),wa=new k({props:{name:"class accelerate.Accelerator",anchor:"accelerate.Accelerator",parameters:[{name:"device_placement",val:": bool = True"},{name:"split_batches",val:": bool =