import{s as Rp,o as Ep,n as T}from"../chunks/scheduler.defa9a21.js";import{S as Sp,i as Hp,g as i,s as n,r as g,A as Lp,h as d,f as h,c,j as U,u,x as j,k as x,y as l,a as w,v as f,d as b,t as $,w as y}from"../chunks/index.fe795e71.js";import{T as Se}from"../chunks/Tip.179eb360.js";import{D as k}from"../chunks/Docstring.90d6fe5c.js";import{C as B}from"../chunks/CodeBlock.204b6c34.js";import{E as G}from"../chunks/ExampleCodeBlock.f6fae62d.js";import{H as fo,E as Pp}from"../chunks/getInferenceSnippets.2234a8dd.js";function Dp(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoZ3JhZGllbnRfYWNjdW11bGF0aW9uX3N0ZXBzJTNEMSklMEFkYXRhbG9hZGVyJTJDJTIwbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBzY2hlZHVsZXIlMjAlM0QlMjBhY2NlbGVyYXRvci5wcmVwYXJlKGRhdGFsb2FkZXIlMkMlMjBtb2RlbCUyQyUyMG9wdGltaXplciUyQyUyMHNjaGVkdWxlciklMEElMEFmb3IlMjBpbnB1dCUyQyUyMG91dHB1dCUyMGluJTIwZGF0YWxvYWRlciUzQSUwQSUyMCUyMCUyMCUyMHdpdGglMjBhY2NlbGVyYXRvci5hY2N1bXVsYXRlKG1vZGVsKSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG91dHB1dHMlMjAlM0QlMjBtb2RlbChpbnB1dCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBsb3NzJTIwJTNEJTIwbG9zc19mdW5jKG91dHB1dHMpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbG9zcy5iYWNrd2FyZCgpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwb3B0aW1pemVyLnN0ZXAoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHNjaGVkdWxlci5zdGVwKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvcHRpbWl6ZXIuemVyb19ncmFkKCk=",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator(gradient_accumulation_steps=1) >>> dataloader, model, optimizer, scheduler = accelerator.prepare(dataloader, model, optimizer, scheduler) >>> for input, output in dataloader: ... with accelerator.accumulate(model): ... outputs = model(input) ... loss = loss_func(outputs) ... loss.backward() ... optimizer.step() ... scheduler.step() ... optimizer.zero_grad()`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Kp(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IobWl4ZWRfcHJlY2lzaW9uJTNEJTIyZnAxNiUyMiklMEF3aXRoJTIwYWNjZWxlcmF0b3IuYXV0b2Nhc3QoKSUzQSUwQSUyMCUyMCUyMCUyMHRyYWluKCk=",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator(mixed_precision="fp16") >>> with accelerator.autocast(): ... train()`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function qp(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoZ3JhZGllbnRfYWNjdW11bGF0aW9uX3N0ZXBzJTNEMiklMEFvdXRwdXRzJTIwJTNEJTIwbW9kZWwoaW5wdXRzKSUwQWxvc3MlMjAlM0QlMjBsb3NzX2ZuKG91dHB1dHMlMkMlMjBsYWJlbHMpJTBBYWNjZWxlcmF0b3IuYmFja3dhcmQobG9zcyk=",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator(gradient_accumulation_steps=2) >>> outputs = model(inputs) >>> loss = loss_fn(outputs, labels) >>> accelerator.backward(loss)`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Op(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQSUyMyUyMEFzc3VtZSUyMGxhdGVyJTIwaW4lMjB0aGUlMjB0cmFpbmluZyUyMHNjcmlwdCUwQSUyMyUyMCU2MHNob3VsZF9kb19icmVha3BvaW50JTYwJTIwaXMlMjBhJTIwY3VzdG9tJTIwZnVuY3Rpb24lMjB0byUyMG1vbml0b3IlMjB3aGVuJTIwdG8lMjBicmVhayUyQyUwQSUyMyUyMGUuZy4lMjB3aGVuJTIwdGhlJTIwbG9zcyUyMGlzJTIwTmFOJTBBaWYlMjBzaG91bGRfZG9fYnJlYWtwb2ludChsb3NzKSUzQSUwQSUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLnNldF90cmlnZ2VyKCklMEElMjMlMjBBc3N1bWUlMjBsYXRlciUyMGluJTIwdGhlJTIwdHJhaW5pbmclMjBzY3JpcHQlMEFpZiUyMGFjY2VsZXJhdG9yLmNoZWNrX3RyaWdnZXIoKSUzQSUwQSUyMCUyMCUyMCUyMGJyZWFr",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> # Assume later in the training script >>> # \`should_do_breakpoint\` is a custom function to monitor when to break, >>> # e.g. when the loss is NaN >>> if should_do_breakpoint(loss): ... accelerator.set_trigger() >>> # Assume later in the training script >>> if accelerator.check_trigger(): ... break`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function ei(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQW1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwc2NoZWR1bGVyJTIwJTNEJTIwLi4uJTBBbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBzY2hlZHVsZXIlMjAlM0QlMjBhY2NlbGVyYXRvci5wcmVwYXJlKG1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwc2NoZWR1bGVyKSUwQW1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwc2NoZWR1bGVyJTIwJTNEJTIwYWNjZWxlcmF0b3IuY2xlYXIobW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBzY2hlZHVsZXIp",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> model, optimizer, scheduler = ... >>> model, optimizer, scheduler = accelerator.prepare(model, optimizer, scheduler) >>> model, optimizer, scheduler = accelerator.clear(model, optimizer, scheduler)`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function ti(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoZ3JhZGllbnRfYWNjdW11bGF0aW9uX3N0ZXBzJTNEMiklMEFkYXRhbG9hZGVyJTJDJTIwbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBzY2hlZHVsZXIlMjAlM0QlMjBhY2NlbGVyYXRvci5wcmVwYXJlKGRhdGFsb2FkZXIlMkMlMjBtb2RlbCUyQyUyMG9wdGltaXplciUyQyUyMHNjaGVkdWxlciklMEElMEFmb3IlMjBpbnB1dCUyQyUyMHRhcmdldCUyMGluJTIwZGF0YWxvYWRlciUzQSUwQSUyMCUyMCUyMCUyMG9wdGltaXplci56ZXJvX2dyYWQoKSUwQSUyMCUyMCUyMCUyMG91dHB1dCUyMCUzRCUyMG1vZGVsKGlucHV0KSUwQSUyMCUyMCUyMCUyMGxvc3MlMjAlM0QlMjBsb3NzX2Z1bmMob3V0cHV0JTJDJTIwdGFyZ2V0KSUwQSUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLmJhY2t3YXJkKGxvc3MpJTBBJTIwJTIwJTIwJTIwaWYlMjBhY2NlbGVyYXRvci5zeW5jX2dyYWRpZW50cyUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLmNsaXBfZ3JhZF9ub3JtXyhtb2RlbC5wYXJhbWV0ZXJzKCklMkMlMjBtYXhfZ3JhZF9ub3JtKSUwQSUyMCUyMCUyMCUyMG9wdGltaXplci5zdGVwKCk=",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator(gradient_accumulation_steps=2) >>> dataloader, model, optimizer, scheduler = accelerator.prepare(dataloader, model, optimizer, scheduler) >>> for input, target in dataloader: ... optimizer.zero_grad() ... output = model(input) ... loss = loss_func(output, target) ... accelerator.backward(loss) ... if accelerator.sync_gradients: ... accelerator.clip_grad_norm_(model.parameters(), max_grad_norm) ... optimizer.step()`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function ai(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoZ3JhZGllbnRfYWNjdW11bGF0aW9uX3N0ZXBzJTNEMiklMEFkYXRhbG9hZGVyJTJDJTIwbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBzY2hlZHVsZXIlMjAlM0QlMjBhY2NlbGVyYXRvci5wcmVwYXJlKGRhdGFsb2FkZXIlMkMlMjBtb2RlbCUyQyUyMG9wdGltaXplciUyQyUyMHNjaGVkdWxlciklMEElMEFmb3IlMjBpbnB1dCUyQyUyMHRhcmdldCUyMGluJTIwZGF0YWxvYWRlciUzQSUwQSUyMCUyMCUyMCUyMG9wdGltaXplci56ZXJvX2dyYWQoKSUwQSUyMCUyMCUyMCUyMG91dHB1dCUyMCUzRCUyMG1vZGVsKGlucHV0KSUwQSUyMCUyMCUyMCUyMGxvc3MlMjAlM0QlMjBsb3NzX2Z1bmMob3V0cHV0JTJDJTIwdGFyZ2V0KSUwQSUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLmJhY2t3YXJkKGxvc3MpJTBBJTIwJTIwJTIwJTIwaWYlMjBhY2NlbGVyYXRvci5zeW5jX2dyYWRpZW50cyUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLmNsaXBfZ3JhZF92YWx1ZV8obW9kZWwucGFyYW1ldGVycygpJTJDJTIwY2xpcF92YWx1ZSklMEElMjAlMjAlMjAlMjBvcHRpbWl6ZXIuc3RlcCgp",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator(gradient_accumulation_steps=2) >>> dataloader, model, optimizer, scheduler = accelerator.prepare(dataloader, model, optimizer, scheduler) >>> for input, target in dataloader: ... optimizer.zero_grad() ... output = model(input) ... loss = loss_func(output, target) ... accelerator.backward(loss) ... if accelerator.sync_gradients: ... accelerator.clip_grad_value_(model.parameters(), clip_value) ... optimizer.step()`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function si(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IobG9nX3dpdGglM0QlMjJ0ZW5zb3Jib2FyZCUyMiklMEFhY2NlbGVyYXRvci5pbml0X3RyYWNrZXJzKCUyMm15X3Byb2plY3QlMjIpJTBBJTIzJTIwRG8lMjB0cmFpbmluZyUwQWFjY2VsZXJhdG9yLmVuZF90cmFpbmluZygp",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator(log_with="tensorboard") >>> accelerator.init_trackers("my_project") >>> # Do training >>> accelerator.end_training()`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function li(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQW1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwc2NoZWR1bGVyJTIwJTNEJTIwLi4uJTBBbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBzY2hlZHVsZXIlMjAlM0QlMjBhY2NlbGVyYXRvci5wcmVwYXJlKG1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwc2NoZWR1bGVyKSUwQW1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwc2NoZWR1bGVyJTIwJTNEJTIwYWNjZWxlcmF0b3IuZnJlZV9tZW1vcnkobW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBzY2hlZHVsZXIp",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> model, optimizer, scheduler = ... >>> model, optimizer, scheduler = accelerator.prepare(model, optimizer, scheduler) >>> model, optimizer, scheduler = accelerator.free_memory(model, optimizer, scheduler)`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function ri(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"JTIzJTIwQXNzdW1pbmclMjBmb3VyJTIwcHJvY2Vzc2VzJTBBaW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwYWNjZWxlcmF0ZSUyMGltcG9ydCUyMEFjY2VsZXJhdG9yJTBBJTBBYWNjZWxlcmF0b3IlMjAlM0QlMjBBY2NlbGVyYXRvcigpJTBBcHJvY2Vzc190ZW5zb3IlMjAlM0QlMjB0b3JjaC50ZW5zb3IoJTVCYWNjZWxlcmF0b3IucHJvY2Vzc19pbmRleCU1RCklMEFnYXRoZXJlZF90ZW5zb3IlMjAlM0QlMjBhY2NlbGVyYXRvci5nYXRoZXIocHJvY2Vzc190ZW5zb3IpJTBBZ2F0aGVyZWRfdGVuc29y",highlighted:`>>> # Assuming four processes >>> import torch >>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> process_tensor = torch.tensor([accelerator.process_index]) >>> gathered_tensor = accelerator.gather(process_tensor) >>> gathered_tensor tensor([0, 1, 2, 3])`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function ni(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"JTIzJTIwQXNzdW1pbmclMjB0d28lMjBwcm9jZXNzZXMlMkMlMjB3aXRoJTIwYSUyMGJhdGNoJTIwc2l6ZSUyMG9mJTIwNSUyMG9uJTIwYSUyMGRhdGFzZXQlMjB3aXRoJTIwOSUyMHNhbXBsZXMlMEFpbXBvcnQlMjB0b3JjaCUwQWZyb20lMjBhY2NlbGVyYXRlJTIwaW1wb3J0JTIwQWNjZWxlcmF0b3IlMEElMEFhY2NlbGVyYXRvciUyMCUzRCUyMEFjY2VsZXJhdG9yKCklMEFkYXRhbG9hZGVyJTIwJTNEJTIwdG9yY2gudXRpbHMuZGF0YS5EYXRhTG9hZGVyKHJhbmdlKDkpJTJDJTIwYmF0Y2hfc2l6ZSUzRDUpJTBBZGF0YWxvYWRlciUyMCUzRCUyMGFjY2VsZXJhdG9yLnByZXBhcmUoZGF0YWxvYWRlciklMEFiYXRjaCUyMCUzRCUyMG5leHQoaXRlcihkYXRhbG9hZGVyKSklMEFnYXRoZXJlZF9pdGVtcyUyMCUzRCUyMGFjY2VsZXJhdG9yLmdhdGhlcl9mb3JfbWV0cmljcyhiYXRjaCklMEFsZW4oZ2F0aGVyZWRfaXRlbXMp",highlighted:`>>> # Assuming two processes, with a batch size of 5 on a dataset with 9 samples >>> import torch >>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> dataloader = torch.utils.data.DataLoader(range(9), batch_size=5) >>> dataloader = accelerator.prepare(dataloader) >>> batch = next(iter(dataloader)) >>> gathered_items = accelerator.gather_for_metrics(batch) >>> len(gathered_items) 9`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function ci(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwYWNjZWxlcmF0ZSUyMGltcG9ydCUyMEFjY2VsZXJhdG9yJTBBJTBBYWNjZWxlcmF0b3IlMjAlM0QlMjBBY2NlbGVyYXRvcigpJTBBbmV0JTIwJTNEJTIwdG9yY2gubm4uTGluZWFyKDIlMkMlMjAyKSUwQW5ldCUyMCUzRCUyMGFjY2VsZXJhdG9yLnByZXBhcmUobmV0KSUwQXN0YXRlX2RpY3QlMjAlM0QlMjBhY2NlbGVyYXRvci5nZXRfc3RhdGVfZGljdChuZXQp",highlighted:`>>> import torch >>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> net = torch.nn.Linear(2, 2) >>> net = accelerator.prepare(net) >>> state_dict = accelerator.get_state_dict(net)`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function oi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IobG9nX3dpdGglM0QlMjJ0ZW5zb3Jib2FyZCUyMiklMEFhY2NlbGVyYXRvci5pbml0X3RyYWNrZXJzKCUyMm15X3Byb2plY3QlMjIpJTBBdGVuc29yYm9hcmRfdHJhY2tlciUyMCUzRCUyMGFjY2VsZXJhdG9yLmdldF90cmFja2VyKCUyMnRlbnNvcmJvYXJkJTIyKQ==",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator(log_with="tensorboard") >>> accelerator.init_trackers("my_project") >>> tensorboard_tracker = accelerator.get_tracker("tensorboard")`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function pi(_){let t,m=`join_uneven_inputs is only supported for Distributed Data Parallel training on multiple GPUs. For any other configuration, this method will have no effect.`;return{c(){t=i("p"),t.innerHTML=m},l(r){t=d(r,"P",{"data-svelte-h":!0}),j(t)!=="svelte-rvhzx4"&&(t.innerHTML=m)},m(r,a){w(r,t,a)},p:T,d(r){r&&h(t)}}}function ii(_){let t,m="Overidding even_batches will not affect iterable-style data loaders.";return{c(){t=i("p"),t.innerHTML=m},l(r){t=d(r,"P",{"data-svelte-h":!0}),j(t)!=="svelte-1kjj3tv"&&(t.innerHTML=m)},m(r,a){w(r,t,a)},p:T,d(r){r&&h(t)}}}function di(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoZXZlbl9iYXRjaGVzJTNEVHJ1ZSklMEFkZHBfbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBkYXRhbG9hZGVyJTIwJTNEJTIwYWNjZWxlcmF0b3IucHJlcGFyZShtb2RlbCUyQyUyMG9wdGltaXplciUyQyUyMGRhdGFsb2FkZXIpJTBBJTBBd2l0aCUyMGFjY2VsZXJhdG9yLmpvaW5fdW5ldmVuX2lucHV0cyglNUJkZHBfbW9kZWwlNUQlMkMlMjBldmVuX2JhdGNoZXMlM0RGYWxzZSklM0ElMEElMjAlMjAlMjAlMjBmb3IlMjBpbnB1dCUyQyUyMG91dHB1dCUyMGluJTIwZGF0YWxvYWRlciUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG91dHB1dHMlMjAlM0QlMjBtb2RlbChpbnB1dCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBsb3NzJTIwJTNEJTIwbG9zc19mdW5jKG91dHB1dHMpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwbG9zcy5iYWNrd2FyZCgpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwb3B0aW1pemVyLnN0ZXAoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG9wdGltaXplci56ZXJvX2dyYWQoKQ==",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator(even_batches=True) >>> ddp_model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader) >>> with accelerator.join_uneven_inputs([ddp_model], even_batches=False): ... for input, output in dataloader: ... outputs = model(input) ... loss = loss_func(outputs) ... loss.backward() ... optimizer.step() ... optimizer.zero_grad()`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function mi(_){let t,m=`Should only be used in conjunction with Accelerator.save_state(). If a file is not registered for checkpointing, it will not be loaded if stored in the directory.`;return{c(){t=i("p"),t.innerHTML=m},l(r){t=d(r,"P",{"data-svelte-h":!0}),j(t)!=="svelte-1y1ax9s"&&(t.innerHTML=m)},m(r,a){w(r,t,a)},p:T,d(r){r&&h(t)}}}function hi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQW1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwbHJfc2NoZWR1bGVyJTIwJTNEJTIwLi4uJTBBbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBscl9zY2hlZHVsZXIlMjAlM0QlMjBhY2NlbGVyYXRvci5wcmVwYXJlKG1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwbHJfc2NoZWR1bGVyKSUwQWFjY2VsZXJhdG9yLmxvYWRfc3RhdGUoJTIybXlfY2hlY2twb2ludCUyMik=",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> model, optimizer, lr_scheduler = ... >>> model, optimizer, lr_scheduler = accelerator.prepare(model, optimizer, lr_scheduler) >>> accelerator.load_state("my_checkpoint")`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function gi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQXdpdGglMjBhY2NlbGVyYXRvci5sb2NhbF9tYWluX3Byb2Nlc3NfZmlyc3QoKSUzQSUwQSUyMCUyMCUyMCUyMCUyMyUyMFRoaXMlMjB3aWxsJTIwYmUlMjBwcmludGVkJTIwZmlyc3QlMjBieSUyMGxvY2FsJTIwcHJvY2VzcyUyMDAlMjB0aGVuJTIwaW4lMjBhJTIwc2VlbWluZ2x5JTBBJTIwJTIwJTIwJTIwJTIzJTIwcmFuZG9tJTIwb3JkZXIlMjBieSUyMHRoZSUyMG90aGVyJTIwcHJvY2Vzc2VzLiUwQSUyMCUyMCUyMCUyMHByaW50KGYlMjJUaGlzJTIwd2lsbCUyMGJlJTIwcHJpbnRlZCUyMGJ5JTIwcHJvY2VzcyUyMCU3QmFjY2VsZXJhdG9yLmxvY2FsX3Byb2Nlc3NfaW5kZXglN0QlMjIp",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> with accelerator.local_main_process_first(): ... # This will be printed first by local process 0 then in a seemingly ... # random order by the other processes. ... print(f"This will be printed by process {accelerator.local_process_index}")`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function ui(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQXdpdGglMjBhY2NlbGVyYXRvci5tYWluX3Byb2Nlc3NfZmlyc3QoKSUzQSUwQSUyMCUyMCUyMCUyMCUyMyUyMFRoaXMlMjB3aWxsJTIwYmUlMjBwcmludGVkJTIwZmlyc3QlMjBieSUyMHByb2Nlc3MlMjAwJTIwdGhlbiUyMGluJTIwYSUyMHNlZW1pbmdseSUwQSUyMCUyMCUyMCUyMCUyMyUyMHJhbmRvbSUyMG9yZGVyJTIwYnklMjB0aGUlMjBvdGhlciUyMHByb2Nlc3Nlcy4lMEElMjAlMjAlMjAlMjBwcmludChmJTIyVGhpcyUyMHdpbGwlMjBiZSUyMHByaW50ZWQlMjBieSUyMHByb2Nlc3MlMjAlN0JhY2NlbGVyYXRvci5wcm9jZXNzX2luZGV4JTdEJTIyKQ==",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> with accelerator.main_process_first(): ... # This will be printed first by process 0 then in a seemingly ... # random order by the other processes. ... print(f"This will be printed by process {accelerator.process_index}")`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function fi(_){let t,m="context_parallel is currently only supported together with FSDP2, and requires parallelism_config.cp_size >",r,a,o=`
  • If either of these conditions are not met, this context manager will have no effect, though to enable fewer code changes it will not raise an Exception.
  • `;return{c(){t=i("p"),t.innerHTML=m,r=n(),a=i("ol"),a.innerHTML=o},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-13dftix"&&(t.innerHTML=m),r=c(e),a=d(e,"OL",{"data-svelte-h":!0}),j(a)!=="svelte-ylijyy"&&(a.innerHTML=o)},m(e,p){w(e,t,p),w(e,r,p),w(e,a,p)},p:T,d(e){e&&(h(t),h(r),h(a))}}}function bi(_){let t,m="This context manager has to be recreated with each training step, as shown in the example below.";return{c(){t=i("p"),t.textContent=m},l(r){t=d(r,"P",{"data-svelte-h":!0}),j(t)!=="svelte-50s686"&&(t.textContent=m)},m(r,a){w(r,t,a)},p:T,d(r){r&&h(t)}}}function $i(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"Zm9yJTIwYmF0Y2glMjBpbiUyMGRhdGFsb2FkZXIlM0ElMEElMjAlMjAlMjAlMjB3aXRoJTIwYWNjZWxlcmF0b3IubWF5YmVfY29udGV4dF9wYXJhbGxlbCglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBidWZmZXJzJTNEJTVCYmF0Y2glNUIlMjJpbnB1dF9pZHMlMjIlNUQlMkMlMjBiYXRjaCU1QiUyMmF0dGVudGlvbl9tYXNrJTIyJTVEJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwYnVmZmVyX3NlcV9kaW1zJTNEJTVCMSUyQyUyMDElNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBub19yZXN0b3JlX2J1ZmZlcnMlM0QlN0JiYXRjaCU1QiUyMmlucHV0X2lkcyUyMiU1RCU3RCUyQyUwQSUyMCUyMCUyMCUyMCklM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBvdXRwdXRzJTIwJTNEJTIwbW9kZWwoYmF0Y2gpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwLi4u",highlighted:`>>> for batch in dataloader: ... with accelerator.maybe_context_parallel( ... buffers=[batch["input_ids"], batch["attention_mask"]], ... buffer_seq_dims=[1, 1], ... no_restore_buffers={batch["input_ids"]}, ... ): ... outputs = model(batch) ... ...`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function yi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQWRhdGFsb2FkZXIlMkMlMjBtb2RlbCUyQyUyMG9wdGltaXplciUyMCUzRCUyMGFjY2VsZXJhdG9yLnByZXBhcmUoZGF0YWxvYWRlciUyQyUyMG1vZGVsJTJDJTIwb3B0aW1pemVyKSUwQWlucHV0X2ElMjAlM0QlMjBuZXh0KGl0ZXIoZGF0YWxvYWRlcikpJTBBaW5wdXRfYiUyMCUzRCUyMG5leHQoaXRlcihkYXRhbG9hZGVyKSklMEElMEF3aXRoJTIwYWNjZWxlcmF0b3Iubm9fc3luYygpJTNBJTBBJTIwJTIwJTIwJTIwb3V0cHV0cyUyMCUzRCUyMG1vZGVsKGlucHV0X2EpJTBBJTIwJTIwJTIwJTIwbG9zcyUyMCUzRCUyMGxvc3NfZnVuYyhvdXRwdXRzKSUwQSUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLmJhY2t3YXJkKGxvc3MpJTBBJTIwJTIwJTIwJTIwJTIzJTIwTm8lMjBzeW5jaHJvbml6YXRpb24lMjBhY3Jvc3MlMjBwcm9jZXNzZXMlMkMlMjBvbmx5JTIwYWNjdW11bGF0ZSUyMGdyYWRpZW50cyUwQW91dHB1dHMlMjAlM0QlMjBtb2RlbChpbnB1dF9iKSUwQWFjY2VsZXJhdG9yLmJhY2t3YXJkKGxvc3MpJTBBJTIzJTIwU3luY2hyb25pemF0aW9uJTIwYWNyb3NzJTIwYWxsJTIwcHJvY2Vzc2VzJTBBb3B0aW1pemVyLnN0ZXAoKSUwQW9wdGltaXplci56ZXJvX2dyYWQoKQ==",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> dataloader, model, optimizer = accelerator.prepare(dataloader, model, optimizer) >>> input_a = next(iter(dataloader)) >>> input_b = next(iter(dataloader)) >>> with accelerator.no_sync(): ... outputs = model(input_a) ... loss = loss_func(outputs) ... accelerator.backward(loss) ... # No synchronization across processes, only accumulate gradients >>> outputs = model(input_b) >>> accelerator.backward(loss) >>> # Synchronization across all processes >>> optimizer.step() >>> optimizer.zero_grad()`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Mi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"JTIzJTIwQXNzdW1lJTIwd2UlMjBoYXZlJTIwNCUyMHByb2Nlc3Nlcy4lMEFmcm9tJTIwYWNjZWxlcmF0ZSUyMGltcG9ydCUyMEFjY2VsZXJhdG9yJTBBJTBBYWNjZWxlcmF0b3IlMjAlM0QlMjBBY2NlbGVyYXRvcigpJTBBJTBBJTBBJTQwYWNjZWxlcmF0b3Iub25fbGFzdF9wcm9jZXNzJTBBZGVmJTIwcHJpbnRfc29tZXRoaW5nKCklM0ElMEElMjAlMjAlMjAlMjBwcmludChmJTIyUHJpbnRlZCUyMG9uJTIwcHJvY2VzcyUyMCU3QmFjY2VsZXJhdG9yLnByb2Nlc3NfaW5kZXglN0QlMjIpJTBBJTBBJTBBcHJpbnRfc29tZXRoaW5nKCklMEElMjJQcmludGVkJTIwb24lMjBwcm9jZXNzJTIwMyUyMg==",highlighted:`# Assume we have 4 processes. from accelerate import Accelerator accelerator = Accelerator() @accelerator.on_last_process def print_something(): print(f"Printed on process {accelerator.process_index}") print_something() "Printed on process 3"`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function ji(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"JTIzJTIwQXNzdW1lJTIwd2UlMjBoYXZlJTIwMiUyMHNlcnZlcnMlMjB3aXRoJTIwNCUyMHByb2Nlc3NlcyUyMGVhY2guJTBBZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQSUwQSUwQSU0MGFjY2VsZXJhdG9yLm9uX2xvY2FsX21haW5fcHJvY2VzcyUwQWRlZiUyMHByaW50X3NvbWV0aGluZygpJTNBJTBBJTIwJTIwJTIwJTIwcHJpbnQoJTIyVGhpcyUyMHdpbGwlMjBiZSUyMHByaW50ZWQlMjBieSUyMHByb2Nlc3MlMjAwJTIwb25seSUyMG9uJTIwZWFjaCUyMHNlcnZlci4lMjIpJTBBJTBBJTBBcHJpbnRfc29tZXRoaW5nKCklMEElMjMlMjBPbiUyMHNlcnZlciUyMDElM0ElMEElMjJUaGlzJTIwd2lsbCUyMGJlJTIwcHJpbnRlZCUyMGJ5JTIwcHJvY2VzcyUyMDAlMjBvbmx5JTIyJTBBJTIzJTIwT24lMjBzZXJ2ZXIlMjAyJTNBJTBBJTIyVGhpcyUyMHdpbGwlMjBiZSUyMHByaW50ZWQlMjBieSUyMHByb2Nlc3MlMjAwJTIwb25seSUyMg==",highlighted:`# Assume we have 2 servers with 4 processes each. from accelerate import Accelerator accelerator = Accelerator() @accelerator.on_local_main_process def print_something(): print("This will be printed by process 0 only on each server.") print_something() # On server 1: "This will be printed by process 0 only" # On server 2: "This will be printed by process 0 only"`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function wi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"JTIzJTIwQXNzdW1lJTIwd2UlMjBoYXZlJTIwMiUyMHNlcnZlcnMlMjB3aXRoJTIwNCUyMHByb2Nlc3NlcyUyMGVhY2guJTBBZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQSUwQSUwQSU0MGFjY2VsZXJhdG9yLm9uX2xvY2FsX3Byb2Nlc3MobG9jYWxfcHJvY2Vzc19pbmRleCUzRDIpJTBBZGVmJTIwcHJpbnRfc29tZXRoaW5nKCklM0ElMEElMjAlMjAlMjAlMjBwcmludChmJTIyUHJpbnRlZCUyMG9uJTIwcHJvY2VzcyUyMCU3QmFjY2VsZXJhdG9yLmxvY2FsX3Byb2Nlc3NfaW5kZXglN0QlMjIpJTBBJTBBJTBBcHJpbnRfc29tZXRoaW5nKCklMEElMjMlMjBPbiUyMHNlcnZlciUyMDElM0ElMEElMjJQcmludGVkJTIwb24lMjBwcm9jZXNzJTIwMiUyMiUwQSUyMyUyME9uJTIwc2VydmVyJTIwMiUzQSUwQSUyMlByaW50ZWQlMjBvbiUyMHByb2Nlc3MlMjAyJTIy",highlighted:`# Assume we have 2 servers with 4 processes each. from accelerate import Accelerator accelerator = Accelerator() @accelerator.on_local_process(local_process_index=2) def print_something(): print(f"Printed on process {accelerator.local_process_index}") print_something() # On server 1: "Printed on process 2" # On server 2: "Printed on process 2"`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function _i(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQSUwQSUwQSU0MGFjY2VsZXJhdG9yLm9uX21haW5fcHJvY2VzcyUwQWRlZiUyMHByaW50X3NvbWV0aGluZygpJTNBJTBBJTIwJTIwJTIwJTIwcHJpbnQoJTIyVGhpcyUyMHdpbGwlMjBiZSUyMHByaW50ZWQlMjBieSUyMHByb2Nlc3MlMjAwJTIwb25seS4lMjIpJTBBJTBBJTBBcHJpbnRfc29tZXRoaW5nKCk=",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> @accelerator.on_main_process ... def print_something(): ... print("This will be printed by process 0 only.") >>> print_something() "This will be printed by process 0 only"`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function vi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"JTIzJTIwQXNzdW1lJTIwd2UlMjBoYXZlJTIwNCUyMHByb2Nlc3Nlcy4lMEFmcm9tJTIwYWNjZWxlcmF0ZSUyMGltcG9ydCUyMEFjY2VsZXJhdG9yJTBBJTBBYWNjZWxlcmF0b3IlMjAlM0QlMjBBY2NlbGVyYXRvcigpJTBBJTBBJTBBJTQwYWNjZWxlcmF0b3Iub25fcHJvY2Vzcyhwcm9jZXNzX2luZGV4JTNEMiklMEFkZWYlMjBwcmludF9zb21ldGhpbmcoKSUzQSUwQSUyMCUyMCUyMCUyMHByaW50KGYlMjJQcmludGVkJTIwb24lMjBwcm9jZXNzJTIwJTdCYWNjZWxlcmF0b3IucHJvY2Vzc19pbmRleCU3RCUyMiklMEElMEElMEFwcmludF9zb21ldGhpbmcoKSUwQSUyMlByaW50ZWQlMjBvbiUyMHByb2Nlc3MlMjAyJTIy",highlighted:`# Assume we have 4 processes. from accelerate import Accelerator accelerator = Accelerator() @accelerator.on_process(process_index=2) def print_something(): print(f"Printed on process {accelerator.process_index}") print_something() "Printed on process 2"`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Ji(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"JTIzJTIwQXNzdW1pbmclMjB0d28lMjBwcm9jZXNzZXMlMkMlMjB3aXRoJTIwdGhlJTIwZmlyc3QlMjBwcm9jZXNzZXMlMjBoYXZpbmclMjBhJTIwdGVuc29yJTIwb2YlMjBzaXplJTIwMSUyMGFuZCUyMHRoZSUyMHNlY29uZCUyMG9mJTIwc2l6ZSUyMDIlMEFpbXBvcnQlMjB0b3JjaCUwQWZyb20lMjBhY2NlbGVyYXRlJTIwaW1wb3J0JTIwQWNjZWxlcmF0b3IlMEElMEFhY2NlbGVyYXRvciUyMCUzRCUyMEFjY2VsZXJhdG9yKCklMEFwcm9jZXNzX3RlbnNvciUyMCUzRCUyMHRvcmNoLmFyYW5nZShhY2NlbGVyYXRvci5wcm9jZXNzX2luZGV4JTIwJTJCJTIwMSkudG8oYWNjZWxlcmF0b3IuZGV2aWNlKSUwQXBhZGRlZF90ZW5zb3IlMjAlM0QlMjBhY2NlbGVyYXRvci5wYWRfYWNyb3NzX3Byb2Nlc3Nlcyhwcm9jZXNzX3RlbnNvciklMEFwYWRkZWRfdGVuc29yLnNoYXBl",highlighted:`>>> # Assuming two processes, with the first processes having a tensor of size 1 and the second of size 2 >>> import torch >>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> process_tensor = torch.arange(accelerator.process_index + 1).to(accelerator.device) >>> padded_tensor = accelerator.pad_across_processes(process_tensor) >>> padded_tensor.shape torch.Size([2])`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Ti(_){let t,m="You don’t need to prepare a model if you only use it for inference without any kind of mixed precision";return{c(){t=i("p"),t.textContent=m},l(r){t=d(r,"P",{"data-svelte-h":!0}),j(t)!=="svelte-1k839yf"&&(t.textContent=m)},m(r,a){w(r,t,a)},p:T,d(r){r&&h(t)}}}function Ui(_){let t,m="Examples:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQSUyMyUyMEFzc3VtZSUyMGElMjBtb2RlbCUyQyUyMG9wdGltaXplciUyQyUyMGRhdGFfbG9hZGVyJTIwYW5kJTIwc2NoZWR1bGVyJTIwYXJlJTIwZGVmaW5lZCUwQW1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwZGF0YV9sb2FkZXIlMkMlMjBzY2hlZHVsZXIlMjAlM0QlMjBhY2NlbGVyYXRvci5wcmVwYXJlKG1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwZGF0YV9sb2FkZXIlMkMlMjBzY2hlZHVsZXIp",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> # Assume a model, optimizer, data_loader and scheduler are defined >>> model, optimizer, data_loader, scheduler = accelerator.prepare(model, optimizer, data_loader, scheduler)`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-kvfsh7"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function xi(_){let t,m;return t=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQSUyMyUyMEFzc3VtZSUyMGElMjBtb2RlbCUyQyUyMG9wdGltaXplciUyQyUyMGRhdGFfbG9hZGVyJTIwYW5kJTIwc2NoZWR1bGVyJTIwYXJlJTIwZGVmaW5lZCUwQWRldmljZV9wbGFjZW1lbnQlMjAlM0QlMjAlNUJUcnVlJTJDJTIwVHJ1ZSUyQyUyMEZhbHNlJTJDJTIwRmFsc2UlNUQlMEElMjMlMjBXaWxsJTIwcGxhY2UlMjB0aGUlMjBmaXJzdCUyMHR3byUyMGl0ZW1zJTIwcGFzc2VkJTIwaW4lMjBhdXRvbWF0aWNhbGx5JTIwdG8lMjB0aGUlMjByaWdodCUyMGRldmljZSUyMGJ1dCUyMG5vdCUyMHRoZSUyMGxhc3QlMjB0d28uJTBBbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBkYXRhX2xvYWRlciUyQyUyMHNjaGVkdWxlciUyMCUzRCUyMGFjY2VsZXJhdG9yLnByZXBhcmUoJTBBJTIwJTIwJTIwJTIwbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBkYXRhX2xvYWRlciUyQyUyMHNjaGVkdWxlciUyQyUyMGRldmljZV9wbGFjZW1lbnQlM0RkZXZpY2VfcGxhY2VtZW50JTBBKQ==",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> # Assume a model, optimizer, data_loader and scheduler are defined >>> device_placement = [True, True, False, False] >>> # Will place the first two items passed in automatically to the right device but not the last two. >>> model, optimizer, data_loader, scheduler = accelerator.prepare( ... model, optimizer, data_loader, scheduler, device_placement=device_placement ... )`,wrap:!1}}),{c(){g(t.$$.fragment)},l(r){u(t.$$.fragment,r)},m(r,a){f(t,r,a),m=!0},p:T,i(r){m||(b(t.$$.fragment,r),m=!0)},o(r){$(t.$$.fragment,r),m=!1},d(r){y(t,r)}}}function ki(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwYWNjZWxlcmF0ZSUyMGltcG9ydCUyMEFjY2VsZXJhdG9yJTBBJTBBYWNjZWxlcmF0b3IlMjAlM0QlMjBBY2NlbGVyYXRvcigpJTBBZGF0YV9sb2FkZXIlMjAlM0QlMjB0b3JjaC51dGlscy5kYXRhLkRhdGFMb2FkZXIoLi4uKSUwQWRhdGFfbG9hZGVyJTIwJTNEJTIwYWNjZWxlcmF0b3IucHJlcGFyZV9kYXRhX2xvYWRlcihkYXRhX2xvYWRlciUyQyUyMGRldmljZV9wbGFjZW1lbnQlM0RUcnVlKQ==",highlighted:`>>> import torch >>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> data_loader = torch.utils.data.DataLoader(...) >>> data_loader = accelerator.prepare_data_loader(data_loader, device_placement=True)`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Gi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQSUyMyUyMEFzc3VtZSUyMGElMjBtb2RlbCUyMGlzJTIwZGVmaW5lZCUwQW1vZGVsJTIwJTNEJTIwYWNjZWxlcmF0b3IucHJlcGFyZV9tb2RlbChtb2RlbCk=",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> # Assume a model is defined >>> model = accelerator.prepare_model(model)`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Bi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwYWNjZWxlcmF0ZSUyMGltcG9ydCUyMEFjY2VsZXJhdG9yJTBBJTBBYWNjZWxlcmF0b3IlMjAlM0QlMjBBY2NlbGVyYXRvcigpJTBBb3B0aW1pemVyJTIwJTNEJTIwdG9yY2gub3B0aW0uQWRhbSguLi4pJTBBb3B0aW1pemVyJTIwJTNEJTIwYWNjZWxlcmF0b3IucHJlcGFyZV9vcHRpbWl6ZXIob3B0aW1pemVyJTJDJTIwZGV2aWNlX3BsYWNlbWVudCUzRFRydWUp",highlighted:`>>> import torch >>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> optimizer = torch.optim.Adam(...) >>> optimizer = accelerator.prepare_optimizer(optimizer, device_placement=True)`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Ii(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwYWNjZWxlcmF0ZSUyMGltcG9ydCUyMEFjY2VsZXJhdG9yJTBBJTBBYWNjZWxlcmF0b3IlMjAlM0QlMjBBY2NlbGVyYXRvcigpJTBBb3B0aW1pemVyJTIwJTNEJTIwdG9yY2gub3B0aW0uQWRhbSguLi4pJTBBc2NoZWR1bGVyJTIwJTNEJTIwdG9yY2gub3B0aW0ubHJfc2NoZWR1bGVyLkxhbWJkYUxSKG9wdGltaXplciUyQyUyMC4uLiklMEFzY2hlZHVsZXIlMjAlM0QlMjBhY2NlbGVyYXRvci5wcmVwYXJlX3NjaGVkdWxlcihzY2hlZHVsZXIp",highlighted:`>>> import torch >>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> optimizer = torch.optim.Adam(...) >>> scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, ...) >>> scheduler = accelerator.prepare_scheduler(scheduler)`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Ci(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQWFjY2VsZXJhdG9yLnByaW50KCUyMkhlbGxvJTIwd29ybGQhJTIyKQ==",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> accelerator.print("Hello world!")`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Zi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"JTIzJTIwUHJvZmlsZSUyMHdpdGglMjBkZWZhdWx0JTIwc2V0dGluZ3MlMEFmcm9tJTIwYWNjZWxlcmF0ZSUyMGltcG9ydCUyMEFjY2VsZXJhdG9yJTBBZnJvbSUyMGFjY2VsZXJhdGUudXRpbHMlMjBpbXBvcnQlMjBQcm9maWxlS3dhcmdzJTBBJTBBYWNjZWxlcmF0b3IlMjAlM0QlMjBBY2NlbGVyYXRvcigpJTBBd2l0aCUyMGFjY2VsZXJhdG9yLnByb2ZpbGUoKSUyMGFzJTIwcHJvZiUzQSUwQSUyMCUyMCUyMCUyMHRyYWluKCklMEFhY2NlbGVyYXRvci5wcmludChwcm9mLmtleV9hdmVyYWdlcygpLnRhYmxlKCkpJTBBJTBBJTBBJTIzJTIwUHJvZmlsZSUyMHdpdGglMjB0aGUlMjBjdXN0b20lMjBoYW5kbGVyJTBBZGVmJTIwY3VzdG9tX2hhbmRsZXIocHJvZiklM0ElMEElMjAlMjAlMjAlMjBwcmludChwcm9mLmtleV9hdmVyYWdlcygpLnRhYmxlKHNvcnRfYnklM0QlMjJzZWxmX2NwdV90aW1lX3RvdGFsJTIyJTJDJTIwcm93X2xpbWl0JTNEMTApKSUwQSUwQSUwQWt3YXJncyUyMCUzRCUyMFByb2ZpbGVLd2FyZ3Moc2NoZWR1bGVfb3B0aW9uJTNEZGljdCh3YWl0JTNEMSUyQyUyMHdhcm11cCUzRDElMkMlMjBhY3RpdmUlM0QxKSUyQyUyMG9uX3RyYWNlX3JlYWR5JTNEY3VzdG9tX2hhbmRsZXIpJTBBYWNjZWxlcmF0b3IlMjAlM0QlMjBBY2NlbGVyYXRvcihrd2FyZ19oYW5kbGVyJTNEJTVCa3dhcmdzJTVEKSUwQXdpdGglMjBhY2NlbGVyYXRvci5wcm9maWxlKCklMjBhcyUyMHByb2YlM0ElMEElMjAlMjAlMjAlMjBmb3IlMjBfJTIwaW4lMjByYW5nZSgxMCklM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB0cmFpbl9pdGVyYXRpb24oKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHByb2Yuc3RlcCgpJTBBJTBBJTBBJTIzJTIwUHJvZmlsZSUyMGFuZCUyMGV4cG9ydCUyMHRvJTIwQ2hyb21lJTIwVHJhY2UlMEFrd2FyZ3MlMjAlM0QlMjBQcm9maWxlS3dhcmdzKG91dHB1dF90cmFjZV9kaXIlM0QlMjJvdXRwdXRfdHJhY2UlMjIpJTBBYWNjZWxlcmF0b3IlMjAlM0QlMjBBY2NlbGVyYXRvcihrd2FyZ19oYW5kbGVyJTNEJTVCa3dhcmdzJTVEKSUwQXdpdGglMjBhY2NlbGVyYXRvci5wcm9maWxlKCklM0ElMEElMjAlMjAlMjAlMjB0cmFpbigp",highlighted:`# Profile with default settings from accelerate import Accelerator from accelerate.utils import ProfileKwargs accelerator = Accelerator() with accelerator.profile() as prof: train() accelerator.print(prof.key_averages().table()) # Profile with the custom handler def custom_handler(prof): print(prof.key_averages().table(sort_by="self_cpu_time_total", row_limit=10)) kwargs = ProfileKwargs(schedule_option=dict(wait=1, warmup=1, active=1), on_trace_ready=custom_handler) accelerator = Accelerator(kwarg_handler=[kwargs]) with accelerator.profile() as prof: for _ in range(10): train_iteration() prof.step() # Profile and export to Chrome Trace kwargs = ProfileKwargs(output_trace_dir="output_trace") accelerator = Accelerator(kwarg_handler=[kwargs]) with accelerator.profile(): train()`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Wi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"JTIzJTIwQXNzdW1pbmclMjB0d28lMjBwcm9jZXNzZXMlMEFpbXBvcnQlMjB0b3JjaCUwQWZyb20lMjBhY2NlbGVyYXRlJTIwaW1wb3J0JTIwQWNjZWxlcmF0b3IlMEElMEFhY2NlbGVyYXRvciUyMCUzRCUyMEFjY2VsZXJhdG9yKCklMEFwcm9jZXNzX3RlbnNvciUyMCUzRCUyMHRvcmNoLmFyYW5nZShhY2NlbGVyYXRvci5udW1fcHJvY2Vzc2VzKSUyMCUyQiUyMDElMjAlMkIlMjAoMiUyMColMjBhY2NlbGVyYXRvci5wcm9jZXNzX2luZGV4KSUwQXByb2Nlc3NfdGVuc29yJTIwJTNEJTIwcHJvY2Vzc190ZW5zb3IudG8oYWNjZWxlcmF0b3IuZGV2aWNlKSUwQXJlZHVjZWRfdGVuc29yJTIwJTNEJTIwYWNjZWxlcmF0b3IucmVkdWNlKHByb2Nlc3NfdGVuc29yJTJDJTIwcmVkdWN0aW9uJTNEJTIyc3VtJTIyKSUwQXJlZHVjZWRfdGVuc29y",highlighted:`>>> # Assuming two processes >>> import torch >>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> process_tensor = torch.arange(accelerator.num_processes) + 1 + (2 * accelerator.process_index) >>> process_tensor = process_tensor.to(accelerator.device) >>> reduced_tensor = accelerator.reduce(process_tensor, reduction="sum") >>> reduced_tensor tensor([4, 6])`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Yi(_){let t,m="Every object must have a load_state_dict and state_dict function to be stored.";return{c(){t=i("p"),t.innerHTML=m},l(r){t=d(r,"P",{"data-svelte-h":!0}),j(t)!=="svelte-1kglckk"&&(t.innerHTML=m)},m(r,a){w(r,t,a)},p:T,d(r){r&&h(t)}}}function Xi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQSUyMyUyMEFzc3VtZSUyMCU2MEN1c3RvbU9iamVjdCU2MCUyMGhhcyUyMGElMjAlNjBzdGF0ZV9kaWN0JTYwJTIwYW5kJTIwJTYwbG9hZF9zdGF0ZV9kaWN0JTYwJTIwZnVuY3Rpb24uJTBBb2JqJTIwJTNEJTIwQ3VzdG9tT2JqZWN0KCklMEFhY2NlbGVyYXRvci5yZWdpc3Rlcl9mb3JfY2hlY2twb2ludGluZyhvYmopJTBBYWNjZWxlcmF0b3Iuc2F2ZV9zdGF0ZSglMjJjaGVja3BvaW50LnB0JTIyKQ==",highlighted:'>>> from accelerate import Accelerator\n\n>>> accelerator = Accelerator()\n>>> # Assume `CustomObject` has a `state_dict` and `load_state_dict` function.\n>>> obj = CustomObject()\n>>> accelerator.register_for_checkpointing(obj)\n>>> accelerator.save_state("checkpoint.pt")',wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Ai(_){let t,m=`Should only be used in conjunction with Accelerator.register_save_state_pre_hook(). Can be useful to load configurations in addition to model weights. Can also be used to overwrite model loading with a customized method. In this case, make sure to remove already loaded models from the models list.`;return{c(){t=i("p"),t.innerHTML=m},l(r){t=d(r,"P",{"data-svelte-h":!0}),j(t)!=="svelte-y7x8xo"&&(t.innerHTML=m)},m(r,a){w(r,t,a)},p:T,d(r){r&&h(t)}}}function Vi(_){let t,m=`Should only be used in conjunction with Accelerator.register_load_state_pre_hook(). Can be useful to save configurations in addition to model weights. Can also be used to overwrite model saving with a customized method. In this case, make sure to remove already loaded weights from the weights list.`;return{c(){t=i("p"),t.innerHTML=m},l(r){t=d(r,"P",{"data-svelte-h":!0}),j(t)!=="svelte-k6vf2f"&&(t.innerHTML=m)},m(r,a){w(r,t,a)},p:T,d(r){r&&h(t)}}}function Ni(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQWFyciUyMCUzRCUyMCU1QjAlMkMlMjAxJTJDJTIwMiUyQyUyMDMlNUQlMEFhY2NlbGVyYXRvci5zYXZlKGFyciUyQyUyMCUyMmFycmF5LnBrbCUyMik=",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> arr = [0, 1, 2, 3] >>> accelerator.save(arr, "array.pkl")`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Qi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQW1vZGVsJTIwJTNEJTIwLi4uJTBBYWNjZWxlcmF0b3Iuc2F2ZV9tb2RlbChtb2RlbCUyQyUyMHNhdmVfZGlyZWN0b3J5KQ==",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> model = ... >>> accelerator.save_model(model, save_directory)`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Fi(_){let t,m=`Should only be used when wanting to save a checkpoint during training and restoring the state in the same environment.`;return{c(){t=i("p"),t.textContent=m},l(r){t=d(r,"P",{"data-svelte-h":!0}),j(t)!=="svelte-1ljq3ee"&&(t.textContent=m)},m(r,a){w(r,t,a)},p:T,d(r){r&&h(t)}}}function zi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQW1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwbHJfc2NoZWR1bGVyJTIwJTNEJTIwLi4uJTBBbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBscl9zY2hlZHVsZXIlMjAlM0QlMjBhY2NlbGVyYXRvci5wcmVwYXJlKG1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwbHJfc2NoZWR1bGVyKSUwQWFjY2VsZXJhdG9yLnNhdmVfc3RhdGUob3V0cHV0X2RpciUzRCUyMm15X2NoZWNrcG9pbnQlMjIp",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> model, optimizer, lr_scheduler = ... >>> model, optimizer, lr_scheduler = accelerator.prepare(model, optimizer, lr_scheduler) >>> accelerator.save_state(output_dir="my_checkpoint")`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Ri(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQSUyMyUyMEFzc3VtZSUyMGxhdGVyJTIwaW4lMjB0aGUlMjB0cmFpbmluZyUyMHNjcmlwdCUwQSUyMyUyMCU2MHNob3VsZF9kb19icmVha3BvaW50JTYwJTIwaXMlMjBhJTIwY3VzdG9tJTIwZnVuY3Rpb24lMjB0byUyMG1vbml0b3IlMjB3aGVuJTIwdG8lMjBicmVhayUyQyUwQSUyMyUyMGUuZy4lMjB3aGVuJTIwdGhlJTIwbG9zcyUyMGlzJTIwTmFOJTBBaWYlMjBzaG91bGRfZG9fYnJlYWtwb2ludChsb3NzKSUzQSUwQSUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLnNldF90cmlnZ2VyKCklMEElMjMlMjBBc3N1bWUlMjBsYXRlciUyMGluJTIwdGhlJTIwdHJhaW5pbmclMjBzY3JpcHQlMEFpZiUyMGFjY2VsZXJhdG9yLmNoZWNrX2JyZWFrcG9pbnQoKSUzQSUwQSUyMCUyMCUyMCUyMGJyZWFr",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> # Assume later in the training script >>> # \`should_do_breakpoint\` is a custom function to monitor when to break, >>> # e.g. when the loss is NaN >>> if should_do_breakpoint(loss): ... accelerator.set_trigger() >>> # Assume later in the training script >>> if accelerator.check_breakpoint(): ... break`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Ei(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQWRhdGFsb2FkZXIlMkMlMjBtb2RlbCUyQyUyMG9wdGltaXplciUyQyUyMHNjaGVkdWxlciUyMCUzRCUyMGFjY2VsZXJhdG9yLnByZXBhcmUoZGF0YWxvYWRlciUyQyUyMG1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwc2NoZWR1bGVyKSUwQXNraXBwZWRfZGF0YWxvYWRlciUyMCUzRCUyMGFjY2VsZXJhdG9yLnNraXBfZmlyc3RfYmF0Y2hlcyhkYXRhbG9hZGVyJTJDJTIwbnVtX2JhdGNoZXMlM0QyKSUwQSUyMyUyMGZvciUyMHRoZSUyMGZpcnN0JTIwZXBvY2glMjBvbmx5JTBBZm9yJTIwaW5wdXQlMkMlMjB0YXJnZXQlMjBpbiUyMHNraXBwZWRfZGF0YWxvYWRlciUzQSUwQSUyMCUyMCUyMCUyMG9wdGltaXplci56ZXJvX2dyYWQoKSUwQSUyMCUyMCUyMCUyMG91dHB1dCUyMCUzRCUyMG1vZGVsKGlucHV0KSUwQSUyMCUyMCUyMCUyMGxvc3MlMjAlM0QlMjBsb3NzX2Z1bmMob3V0cHV0JTJDJTIwdGFyZ2V0KSUwQSUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLmJhY2t3YXJkKGxvc3MpJTBBJTIwJTIwJTIwJTIwb3B0aW1pemVyLnN0ZXAoKSUwQSUwQSUyMyUyMHN1YnNlcXVlbnQlMjBlcG9jaHMlMEFmb3IlMjBpbnB1dCUyQyUyMHRhcmdldCUyMGluJTIwZGF0YWxvYWRlciUzQSUwQSUyMCUyMCUyMCUyMG9wdGltaXplci56ZXJvX2dyYWQoKSUwQSUyMCUyMCUyMCUyMC4uLg==",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> dataloader, model, optimizer, scheduler = accelerator.prepare(dataloader, model, optimizer, scheduler) >>> skipped_dataloader = accelerator.skip_first_batches(dataloader, num_batches=2) >>> # for the first epoch only >>> for input, target in skipped_dataloader: ... optimizer.zero_grad() ... output = model(input) ... loss = loss_func(output, target) ... accelerator.backward(loss) ... optimizer.step() >>> # subsequent epochs >>> for input, target in dataloader: ... optimizer.zero_grad() ... ...`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Si(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"JTIzJTIwQXNzdW1lJTIwdGhlcmUlMjBhcmUlMjB0d28lMjBwcm9jZXNzZXMlMEFmcm9tJTIwYWNjZWxlcmF0ZSUyMGltcG9ydCUyMEFjY2VsZXJhdG9yJTBBJTBBYWNjZWxlcmF0b3IlMjAlM0QlMjBBY2NlbGVyYXRvcigpJTBBd2l0aCUyMGFjY2VsZXJhdG9yLnNwbGl0X2JldHdlZW5fcHJvY2Vzc2VzKCU1QiUyMkElMjIlMkMlMjAlMjJCJTIyJTJDJTIwJTIyQyUyMiU1RCklMjBhcyUyMGlucHV0cyUzQSUwQSUyMCUyMCUyMCUyMHByaW50KGlucHV0cyklMEElMjMlMjBQcm9jZXNzJTIwMCUwQSU1QiUyMkElMjIlMkMlMjAlMjJCJTIyJTVEJTBBJTIzJTIwUHJvY2VzcyUyMDElMEElNUIlMjJDJTIyJTVEJTBBJTBBd2l0aCUyMGFjY2VsZXJhdG9yLnNwbGl0X2JldHdlZW5fcHJvY2Vzc2VzKCU1QiUyMkElMjIlMkMlMjAlMjJCJTIyJTJDJTIwJTIyQyUyMiU1RCUyQyUyMGFwcGx5X3BhZGRpbmclM0RUcnVlKSUyMGFzJTIwaW5wdXRzJTNBJTBBJTIwJTIwJTIwJTIwcHJpbnQoaW5wdXRzKSUwQSUyMyUyMFByb2Nlc3MlMjAwJTBBJTVCJTIyQSUyMiUyQyUyMCUyMkIlMjIlNUQlMEElMjMlMjBQcm9jZXNzJTIwMSUwQSU1QiUyMkMlMjIlMkMlMjAlMjJDJTIyJTVE",highlighted:`# Assume there are two processes from accelerate import Accelerator accelerator = Accelerator() with accelerator.split_between_processes(["A", "B", "C"]) as inputs: print(inputs) # Process 0 ["A", "B"] # Process 1 ["C"] with accelerator.split_between_processes(["A", "B", "C"], apply_padding=True) as inputs: print(inputs) # Process 0 ["A", "B"] # Process 1 ["C", "C"]`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Hi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQWRhdGFsb2FkZXIlMkMlMjBtb2RlbCUyQyUyMG9wdGltaXplciUyMCUzRCUyMGFjY2VsZXJhdG9yLnByZXBhcmUoZGF0YWxvYWRlciUyQyUyMG1vZGVsJTJDJTIwb3B0aW1pemVyKSUwQSUwQXdpdGglMjBhY2NlbGVyYXRvci5ub19zeW5jKCklM0ElMEElMjAlMjAlMjAlMjBsb3NzX2ElMjAlM0QlMjBsb3NzX2Z1bmMobW9kZWwoaW5wdXRfYSkpJTIwJTIwJTIzJTIwZmlyc3QlMjBmb3J3YXJkJTIwcGFzcyUwQSUyMCUyMCUyMCUyMGxvc3NfYiUyMCUzRCUyMGxvc3NfZnVuYyhtb2RlbChpbnB1dF9iKSklMjAlMjAlMjMlMjBzZWNvbmQlMjBmb3J3YXJkJTIwcGFzcyUwQWFjY2VsZXJhdG9yLmJhY2t3YXJkKGxvc3NfYSklMjAlMjAlMjMlMjBObyUyMHN5bmNocm9uaXphdGlvbiUyMGFjcm9zcyUyMHByb2Nlc3NlcyUyQyUyMG9ubHklMjBhY2N1bXVsYXRlJTIwZ3JhZGllbnRzJTBBd2l0aCUyMGFjY2VsZXJhdG9yLnRyaWdnZXJfc3luY19pbl9iYWNrd2FyZChtb2RlbCklM0ElMEElMjAlMjAlMjAlMjBhY2NlbGVyYXRvci5iYWNrd2FyZChsb3NzX2IpJTIwJTIwJTIzJTIwU3luY2hyb25pemF0aW9uJTIwYWNyb3NzJTIwYWxsJTIwcHJvY2Vzc2VzJTBBb3B0aW1pemVyLnN0ZXAoKSUwQW9wdGltaXplci56ZXJvX2dyYWQoKQ==",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> dataloader, model, optimizer = accelerator.prepare(dataloader, model, optimizer) >>> with accelerator.no_sync(): ... loss_a = loss_func(model(input_a)) # first forward pass ... loss_b = loss_func(model(input_b)) # second forward pass >>> accelerator.backward(loss_a) # No synchronization across processes, only accumulate gradients >>> with accelerator.trigger_sync_in_backward(model): ... accelerator.backward(loss_b) # Synchronization across all processes >>> optimizer.step() >>> optimizer.zero_grad()`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Li(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQW1vZGVsJTJDJTIwb3B0aW1pemVyJTIwJTNEJTIwYWNjZWxlcmF0b3IucHJlcGFyZShtb2RlbCUyQyUyMG9wdGltaXplciklMEFvdXRwdXRzJTIwJTNEJTIwbW9kZWwoaW5wdXRzKSUwQWxvc3MlMjAlM0QlMjBsb3NzX2ZuKG91dHB1dHMlMkMlMjBsYWJlbHMpJTBBYWNjZWxlcmF0b3IuYmFja3dhcmQobG9zcyklMEFhY2NlbGVyYXRvci51bnNjYWxlX2dyYWRpZW50cyhvcHRpbWl6ZXIlM0RvcHRpbWl6ZXIp",highlighted:`>>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> model, optimizer = accelerator.prepare(model, optimizer) >>> outputs = model(inputs) >>> loss = loss_fn(outputs, labels) >>> accelerator.backward(loss) >>> accelerator.unscale_gradients(optimizer=optimizer)`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Pi(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"JTIzJTIwQXNzdW1pbmclMjB0d28lMjBHUFUlMjBwcm9jZXNzZXMlMEFmcm9tJTIwdG9yY2gubm4ucGFyYWxsZWwlMjBpbXBvcnQlMjBEaXN0cmlidXRlZERhdGFQYXJhbGxlbCUwQWZyb20lMjBhY2NlbGVyYXRlJTIwaW1wb3J0JTIwQWNjZWxlcmF0b3IlMEElMEFhY2NlbGVyYXRvciUyMCUzRCUyMEFjY2VsZXJhdG9yKCklMEFtb2RlbCUyMCUzRCUyMGFjY2VsZXJhdG9yLnByZXBhcmUoTXlNb2RlbCgpKSUwQXByaW50KG1vZGVsLl9fY2xhc3NfXy5fX25hbWVfXyklMEElMEFtb2RlbCUyMCUzRCUyMGFjY2VsZXJhdG9yLnVud3JhcF9tb2RlbChtb2RlbCklMEFwcmludChtb2RlbC5fX2NsYXNzX18uX19uYW1lX18p",highlighted:`>>> # Assuming two GPU processes >>> from torch.nn.parallel import DistributedDataParallel >>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> model = accelerator.prepare(MyModel()) >>> print(model.__class__.__name__) DistributedDataParallel >>> model = accelerator.unwrap_model(model) >>> print(model.__class__.__name__) MyModel`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Di(_){let t,m="Example:",r,a,o;return a=new B({props:{code:"JTIzJTIwQXNzdW1pbmclMjB0d28lMjBHUFUlMjBwcm9jZXNzZXMlMEFpbXBvcnQlMjB0aW1lJTBBZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUwQSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3IoKSUwQWlmJTIwYWNjZWxlcmF0b3IuaXNfbWFpbl9wcm9jZXNzJTNBJTBBJTIwJTIwJTIwJTIwdGltZS5zbGVlcCgyKSUwQWVsc2UlM0ElMEElMjAlMjAlMjAlMjBwcmludCglMjJJJ20lMjB3YWl0aW5nJTIwZm9yJTIwdGhlJTIwbWFpbiUyMHByb2Nlc3MlMjB0byUyMGZpbmlzaCUyMGl0cyUyMHNsZWVwLi4uJTIyKSUwQWFjY2VsZXJhdG9yLndhaXRfZm9yX2V2ZXJ5b25lKCklMEElMjMlMjBTaG91bGQlMjBwcmludCUyMG9uJTIwZXZlcnklMjBwcm9jZXNzJTIwYXQlMjB0aGUlMjBzYW1lJTIwdGltZSUwQXByaW50KCUyMkV2ZXJ5b25lJTIwaXMlMjBoZXJlJTIyKQ==",highlighted:`>>> # Assuming two GPU processes >>> import time >>> from accelerate import Accelerator >>> accelerator = Accelerator() >>> if accelerator.is_main_process: ... time.sleep(2) >>> else: ... print("I'm waiting for the main process to finish its sleep...") >>> accelerator.wait_for_everyone() >>> # Should print on every process at the same time >>> print("Everyone is here")`,wrap:!1}}),{c(){t=i("p"),t.textContent=m,r=n(),g(a.$$.fragment)},l(e){t=d(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-11lpom8"&&(t.textContent=m),r=c(e),u(a.$$.fragment,e)},m(e,p){w(e,t,p),w(e,r,p),f(a,e,p),o=!0},p:T,i(e){o||(b(a.$$.fragment,e),o=!0)},o(e){$(a.$$.fragment,e),o=!1},d(e){e&&(h(t),h(r)),y(a,e)}}}function Ki(_){let t,m,r,a,o,e,p,bo='The Accelerator is the main class for enabling distributed training on any type of training setup. Read the Add Accelerator to your code tutorial to learn more about how to add the Accelerator to your script.',El,ja,Sl,v,wa,ql,_s,$o="Creates an instance of an accelerator for distributed training or mixed precision training.",Ol,vs,yo="Available attributes:",er,Js,Mo=`
  • device (torch.device) — The device to use.
  • distributed_type (DistributedType) — The distributed training configuration.
  • local_process_index (int) — The process index on the current machine.
  • mixed_precision (str) — The configured mixed precision mode.
  • num_processes (int) — The total number of processes used for training.
  • optimizer_step_was_skipped (bool) — Whether or not the optimizer update was skipped (because of gradient overflow in mixed precision), in which case the learning rate should not be changed.
  • process_index (int) — The overall index of the current process among all processes.
  • state (AcceleratorState) — The distributed setup state.
  • sync_gradients (bool) — Whether the gradients are currently being synced across all processes.
  • use_distributed (bool) — Whether the current configuration is for distributed training.
  • `,tr,we,_a,ar,Ts,jo="A context manager that will lightly wrap around and perform gradient accumulation automatically",sr,$t,lr,S,va,rr,Us,wo=`Will apply automatic mixed-precision inside the block inside this context manager, if it is enabled. Nothing different will happen otherwise.`,nr,xs,_o=`A different autocast_handler can be passed in to override the one set in the Accelerator object. This is useful in blocks under autocast where you want to revert to fp32.`,cr,yt,or,H,Ja,pr,ks,vo=`Scales the gradients in accordance to the GradientAccumulationPlugin and calls the correct backward() based on the configuration.`,ir,Gs,Jo="Should be used in lieu of loss.backward().",dr,Mt,mr,L,Ta,hr,Bs,To=`Checks if the internal trigger tensor has been set to 1 in any of the processes. If so, will return True and reset the trigger tensor to 0.`,gr,Is,Uo=`Note: Does not require wait_for_everyone()`,ur,jt,fr,_e,Ua,br,Cs,xo=`Alias for Accelerate.free_memory, releases all references to the internal objects stored and call the garbage collector. You should call this method between two trainings with different models/optimizers.`,$r,wt,yr,ve,xa,Mr,Zs,ko="Should be used in place of torch.nn.utils.clip_grad_norm_.",jr,_t,wr,Je,ka,_r,Ws,Go="Should be used in place of torch.nn.utils.clip_grad_value_.",vr,vt,Jr,Te,Ga,Tr,Ys,Bo=`Runs any special end training behaviors, such as stopping trackers on the main process only or destoying process group. Should always be called at the end of your script if using experiment tracking.`,Ur,Jt,xr,Ue,Ba,kr,Xs,Io=`Will release all references to the internal objects stored and call the garbage collector. You should call this method between two trainings with different models/optimizers. Also will reset Accelerator.step to 0.`,Gr,Tt,Br,P,Ia,Ir,As,Co=`Gather the values in tensor across all processes and concatenate them on the first dimension. Useful to regroup the predictions from all processes when doing evaluation.`,Cr,Vs,Zo=`Note: This gather happens in all processes.`,Zr,Ut,Wr,xe,Ca,Yr,Ns,Wo=`Gathers input_data and potentially drops duplicates in the last batch if on a distributed system. Should be used for gathering the inputs and targets for metric calculation.`,Xr,xt,Ar,ke,Za,Vr,Qs,Yo=`Returns the state dictionary of a model sent through Accelerator.prepare() potentially without full precision.`,Nr,kt,Qr,Ge,Wa,Fr,Fs,Xo="Returns a tracker from self.trackers based on name on the main process only.",zr,Gt,Rr,A,Ya,Er,zs,Ao=`A context manager that facilitates distributed training or evaluation on uneven inputs, which acts as a wrapper around torch.distributed.algorithms.join. This is useful when the total batch size does not evenly divide the length of the dataset.`,Sr,Bt,Hr,It,Lr,Ct,Pr,D,Xa,Dr,Rs,Vo="Loads the current states of the model, optimizer, scaler, RNG generators, and registered objects.",Kr,Zt,qr,Wt,Or,K,Aa,en,Es,No="Lets the local main process go inside a with block.",tn,Ss,Qo="The other processes will enter the with block after the main process exits.",an,Yt,sn,Xt,Va,ln,Hs,Fo="Runs backward pass on LOMO optimizers.",rn,q,Na,nn,Ls,zo="Lets the main process go first inside a with block.",cn,Ps,Ro="The other processes will enter the with block after the main process exits.",on,At,pn,V,Qa,dn,Ds,Eo="A context manager that enables context parallel training.",mn,Vt,hn,Nt,gn,Qt,un,O,Fa,fn,Ks,So=`A context manager to disable gradient synchronizations across DDP processes by calling torch.nn.parallel.DistributedDataParallel.no_sync.`,bn,qs,Ho="If model is not in DDP, this context manager does nothing",$n,Ft,yn,Be,za,Mn,Os,Lo=`A decorator that will run the decorated function on the last process only. Can also be called using the PartialState class.`,jn,zt,wn,Ie,Ra,_n,el,Po=`A decorator that will run the decorated function on the local main process only. Can also be called using the PartialState class.`,vn,Rt,Jn,Ce,Ea,Tn,tl,Do=`A decorator that will run the decorated function on a given local process index only. Can also be called using the PartialState class.`,Un,Et,xn,Ze,Sa,kn,al,Ko=`A decorator that will run the decorated function on the main process only. Can also be called using the PartialState class.`,Gn,St,Bn,We,Ha,In,sl,qo=`A decorator that will run the decorated function on a given process index only. Can also be called using the PartialState class.`,Cn,Ht,Zn,Ye,La,Wn,ll,Oo=`Recursively pad the tensors in a nested list/tuple/dictionary of tensors from all devices to the same size so they can safely be gathered.`,Yn,Lt,Xn,N,Pa,An,rl,ep=`Prepare all objects passed in args for distributed training and mixed precision, then return them in the same order.`,Vn,Pt,Nn,Dt,Qn,Kt,Fn,Xe,Da,zn,nl,tp=`Prepares a PyTorch DataLoader for training in any distributed setup. It is recommended to use Accelerator.prepare() instead.`,Rn,qt,En,Ae,Ka,Sn,cl,ap=`Prepares a PyTorch model for training in any distributed setup. It is recommended to use Accelerator.prepare() instead.`,Hn,Ot,Ln,Ve,qa,Pn,ol,sp=`Prepares a PyTorch Optimizer for training in any distributed setup. It is recommended to use Accelerator.prepare() instead.`,Dn,ea,Kn,Ne,Oa,qn,pl,lp=`Prepares a PyTorch Scheduler for training in any distributed setup. It is recommended to use Accelerator.prepare() instead.`,On,ta,ec,Qe,es,tc,il,rp="Drop in replacement of print() to only print once per server.",ac,aa,sc,ee,ts,lc,dl,np=`Will profile the code inside the context manager. The profile will be saved to a Chrome Trace file if profile_handler.output_trace_dir is set.`,rc,ml,cp="A different profile_handler can be passed in to override the one set in the Accelerator object.",nc,sa,cc,te,as,oc,hl,op="Reduce the values in tensor across all processes based on reduction.",pc,gl,pp=`Note: All processes get the reduced value.`,ic,la,dc,Q,ss,mc,ul,ip="Makes note of objects and will save or load them in during save_state or load_state.",hc,fl,dp=`These should be utilized when the state is being loaded or saved in the same script. It is not designed to be used in different scripts.`,gc,ra,uc,na,fc,I,ls,bc,bl,mp='Registers a pre hook to be run before load_checkpoint is called in Accelerator.load_state().',$c,$l,hp="The hook should have the following signature:",yc,yl,gp="hook(models: list[torch.nn.Module], input_dir: str) -> None",Mc,Ml,up=`The models argument are the models as saved in the accelerator state under accelerator._models, and the input_dir argument is the input_dir argument passed to Accelerator.load_state().`,jc,ca,wc,C,rs,_c,jl,fp='Registers a pre hook to be run before save_checkpoint is called in Accelerator.save_state().',vc,wl,bp="The hook should have the following signature:",Jc,_l,$p="hook(models: list[torch.nn.Module], weights: list[dict[str, torch.Tensor]], input_dir: str) -> None",Tc,vl,yp=`The models argument are the models as saved in the accelerator state under accelerator._models, weigths argument are the state dicts of the models, and the input_dir argument is the input_dir argument passed to Accelerator.load_state().`,Uc,oa,xc,ae,ns,kc,Jl,Mp="Save the object passed to disk once per machine. Use in place of torch.save.",Gc,Tl,jp=`Note: If save_on_each_node was passed in as a ProjectConfiguration, will save the object once per node, rather than only once on the main node.`,Bc,pa,Ic,Fe,cs,Cc,Ul,wp="Save a model so that it can be re-loaded using load_checkpoint_in_model",Zc,ia,Wc,Z,os,Yc,xl,_p="Saves the current states of the model, optimizer, scaler, RNG generators, and registered objects to a folder.",Xc,kl,vp=`If a ProjectConfiguration was passed to the Accelerator object with automatic_checkpoint_naming enabled then checkpoints will be saved to self.project_dir/checkpoints. If the number of current saves is greater than total_limit then the oldest save is deleted. Each checkpoint is saved in separate folders named checkpoint_<iteration>.`,Ac,Gl,Jp="Otherwise they are just saved to output_dir.",Vc,da,Nc,ma,Qc,se,ps,Fc,Bl,Tp=`Sets the internal trigger tensor to 1 on the current process. A latter check should follow using this which will check across all processes.`,zc,Il,Up=`Note: Does not require wait_for_everyone()`,Rc,ha,Ec,ze,is,Sc,Cl,xp="Creates a new torch.utils.data.DataLoader that will efficiently skip the first num_batches.",Hc,ga,Lc,le,ds,Pc,Zl,kp=`Splits input between self.num_processes quickly and can be then used on that process. Useful when doing distributed inference, such as with different prompts.`,Dc,Wl,Gp="Note that when using a dict, all keys need to have the same number of elements.",Kc,ua,qc,re,ms,Oc,Yl,Bp=`Trigger the sync of the gradients in the next backward pass of the model after multiple forward passes under Accelerator.no_sync (only applicable in multi-GPU scenarios).`,eo,Xl,Ip="If the script is not launched in distributed mode, this context manager does nothing.",to,fa,ao,ne,hs,so,Al,Cp="Unscale the gradients in mixed precision training with AMP. This is a noop in all other settings.",lo,Vl,Zp='Likely should be called through Accelerator.clipgrad_norm() or Accelerator.clipgrad_value()',ro,ba,no,Re,gs,co,Nl,Wp=`Unwraps the model from the additional layer possible added by prepare(). Useful before saving the model.`,oo,$a,po,ya,us,io,Ql,Yp="Verifies that model has not been prepared with big model inference with a device-map resembling auto.",mo,Ee,fs,ho,Fl,Xp=`Will stop the execution of the current process until every other process has reached that point (so this does nothing when the script is only run in one process). Useful to do before saving a model.`,go,Ma,Hl,bs,Ll,He,$s,uo,zl,Ap="Recursively gather object in a nested list/tuple/dictionary of objects from all devices.",Pl,ys,Dl,Rl,Kl;return o=new fo({props:{title:"Accelerator",local:"accelerator",headingTag:"h1"}}),ja=new fo({props:{title:"Accelerator",local:"api ][ accelerate.Accelerator",headingTag:"h2"}}),wa=new k({props:{name:"class accelerate.Accelerator",anchor:"accelerate.Accelerator",parameters:[{name:"device_placement",val:": bool = True"},{name:"split_batches",val:": bool = "},{name:"mixed_precision",val:": PrecisionType | str | None = None"},{name:"gradient_accumulation_steps",val:": int = 1"},{name:"cpu",val:": bool = False"},{name:"dataloader_config",val:": DataLoaderConfiguration | None = None"},{name:"deepspeed_plugin",val:": DeepSpeedPlugin | dict[str, DeepSpeedPlugin] | None = None"},{name:"fsdp_plugin",val:": FullyShardedDataParallelPlugin | None = None"},{name:"torch_tp_plugin",val:": TorchTensorParallelPlugin | None = None"},{name:"megatron_lm_plugin",val:": MegatronLMPlugin | None = None"},{name:"rng_types",val:": list[str | RNGType] | None = None"},{name:"log_with",val:": str | LoggerType | GeneralTracker | list[str | LoggerType | GeneralTracker] | None = None"},{name:"project_dir",val:": str | os.PathLike | None = None"},{name:"project_config",val:": ProjectConfiguration | None = None"},{name:"gradient_accumulation_plugin",val:": GradientAccumulationPlugin | None = None"},{name:"step_scheduler_with_optimizer",val:": bool = True"},{name:"kwargs_handlers",val:": list[KwargsHandler] | None = None"},{name:"dynamo_backend",val:": DynamoBackend | str | None = None"},{name:"dynamo_plugin",val:": TorchDynamoPlugin | None = None"},{name:"deepspeed_plugins",val:": DeepSpeedPlugin | dict[str, DeepSpeedPlugin] | None = None"},{name:"parallelism_config",val:": ParallelismConfig | None = None"}],parametersDescription:[{anchor:"accelerate.Accelerator.device_placement",description:`device_placement (bool, optional, defaults to True) — Whether or not the accelerator should put objects on device (tensors yielded by the dataloader, model, etc…).`,name:"device_placement"},{anchor:"accelerate.Accelerator.mixed_precision",description:`mixed_precision (str, optional) — Whether or not to use mixed precision training. Choose from ‘no’,‘fp16’,‘bf16’ or ‘fp8’. Will default to the value in the environment variable ACCELERATE_MIXED_PRECISION, which will use the default value in the accelerate config of the current system or the flag passed with the accelerate.launch command. ‘fp8’ requires the installation of transformers-engine.`,name:"mixed_precision"},{anchor:"accelerate.Accelerator.gradient_accumulation_steps",description:`gradient_accumulation_steps (int, optional, default to 1) — The number of steps that should pass before gradients are accumulated. A number > 1 should be combined with Accelerator.accumulate. If not passed, will default to the value in the environment variable ACCELERATE_GRADIENT_ACCUMULATION_STEPS. Can also be configured through a GradientAccumulationPlugin.`,name:"gradient_accumulation_steps"},{anchor:"accelerate.Accelerator.cpu",description:`cpu (bool, optional) — Whether or not to force the script to execute on CPU. Will ignore GPU available if set to True and force the execution on one process only.`,name:"cpu"},{anchor:"accelerate.Accelerator.dataloader_config",description:`dataloader_config (DataLoaderConfiguration, optional) — A configuration for how the dataloaders should be handled in distributed scenarios.`,name:"dataloader_config"},{anchor:"accelerate.Accelerator.deepspeed_plugin",description:`deepspeed_plugin (DeepSpeedPlugin or dict of strDeepSpeedPlugin, optional): Tweak your DeepSpeed related args using this argument. This argument is optional and can be configured directly using accelerate config. If using multiple plugins, use the configured key property of each plugin to access them from accelerator.state.get_deepspeed_plugin(key). Alias for deepspeed_plugins.`,name:"deepspeed_plugin"},{anchor:"accelerate.Accelerator.fsdp_plugin",description:`fsdp_plugin (FullyShardedDataParallelPlugin, optional) — Tweak your FSDP related args using this argument. This argument is optional and can be configured directly using accelerate config`,name:"fsdp_plugin"},{anchor:"accelerate.Accelerator.torch_tp_plugin",description:`torch_tp_plugin (TorchTensorParallelPlugin, optional) — Deprecated: use parallelism_config with tp_size instead.`,name:"torch_tp_plugin"},{anchor:"accelerate.Accelerator.megatron_lm_plugin",description:`megatron_lm_plugin (MegatronLMPlugin, optional) — Tweak your MegatronLM related args using this argument. This argument is optional and can be configured directly using accelerate config`,name:"megatron_lm_plugin"},{anchor:"accelerate.Accelerator.rng_types",description:`rng_types (list of str or RNGType) — The list of random number generators to synchronize at the beginning of each iteration in your prepared dataloaders. Should be one or several of:

    Will default to ["torch"] for PyTorch versions <=1.5.1 and ["generator"] for PyTorch versions >= 1.6.`,name:"rng_types"},{anchor:"accelerate.Accelerator.log_with",description:`log_with (list of str, LoggerType or GeneralTracker, optional) — A list of loggers to be setup for experiment tracking. Should be one or several of:

    `,name:"log_with"},{anchor:"accelerate.Accelerator.project_config",description:`project_config (ProjectConfiguration, optional) — A configuration for how saving the state can be handled.`,name:"project_config"},{anchor:"accelerate.Accelerator.project_dir",description:`project_dir (str, os.PathLike, optional) — A path to a directory for storing data such as logs of locally-compatible loggers and potentially saved checkpoints.`,name:"project_dir"},{anchor:"accelerate.Accelerator.step_scheduler_with_optimizer",description:`step_scheduler_with_optimizer (bool, optional, defaults to True) — Set True if the learning rate scheduler is stepped at the same time as the optimizer, False if only done under certain circumstances (at the end of each epoch, for instance).`,name:"step_scheduler_with_optimizer"},{anchor:"accelerate.Accelerator.kwargs_handlers",description:`kwargs_handlers (list of KwargsHandler, optional) — A list of KwargsHandler to customize how the objects related to distributed training, profiling or mixed precision are created. See kwargs for more information.`,name:"kwargs_handlers"},{anchor:"accelerate.Accelerator.dynamo_backend",description:`dynamo_backend (str or DynamoBackend, optional, defaults to "no") — Set to one of the possible dynamo backends to optimize your training with torch dynamo.`,name:"dynamo_backend"},{anchor:"accelerate.Accelerator.dynamo_plugin",description:`dynamo_plugin (TorchDynamoPlugin, optional) — A configuration for how torch dynamo should be handled, if more tweaking than just the backend or mode is needed.`,name:"dynamo_plugin"},{anchor:"accelerate.Accelerator.gradient_accumulation_plugin",description:`gradient_accumulation_plugin (GradientAccumulationPlugin, optional) — A configuration for how gradient accumulation should be handled, if more tweaking than just the gradient_accumulation_steps is needed.`,name:"gradient_accumulation_plugin"}],source:"https://github.com/huggingface/accelerate/blob/v1.10.1/src/accelerate/accelerator.py#L184"}}),_a=new k({props:{name:"accumulate",anchor:"accelerate.Accelerator.accumulate",parameters:[{name:"*models",val:""}],parametersDescription:[{anchor:"accelerate.Accelerator.accumulate.*models",description:`*models (list of torch.nn.Module) — PyTorch Modules that were prepared with Accelerator.prepare. Models passed to accumulate() will skip gradient syncing during backward pass in distributed training`,name:"*models"}],source:"https://github.com/huggingface/accelerate/blob/v1.10.1/src/accelerate/accelerator.py#L1249"}}),$t=new G({props:{anchor:"accelerate.Accelerator.accumulate.example",$$slots:{default:[Dp]},$$scope:{ctx:_}}}),va=new k({props:{name:"autocast",anchor:"accelerate.Accelerator.autocast",parameters:[{name:"autocast_handler",val:": AutocastKwargs = None"}],source:"https://github.com/huggingface/accelerate/blob/v1.10.1/src/accelerate/accelerator.py#L4045"}}),yt=new G({props:{anchor:"accelerate.Accelerator.autocast.example",$$slots:{default:[Kp]},$$scope:{ctx:_}}}),Ja=new k({props:{name:"backward",anchor:"accelerate.Accelerator.backward",parameters:[{name:"loss",val:""},{name:"**kwargs",val:""}],source:"https://github.com/huggingface/accelerate/blob/v1.10.1/src/accelerate/accelerator.py#L2702"}}),Mt=new G({props:{anchor:"accelerate.Accelerator.backward.example",$$slots:{default:[qp]},$$scope:{ctx:_}}}),Ta=new k({props:{name:"check_trigger",anchor:"accelerate.Accelerator.check_trigger",parameters:[],source:"https://github.com/huggingface/accelerate/blob/v1.10.1/src/accelerate/accelerator.py#L2762"}}),jt=new G({props:{anchor:"accelerate.Accelerator.check_trigger.example",$$slots:{default:[Op]},$$scope:{ctx:_}}}),Ua=new k({props:{name:"clear",anchor:"accelerate.Accelerator.clear",parameters:[{name:"*objects",val:""}],source:"https://github.com/huggingface/accelerate/blob/v1.10.1/src/accelerate/accelerator.py#L3808"}}),wt=new G({props:{anchor:"accelerate.Accelerator.clear.example",$$slots:{default:[ei]},$$scope:{ctx:_}}}),xa=new k({props:{name:"clip_grad_norm_",anchor:"accelerate.Accelerator.clip_grad_norm_",parameters:[{name:"parameters",val:""},{name:"max_norm",val:""},{name:"norm_type",val:" = 2"}],source:"https://github.com/huggingface/accelerate/blob/v1.10.1/src/accelerate/accelerator.py#L2830",returnDescription:`