import{s as k,n as A,o as C}from"../chunks/scheduler.8c3d61f6.js";import{S as z,i as F,g as p,s as f,r as O,A as j,h as u,f as n,c,j as L,u as x,x as M,k as y,y as q,a as i,v as R,d as E,t as H,w as S}from"../chunks/index.da70eac4.js";import{H as B,E as G}from"../chunks/getInferenceSnippets.725ed3d4.js";function I(T){let a,d,m,g,r,w,o,b='You can fine-tune Stable Diffusion on a reward function via reinforcement learning with the 🤗 TRL library and 🤗 Diffusers. This is done with the Denoising Diffusion Policy Optimization (DDPO) algorithm introduced by Black et al. in Training Diffusion Models with Reinforcement Learning, which is implemented in 🤗 TRL with the DDPOTrainer.',D,s,v='For more information, check out the DDPOTrainer API reference and the Finetune Stable Diffusion Models with DDPO via TRL blog post.',_,l,P,h,$;return r=new B({props:{title:"Reinforcement learning training with DDPO",local:"reinforcement-learning-training-with-ddpo",headingTag:"h1"}}),l=new G({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/en/training/ddpo.md"}}),{c(){a=p("meta"),d=f(),m=p("p"),g=f(),O(r.$$.fragment),w=f(),o=p("p"),o.innerHTML=b,D=f(),s=p("p"),s.innerHTML=v,_=f(),O(l.$$.fragment),P=f(),h=p("p"),this.h()},l(e){const t=j("svelte-u9bgzb",document.head);a=u(t,"META",{name:!0,content:!0}),t.forEach(n),d=c(e),m=u(e,"P",{}),L(m).forEach(n),g=c(e),x(r.$$.fragment,e),w=c(e),o=u(e,"P",{"data-svelte-h":!0}),M(o)!=="svelte-1lr0fax"&&(o.innerHTML=b),D=c(e),s=u(e,"P",{"data-svelte-h":!0}),M(s)!=="svelte-xnt7kv"&&(s.innerHTML=v),_=c(e),x(l.$$.fragment,e),P=c(e),h=u(e,"P",{}),L(h).forEach(n),this.h()},h(){y(a,"name","hf:doc:metadata"),y(a,"content",U)},m(e,t){q(document.head,a),i(e,d,t),i(e,m,t),i(e,g,t),R(r,e,t),i(e,w,t),i(e,o,t),i(e,D,t),i(e,s,t),i(e,_,t),R(l,e,t),i(e,P,t),i(e,h,t),$=!0},p:A,i(e){$||(E(r.$$.fragment,e),E(l.$$.fragment,e),$=!0)},o(e){H(r.$$.fragment,e),H(l.$$.fragment,e),$=!1},d(e){e&&(n(d),n(m),n(g),n(w),n(o),n(D),n(s),n(_),n(P),n(h)),n(a),S(r,e),S(l,e)}}}const U='{"title":"Reinforcement learning training with DDPO","local":"reinforcement-learning-training-with-ddpo","sections":[],"depth":1}';function Y(T){return C(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Q extends z{constructor(a){super(),F(this,a,Y,I,k,{})}}export{Q as component};