! inv-pend-ctrl-# ! ! simple NN to learn control of an inverted pendulum (angle phi=0 in the bottom ! position), located on a cart (at coordinate x), via accel. applied to the cart. ! ! O m1 ! \ ! \ ! |_m2_| !_______________o__o___________ x ! ! ! phi_tt = -[g/L sin(phi) +x_tt_ext cos(phi)/L + mu cos(phi)sin(phi) phi_t^2]/ ! (1 - mu cos^2 phi) ! x_tt = x_tt_ext + mu L [sin(phi) phi_t^2 - cos(phi) phi_tt)] ! where mu = m1/(m1+m2) < 1, and L = length of the arm. ! At first, we assume mu --> 0. ! module pendulum use omp_lib implicit none integer, parameter :: Nets = 140000, Nneur = 4, Npar = 14 real(8), parameter :: Tend = 20., dt = 0.002 ! [s] learning time, time-step integer, parameter :: Nsteps = floor(Tend/dt) ! ~1e4 real(8), parameter :: mu = 1./(1.+2.), L = 1., g_L = 9.81, pi=atan(1d0)*4d0 ! mass ratio mu = m1/(m1+m2) is not used at this time real(8), parameter :: gamm = 0.01 ! damping parameter (inverse time) ! one pendulum, net, and NN real(8) :: pend(6), par(Npar+1), nn(Nneur) ! history of 1 run for plotting real(4) :: swing(Nsteps,6+1) ! simplex real(8) :: splx(Npar+1,Npar+1), store(Npar+1,Nets) contains ! physics of the pendulum: d^2 phi/ dt^2 = phi_tt (external x-accelertion) ! function phi_tt (x_tt_ext) real(8) :: phi, x_tt_ext, phi_tt ! ! very simple physics: gamm = small rotational friction coeff. phi = pend(1) phi_tt = -(g_L*sin(phi) + x_tt_ext*cos(phi)) -gamm*pend(2) ! ! this more complicated physics is not used: pendulum is heavy and ! influences the resultant x-acceleration. ! mu = mass parameter = m_pend/(mass_pend + mass_platform) ! phi_tt = -(g_L*sin(phi) +x_tt_ext*cos(phi) +mu*cos(phi)sin(phi)*phi_t^2]/& ! (1 - mu*cos^2 phi) -gamm*phi_t ! for mu > 0 end function subroutine setup_pend () ! initialize the pendulum always the same way pend(1) = 2. ! phi [rad] pend(2) = -0.1 ! phi_t [rad/s] pend(3) = 0. ! phi_tt [s^-2] pend(4) = 0. ! x [m] pend(5) = 0.1 ! x_t [m/s] pend(6) = 0. ! x_tt [m/s^2] end subroutine subroutine setup_net () ! initialize the net parameters randomly call random_number(par) ! 0..1 par(1:Npar-1) = par(1:Npar-1) - 0.5 ! -0.5 ... +0.5 ! sigmoid function contains one hyper-parameter, par(Npar) par(Npar) = 0.5 + 0.7*par(Npar) ! 0.5..1.2 par(Npar+1) = 0. ! reserved for fitness function value end subroutine subroutine useNN (x_tt_recom) ! ! structure of the 14-parameter net (+1 hyper-param in sigmoid) ! the inputs are: phi, phi_t, x_t, x_tt, so it's a recurrent net ! (previous x_tt affects next x_tt) ! ! 0 ---- 2 ! \/ / \ ! 0 -- 1 --- 4 --> output ! / \ \ / ! 0 ------ 3 ! / ! 0 ! input values are layer 0, they are not normalized by a sigmoid function ! level 1 has 1 neuron, level 2 has 2 neurons, level 3 has 1 neuron. real(8) :: x_tt_recom ! nn(1) combines all inputs: phi,phi_t,x_t,x_tt (x_tt makes NN recurrent) ! nn(1) = sig(par(1)*pend(1)+par(2)*pend(2)+par(3)*pend(5)+par(4)*pend(6)) ! phi,phi_t,x,nn(1) ! nn(2) = sig(par(4)*pend(1)+par(5)*pend(2)+par(6)*pend(4) +par(7)*nn(1)) ! nn(1),phi_t,x_t,nn(2) ! nn(3) = sig(par(8)* nn(1) +par(9)*pend(2) +par(10)*pend(5) +par(11)*nn(2)) ! nn(4) = sig(par(12)*nn(1) +par(13)*nn(2) +par(14)*nn(3)) ! nn(1),nn(2),nn(3) !-----------------orig with error nn(1) = sig(par(1)*pend(1)+par(2)*pend(2)+par(3)*pend(5) & + par(4)*pend(6)) ! linear comb. of phi,phi_t,x_t,x_tt nn(2) = sig(par(5)*pend(1)+par(6)*pend(2)+par(7)*pend(5)) ! phi,phi_t,x_t nn(3) = sig(par(8)* nn(1) +par(9)*pend(2)) ! +par(10)*nn(5)) ! nn(1),phi_t nn(4) = sig(par(11)*nn(1) +par(12)*nn(2) +par(13)*nn(3)) ! nn(1),nn(2),nn(3) ! x_tt_recom = nn(4) ! allows x_tt_ext up to ~1 m/s^2 ~ 0.1g end subroutine function sig(x) ! sigmoid function simpler than but similar to arctan(x)*(2/pi) real(8) :: sig, x sig = x/(par(Npar)+x) ! |x| > ~par(Npar) renorms to stay below 1 end function subroutine evolve1 (fit, iw, ipl) ! one pendulum experiment with values of parameters in par() integer :: st, iw, ipl ! st=step #, iw,ipl control printing and plotting real(8) :: t,y, fit, x_tt_ext, pi = atan(1d0)*4d0, pi2 = atan(1d0)*8d0 ! call setup_pend () fit = 0.d0 do st = 1, Nsteps t = st*dt; y = 0. ! NN recommends to apply x-acceler. x_tt_ext, based on current state of pendulum call useNN (x_tt_ext) ! argument is the output of the subroutine pend(3) = phi_tt (x_tt_ext) ! phi_tt = function of pendulum & ext. push pend(2) = pend(2) + dt*pend(3) ! phi_t pend(1) = mod( pend(1) + dt*pend(2), pi2) ! phi in range 0...2pi pend(6) = x_tt_ext ! x_tt(mu=0) pend(5) = pend(5) + dt*pend(6) ! x_t pend(4) = pend(4) + dt*pend(5) ! x ! contribute to the fitness function, mainly dependent |phi-pi| if(st/2*2==st) y = -2*cos(pend(1)) fit = fit + y ! <-cos(phi)> should be as close to +1 as possible fit = fit -0.07*(abs(pend(2))+abs(pend(5))) ! penalize |d/dt|'s ! df = -abs(pend(1)-pi) ! phi should stay close to pi ! df = df -(0.3*abs(pend(2))+0.5*abs(pend(5))) ! penalize phi & x-speeds ! fit = fit + df *min(1d0,t/4d0) ! time-ramped ! save data for a plot if(ipl) then swing(st,1:6) = pend; swing(st,7) = t ! system's state & time end if ! print, every 500th step if(iw .and. (mod(st,500)==0 .or. st > Nsteps-5) ) & print '(f5.1,5f8.3,f9.3)',t,pend(1:2),pend(4:6),x_tt_ext,fit/st end do ! st ! simulation ended fit = fit/Nsteps ! average fit = fit -0.03*abs(pend(4)) ! also penalize big |x| at the end par(Npar+1) = fit if(iw) print*,' fit', fit ! plot if(ipl) call plot_3f('XWIN',Nsteps, & swing(:,7),swing(:,1)/real(pi),swing(:,2),swing(:,4)) end subroutine subroutine evolve (fit, iw, ipl) integer :: istp, iw, ipl real(8) :: fit, y, x_tt_ext, t, pi2 = atan(1d0)*8d0 call setup_pend () fit = 0.d0; y = 0.d0 do istp = 1, Nsteps t = dt*istp call useNN (x_tt_ext) ! NN recommends to apply x_tt based on pendulum state pend(3) = phi_tt (x_tt_ext) ! phi_tt pend(2) = pend(2) + dt*pend(3) ! phi_t ! pend(1) = mod( pend(1) + dt*pend(2), pi2) ! phi pend(1) = pend(1) + dt*pend(2) pend(6) = x_tt_ext !-0.* pend(4) ! x_tt, assuming mu=0, add terms otherwise pend(5) = pend(5) + dt*pend(6) ! x_t pend(4) = pend(4) + dt*pend(5) ! x ! contribute to fitness function, mainly dependent on if(istp/2*2==istp) y = -2*cos(pend(1)) fit = fit + y ! <-cos(phi)> should be as close to +1 as possible fit = fit -0.07*(abs(pend(2))+abs(pend(5))) ! penalize |d/dt|'s ! save data for a plot if(ipl) then swing(istp,1:6) = pend(1:6); swing(istp,7) = t ! system's state & time end if if(iw) then if(istp/500*500==istp .or. istp > Nsteps-5) print '(i5,6f8.3,f9.3)',& istp,pend(1:2),pend(4:6),x_tt_ext,fit/istp end if end do ! istp ! simulation ended fit = fit/Nsteps ! average fit = fit -0.03*abs(pend(4)) ! also penalize big |x| at the end par(Npar+1) = fit if(iw) print*,' fit', fit ! plot if(ipl) call plot_3f('XWIN',Nsteps, & swing(:,7),swing(:,1)/real(pi),swing(:,2)/5.,swing(:,5)/5.) end subroutine !_______________________________________________________________________ ! plot 3 functions with DISLIN !_______________________________________________________________________ subroutine plot_3f (CDEV,nx,x,y1,y2,y3) ! allowed plot formats and devices, CDEV = ! CONS (Screen, full window), XWIN (Screen, small window), ! GL (OpenGL window), PS, EPS, PDF, SVG, GIF, TIFF, PNG, PPM, BMP. use dislin implicit none REAL, DIMENSION(*) :: x, y1, y2, y3 real :: ymi,yma INTEGER :: nx, N, i CHARACTER (LEN=*) :: CDEV N = nx ! size(x) CALL METAFL(CDEV) CALL SETPAG('DA4L') ! A4 Landscape CALL DISINI() CALL PAGERA() !CALL COMPLX() ! fonts CALL AXSPOS(800,1800) CALL AXSLEN(1600,1000) CALL NAME('time','X') CALL NAME('phi, dphi/dt','Y') CALL LABDIG(-1,'X') CALL TICKS(10,'XY') ! CALL TITLIN('Plot of phi and dphi/dt',1) ! <--modify CALL TITLIN('phi (red), dphi/dt (green), dx/dt (blue)',3) ! <--modify ! graf(XA, XE, XOR, XSTEP, YA, YE, YOR, YSTEP) ! XA, XE (YA, YE) are the lower and upper limits of the X (Y) axis. ! XOR, XSTEP (YOR,..) are the first X(Y)-axis label and the step between ymi = min(minval(y1(1:N)),minval(y2(1:N))) yma = max(maxval(y1(1:N)),maxval(y2(1:N))) do i = 1,nx y3(i) = min(y3(i),yma); y3(i) = mAX(y3(i),ymi) end do CALL GRAF(0.,x(N),0.,x(N)/5, ymi,yma,ymi,(yma-ymi)/5.) CALL TITLE() CALL COLOR('RED') CALL CURVE(x,y1,N) CALL COLOR('GREEN') CALL CURVE(x,y2,N) CALL COLOR('BLUE') CALL CURVE(x,y3,N) CALL COLOR('FORE') CALL DASH() CALL XAXGIT() CALL DISFIN() return end subroutine plot_3f end module pendulum !_________________________________________________________________________________ ! main program !_________________________________________________________________________________ program pendulum_ctrl use pendulum use omp_lib implicit none integer :: j,v real(8), save :: f, best_fit, par_best(Npar+1), perturb(Npar) ! run a large number (Nets) of trial nets and their results best_fit = -1e20 do j = 1, Nets call setup_net ! with random parameters call evolve (f,0,0) ! one pendulum evolution, no printing or plotting store(1:Npar+1,j) = par(1:Npar+1) ! Npar+1 value: the fitness if (f > best_fit) then v = j; best_fit = f end if if(mod(j,5000)==0) print '(i6,a3,e11.3,a5,7f8.3)', & j,' f',best_fit,' pend',pend(1:7) ! if(mod(j,1000)==0) print*,j,' f',f end do ! j ! the best fit is at vertex v par_best = store(1:Npar+1,v) par = par_best splx(:,Npar+1) = par_best call evolve (f,1,1) ! re-run, print and plot the best case print*,v,' is best, par:',par_best,' last=fit' print*,' building splx around the par_best' ! build a simplex of Npar+1 vertices do j = 1, Npar call random_number(perturb) par(1:Npar) = (1+0.02*(perturb-0.5))*par_best(1:Npar) ! vary par_best+-0.01 call evolve (f,0,0) ! f= fit = par(Npar+1) splx(:,j) = par end do ! print*,' simplex has been built' ! evolve the simplex end program pendulum_ctrl