program test_parareal

  implicit none

  real, dimension(:,:,:), allocatable :: yP
  real, dimension(:), allocatable :: y, y0, yM, yS
  real, dimension(:,:), allocatable :: A, S1, S, ST, ST1
  real :: t_0, T, det, tau, ttau
  integer :: P, N, M, ndim, i,j, k

  t_0 = 0.
  T = 6.
  !T = 2.

  M = 500
  P = 20
  N = M / P
  
  ndim = 2
  allocate( A(1:ndim, 1:ndim), S(1:ndim, 1:ndim), S1(1:ndim, 1:ndim) )
  allocate( ST(1:ndim, 1:ndim), ST1(1:ndim, 1:ndim) )
  allocate(y0(1:ndim), y(1:ndim), yM(1:ndim), yS(1:ndim) )
  allocate(yP(0:P, 1:ndim, 0:3) )

  
  y0 = (/1., 0. /)
  
  A(1,1) = 998.
  A(1,2) = 1998
  A(2,1) = -999.
  A(2,2) = -1999.

  A(1,1) = 1.
  A(1,2) = 0.
  A(2,1) = 0.
  A(2,2) = 1.


!  do i=1, ndim
!     write(*,'(3(2es12.4, a2))') A(i,:),'|', A1(i, : ),'|', matmul(A(i, 1:ndim), A1(1:ndim, 1:ndim))
!
!  enddo

  tau = (T-t_0) / M
  S(1:ndim, 1:ndim) = - tau * A(1:ndim, 1:ndim)
  S(1, 1) = S(1,1) + 1.
  S(2, 2) = S(2,2) + 1.
  
  det = S(1,1)*S(2,2) - S(1,2)*S(2,1)
  S1(1,1)= S(2,2) / det
  S1(1,2)= -S(1,2) / det
  S1(2,1)= -S(2,1) / det
  S1(2,2)= S(1,1) / det

  yM(1:ndim) = y0(1:ndim)

  ! usual backward Euler
  call integrate(ndim, M, t_0, tau, S1(1:ndim, 1:ndim), y0(1:ndim), yS(1:ndim), 20)
  
  ! parareal
  ttau = (T-t_0) / P
  ST(1:ndim, 1:ndim) = - ttau * A(1:ndim, 1:ndim)
  ST(1, 1) = ST(1,1) + 1.
  ST(2, 2) = ST(2,2) + 1.
  
  det = ST(1,1)*ST(2,2) - ST(1,2)*ST(2,1)
  ST1(1,1)= ST(2,2) / det
  ST1(1,2)= -ST(1,2) / det
  ST1(2,1)= -ST(2,1) / det
  ST1(2,2)= ST(1,1) / det

  yM(1:ndim) = y0(1:ndim)
  yP(0, 1:ndim, 0)  = yM(1:ndim)
  yP(0, 1:ndim, 1)  = yM(1:ndim)
  yP(0, 1:ndim, 2)  = yM(1:ndim)
  yP(0, 1:ndim, 3)  = yM(1:ndim)

  ! sequantial coarse, initiation
  do j = 0, P-1
     call integrate(ndim, 1, t_0+j*ttau, ttau, ST1(1:ndim,1:ndim), &
          yP(j,1:ndim,0), yP(j+1,1:ndim,0), 21)
  enddo

  !do j=0, P
  !   write(60,'(20es14.6)') j*ttau, yP(j,1:ndim,0)
  !enddo

  do k=1, P ! levels of global iterations

     !paralel computation
     do j=0, P-1
        call integrate(ndim, N, t_0+j*ttau, tau, S1(1:ndim,1:ndim), &
             yP(j,1:ndim,0), yP(j,1:ndim,2), 100+k)
     enddo
     
     ! sequantial   (can be pararell)
     do j = 0, P-1
        call integrate(ndim, 1, t_0+j*ttau, ttau, ST1(1:ndim,1:ndim), &
             yP(j,1:ndim,0), yP(j,1:ndim,1), 200+k)
        !if(j == 0) print*,'yP1=', yP(j,1:ndim,0), yP(j,1:ndim,1)

     enddo

     ! sequantial   (can not be pararell)
     do j = 0, P-1
        call integrate(ndim, 1, t_0+j*ttau, ttau, ST1(1:ndim,1:ndim), &
             yP(j,1:ndim,3), yM(1:ndim), -1)
        !if(j == 0) print*,'yP1=', yP(j,1:ndim,3), yM(1:ndim)

        
        yP(j+1,1:ndim,3) = yM(1:ndim) + yP(j,1:ndim,2) - yP(j,1:ndim,1)
        !if(j == 0) print*,'yM=', yM
        !if(j == 0) print*,'yM=', yP(j,:,1)
        !if(j == 0) print*,'yM=', yP(j+1,:,2)
        !if(j == 0) print*,'yM=', yP(j+1,:,3)
     enddo
        
     do j=0, P
        write(500+k,'(20es14.6)') j*ttau, yP(j,1:ndim,3)
     enddo
     

     write(*,'(a10,i5, 30es12.4)') 'Level=',k,yP(P, 1, 3), abs(yP(P, 1, 3)- yS(1)), abs(yP(P, 1, 3)- exp(2.))
     yP(1:P,1:ndim,0) = yP(1:P,1:ndim,3)
     if(abs(yP(P, 1, 3)- yS(1)) < 1E-12) goto 100
  enddo
100 print*,'end program test_parareal'


  
end program test_parareal


  subroutine integrate(ndim, nstep, t0, tau, A1, y0, y, ifile)
    integer, intent(in) :: ndim
    integer, intent(in) :: nstep
    real, dimension(1:ndim, 1:ndim), intent(in) :: A1
    real, dimension(1:ndim), intent(in) :: y0
    real, dimension(1:ndim), intent(inout) :: y
    real, intent(in) :: t0, tau

    integer, intent(in) :: ifile
    real, dimension(:), allocatable :: yi
    integer :: i
    real :: t
    
    allocate(yi(1:ndim))
    yi(1:ndim) = y0(1:ndim)
    y(1:ndim) = y0(1:ndim)
    
    do i=0, nstep
       if(i > 0) then
          y(1:ndim) = matmul(A1(1:ndim, 1:ndim), yi(1:ndim))
       endif
       if(ifile > 0) then
          write(ifile, '(30es14.6)') t0 + i*tau, y(1:ndim)
       endif
       yi(1:ndim) = y(1:ndim)
    enddo
    if(nstep >1) write(ifile, '(x)')
    
    deallocate(yi)
    
  end subroutine integrate
