module sub_calc

  use Ellip_Slv

  public :: EPV_varinv, BOMG_inv
  private :: calc_J0, set_3dbd, check_TL
  public :: sub_cross_2d, sub_lapsi_2d, sub_lapsi_1d

contains

subroutine EPV_varinv( lon, lat, pex, pv, phi, t, ug, vg, psi,  &
  &                    ini_flag )
  use Thermo_Const
  use Phys_Const
  use Math_Const
  use Thermo_Function
  use Derivation
  use Algebra
  use Max_Min

  implicit none

  real, intent(in) :: lon(:)  ! longitude [rad]
  real, intent(in) :: lat(:)  ! latitude [rad]
  real, intent(in) :: pex(:)  ! pressure by exner function [1]
  real, intent(in) :: pv(size(lon),size(lat),size(pex))   ! EPV [10^{-6}PVU]
  real, intent(inout) :: phi(size(lon),size(lat),size(pex))
                              ! geopotential [J/kg]
  real, intent(inout) :: t(size(lon),size(lat),size(pex))   ! potential temperature [K]
  real, intent(inout) :: ug(size(lon),size(lat),size(pex))  ! x-wind [ms-1]
  real, intent(inout) :: vg(size(lon),size(lat),size(pex))  ! y-wind [ms-1]
  real, intent(inout) :: psi(size(lon),size(lat),size(pex))  ! stream func. [10^6PVUs]
  logical, intent(in), optional :: ini_flag   ! .true. = initializing, default = .true.

  integer, parameter :: nl=20
  real, parameter :: sigma1=1.0e-4, sigma2=0.9, errmax=1.0e-6
  integer :: nx, ny, nz, i, j, k, l, m, counter, fl
  integer :: tmpsx, tmpsy, tmpsz, tmphx, tmphy, tmphz
  real :: kp, thres1l, thres1s, thres2l, thres2s, alphak, ddJ, dJds1
  real :: smax, hmax
  real, dimension(nl) :: J0, a, b, rho
  real, dimension(size(pex)) :: pres
  real, dimension(size(lat)) :: f0, coslat
  real, dimension(size(lon),size(lat)) :: sx, sy
  real, dimension(size(lon),size(lat),size(pex)) :: bo_phi, bo_psi
  real, dimension(size(lon),size(lat),size(pex)) :: ds1, dh1, checks, checkh
  real, dimension(size(lon),size(lat),size(pex),nl) :: phip, psip
  real, dimension(size(lon),size(lat),size(pex),nl) :: dJhdx, dJsdx
  real, dimension(size(lon),size(lat),size(pex),nl) :: dxh, dxs, dyh, dys
  real, dimension(size(lon),size(lat),size(pex),2) :: q
  logical :: e_flag

  nx=size(lon)
  ny=size(lat)
  nz=size(pex)

  phip=0.0
  psip=0.0

  kp=Rd/Cpd
  bo_phi=phi
  bo_psi=psi

  do k=1,nz
     pres(k)=p0*(pex(k)**(1.0/kp))
  end do

  !-- calculating coriolis parameter
  do j=1,ny
     f0(j)=2.0*omega*sin(lat(j))
     coslat(j)=cos(lat(j))
     do i=1,nx
        sx(i,j)=radius*coslat(j)
        sy(i,j)=radius
     end do
  end do

  !-- setting boundary condition

!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,j)
  do j=1,ny
     do i=1,nx
        bo_psi(i,j,1)=-Cpd*t(i,j,1)/f0(j)
        bo_psi(i,j,nz)=-Cpd*t(i,j,nz)/f0(j)
        if(i/=1.and.i/=nx.and.j/=1.and.j/=ny)then
           bo_phi(i,j,1)=-Cpd*t(i,j,1)
           bo_phi(i,j,nz)=-Cpd*t(i,j,nz)
        end if
     end do
  end do
!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,k)

  do k=2,nz-1
     do i=1,nx
        bo_psi(i,1,k)=-radius*ug(i,1,k)
        bo_psi(i,ny,k)=-radius*ug(i,ny,k)
     end do
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(j,k)

  do k=2,nz-1
     do j=1,ny
        bo_psi(1,j,k)=radius*coslat(j)*vg(1,j,k)
        bo_psi(nx,j,k)=radius*coslat(j)*vg(nx,j,k)
     end do
  end do

!$omp end do
!$omp end parallel

!--------------------------------
!-- setting first guess values

  phip(:,:,:,1)=phi(:,:,:)
  psip(:,:,:,1)=psi(:,:,:)

  do l=1,nl-1

     e_flag=.false.
  !-- Here, Starting L-BFGS
  !-- calculating detecting direction (L-BFGS method)
     if(l==1)then
     !-- calculating J and dJ/dx
        call calc_J0( lon, lat, pex, psip(:,:,:,l), phip(:,:,:,l), pv,  &
  &                   J0(l), dJsdx(:,:,:,l), dJhdx(:,:,:,l) )
        ds1(:,:,:)=-dJsdx(:,:,:,l)
        dh1(:,:,:)=-dJhdx(:,:,:,l)
     else

!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,j,k,m)
        do m=1,l-1
           do k=1,nz
              do j=1,ny
                 do i=1,nx
                    dxh(i,j,k,m)=phip(i,j,k,m+1)-phip(i,j,k,m)
                    dxs(i,j,k,m)=psip(i,j,k,m+1)-psip(i,j,k,m)
                    dyh(i,j,k,m)=dJhdx(i,j,k,m+1)-dJhdx(i,j,k,m)
                    dys(i,j,k,m)=dJsdx(i,j,k,m+1)-dJsdx(i,j,k,m)
                 end do
              end do
           end do
        end do
!$omp end do
!$omp end parallel

        q(:,:,:,1)=dJhdx(:,:,:,l)
        q(:,:,:,2)=dJsdx(:,:,:,l)

        do m=l-1,1,-1
           rho(m)=calc_3ddot( dxh(2:nx-1,2:ny-1,2:nz-1,m),  &
  &                           dyh(2:nx-1,2:ny-1,2:nz-1,m) )  &
  &              +calc_3ddot( dxs(2:nx-1,2:ny-1,2:nz-1,m),  &
  &                           dys(2:nx-1,2:ny-1,2:nz-1,m) )
           rho(m)=1.0/rho(m)

           a(m)=rho(m)*( calc_3ddot( dxh(2:nx-1,2:ny-1,2:nz-1,m),  &
  &                                  q(2:nx-1,2:ny-1,2:nz-1,1) )  &
  &                     +calc_3ddot( dxs(2:nx-1,2:ny-1,2:nz-1,m),  &
  &                                  q(2:nx-1,2:ny-1,2:nz-1,2) ) )

!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,j,k)
           do k=1,nz
              do j=1,ny
                 do i=1,nx
                    q(i,j,k,1)=q(i,j,k,1)-a(m)*dyh(i,j,k,m)
                    q(i,j,k,2)=q(i,j,k,2)-a(m)*dys(i,j,k,m)
                 end do
              end do
           end do
!$omp end do
!$omp end parallel

        end do

        do m=1,l-1
           b(m)=rho(m)*( calc_3ddot( dyh(2:nx-1,2:ny-1,2:nz-1,m),  &
  &                                  q(2:nx-1,2:ny-1,2:nz-1,1) )  &
  &                     +calc_3ddot( dys(2:nx-1,2:ny-1,2:nz-1,m),  &
  &                                  q(2:nx-1,2:ny-1,2:nz-1,2) ) )

!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,j,k)
           do k=1,nz
              do j=1,ny
                 do i=1,nx
                    q(i,j,k,1)=q(i,j,k,1)+(a(m)-b(m))*dxh(i,j,k,m)
                    q(i,j,k,2)=q(i,j,k,2)+(a(m)-b(m))*dxs(i,j,k,m)
                 end do
              end do
           end do
!$omp end do
!$omp end parallel

        end do

        dh1(:,:,:)=-q(:,:,:,1)
        ds1(:,:,:)=-q(:,:,:,2)
     end if

  !-- calculating step interval and final direction (calc. Wolfe's condition)
     dJds1=calc_3ddot( dJsdx(2:nx-1,2:ny-1,2:nz-1,l),  &
  &                    ds1(2:nx-1,2:ny-1,2:nz-1) )  &
  &       +calc_3ddot( dJhdx(2:nx-1,2:ny-1,2:nz-1,l),  &
  &                    dh1(2:nx-1,2:ny-1,2:nz-1) )
     thres2l=dJds1*sigma2
     thres1l=dJds1*sigma1
     alphak=1.0

!!! for checking
!  call check_TL( lon, lat, pex, psip(:,:,:,1), phip(:,:,:,1), pv,  &
!  &              ds1(:,:,:), dh1(:,:,:), alphak )

!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,j,k)
     do k=1,nz
        do j=1,ny
           do i=1,nx
              psip(i,j,k,l+1)=psip(i,j,k,l)+alphak*ds1(i,j,k)
              phip(i,j,k,l+1)=phip(i,j,k,l)+alphak*dh1(i,j,k)
           end do
        end do
     end do
!$omp end do
!$omp end parallel

!     call set_3dbd( lon, lat, pex, psip(:,:,:,l+1), bo_psi, '222222' )
!     call set_3dbd( lon, lat, pex, phip(:,:,:,l+1), bo_phi, '111122' )

     call calc_J0( lon, lat, pex, psip(:,:,:,l+1), phip(:,:,:,l+1), pv,  &
  &                J0(l+1), dJsdx(:,:,:,l+1), dJhdx(:,:,:,l+1) )

     thres2s=calc_3ddot( dJsdx(2:nx-1,2:ny-1,2:nz-1,l+1),  &
  &                      ds1(2:nx-1,2:ny-1,2:nz-1) )  &
  &         +calc_3ddot( dJhdx(2:nx-1,2:ny-1,2:nz-1,l+1),  &
  &                      dh1(2:nx-1,2:ny-1,2:nz-1) )
     thres1s=J0(l+1)-J0(l)

     do while ((J0(l+1)==J0(l)).or.(thres1s<thres1l*alphak.and.thres2s<thres2l))
        e_flag=.true.
        alphak=2.0*alphak
!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,j,k)
        do k=1,nz
           do j=1,ny
              do i=1,nx
                 psip(i,j,k,l+1)=psip(i,j,k,l)+alphak*ds1(i,j,k)
                 phip(i,j,k,l+1)=phip(i,j,k,l)+alphak*dh1(i,j,k)
              end do
           end do
        end do
!$omp end do
!$omp end parallel

!        call set_3dbd( lon, lat, pex, psip(:,:,:,l+1), bo_psi, '222222' )
!        call set_3dbd( lon, lat, pex, phip(:,:,:,l+1), bo_phi, '111122' )

        call calc_J0( lon, lat, pex, psip(:,:,:,l+1), phip(:,:,:,l+1), pv,  &
  &                   J0(l+1), dJsdx(:,:,:,l+1), dJhdx(:,:,:,l+1) )
        thres2s=calc_3ddot( dJsdx(2:nx-1,2:ny-1,2:nz-1,l+1),  &
  &                         ds1(2:nx-1,2:ny-1,2:nz-1) )  &
  &            +calc_3ddot( dJhdx(2:nx-1,2:ny-1,2:nz-1,l+1),  &
  &                         dh1(2:nx-1,2:ny-1,2:nz-1) )
        thres1s=J0(l+1)-J0(l)

        if(J0(l+1)>J0(l))then
write(*,*) "Passing J1>J0", J0(l:l+1), l
           exit
        end if
     end do

     if(e_flag.eqv..true.)then
        alphak=alphak*0.5
        write(*,*) "check alpha", alphak
     end if

!!     if(J0(l+1)>J0(l))then
!!        write(*,*) "Entering J1>J0 checker. NOTE!!! (EPV_varinv)."
!stop
!!!$omp parallel default(shared)
!!!$omp do schedule(runtime) private(i,j,k)
!!        do k=1,nz
!!           do j=1,ny
!!              do i=1,nx
!!                 psip(i,j,k,l+1)=psip(i,j,k,l)-ds1(i,j,k)
!!                 phip(i,j,k,l+1)=phip(i,j,k,l)-dh1(i,j,k)
!!              end do
!!           end do
!!        end do
!!!$omp end do
!!!$omp end parallel

!        call set_3dbd( lon, lat, pex, psip(:,:,:,l+1), bo_psi, '222222' )
!        call set_3dbd( lon, lat, pex, phip(:,:,:,l+1), bo_phi, '111122' )

!!        call calc_J0( lon, lat, pex, psip(:,:,:,l+1), phip(:,:,:,l+1), pv,  &
!!  &                   J0(l+1), dJsdx(:,:,:,l+1), dJhdx(:,:,:,l+1) )

!!        alphak=-1.0
!!     end if

!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,j,k)
     do k=1,nz
        do j=1,ny
           do i=1,nx
              psip(i,j,k,l+1)=psip(i,j,k,l)+alphak*ds1(i,j,k)
              phip(i,j,k,l+1)=phip(i,j,k,l)+alphak*dh1(i,j,k)
           end do
        end do
     end do
!$omp end do
!$omp end parallel

     call calc_J0( lon, lat, pex, psip(:,:,:,l+1), phip(:,:,:,l+1), pv,  &
  &                J0(l+1), dJsdx(:,:,:,l+1), dJhdx(:,:,:,l+1) )
     thres2s=calc_3ddot( dJsdx(2:nx-1,2:ny-1,2:nz-1,l+1),  &
  &                      ds1(2:nx-1,2:ny-1,2:nz-1) )  &
  &         +calc_3ddot( dJhdx(2:nx-1,2:ny-1,2:nz-1,l+1),  &
  &                      dh1(2:nx-1,2:ny-1,2:nz-1) )
     thres1s=J0(l+1)-J0(l)

     do while (thres1s>thres1l*alphak.or.thres2s>thres2l)
        alphak=alphak*0.5

!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,j,k)
        do k=1,nz
           do j=1,ny
              do i=1,nx
                 psip(i,j,k,l+1)=psip(i,j,k,l)+alphak*ds1(i,j,k)
                 phip(i,j,k,l+1)=phip(i,j,k,l)+alphak*dh1(i,j,k)
              end do
           end do
        end do
!$omp end do
!$omp end parallel

!        call set_3dbd( lon, lat, pex, psip(:,:,:,l+1), bo_psi, '222222' )
!        call set_3dbd( lon, lat, pex, phip(:,:,:,l+1), bo_phi, '111122' )

        call calc_J0( lon, lat, pex, psip(:,:,:,l+1), phip(:,:,:,l+1), pv,  &
  &                   J0(l+1), dJsdx(:,:,:,l+1), dJhdx(:,:,:,l+1) )

        thres2s=calc_3ddot( dJsdx(2:nx-1,2:ny-1,2:nz-1,l+1),  &
  &                         ds1(2:nx-1,2:ny-1,2:nz-1) )  &
  &            +calc_3ddot( dJhdx(2:nx-1,2:ny-1,2:nz-1,l+1),  &
  &                         dh1(2:nx-1,2:ny-1,2:nz-1) )

        thres1s=(J0(l+1)-J0(l))

     end do

     !-- When passing above loop, psi and phi are set of new values.
     write(*,'(a8,1PE14.6,a2,1PE14.6,a9,i3,a1)')   &
  &        'J0,ak = ', J0(l), ', ', alphak, ', STEP = ', l, '.'

     if(abs(J0(l+1)-J0(l))/abs(J0(l))<errmax)then
        write(*,*) "Passing last", abs(J0(l+1)-J0(l))/abs(J0(l))
        write(*,'(a5,1PE14.6,a7,i3,a1)') 'J0 = ', J0(l+1), 'STEP = ', l+1, '.'
        fl=l+1
        exit
     end if

  end do

  !-- resetting phi and psi.
  psi(2:nx-1,2:ny-1,2:nz-1)=psip(2:nx-1,2:ny-1,2:nz-1,fl)
  phi(2:nx-1,2:ny-1,2:nz-1)=phip(2:nx-1,2:ny-1,2:nz-1,fl)

  !-- calculating final variables

!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,j)

  do j=1,ny
     do i=1,nx
        call grad_1d( pex(2:nz-1), phi(i,j,2:nz-1), t(i,j,2:nz-1) )
     end do
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(j,k)

  do k=1,nz
     do j=2,ny-1
        call grad_1d( lon(2:nx-1), psi(2:nx-1,j,k), vg(2:nx-1,j,k),  &
  &                   hx=sx(2:nx-1,j) )
     end do
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,k)

  do k=1,nz
     do i=2,nx-1
        call grad_1d( lat(2:ny-1), psi(i,2:ny-1,k), ug(i,2:ny-1,k),  &
  &                   hx=sy(i,2:ny-1) )
     end do
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,j,k)

  do k=1,nz
     do j=1,ny
        do i=1,nx
           if(k/=1.and.k/=nz)then
              t(i,j,k)=-t(i,j,k)/Cpd
           end if
           if(i/=1.and.i/=nx.and.j/=1.and.j/=ny)then
              ug(i,j,k)=-ug(i,j,k)
           end if
        end do
     end do
  end do

!$omp end do
!$omp end parallel

end subroutine EPV_varinv

!------------------------------------
!------------------------------------

subroutine BOMG_inv( lon, lat, pex, ptb, ptt, phid, psid, epvd, bomg, uh, vh, diaq )
  ! Yoshino et al. (2003) : modified for (6) and (8).
  ! convergence condition : both errors of phit and psit are 
  ! less than 0.1 m / 12 hour.
  ! i.e. eps = 2.315e-6
  use Thermo_Const
  use Phys_Const
  use Math_Const
  use Thermo_Function
  use Derivation

  implicit none

  real, intent(in) :: lon(:)  ! longitude [rad]
  real, intent(in) :: lat(:)  ! latitude [rad]
  real, intent(in) :: pex(:)  ! pressure by exner function [1]
  real, intent(in) :: ptb(size(lon),size(lat))  ! surface PT [K].
  real, intent(in) :: ptt(size(lon),size(lat))  ! top PT [K].
  real, intent(in) :: phid(size(lon),size(lat),size(pex))
                              ! geopotential [J/kg]
  real, intent(in) :: psid(size(lon),size(lat),size(pex))  ! stream func. [10^6PVUs]
  real, intent(in) :: epvd(size(lon),size(lat),size(pex))   ! EPV [10^{-6}PVU]
  real, intent(inout) :: bomg(size(lon),size(lat),size(pex))   ! vertical wind [1/s]
  real, intent(inout) :: uh(size(lon),size(lat),size(pex))   ! X wind [m/s]
  real, intent(inout) :: vh(size(lon),size(lat),size(pex))   ! Y wind [m/s]
  real, intent(in), optional :: diaq(size(lon),size(lat),size(pex))
                              ! diabatic heating [K/s]

  integer :: nx, ny, nz, i, j, k
  real, parameter :: epspd=2.315e-6  ! Davis and Emanuel (1999)
  real, parameter :: accel=0.5      ! SOR coe. Davis and Emanuel (1999)
  real :: kp, errmax, radinv, rad2, rad2inv, pvc, gam, epsp
  real :: lengrat, height, strat, leng, FF, phis, psis, pmin, pmax
  real :: leng2, lengrat2
  real :: lond(size(lon)), latd(size(lat))
  real, dimension(size(pex)) :: p
  real, dimension(size(lat)) :: f0, coslat
  real, dimension(size(lon),size(lat)) :: rho2, ah, ch, eh, sx, sy
  real, dimension(size(lon),size(lat)) :: bo2_bphi, bo2_tphi, bo2_bpsi, bo2_tpsi
  real, dimension(size(lon),size(lat)) :: dptbdlon, dptbdlat, dpttdlon, dpttdlat
  real, dimension(size(lon),size(lat),size(pex)) :: ur, vr
  real, dimension(size(lon),size(lat),size(pex)) :: phit, psit, chi
  real, dimension(size(lon),size(lat),size(pex)) :: phi, psi, epv
  real, dimension(size(lon),size(lat),size(pex)) :: rho3, bo3, hq
  real, dimension(size(lon),size(lat),size(pex)) :: xaw, yaw, zaw
  real, dimension(size(lon),size(lat),size(pex)) :: dw, ew, fw, gw
  real, dimension(size(lon),size(lat),size(pex)) :: dphi2dp2
  real, dimension(size(lon),size(lat),size(pex)) :: dphit2dp2
  real, dimension(size(lon),size(lat),size(pex)) :: dpsi_ll, dpsit_ll
  real, dimension(size(lon),size(lat),size(pex)) :: ADVq1, ADVq2, ADVq3, ADVw
  real, dimension(size(lon),size(lat),size(pex)) :: ADVw1, ADVw2, ADVw3, ADVw4
  real, dimension(size(lon),size(lat),size(pex)) :: dwdp, zeta, zetat, zetan
  real, dimension(size(lon),size(lat),size(pex)) :: dpsitdlat, lappt2
  real, dimension(size(lon),size(lat),size(pex)) :: dpsi2dlon2, dpsi2dlat2
  real, dimension(size(lon),size(lat),size(pex)) :: dpsit2dlon2, dpsit2dlat2
  real, dimension(size(lon),size(lat),size(pex)) :: dphi_lonp, dphi_latp
  real, dimension(size(lon),size(lat),size(pex)) :: dphit_lonp, dphit_latp
  real, dimension(size(lon),size(lat),size(pex)) :: dpsi_lonp, dpsi_latp
  real, dimension(size(lon),size(lat),size(pex)) :: dpsit_lonp, dpsit_latp
  real, dimension(size(lon),size(lat),size(pex)) :: dzdp, dz2dp2
  real, dimension(size(lon),size(lat),size(pex)) :: JQ, termw, dzdlon, dzdlat
  real, dimension(size(lon),size(lat),size(pex)) :: force, tmp_h, tmp_s
  real, dimension(size(lon),size(lat),size(pex)) :: dqdlon, dqdlat, dqdp
  real, dimension(size(lon),size(lat),size(pex)) :: dudp, dvdp

  nx=size(lon)
  ny=size(lat)
  nz=size(pex)

  kp=Rd/Cpd
  gam=Cvd/Rd  ! (1/kp-1)
  bo2_bphi=0.0
  bo2_tphi=0.0
  bo2_bpsi=0.0
  bo2_tpsi=0.0
  ur=0.0
  uh=0.0
  vr=0.0
  vh=0.0
  phit=0.0
  psit=0.0
  chi=0.0
  rho2=0.0
  rho3=0.0
  bomg=0.0
  tmp_h=0.0
  tmp_s=0.0

  rad2=radius*radius
  radinv=1.0/radius
  rad2inv=1.0/rad2

  do k=1,nz
     p(k)=p0*(pex(k)**(Cpd/Rd))
  end do

  !-- calculating each scaling variables
  call Mean_2d( ptb, pmin )
  call Mean_2d( ptt, pmax )

  FF=2.0*omega
  strat=Cpd*(pmax-pmin)
  height=1.0
  leng=sqrt(strat)*height/FF
  lengrat=leng/radius
  leng2=leng*leng
  lengrat2=lengrat*lengrat
  phis=1.0/(strat*height*height)
  psis=FF/(strat*height*height)

  !-- scaling phi and psi
  phi=phid*phis
  psi=psid*psis
  epsp=epspd*phis*g

  write(*,*) "*** MESSAGE (BOMG_inv) ***"
  write(*,'(a12,1P4E14.6)') "(S,F,L,H) : ", strat, FF, leng, height
  write(*,'(a12,1P2E14.6)') "(phi,psi) : ", phis, psis
  write(*,'(a8,1PE14.6)') "(err) : ", epsp


  do k=1,nz
     do j=1,ny
        do i=1,nx
           epv(i,j,k)=epvd(i,j,k)*(Cpd*p(k)/(FF*strat*g*kp*pex(k)))
        end do
     end do
  end do

  if(present(diaq))then
     hq=diaq
  else
     hq=0.0
  end if

  do j=1,ny
     f0(j)=2.0*omega*sin(lat(j))
     coslat(j)=cos(lat(j))
     do i=1,nx
        sx(i,j)=radius*coslat(j)
        sy(i,j)=radius
     end do
  end do

  do i=1,nx
     lond(i)=lon(1)+(lon(i)-lon(1))/lengrat
  end do
  do j=1,ny
     latd(j)=lat(1)+(lat(j)-lat(1))/lengrat
  end do

!$omp parallel default(shared)
  !-- calculating steady variables

!$omp do schedule(runtime) private(i,j)

  do j=1,ny
     do i=1,nx
        call sub_lapsi_1d( pex, phi(i,j,:), dphi2dp2(i,j,:) )
        call sub_lapsi_1d( pex, phit(i,j,:), dphit2dp2(i,j,:) )
        call grad_1d( pex, epv(i,j,:), ADVq3(i,j,:) )
        call grad_1d( pex, bomg(i,j,:), dwdp(i,j,:) )
        call grad_1d( pex, hq(i,j,:), dqdp(i,j,:) )
     end do
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,k)

  do k=1,nz
     do i=1,nx
        call sub_lapsi_1d( lat, psi(i,:,k), dpsi2dlat2(i,:,k) )
        call sub_lapsi_1d( lat, psit(i,:,k), dpsit2dlat2(i,:,k) )
        call grad_1d( lat, psit(i,:,k), dpsitdlat(i,:,k) )
     end do
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(j,k)

  do k=1,nz
     do j=1,ny
        call sub_lapsi_1d( lon, psi(:,j,k), dpsi2dlon2(:,j,k) )
        call sub_lapsi_1d( lon, psit(:,j,k), dpsit2dlon2(:,j,k) )
     end do
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i)

  do i=1,nx
     call sub_cross2_2d( lat, pex, phi(i,:,:), dphi_latp(i,:,:) )
     call sub_cross2_2d( lat, pex, psi(i,:,:), dpsi_latp(i,:,:) )
     call sub_cross2_2d( lat, pex, phit(i,:,:), dphit_latp(i,:,:) )
     call sub_cross2_2d( lat, pex, psit(i,:,:), dpsit_latp(i,:,:) )
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(j)

  do j=1,ny
     call sub_cross2_2d( lon, pex, phi(:,j,:), dphi_lonp(:,j,:) )
     call sub_cross2_2d( lon, pex, psi(:,j,:), dpsi_lonp(:,j,:) )
     call sub_cross2_2d( lon, pex, phit(:,j,:), dphit_lonp(:,j,:) )
     call sub_cross2_2d( lon, pex, psit(:,j,:), dpsit_lonp(:,j,:) )
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(k)

  do k=1,nz
     call sub_lapsi_2d( lon, lat, psi(:,:,k), zeta(:,:,k) )
     call sub_lapsi_2d( lon, lat, psit(:,:,k), zetat(:,:,k) )
     call sub_lapsi_2d( lon, lat, dphi2dp2(:,:,k), lappt2(:,:,k) )
     call sub_lapsi_2d( lon, lat, hq(:,:,k), JQ(:,:,k) )
     call sub_cross2_2d( lon, lat, psi(:,:,k), dpsi_ll(:,:,k) )
     call sub_cross2_2d( lon, lat, psit(:,:,k), dpsit_ll(:,:,k) )
     call grad_2d( lon, lat, psi(:,:,k), vr(:,:,k), ur(:,:,k),  &
  &                hx=sx, hy=sy )
     call grad_2d( lon, lat, chi(:,:,k), uh(:,:,k), vh(:,:,k),  &
  &                hx=sx, hy=sy )
     call grad_2d( lon, lat, epv(:,:,k), ADVq1(:,:,k), ADVq2(:,:,k),  &
  &                hx=sx, hy=sy )
     call grad_2d( lon, lat, hq(:,:,k), dqdlon(:,:,k), dqdlat(:,:,k),  &
  &                hx=sx, hy=sy )

  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,j,k)

  do k=1,nz
     do j=1,ny
        do i=1,nx
           zetan(i,j,k)=zeta(i,j,k)/(coslat(j)**2)
           zeta(i,j,k)=lengrat2*zeta(i,j,k)
           zetat(i,j,k)=lengrat2*zetat(i,j,k)
           JQ(i,j,k)=lengrat2*JQ(i,j,k)
           lappt2(i,j,k)=lengrat2*lappt2(i,j,k)
           ur(i,j,k)=-ur(i,j,k)
           uh(i,j,k)=uh(i,j,k)+ur(i,j,k)
           vh(i,j,k)=vh(i,j,k)+vr(i,j,k)
           termw(i,j,k)=uh(i,j,k)*dphi_lonp(i,j,k)/sx(i,j)  &
  &                     +vh(i,j,k)*dphi_latp(i,j,k)/sy(i,j)
        end do
     end do
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(k)

  do k=1,nz
     call grad_2d( lon, lat, zetan(:,:,k), dzdlon(:,:,k), dzdlat(:,:,k),  &
  &                hx=sx, hy=sy )
     call grad_2d( lon, lat, dphi2dp2(:,:,k), dw(:,:,k), ew(:,:,k),  &
  &                hx=sx, hy=sy )
     call sub_lapsi_2d( lon, lat, termw(:,:,k), ADVw(:,:,k) )
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,j,k)

  do k=1,nz
     do j=1,ny
        do i=1,nx
           ADVw1(i,j,k)=uh(i,j,k)*dzdlon(i,j,k)  &
  &                     +vh(i,j,k)*dzdlat(i,j,k)
           ADVw3(i,j,k)=dpsit2dlon2(i,j,k)*dpsi2dlat2(i,j,k)  &
  &                     +dpsi2dlon2(i,j,k)*dpsit2dlat2(i,j,k)  &
  &                     -2.0*dpsi_ll(i,j,k)*dpsit_ll(i,j,k)
        end do
     end do
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,j)

  do j=1,ny
     do i=1,nx
        call sub_lapsi_1d( pex, zeta(i,j,:), dz2dp2(i,j,:) )
        call grad_1d( pex, zeta(i,j,:), dzdp(i,j,:) )
        call grad_1d( pex, ADVw1(i,j,:), ADVw2(i,j,:) )
        call grad_1d( pex, ADVw3(i,j,:), ADVw4(i,j,:) )
        call grad_1d( pex, uh(i,j,:), dudp(i,j,:) )
        call grad_1d( pex, vh(i,j,:), dvdp(i,j,:) )
     end do
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,j)
  !-- calculating steady coefficients
  do j=1,ny
     do i=1,nx
        ch(i,j)=coslat(j)**2
        eh(i,j)=-coslat(j)*sin(lat(j))
     end do
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,j,k)

  do k=1,nz
     do j=1,ny
        do i=1,nx
           xaw(i,j,k)=dphi2dp2(i,j,k)
           yaw(i,j,k)=dphi2dp2(i,j,k)*(coslat(j)**2)
           zaw(i,j,k)=sin(lat(j))*zeta(i,j,k)
           dw(i,j,k)=2.0*dw(i,j,k)*leng*lengrat
           ew(i,j,k)=2.0*leng*lengrat*(coslat(j)**2)*ew(i,j,k)  &
  &                   -lengrat2*sin(lat(j))*coslat(j)*dphi2dp2(i,j,k)
           fw(i,j,k)=gam*sin(lat(j))*zeta(i,j,k)/pex(k)
           gw(i,j,k)=lappt2(i,j,k)-sin(lat(j))*dz2dp2(i,j,k)  &
  &                  +sin(lat(j))*gam*(dzdp(i,j,k)-zeta(i,j,k)/pex(k))/pex(k)
!-- Now Force is zero.
           force(i,j,k)=0.0
! below is incorrect.
!           force(i,j,k)=(zeta(i,j,k)+sin(lat(j))*(coslat(j)**2))*dqdp(i,j,k)  &
!  &                    -(dqdlon(i,j,k)*dvdp(i,j,k)-dqdlat(i,j,k)*dudp(i,j,k))  &
!  &                     /(sx(i,j))
if(xaw(i,j,k)==0.0)then
write(*,*) "Detect xaw and yaw are zero (BOMG) : ", dphi2dp2(i,j,k), phi(i,j,k:k+2), i, j, k
write(*,*) "each value",   pex(k:k+2), (phi(i,j,k+2)-phi(i,j,k)),  &
  &                       (pex(k+2)-pex(k))*(pex(k+1)-pex(k))
write(*,*) "each value",   (phi(i,j,k+1)-phi(i,j,k)),  &
  &                       (pex(k+1)-pex(k))**2,  &
  &    ((phi(i,j,k+2)-phi(i,j,k))/(pex(k+2)-pex(k))   &
  &   -(phi(i,j,k+1)-phi(i,j,k))/(pex(k+1)-pex(k)))/(pex(k+1)-pex(k))
end if
if(zaw(i,j,k)==0.0)then
write(*,*) "Detect zaw is zero (BOMG) : ", zeta(i,j,k), i, j, k
end if
        end do
     end do
  end do

!$omp end do
!$omp end parallel

  call grad_2d( lon, lat, ptb, dptbdlon, dptbdlat, hx=sx, hy=sy )
  call grad_2d( lon, lat, ptt, dpttdlon, dpttdlat, hx=sx, hy=sy )

!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,j)

  !-- calculating top and bottom boundary.
  do j=1,ny
     do i=1,nx
        bo2_bphi(i,j)=-(pex(2)-pex(1))*(Cpd*phis)  &
  &                    *(ur(i,j,1)*dptbdlon(i,j)  &
  &                     +vr(i,j,1)*dptbdlat(i,j)  &
  &                     -hq(i,j,1))
        bo2_tphi(i,j)=(pex(nz)-pex(nz-1))*(Cpd*phis)  &
  &                    *(ur(i,j,nz)*dpttdlon(i,j)  &
  &                     +vr(i,j,nz)*dpttdlat(i,j)  &
  &                     -hq(i,j,nz))
        bo2_bpsi(i,j)=bo2_bphi(i,j)/sin(lat(j))
        bo2_tpsi(i,j)=bo2_tphi(i,j)/sin(lat(j))
     end do
  end do

!$omp end do
!$omp end parallel

  !-- calculating iteration
  errmax=epsp
  write(*,*) "*** MESSAGE (BOMG_inv) *** : Passing set coefficients..."

  do while (errmax>=epsp)  ! this process is not Omp (in this routine.)
     errmax=0.0

     do k=2,nz-1

!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,j)
        do j=1,ny
           do i=1,nx
              rho2(i,j)=2.0*lengrat2*lengrat2*(  &
  &                          dpsit2dlon2(i,j,k)*dpsi2dlat2(i,j,k)  &
  &                         +dpsi2dlon2(i,j,k)*dpsit2dlat2(i,j,k)  &
  &                         -2.0*dpsi_ll(i,j,k)*dpsit_ll(i,j,k))  &
  &                     +lengrat2*(coslat(j)**3)*dpsitdlat(i,j,k)  &
  &                     +sin(lat(j))*zetat(i,j,k)
           end do
        end do
!$omp end do
!$omp end parallel

        !-- for phit
        call Ellip_Jacobi_2d( lon, lat, rho2, epsp, '2222',  &
  &                           phit(:,:,k), bound_opt=bo3(:,:,k),  &
  &                           a=ah, c=ch, e=eh, ln=1,  &
  &                           accel=accel, init_flag=.false. )
     end do

     write(*,*) "*** MESSAGE (BOMG_inv) *** : Passing phit..."

!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,j)
     !-- calculating top and bottom boundary for phit
     do j=1,ny
        do i=1,nx
           phit(i,j,1)=phit(i,j,2)+bo2_bphi(i,j)
           phit(i,j,nz)=phit(i,j,nz-1)+bo2_tphi(i,j)
        end do
     end do
!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,j)
     !-- calculating some terms related to new phit.
     do j=1,ny
        do i=1,nx
           call sub_lapsi_1d( pex, phit(i,j,:), dphit2dp2(i,j,:) )
        end do
     end do
!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i)
     do i=1,nx
        call sub_cross2_2d( lat, pex, phit(i,:,:), dphit_latp(i,:,:) )
     end do
!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(j)
     do j=1,ny
        call sub_cross2_2d( lon, pex, phit(:,j,:), dphit_lonp(:,j,:) )
     end do
!$omp end do
!$omp end parallel

     !-- calculating new forcing term for psit
     do k=2,nz-1
!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,j)
        do j=1,ny
           do i=1,nx
              rho2(i,j)=-((coslat(j)/height)**2)  &
  &                      *(uh(i,j,k)*ADVq1(i,j,k)  &
  &                       +vh(i,j,k)*ADVq2(i,j,k)  &
  &                       +bomg(i,j,k)*ADVq3(i,j,k)  &
  &                       -bomg(i,j,k)*gam*epv(i,j,k)/pex(k))  &
  &                     -Cpd*phis*force(i,j,k)*(coslat(j)**2)/FF  &
  &                     -(sin(lat(j))*(coslat(j)**2)+zeta(i,j,k))  &
  &                      *dphit2dp2(i,j,k)  &
  &                     +lengrat2*(dphit_lonp(i,j,k)*dpsi_lonp(i,j,k)  &
  &                      +dphi_lonp(i,j,k)*dpsit_lonp(i,j,k)  &
  &                     +(coslat(j)**2)*(dphit_latp(i,j,k)*dpsi_latp(i,j,k)  &
  &                      +dphi_latp(i,j,k)*dpsit_latp(i,j,k)))
              rho2(i,j)=rho2(i,j)/dphi2dp2(i,j,k)
           end do
        end do
!$omp end do
!$omp end parallel

        !-- for psit
        call Ellip_Jacobi_2d( lon, lat, rho2, epsp, '2222',  &
  &                           psit(:,:,k), bound_opt=bo3(:,:,k),  &
  &                           a=ah, c=ch, e=eh, ln=1,  &
  &                           accel=accel, init_flag=.false. )
     end do

     write(*,*) "*** MESSAGE (BOMG_inv) *** : Passing psit..."

!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,j)
     !-- calculating top and bottom boundary for psit
     do j=1,ny
        do i=1,nx
           psit(i,j,1)=psit(i,j,2)+bo2_bpsi(i,j)
           psit(i,j,nz)=psit(i,j,nz-1)+bo2_tpsi(i,j)
        end do
     end do
!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(j,k)

     !-- calculating some terms related to new psit.
     do k=1,nz
        do j=1,ny
           call sub_lapsi_1d( lon, psit(:,j,k), dpsit2dlon2(:,j,k) )
        end do
     end do
!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,k)
     do k=1,nz
        do i=1,nx
           call sub_lapsi_1d( lat, psit(i,:,k), dpsit2dlat2(i,:,k) )
           call grad_1d( lat, psit(i,:,k), dpsitdlat(i,:,k) )
        end do
     end do
!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i)
     do i=1,nx
        call sub_cross2_2d( lat, pex, psit(i,:,:), dpsit_latp(i,:,:) )
     end do
!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(j)
     do j=1,ny
        call sub_cross2_2d( lon, pex, psit(:,j,:), dphit_lonp(:,j,:) )
     end do
!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(k)
     do k=1,nz
        call sub_lapsi_2d( lon, lat, psit(:,:,k), zetat(:,:,k) )
        call sub_cross2_2d( lon, lat, psit(:,:,k), dpsit_ll(:,:,k) )
     end do
!$omp end do
!$omp end parallel

     do k=2,nz-1
!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,j)
        do j=1,ny
           do i=1,nx
              zetat(i,j,k)=lengrat2*zetat(i,j,k)
              rho2(i,j)=-leng2*(coslat(j)**2)  &
  &                      *(dwdp(i,j,k)+gam*bomg(i,j,k)/pex(k))
           end do
        end do
!$omp end do
!$omp end parallel

        !-- for chi
        call Ellip_Jacobi_2d( lon, lat, rho2, epsp, '2222',  &
  &                           chi(:,:,k), bound_opt=bo3(:,:,k),  &
  &                           a=ah, c=ch, e=eh, ln=1,  &
  &                           accel=accel, init_flag=.false. )
     end do

     write(*,*) "*** MESSAGE (BOMG_inv) *** : Passing chi..."

!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,j)
     !-- calculating top and bottom boundary for chi
     do j=1,ny
        do i=1,nx
           chi(i,j,1)=0.0
           chi(i,j,nz)=0.0
        end do
     end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(k)

     !-- calculating some terms related to new chi.
     do k=1,nz
        call grad_2d( lon, lat, chi(:,:,k), uh(:,:,k), vh(:,:,k),  &
  &                   hx=sx, hy=sy )
     end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,j,k)

     !-- calculating some new terms for new bomg

     do k=1,nz
        do j=1,ny
           do i=1,nx
              uh(i,j,k)=uh(i,j,k)+ur(i,j,k)
              vh(i,j,k)=vh(i,j,k)+vr(i,j,k)
              termw(i,j,k)=uh(i,j,k)*dphi_lonp(i,j,k)/sx(i,j)  &
  &                        +vh(i,j,k)*dphi_latp(i,j,k)/sy(i,j)
           end do
        end do
     end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,j)

     do j=1,ny
        do i=1,nx
           call grad_1d( pex, uh(i,j,:), dudp(i,j,:) )
           call grad_1d( pex, vh(i,j,:), dvdp(i,j,:) )
        end do
     end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(k)

     do k=1,nz
        call sub_lapsi_2d( lon, lat, termw(:,:,k), ADVw(:,:,k) )
     end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,j,k)

     do k=1,nz
        do j=1,ny
           do i=1,nx
              ADVw1(i,j,k)=uh(i,j,k)*dzdlon(i,j,k)  &
  &                        +vh(i,j,k)*dzdlat(i,j,k)
              ADVw3(i,j,k)=dpsit2dlon2(i,j,k)*dpsi2dlat2(i,j,k)  &
  &                        +dpsi2dlon2(i,j,k)*dpsit2dlat2(i,j,k)  &
  &                        -2.0*dpsi_ll(i,j,k)*dpsit_ll(i,j,k)
              force(i,j,k)=(zeta(i,j,k)+f0(j)*(coslat(j)**2))*dqdp(i,j,k)  &
  &                       -(dqdlon(i,j,k)*dvdp(i,j,k)  &
  &                        -dqdlat(i,j,k)*dudp(i,j,k))  &
  &                        /(sx(i,j))
           end do
        end do
     end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,j)

     do j=1,ny
        do i=1,nx
           call grad_1d( pex, ADVw1(i,j,:), ADVw2(i,j,:) )
           call grad_1d( pex, ADVw3(i,j,:), ADVw4(i,j,:) )
        end do
     end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,j,k)
     do k=1,nz
        do j=1,ny
           do i=1,nx
              rho3(i,j,k)=-lengrat2*ADVw(i,j,k)  &
  &                       +sin(lat(j))*(coslat(j)**2)*ADVw2(i,j,k)  &
  &                       -Cpd*phis*JQ(i,j,k)  &
  &                       -lengrat2*(coslat(j)**3)*dpsit_latp(i,j,k)  &
  &                       -2.0*(lengrat2**2)*ADVw4(i,j,k)
           end do
        end do
     end do
!$omp end do
!$omp end parallel

     call Ellip_Jacobi_3d( lon, lat, pex, rho3, epsp, '222211', bomg,  &
  &                        bound_opt=bo3,  &
  &                        xa=xaw, ya=yaw, za=zaw, d=dw, e=ew, f=fw, g=gw,  &
  &                        ln=1, accel=accel, init_flag=.false. )

     write(*,*) "*** MESSAGE (BOMG_inv) *** : Passing omega..."

     !-- calculating some terms related to new bomg.
     do j=1,ny
        do i=1,nx
           call grad_1d( pex, bomg(i,j,:), dwdp(i,j,:) )
        end do
     end do

  !-- calculating each error max
     do k=1,nz
        do j=1,ny
           do i=1,nx
              if(abs(phit(i,j,k)-tmp_h(i,j,k))>errmax)then
                 errmax=abs(phit(i,j,k)-tmp_h(i,j,k))
write(*,*) "check errmax phit", errmax, tmp_h(i,j,k), phit(i,j,k)
              end if
              if(abs(psit(i,j,k)-tmp_s(i,j,k))>errmax)then
                 errmax=abs(psit(i,j,k)-tmp_s(i,j,k))
write(*,*) "check errmax psit", errmax, tmp_s(i,j,k), psit(i,j,k)
              end if
              tmp_h(i,j,k)=phit(i,j,k)
              tmp_s(i,j,k)=psit(i,j,k)
           end do
        end do
     end do

write(*,*) "Check final error", errmax, epsp
  end do

end subroutine BOMG_inv

!------------------------------------
!-------------------------------
! private routines
!-------------------------------
!------------------------------------

subroutine calc_J0( lon, lat, pex, psi, phi, epv, J0, dJsdx, dJhdx )
! By adjoint method, calculating J0 and dJ.

  use Phys_Const
  use Thermo_Const
  use Derivation

  implicit none

  real, intent(in) :: lon(:)  ! longitude [rad]
  real, intent(in) :: lat(:)  ! latitude [rad]
  real, intent(in) :: pex(:)  ! pressure by exner function [1]
  real, intent(in) :: phi(size(lon),size(lat),size(pex))
                              ! geopotential
  real, intent(in) :: psi(size(lon),size(lat),size(pex))  ! stream func.
  real, intent(in) :: epv(size(lon),size(lat),size(pex))  ! EPV
  real, intent(inout) :: J0   ! cost function
  real, intent(inout) :: dJsdx(size(lon),size(lat),size(pex))  ! grad for psi
  real, intent(inout) :: dJhdx(size(lon),size(lat),size(pex))  ! grad for phi

  integer :: i, j, k, nx, ny, nz
  real :: a1, a2, a3, b1, c1, d1, e1, f1, g1, h1, k1, m1, n1, o1, q1, s1, t1
  real :: dlond, dlatd, dpd, dll, dlonp, dlatp, dlon2, dlat2, dp2, kp, rad2inv
  real :: pvc, radc
  real, dimension(size(lat)) :: coslat, f0
  real, dimension(size(pex)) :: pres
  real, dimension(size(lon),size(lat),size(pex)) :: dpsi_ll, dpsi_lonp, dpsi_latp
  real, dimension(size(lon),size(lat),size(pex)) :: dpsi_lon2, dpsi_lat2, dpsi_lat
  real, dimension(size(lon),size(lat),size(pex)) :: laphi, lapsi, r1, r2, r3, r4
  real, dimension(size(lon),size(lat),size(pex)) :: psip, psib, phip, phib
  real, dimension(size(lon),size(lat),size(pex)) :: dphi_lonp, dphi_latp, dphi_p2
  real, dimension(size(lon),size(lat),size(pex)) :: psitp, phitp, psitb, phitb

  nx=size(lon)
  ny=size(lat)
  nz=size(pex)

  kp=Rd/Cpd
  rad2inv=1.0/radius/radius
  pres=p0*(pex**(1.0/kp))
  coslat=cos(lat)
  f0=2.0*omega*sin(lat)

  psip=0.0
  psib=0.0
  phip=0.0
  phib=0.0

!$omp parallel default(shared)
!$omp do schedule(runtime) private(k)

  do k=1,nz
     call sub_cross2_2d( lon, lat, psi(:,:,k), dpsi_ll(:,:,k) )
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(j)

  do j=1,ny
     call sub_cross2_2d( lon, pex, psi(:,j,:), dpsi_lonp(:,j,:) )
  end do

!$Omp end do

!$omp barrier

!$omp do schedule(runtime) private(i)

  do i=1,nx
     call sub_cross2_2d( lat, pex, psi(i,:,:), dpsi_latp(i,:,:) )
  end do

!$Omp end do

!$omp barrier

!$omp do schedule(runtime) private(j,k)

  do k=1,nz
     do j=1,ny
        call sub_lapsi_1d( lon, psi(:,j,k), dpsi_lon2(:,j,k) )
     end do
  end do

!$Omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,k)

  do k=1,nz
     do i=1,nx
        call sub_lapsi_1d( lat, psi(i,:,k), dpsi_lat2(i,:,k) )
        call grad_1d( lat, psi(i,:,k), dpsi_lat(i,:,k) )
     end do
  end do

!$omp end do

!$omp barrier

  !-- calculating each coefficient for phi

!$omp do schedule(runtime) private(k)

  do k=1,nz
     call sub_lapsi_2d( lon, lat, phi(:,:,k), laphi(:,:,k) )
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(j)

  do j=1,ny
     call sub_cross2_2d( lon, pex, phi(:,j,:), dphi_lonp(:,j,:) )
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i)

  do i=1,nx
     call sub_cross2_2d( lat, pex, phi(i,:,:), dphi_latp(i,:,:) )
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,j)

  do j=1,ny
     do i=1,nx
        call sub_lapsi_1d( pex, phi(i,j,:), dphi_p2(i,j,:) )
     end do
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(k)
     !-- calculating each coefficient for psi

  do k=1,nz
     call sub_lapsi_2d( lon, lat, psi(:,:,k), lapsi(:,:,k) )
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,j,k)

  do k=2,nz-1
     do j=2,ny-1
        do i=2,nx-1
           r1(i,j,k)=(g*kp*pex(k)/(Cpd*pres(k)))*(  &
  &                  (f0(j)+lapsi(i,j,k)*rad2inv/((coslat(j))**2))*dphi_p2(i,j,k)  &
  &                  -(dphi_lonp(i,j,k)*dpsi_lonp(i,j,k)/(coslat(j)**2)  &
  &                   +dphi_latp(i,j,k)*dpsi_latp(i,j,k))*rad2inv)  &
  &                 -epv(i,j,k)
           r2(i,j,k)=2.0*(dpsi_lon2(i,j,k)*dpsi_lat2(i,j,k)  &
  &                      -dpsi_ll(i,j,k)**2)*rad2inv*rad2inv/(coslat(j)**2)  &
  &                  +2.0*omega*coslat(j)*dpsi_lat(i,j,k)*rad2inv  &
  &                  +(f0(j)*lapsi(i,j,k)-laphi(i,j,k))*rad2inv/(coslat(j)**2)
        end do
     end do
  end do

!$omp end do
!$omp end parallel

  J0=0.0

  do k=2,nz-1
     do j=2,ny-1
        do i=2,nx-1
           J0=J0+0.5*(r1(i,j,k)**2)+0.5*(r2(i,j,k)**2)
        end do
     end do
  end do

  !-- calculating dJ/dx
  do i=nx-1,2,-1
     do j=ny-1,2,-1
        do k=nz-1,2,-1
        !-- calculating each grids
           dlond=2.0/(lon(i+1)-lon(i-1))
           dlatd=2.0/(lat(j+1)-lat(j-1))
           dpd=2.0/(pex(k+1)-pex(k-1))
           dll=dlond*dlatd
           dlonp=dlond*dpd
           dlatp=dlatd*dpd
           dlon2=dlond*dlond
           dlat2=dlatd*dlatd
           dp2=dpd*dpd
           pvc=(g*kp*pex(k)/(Cpd*pres(k)))
           radc=rad2inv/(coslat(j)**2)

        !-- calculating each coefficients
           a1=dphi_p2(i,j,k)*dlon2*radc*pvc
           a2=dphi_p2(i,j,k)*dlat2*rad2inv*pvc
           a3=0.5*dphi_p2(i,j,k)*dlatd*rad2inv*tan(lat(j))
           b1=0.25*dphi_lonp(i,j,k)*dlonp*radc*pvc
           c1=0.25*dphi_latp(i,j,k)*dlatp*rad2inv*pvc
           d1=(f0(j)+lapsi(i,j,k)*radc)*dp2*pvc
           e1=0.25*dpsi_lonp(i,j,k)*dlonp*radc*pvc
           f1=0.25*dpsi_latp(i,j,k)*dlatp*rad2inv*pvc
           g1=2.0*dpsi_lat2(i,j,k)*dlon2*rad2inv*radc
           h1=2.0*dpsi_lon2(i,j,k)*dlat2*rad2inv*radc
           k1=dpsi_ll(i,j,k)*dll*rad2inv*radc
           m1=omega*coslat(j)*dlatd*rad2inv
           n1=f0(j)*dlon2*radc
           o1=f0(j)*dlat2*rad2inv
           q1=0.5*f0(j)*tan(lat(j))*dlatd*rad2inv
           s1=dlat2*rad2inv
           t1=0.5*tan(lat(j))*dlatd*rad2inv

        !-- calculating adjoint for P
           psip(i,j,k)=psip(i,j,k)-(2.0*a1+2.0*a2)*r1(i,j,k)
           psip(i+1,j,k)=psip(i+1,j,k)+a1*r1(i,j,k)
           psip(i-1,j,k)=psip(i-1,j,k)+a1*r1(i,j,k)
           psip(i,j+1,k)=psip(i,j+1,k)+(a2-a3)*r1(i,j,k)
           psip(i,j-1,k)=psip(i,j-1,k)+(a2-a3)*r1(i,j,k)
           psip(i+1,j,k+1)=psip(i+1,j,k+1)-b1*r1(i,j,k)
           psip(i-1,j,k+1)=psip(i-1,j,k+1)+b1*r1(i,j,k)
           psip(i+1,j,k-1)=psip(i+1,j,k-1)+b1*r1(i,j,k)
           psip(i-1,j,k-1)=psip(i-1,j,k-1)-b1*r1(i,j,k)
           psip(i,j+1,k+1)=psip(i,j+1,k+1)-c1*r1(i,j,k)
           psip(i,j-1,k+1)=psip(i,j-1,k+1)+c1*r1(i,j,k)
           psip(i,j+1,k-1)=psip(i,j+1,k-1)+c1*r1(i,j,k)
           psip(i,j-1,k-1)=psip(i,j-1,k-1)-c1*r1(i,j,k)
           phip(i,j,k)=phip(i,j,k)-2.0*d1*r1(i,j,k)
           phip(i,j,k+1)=phip(i,j,k+1)+d1*r1(i,j,k)
           phip(i,j,k-1)=phip(i,j,k-1)+d1*r1(i,j,k)
           phip(i+1,j,k+1)=phip(i+1,j,k+1)-e1*r1(i,j,k)
           phip(i-1,j,k+1)=phip(i-1,j,k+1)+e1*r1(i,j,k)
           phip(i+1,j,k-1)=phip(i+1,j,k-1)+e1*r1(i,j,k)
           phip(i-1,j,k-1)=phip(i-1,j,k-1)-e1*r1(i,j,k)
           phip(i,j+1,k+1)=phip(i,j+1,k+1)-f1*r1(i,j,k)
           phip(i,j-1,k+1)=phip(i,j-1,k+1)+f1*r1(i,j,k)
           phip(i,j+1,k-1)=phip(i,j+1,k-1)+f1*r1(i,j,k)
           phip(i,j-1,k-1)=phip(i,j-1,k-1)-f1*r1(i,j,k)

        !-- calculating adjoint for B
           psib(i,j,k)=psib(i,j,k)-(2.0*g1+2.0*h1  &
  &                                +2.0*n1+2.0*o1)*r2(i,j,k)
           psib(i+1,j,k)=psib(i+1,j,k)+(g1+n1)*r2(i,j,k)
           psib(i-1,j,k)=psib(i-1,j,k)+(g1+n1)*r2(i,j,k)
           psib(i,j+1,k)=psib(i,j+1,k)+(h1+m1+o1-q1)*r2(i,j,k)
           psib(i,j-1,k)=psib(i,j-1,k)+(h1+m1+o1-q1)*r2(i,j,k)
           psib(i+1,j+1,k)=psib(i+1,j+1,k)-k1*r2(i,j,k)
           psib(i-1,j+1,k)=psib(i-1,j+1,k)+k1*r2(i,j,k)
           psib(i+1,j-1,k)=psib(i+1,j-1,k)+k1*r2(i,j,k)
           psib(i-1,j-1,k)=psib(i-1,j-1,k)-k1*r2(i,j,k)
           phib(i,j,k)=phib(i,j,k)+2.0*(dlon2*radc+s1)*r2(i,j,k)
           phib(i+1,j,k)=phib(i+1,j,k)-(dlon2*radc)*r2(i,j,k)
           phib(i-1,j,k)=phib(i-1,j,k)-(dlon2*radc)*r2(i,j,k)
           phib(i,j+1,k)=phib(i,j+1,k)+(-s1+t1)*r2(i,j,k)
           phib(i,j-1,k)=phib(i,j-1,k)-(s1+t1)*r2(i,j,k)
        end do
     end do
  end do

!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,j,k)

  do k=2,nz-1
     do j=2,ny-1
        do i=2,nx-1
           dJsdx(i,j,k)=psip(i,j,k)+psib(i,j,k)
           dJhdx(i,j,k)=phip(i,j,k)+phib(i,j,k)
        end do
     end do
  end do

!$omp end do
!$omp end parallel

end subroutine calc_J0

!------------------------------------
!------------------------------------

subroutine set_3dbd( lon, lat, pex, val, bo, boc )
  implicit none

  real, intent(in) :: lon(:)  ! longitude [rad]
  real, intent(in) :: lat(:)  ! latitude [rad]
  real, intent(in) :: pex(:)  ! pressure by exner function [1]
  real, intent(inout) :: val(size(lon),size(lat),size(pex))  ! setting value
  real, intent(in), optional :: bo(size(lon),size(lat),size(pex))
                              ! boundary value (default = 0.0)
  character(6), intent(in), optional :: boc  ! boundary character
                              ! 1 = Diriclet, 2 = Neumann,
                              ! default = 1
                              ! order is following to Ellip_Slv
  integer :: i, j, k, nx, ny, nz
  real, dimension(size(lon),size(lat),size(pex)) :: bound
  character(6) :: bc

  nx=size(lon)
  ny=size(lat)
  nz=size(pex)

  if(present(bo))then
     bound=bo
  else
     bound=0.0
  end if

  if(present(boc))then
     bc(1:6)=boc(1:6)
  else
     bc(1:6)='111111'
  end if

  select case(bc(1:1))  ! y=0, (x,z)
  case ('1')
     val(:,1,:)=bound(:,1,:)
  case ('2')
!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,k)
     do k=1,nz
        do i=1,nx
           val(i,1,k)=val(i,2,k)-bound(i,1,k)*(lat(2)-lat(1))
        end do
     end do
!$omp end do
!$omp end parallel
  end select

  select case(bc(2:2))  ! x=0, (y,z)
  case ('1')
     val(1,:,:)=bound(1,:,:)
  case ('2')
!$omp parallel default(shared)
!$omp do schedule(runtime) private(j,k)
     do k=1,nz
        do j=1,ny
           val(1,j,k)=val(2,j,k)-bound(1,j,k)*(lon(2)-lon(1))
        end do
     end do
!$omp end do
!$omp end parallel
  end select

  select case(bc(3:3))  ! y=L, (x,z)
  case ('1')
     val(:,ny,:)=bound(:,ny,:)
  case ('2')
!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,k)
     do k=1,nz
        do i=1,nx
           val(i,ny,k)=val(i,ny-1,k)+bound(i,ny,k)*(lat(ny)-lat(ny-1))
        end do
     end do
!$omp end do
!$omp end parallel
  end select

  select case(bc(4:4))  ! x=L, (y,z)
  case ('1')
     val(nx,:,:)=bound(nx,:,:)
  case ('2')
!$omp parallel default(shared)
!$omp do schedule(runtime) private(j,k)
     do k=1,nz
        do j=1,ny
           val(nx,j,k)=val(nx-1,j,k)+bound(nx,j,k)*(lon(nx)-lon(nx-1))
        end do
     end do
!$omp end do
!$omp end parallel
  end select

  select case(bc(5:5))  ! z=0, (x,y)
  case ('1')
     val(:,:,1)=bound(:,:,1)
  case ('2')
!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,j)
     do j=1,ny
        do i=1,nx
           val(i,j,1)=val(i,j,2)-bound(i,j,1)*(pex(2)-pex(1))
        end do
     end do
!$omp end do
!$omp end parallel
  end select

  select case(bc(6:6))  ! z=L, (x,y)
  case ('1')
     val(:,:,nz)=bound(:,:,nz)
  case ('2')
!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,j)
     do j=1,ny
        do i=1,nx
           val(i,j,nz)=val(i,j,nz-1)+bound(i,j,nz)*(pex(nz)-pex(nz-1))
        end do
     end do
!$omp end do
!$omp end parallel
  end select

end subroutine set_3dbd

!------------------------------------
!------------------------------------

subroutine check_TL( lon, lat, pex, psi, phi, epv, dpsi, dphi, alphak )
! By adjoint method, calculating J0 and dJ.

  use Phys_Const
  use Thermo_Const
  use Derivation

  implicit none

  real, intent(in) :: lon(:)  ! longitude [rad]
  real, intent(in) :: lat(:)  ! latitude [rad]
  real, intent(in) :: pex(:)  ! pressure by exner function [1]
  real, intent(in) :: phi(size(lon),size(lat),size(pex))
                              ! geopotential
  real, intent(in) :: psi(size(lon),size(lat),size(pex))  ! stream func.
  real, intent(in) :: epv(size(lon),size(lat),size(pex))  ! EPV
  real, intent(inout) :: dpsi(size(lon),size(lat),size(pex))  ! grad for psi
  real, intent(inout) :: dphi(size(lon),size(lat),size(pex))  ! grad for phi
  real, intent(in) :: alphak

  integer :: i, j, k, nx, ny, nz
  real :: a1, a2, a3, b1, c1, d1, e1, f1, g1, h1, k1, m1, n1, o1, q1, s1, t1
  real :: dlond, dlatd, dpd, dll, dlonp, dlatp, dlon2, dlat2, dp2, kp, rad2inv
  real :: pvc, radc
  real, dimension(size(lat)) :: coslat, f0
  real, dimension(size(pex)) :: pres
  real, dimension(size(lon),size(lat),size(pex)) :: dpsi_ll, dpsi_lonp, dpsi_latp
  real, dimension(size(lon),size(lat),size(pex)) :: dpsi_lon2, dpsi_lat2, dpsi_lat
  real, dimension(size(lon),size(lat),size(pex)) :: laphi, lapsi, r1, r2, r3, r4
  real, dimension(size(lon),size(lat),size(pex)) :: psip, psib, phip, phib
  real, dimension(size(lon),size(lat),size(pex)) :: dphi_lonp, dphi_latp, dphi_p2
  real, dimension(size(lon),size(lat),size(pex)) :: ddpsi_ll, ddpsi_lonp, ddpsi_latp
  real, dimension(size(lon),size(lat),size(pex)) :: ddpsi_lon2, ddpsi_lat2, ddpsi_lat
  real, dimension(size(lon),size(lat),size(pex)) :: dlaphi, dlapsi, r5, r6, r7, r8
  real, dimension(size(lon),size(lat),size(pex)) :: r9, r10
  real, dimension(size(lon),size(lat),size(pex)) :: ddphi_lonp, ddphi_latp, ddphi_p2
  real, dimension(size(lon),size(lat),size(pex)) :: psitp, phitp, psitb, phitb

  nx=size(lon)
  ny=size(lat)
  nz=size(pex)

  kp=Rd/Cpd
  rad2inv=1.0/radius/radius
  pres=p0*(pex**(1.0/kp))
  coslat=cos(lat)
  f0=2.0*omega*sin(lat)

  psip=0.0
  psib=0.0
  phip=0.0
  phib=0.0

  psitp=psi+alphak*dpsi
  phitp=phi+alphak*dphi

!$omp parallel default(shared)
!$omp do schedule(runtime) private(k)

  do k=1,nz
     call sub_cross2_2d( lon, lat, psi(:,:,k), dpsi_ll(:,:,k) )
     call sub_cross2_2d( lon, lat, psitp(:,:,k), ddpsi_ll(:,:,k) )
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(j)

  do j=1,ny
     call sub_cross2_2d( lon, pex, psi(:,j,:), dpsi_lonp(:,j,:) )
     call sub_cross2_2d( lon, pex, psitp(:,j,:), ddpsi_lonp(:,j,:) )
  end do

!$Omp end do

!$omp barrier

!$omp do schedule(runtime) private(i)

  do i=1,nx
     call sub_cross2_2d( lat, pex, psi(i,:,:), dpsi_latp(i,:,:) )
     call sub_cross2_2d( lat, pex, psitp(i,:,:), ddpsi_latp(i,:,:) )
  end do

!$Omp end do

!$omp barrier

!$omp do schedule(runtime) private(j,k)

  do k=1,nz
     do j=1,ny
        call sub_lapsi_1d( lon, psi(:,j,k), dpsi_lon2(:,j,k) )
        call sub_lapsi_1d( lon, psitp(:,j,k), ddpsi_lon2(:,j,k) )
     end do
  end do

!$Omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,k)

  do k=1,nz
     do i=1,nx
        call sub_lapsi_1d( lat, psi(i,:,k), dpsi_lat2(i,:,k) )
        call sub_lapsi_1d( lat, psitp(i,:,k), ddpsi_lat2(i,:,k) )
        call grad_1d( lat, psi(i,:,k), dpsi_lat(i,:,k) )
        call grad_1d( lat, psitp(i,:,k), ddpsi_lat(i,:,k) )
     end do
  end do

!$omp end do

!$omp barrier

  !-- calculating each coefficient for phi

!$omp do schedule(runtime) private(k)

  do k=1,nz
     call sub_lapsi_2d( lon, lat, phi(:,:,k), laphi(:,:,k) )
     call sub_lapsi_2d( lon, lat, phitp(:,:,k), dlaphi(:,:,k) )
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(j)

  do j=1,ny
     call sub_cross2_2d( lon, pex, phi(:,j,:), dphi_lonp(:,j,:) )
     call sub_cross2_2d( lon, pex, phitp(:,j,:), ddphi_lonp(:,j,:) )
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i)

  do i=1,nx
     call sub_cross2_2d( lat, pex, phi(i,:,:), dphi_latp(i,:,:) )
     call sub_cross2_2d( lat, pex, phitp(i,:,:), ddphi_latp(i,:,:) )
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(i,j)

  do j=1,ny
     do i=1,nx
        call sub_lapsi_1d( pex, phi(i,j,:), dphi_p2(i,j,:) )
        call sub_lapsi_1d( pex, phitp(i,j,:), ddphi_p2(i,j,:) )
     end do
  end do

!$omp end do

!$omp barrier

!$omp do schedule(runtime) private(k)
     !-- calculating each coefficient for psi

  do k=1,nz
     call sub_lapsi_2d( lon, lat, psi(:,:,k), lapsi(:,:,k) )
     call sub_lapsi_2d( lon, lat, psitp(:,:,k), dlapsi(:,:,k) )
  end do

!$omp end do
!$omp end parallel

  !-- calculating dJ/dx
  do i=nx-1,2,-1
     do j=ny-1,2,-1
        do k=nz-1,2,-1
        !-- calculating each grids
           dlond=2.0/(lon(i+1)-lon(i-1))
           dlatd=2.0/(lat(j+1)-lat(j-1))
           dpd=2.0/(pex(k+1)-pex(k-1))
           dll=dlond*dlatd
           dlonp=dlond*dpd
           dlatp=dlatd*dpd
           dlon2=dlond*dlond
           dlat2=dlatd*dlatd
           dp2=dpd*dpd
           pvc=(g*kp*pex(k)/(Cpd*pres(k)))
           radc=rad2inv/(coslat(j)**2)

        !-- calculating each coefficients
           a1=dphi_p2(i,j,k)*dlon2*radc*pvc
           a2=dphi_p2(i,j,k)*dlat2*rad2inv*pvc
           a3=0.5*dphi_p2(i,j,k)*dlatd*rad2inv*tan(lat(j))
           b1=0.25*dphi_lonp(i,j,k)*dlonp*radc*pvc
           c1=0.25*dphi_latp(i,j,k)*dlatp*rad2inv*pvc
           d1=(f0(j)+lapsi(i,j,k)*radc)*dp2*pvc
           e1=0.25*dpsi_lonp(i,j,k)*dlonp*radc*pvc
           f1=0.25*dpsi_latp(i,j,k)*dlatp*rad2inv*pvc
           g1=2.0*dpsi_lat2(i,j,k)*dlon2*rad2inv*radc
           h1=2.0*dpsi_lon2(i,j,k)*dlat2*rad2inv*radc
           k1=dpsi_ll(i,j,k)*dll*rad2inv*radc
           m1=omega*coslat(j)*dlatd*rad2inv
           n1=f0(j)*dlon2*radc
           o1=f0(j)*dlat2*rad2inv
           q1=0.5*f0(j)*tan(lat(j))*dlatd*rad2inv
           s1=dlat2*rad2inv
           t1=0.5*tan(lat(j))*dlatd*rad2inv

           r1(i,j,k)=pvc*(  &
  &                  (f0(j)+lapsi(i,j,k)*radc)*dphi_p2(i,j,k)  &
  &                  -(dphi_lonp(i,j,k)*dpsi_lonp(i,j,k)*radc  &
  &                   +dphi_latp(i,j,k)*dpsi_latp(i,j,k)*rad2inv))
           r3(i,j,k)=pvc*(  &
  &                  (f0(j)+dlapsi(i,j,k)*radc)*ddphi_p2(i,j,k)  &
  &                  -(ddphi_lonp(i,j,k)*ddpsi_lonp(i,j,k)*radc  &
  &                   +ddphi_latp(i,j,k)*ddpsi_latp(i,j,k)*rad2inv))
           r2(i,j,k)=2.0*(dpsi_lon2(i,j,k)*dpsi_lat2(i,j,k)  &
  &                      -dpsi_ll(i,j,k)**2)*rad2inv*radc  &
  &                  +2.0*omega*coslat(j)*dpsi_lat(i,j,k)*rad2inv  &
  &                  +(f0(j)*lapsi(i,j,k)-laphi(i,j,k))*radc
           r4(i,j,k)=2.0*(ddpsi_lon2(i,j,k)*ddpsi_lat2(i,j,k)  &
  &                      -ddpsi_ll(i,j,k)**2)*rad2inv*radc  &
  &                  +2.0*omega*coslat(j)*ddpsi_lat(i,j,k)*rad2inv  &
  &                  +(f0(j)*dlapsi(i,j,k)-dlaphi(i,j,k))*radc
           r7(i,j,k)=r3(i,j,k)-r1(i,j,k)
           r8(i,j,k)=r4(i,j,k)-r2(i,j,k)

        !-- calculating R
           r5(i,j,k)=a1*(dpsi(i+1,j,k)+dpsi(i-1,j,k)-2.0*dpsi(i,j,k))  &
  &                 +a2*(dpsi(i,j+1,k)+dpsi(i,j-1,k)-2.0*dpsi(i,j,k))  &
  &                 -a3*(dpsi(i,j+1,k)-dpsi(i,j-1,k))  &
  &                 -b1*(dpsi(i+1,j,k+1)-dpsi(i+1,j,k-1)  &
  &                     -dpsi(i-1,j,k+1)+dpsi(i-1,j,k-1))  &
  &                 -c1*(dpsi(i,j+1,k+1)-dpsi(i,j+1,k-1)  &
  &                     -dpsi(i,j-1,k+1)+dpsi(i,j-1,k-1))  &
  &                 +d1*(dphi(i,j,k+1)+dphi(i,j,k-1)-2.0*dphi(i,j,k))  &
  &                 -e1*(dphi(i+1,j,k+1)-dphi(i+1,j,k-1)  &
  &                     -dphi(i-1,j,k+1)+dphi(i-1,j,k-1))  &
  &                 -f1*(dphi(i,j+1,k+1)-dphi(i,j+1,k-1)  &
  &                     -dphi(i,j-1,k+1)+dphi(i,j-1,k-1))
           r6(i,j,k)=(g1+n1)*(dpsi(i+1,j,k)+dpsi(i-1,j,k)-2.0*dpsi(i,j,k))  &
  &                 +(h1+o1)*(dpsi(i,j+1,k)+dpsi(i,j-1,k)-2.0*dpsi(i,j,k))  &
  &                 -k1*(dpsi(i+1,j+1,k)-dpsi(i+1,j-1,k)  &
  &                     -dpsi(i-1,j+1,k)+dpsi(i-1,j-1,k))  &
  &                 +(m1-q1)*(dpsi(i,j+1,k)-dpsi(i,j-1,k))  &
  &                 -radc*dlon2*(dphi(i+1,j,k)+dphi(i-1,j,k)-2.0*dphi(i,j,k))  &
  &                 -s1*(dphi(i,j+1,k)+dphi(i,j-1,k)-2.0*dphi(i,j,k))  &
  &                 +t1*(dphi(i,j+1,k)-dphi(i,j-1,k))

        !-- calculating adjoint for P
           psip(i,j,k)=psip(i,j,k)-(2.0*a1+2.0*a2)*r5(i,j,k)
           psip(i+1,j,k)=psip(i+1,j,k)+a1*r5(i,j,k)
           psip(i-1,j,k)=psip(i-1,j,k)+a1*r5(i,j,k)
           psip(i,j+1,k)=psip(i,j+1,k)+(a2-a3)*r5(i,j,k)
           psip(i,j-1,k)=psip(i,j-1,k)+(a2-a3)*r5(i,j,k)
           psip(i+1,j,k+1)=psip(i+1,j,k+1)-b1*r5(i,j,k)
           psip(i-1,j,k+1)=psip(i-1,j,k+1)+b1*r5(i,j,k)
           psip(i+1,j,k-1)=psip(i+1,j,k-1)+b1*r5(i,j,k)
           psip(i-1,j,k-1)=psip(i-1,j,k-1)-b1*r5(i,j,k)
           psip(i,j+1,k+1)=psip(i,j+1,k+1)-c1*r5(i,j,k)
           psip(i,j-1,k+1)=psip(i,j-1,k+1)+c1*r5(i,j,k)
           psip(i,j+1,k-1)=psip(i,j+1,k-1)+c1*r5(i,j,k)
           psip(i,j-1,k-1)=psip(i,j-1,k-1)-c1*r5(i,j,k)
           phip(i,j,k)=phip(i,j,k)-2.0*d1*r5(i,j,k)
           phip(i,j,k+1)=phip(i,j,k+1)+d1*r5(i,j,k)
           phip(i,j,k-1)=phip(i,j,k-1)+d1*r5(i,j,k)
           phip(i+1,j,k+1)=phip(i+1,j,k+1)-e1*r5(i,j,k)
           phip(i-1,j,k+1)=phip(i-1,j,k+1)+e1*r5(i,j,k)
           phip(i+1,j,k-1)=phip(i+1,j,k-1)+e1*r5(i,j,k)
           phip(i-1,j,k-1)=phip(i-1,j,k-1)-e1*r5(i,j,k)
           phip(i,j+1,k+1)=phip(i,j+1,k+1)-f1*r5(i,j,k)
           phip(i,j-1,k+1)=phip(i,j-1,k+1)+f1*r5(i,j,k)
           phip(i,j+1,k-1)=phip(i,j+1,k-1)+f1*r5(i,j,k)
           phip(i,j-1,k-1)=phip(i,j-1,k-1)-f1*r5(i,j,k)

        !-- calculating adjoint for B
           psib(i,j,k)=psib(i,j,k)-(2.0*g1+2.0*h1  &
  &                                +2.0*n1+2.0*o1)*r6(i,j,k)
           psib(i+1,j,k)=psib(i+1,j,k)+(g1+n1)*r6(i,j,k)
           psib(i-1,j,k)=psib(i-1,j,k)+(g1+n1)*r6(i,j,k)
           psib(i,j+1,k)=psib(i,j+1,k)+(h1+m1+o1-q1)*r6(i,j,k)
           psib(i,j-1,k)=psib(i,j-1,k)+(h1+m1+o1-q1)*r6(i,j,k)
           psib(i+1,j+1,k)=psib(i+1,j+1,k)-k1*r6(i,j,k)
           psib(i-1,j+1,k)=psib(i-1,j+1,k)+k1*r6(i,j,k)
           psib(i+1,j-1,k)=psib(i+1,j-1,k)+k1*r6(i,j,k)
           psib(i-1,j-1,k)=psib(i-1,j-1,k)-k1*r6(i,j,k)
           phib(i,j,k)=phib(i,j,k)+2.0*(dlon2*radc+s1)*r6(i,j,k)
           phib(i+1,j,k)=phib(i+1,j,k)-(dlon2*radc)*r6(i,j,k)
           phib(i-1,j,k)=phib(i-1,j,k)-(dlon2*radc)*r6(i,j,k)
           phib(i,j+1,k)=phib(i,j+1,k)+(-s1+t1)*r6(i,j,k)
           phib(i,j-1,k)=phib(i,j-1,k)-(s1+t1)*r6(i,j,k)
        end do
     end do
  end do

  r9=psip+psib
  r10=phip+phib

  write(*,*) "checking tangential linear (check_TL)"
  write(*,'(a13,1PE14.6)') "Non linear = ", sqrt(calc_3ddot(r7(2:nx-1,2:ny-1,2:nz-1),r7(2:nx-1,2:ny-1,2:nz-1))+calc_3ddot(r8(2:nx-1,2:ny-1,2:nz-1),r8(2:nx-1,2:ny-1,2:nz-1)))
  write(*,'(a13,1PE14.6)') "Tan linear = ", alphak*sqrt(calc_3ddot(r5(2:nx-1,2:ny-1,2:nz-1),r5(2:nx-1,2:ny-1,2:nz-1))+calc_3ddot(r6(2:nx-1,2:ny-1,2:nz-1),r6(2:nx-1,2:ny-1,2:nz-1)))
  write(*,'(a13,1PE14.6)') "The ratio  = ", sqrt(calc_3ddot(r7(2:nx-1,2:ny-1,2:nz-1),r7(2:nx-1,2:ny-1,2:nz-1))+calc_3ddot(r8(2:nx-1,2:ny-1,2:nz-1),r8(2:nx-1,2:ny-1,2:nz-1)))/(alphak*sqrt(calc_3ddot(r5(2:nx-1,2:ny-1,2:nz-1),r5(2:nx-1,2:ny-1,2:nz-1))+calc_3ddot(r6(2:nx-1,2:ny-1,2:nz-1),r6(2:nx-1,2:ny-1,2:nz-1))))
  write(*,*) "checking adjoint (check_TL)"
  write(*,'(a13,1PE14.6)') '(Hdx)T(Hdx)= ', calc_3ddot(r5(2:nx-1,2:ny-1,2:nz-1),r5(2:nx-1,2:ny-1,2:nz-1))+calc_3ddot(r6(2:nx-1,2:ny-1,2:nz-1),r6(2:nx-1,2:ny-1,2:nz-1))
  write(*,'(a13,1PE14.6)') 'dxT(HTHdx) = ', calc_3ddot(dpsi(2:nx-1,2:ny-1,2:nz-1),r9(2:nx-1,2:ny-1,2:nz-1))+calc_3ddot(dphi(2:nx-1,2:ny-1,2:nz-1),r10(2:nx-1,2:ny-1,2:nz-1))

end subroutine check_TL

!------------------------------------
!------------------------------------
!------------------------------------

subroutine sub_lapsi_2d( lon, lat, val, res )
! calculating laplacian for val, based on spheric coordinate.
! nabla ^2 (val) = (1/a^2) * 
!                  (d^2val/dlambda ^2+cos^2phi d^2val/dphi ^2-sincos dval/dphi)
  use Derivation
  use Phys_Const
  implicit none
  real, intent(in) :: lon(:)   ! longitude coordinate [rad]
  real, intent(in) :: lat(:)   ! latitude coordinate [rad]
  real, intent(in) :: val(size(lon),size(lat))  ! any variable
  real, intent(inout) :: res(size(lon),size(lat))  ! any variable

  integer :: nx, ny, i, j
  real, allocatable, dimension(:) :: tmp

  nx=size(lon)
  ny=size(lat)

  allocate(tmp(nx))
  res=0.0

  do j=1,ny
     call sub_lapsi_1d( lon, val(:,j), tmp )
     do i=1,nx
        res(i,j)=res(i,j)+tmp(i)
     end do
  end do

  deallocate(tmp)
  allocate(tmp(ny))

  do i=1,nx
     call sub_lapsi_1d( lat, val(i,:), tmp )
     do j=1,ny
        res(i,j)=res(i,j)+(cos(lat(j))**2)*tmp(j)
     end do
     call grad_1d( lat, val(i,:), tmp )
     do j=1,ny
        res(i,j)=res(i,j)-sin(lat(j))*cos(lat(j))*tmp(j)
     end do
  end do

end subroutine sub_lapsi_2d


subroutine sub_lapsi_1d( x, val, res )
! including boundary values, calculating 1d laplacian.
! In the inner region, res = call sub_lapsi_1d()
! At each boundary, res = (val_n-val_{n-1})/(x_n-x_{n-1})^2
!                        -(val_n-val_{n-2})/{(x_n-x_{n-1})(x_n-x_{n-2})}
  use Derivation
  implicit none
  real, intent(in) :: x(:)   ! longitude coordinate [rad]
  real, intent(in) :: val(size(x))  ! any variable
  real, intent(inout) :: res(size(x))  ! any variable

  integer :: nx

  nx=size(x)

  call laplacian_1d( x, val, res )

  !-- calculating boundary 1st order.

  res(1)=(val(3)-val(1))/((x(3)-x(1))*(x(2)-x(1)))  &
  &      -(val(2)-val(1))/((x(2)-x(1))**2)
  res(nx)=(val(nx)-val(nx-1))/((x(nx)-x(nx-1))**2)  &
  &      -(val(nx)-val(nx-2))/((x(nx)-x(nx-1))*(x(nx)-x(nx-2)))

end subroutine sub_lapsi_1d



subroutine sub_cross2_2d( x1, x2, val, res )
! calculating cross derivation term for val.
! Array order is x1 and x2.
! d^2val/dx1dx2
  use derivation
  implicit none
  real, intent(in) :: x1(:)   ! longitude coordinate [rad]
  real, intent(in) :: x2(:)   ! latitude coordinate [rad]
  real, intent(in) :: val(size(x1),size(x2))  ! any variable
  real, intent(inout) :: res(size(x1),size(x2))  ! any variable

  integer :: nx, ny, i, j
  real, dimension(size(x1),size(x2)) :: dval

  nx=size(x1)
  ny=size(x2)

  res=0.0
  dval=0.0

  do j=1,ny
     call grad_1d( x1, val(:,j), dval(:,j) )
  end do

  do i=1,nx
     call grad_1d( x2, dval(i,:), res(i,:) )
  end do

end subroutine sub_cross2_2d


real function calc_3ddot( val1, val2 )
  implicit none
  real, intent(in) :: val1(:,:,:)
  real, intent(in) :: val2(size(val1,1),size(val1,2),size(val1,3))

  integer :: nx, ny, nz, i, j, k

  nx=size(val1,1)
  ny=size(val1,2)
  nz=size(val1,3)

  calc_3ddot=0.0

  do k=1,nz
     do j=1,ny
        do i=1,nx
           calc_3ddot=calc_3ddot+val1(i,j,k)*val2(i,j,k)
        end do
     end do
  end do

  return

end function calc_3ddot

end module sub_calc
