!
! How to use this module
!
!--------------------------------------------------------------
! NOTE for compilation:
! If MPI is not used, define LT2_NOMPI when you compile
!
!--------------------------------------------------------------
! Order of subroutine call if MPI is used :
!
!   use lt2_module
!   im     = XX
!   jmg    = YY
!   km     = ZZ
!   ntrunc = N
!   call mpii_init                             ! MPI is initialized
!   call lt2_init_mpi( jmg, ntrunc, jm, lm )   ! distributed grids are set up
!                                              ! and inquire jm and lm
!   call lt2_init( im, jmg, lm, ntrunc,   &
!        ita, dta, sinlatg, coslatg, gwg, &
!        ord, deg, pmn, eps )
!
!   ...
!
!   call lt2_end_mpi
!   call mpii_end
!
!--------------------------------------------------------------
! Order of subroutine call if MPI is NOT used :
!
!   use lt2_module
!   im     = XX
!   jmg    = YY
!   km     = ZZ
!   ntrunc = N
!   jm     = jmg                              ! jm is set manually
!   call lt2_inq_arraysize( ntrunc, lm )      ! inquire mmg and nm
!   call lt2_init( im, jmg, mm, nm,        &  !
!        ita, dta, sinlatg, coslatg, gwg, &   !
!        ord, deg, pmn, eps )
!
!   ...
!
!
!--------------------------------------------------------------

  module lt2_module

    use vtype_module

    implicit none

    private


#ifndef LT2_NOMPI

    ! jgs   : first index of latitudinal grid on my rank over all ranks
    ! jge   : last index of latitudinal grid on my rank over all ranks

    integer(i4b)             , save :: jgs, jge

    ! jm_a   : array for number of latitudinal grids on each rank
    ! jgs_a  : array for first index of latitudinal grid on each rank over all ranks
    ! jge_a  : array for last index of latitudinal grid on each rank over all ranks
    ! jm_max : maximum number of latitudinal grid on each rank over all ranks

    integer(i4b), allocatable, save :: jm_a( : ), jgs_a( : ), jge_a( : )
    integer(i4b)             , save :: jm_max

    public :: jgs, jge, jm_a, jgs_a, jge_a, jm_max



    !
    !
    !

    integer(i4b)             , save :: mgs1, mge1, mgs2, mge2
    integer(i4b)             , save :: mm1, mm2
    integer(i4b), allocatable, save :: mgs1_a( : ), mge1_a( : ), mgs2_a( : ), mge2_a( : )
    integer(i4b), allocatable, save :: mm1_a( : ), mm2_a( : )
    integer(i4b), allocatable, save :: m_index( : )

    public :: &
         mgs1, mge1, mgs2, mge2, &
         mm1, mm2, &
         mgs1_a, mge1_a, mgs2_a, mge2_a, &
         mm1_a, mm2_a, &
         m_index


#ifdef NOCYCLIC

    !
    !
    !

    integer(i4b)             , save :: mgs, mge
    integer(i4b), allocatable, save :: mm_a( : ), mgs_a( : ), mge_a( : )

    public :: mgs, mge, mm_a, mgs_a, mge_a

#endif

    integer(i4b)             , save :: mm_max

    public :: mm_max

#endif


    !
    ! internal variables
    !
    ! l_mmg : "global" array size for m
    ! l_mm  : "local"  array size for m
    ! l_nm  : array size for n
    !
    integer(i4b), save              :: l_mmg, l_mm, l_nm
    integer(i4b), save, allocatable :: l_ord( : ), l_deg( : )
    real(dp)    , save, allocatable :: l_pmn_np( :, :, : )
    real(dp)    , save, allocatable :: l_pmn   ( :, :, : ), l_eps   ( :, : )
    real(dp)    , save, allocatable :: l_pmn_nn( :, :, : ), l_eps_nn( :, : )
    real(dp)    , save, allocatable :: l_pmngwg( :, :, : )
    real(dp)    , save, allocatable :: &
         l_derivop_coefr1( :, :, : ), &
         l_derivop_coefi1( :, :, : ), &
         l_derivop_coefr2( :, :, : ), &
         l_derivop_coefi2( :, :, : )
    integer(i4b), save, allocatable :: l_l2m( : ), l_nm2l( :, : )

    real(dp)    , save, allocatable :: ll_pmn_np( :, : )
    real(dp)    , save, allocatable :: ll_pmn_nn( :, : ), ll_eps_nn( : )
    real(dp)    , save, allocatable :: ll_pmn   ( :, : ), ll_eps   ( : )
    real(dp)    , save, allocatable :: ll_ord   ( : )   , ll_deg   ( : )
    real(dp)    , save, allocatable :: &
         ll_derivop_coef1( :, : ), ll_derivop_coef2( :, : )


    logical     , save              :: flag_inited_lt2_init_mpi

    data flag_inited_lt2_init_mpi /.false./


#ifdef LT2_USEFTPACK
    !
    ! for ispack
    !
    integer(i4b), save              :: ft_it( 5 )
    real(dp)    , save, allocatable :: ft_t( : )
#endif



#ifndef LT2_NOMPI
#else
    public :: lt2_inq_arraysize
#endif

    public :: lt2_init
    public :: lt2_naleg2

#ifndef LT2_NOMPI
    public :: lt2_init_mpi, lt2_end_mpi
    public :: lt2_transposition_cyclic, lt2_transposition_cyclic_inv
#ifdef NOCYCLIC
    public :: lt2_transposition
#endif
#endif


    public :: lt2_g2s, lt2_s2g, lt2_g2f, lt2_f2g, lt2_f2s, lt2_s2f
    public :: lt2_derivop_f2s, lt2_derivop_mn_f2s, lt2_derivop_f2s_old
    public :: lt2_lamderiv_s2s, lt2_cossqmuderiv_s2s, &
         lt2_lap_s2s, lt2_lapinv_s2s, &
         lt2_truncate

    public :: lt2_schipsi2sucvc

    public :: lt2_get_nm_mm, lt2_exchange_array_l2nm, lt2_exchange_array_nm2l


    !**************************************************************************

  contains

    !**************************************************************************
    ! 2nm+2: Im{S(0,nm)}
    ! 2nm+1: Re{S(0,nm)}
    ! 2N+2: Im{S(0,N )}
    ! 2N+1: Re{S(0,N )}
    !          ... 
    ! 4   : Im{S(0,1 )}
    ! 3   : Re{S(0,1 )}
    ! 2   : Im{S(0,0 )}
    ! 1   : Re{S(0,0 )}
    !**************************************************************************

#ifndef LT2_NOMPI

    !
    ! If MPI is used, array size, lm, is calculated in lt2_init_mpi.
    !

#else

    subroutine lt2_inq_arraysize( jmg, ntrunc, jm, lm )

      integer(i4b), intent(in ) :: jmg, ntrunc
      integer(i4b), intent(out) :: jm, lm


      !
      ! local variables
      !
      integer(i4b) :: mmg, nm
      integer(i4b) :: m


      jm = jmg

      !
      ! The mmg and nm used here are local variables.
      ! Those are different from l_mmg and l_nm of module variables.
      !
      call lt2___calc_mmg_nm( ntrunc, mmg, nm )


      lm = 0
      do m = 1, mmg
         lm = lm + nm - m + 1
      end do
      lm = lm + lm


    end subroutine lt2_inq_arraysize

#endif

    !**************************************************************************

    subroutine lt2___calc_mmg_nm( ntrunc, mmg, nm )

      integer(i4b), intent(in ) :: ntrunc
      integer(i4b), intent(out) :: mmg, nm


      !
      ! local variables
      !
      integer(i4b) :: ordm, degm


      ! truncation wavenumber for m and n
      ordm = ntrunc
      degm = ordm

      !
      ! !!!NOTICE!!!
      ! If you change here, you MUST change l_mmg, l_mm, l_nm in the subroutine ... .
      !
      ! "global" array size for m, and array size for n
      !
      mmg = ( ordm + 1 )
      nm  = ( degm + 1 ) + 1


    end subroutine lt2___calc_mmg_nm

    !**************************************************************************
    !
    ! jmg  : number of latitudinal grids over all ranks
    ! jm   : number of latitudinal grids on my rank
    ! mmg  : number of zonal wavenumber over all ranks
    ! mm   : number of zonal wavenumber on my rank
    !**************************************************************************

    subroutine lt2_init( im, jmg, lm, ntrunc, &
         ita, dta, sinlatg, coslatg, gwg, ord, deg, pmn, eps )

      use fft7_module

      integer(i4b), intent(in ) :: im, jmg, lm, ntrunc
      integer(i4b), intent(out) :: ita( 5, 2 )
      real(dp)    , intent(out) :: dta( im, 2 )
      real(dp)    , intent(out) :: sinlatg( jmg ), coslatg( jmg ), gwg( jmg )
      integer(i4b), intent(out) :: ord( lm ), deg( lm )
      real(dp)    , intent(out) :: pmn( lm, jmg ), eps( lm )



      !
      ! local variables
      !
      integer(i4b) :: j, l, m, n


      call fft7_init( im, ita, dta )

      !
      ! set latitudinal grids
      !
      call lt2_gauleg( -1.0d0, 1.0d0, jmg, sinlatg, gwg )
      do j = 1, jmg
         coslatg( j ) = sqrt( 1.0d0 - sinlatg( j ) * sinlatg( j ) )
      end do


      !
      ! If MPI is used, l_mmg and l_nm have already been set in lt2_init_mpi routine.
      ! But, here, those values are overwritten. The values calculated here are 
      ! used in the following routines. 
      ! If MPI is not used, those values are calculated here for the first time.
      !
      call lt2___calc_mmg_nm( ntrunc, l_mmg, l_nm )


#ifndef LT2_NOMPI
      ! If MPI is used, l_mm is set in lt2_init_mpi routine.
#else
      l_mm = l_mmg
#endif


      !--------------------------------------------
      !
      ! for internal variable
      !
      allocate( &
           l_ord( l_mm ), l_deg( l_nm+1 ), &
           l_pmn_np( l_nm+1, l_mm, jmg ), &
           l_pmn   ( l_nm+1, l_mm, jmg ), l_eps   ( l_nm+1, l_mm ), &
           l_pmn_nn( l_nm+1, l_mm, jmg ), l_eps_nn( l_nm+1, l_mm ), &
           l_pmngwg( l_nm+1, l_mm, jmg ), &
           l_derivop_coefr1( l_nm+1, l_mm, jmg ), &
           l_derivop_coefi1( l_nm+1, l_mm, jmg ), &
           l_derivop_coefr2( l_nm+1, l_mm, jmg ), &
           l_derivop_coefi2( l_nm+1, l_mm, jmg ), &
           l_l2m( lm ), l_nm2l( l_nm, l_mm ), &
           ll_ord   ( lm ), &
           ll_deg   ( lm ), &
           ll_eps_nn( lm ), &
           ll_eps   ( lm ), &
           ll_pmn_np( lm, jmg ), &
           ll_pmn_nn( lm, jmg ), &
           ll_pmn   ( lm, jmg ), &
           ll_derivop_coef1( lm, jmg ), ll_derivop_coef2( lm, jmg ) &
           )

#ifndef LT2_NOMPI
      do m = 1, l_mm
         l_ord( m ) = fft7_i2m( im, m_index(m) )
      end do
#else
      do m = 1, l_mm
         l_ord( m ) = fft7_i2m( im, m )
      end do
#endif
      do n = 1, l_nm+1
         l_deg( n ) = n - 1
      end do
      do j = 1, jmg
         do m = 1, l_mm
            do n = 1, l_nm+1
               if( abs( l_ord( m ) ) .gt. l_deg( n ) ) then
                  l_pmn(n,m,j) = 1.0d100
               else
                  l_pmn(n,m,j) = lt2_naleg2( l_ord(m), l_deg(n), sinlatg(j) )
               end if
            end do
         end do
      end do
      do m = 1, l_mm
         do n = 1, l_nm+1
            if( abs( l_ord( m ) ) .gt. l_deg( n ) ) then
               l_eps( n, m ) = 1.0d100
            else
               l_eps( n, m ) &
                    = sqrt( dble( l_deg(n) * l_deg(n) - l_ord(m) * l_ord(m) ) &
                    / dble( 4 * l_deg(n) * l_deg(n) - 1 ) )
            end if
         end do
      end do
      do j = 1, jmg
         do m = 1, l_mm
            do n = 1, l_nm
               if( abs( l_ord( m ) ) .gt. l_deg( n ) ) then
                  l_pmn_np( n, m, j ) = 1.0d100
                  l_pmn_nn( n, m, j ) = 1.0d100
               else
                  if( abs( l_ord( m ) ) .eq. l_deg( n ) ) then
                     l_pmn_np( n, m, j ) = 0.0d0
                     l_pmn_nn( n, m, j ) = l_pmn( n+1, m, j )
                  else
                     l_pmn_np( n, m, j ) = l_pmn( n-1, m, j )
                     l_pmn_nn( n, m, j ) = l_pmn( n+1, m, j )
                  endif
               end if
            end do
            do n = l_nm+1, l_nm+1
               l_pmn_np( n, m, j ) = 1.0d100
               l_pmn_nn( n, m, j ) = 1.0d100
            end do
         end do
      end do
      do m = 1, l_mm
         do n = 1, l_nm
            if( abs( l_ord( m ) ) .gt. l_deg( n ) ) then
               l_eps_nn( n, m ) = 1.0d100
            else
               if( abs( l_ord( m ) ) .eq. l_deg( n ) ) then
                  l_eps_nn( n, m ) = l_eps( n+1, m )
               else
                  l_eps_nn( n, m ) = l_eps( n+1, m )
               endif
            end if
         end do
         do n = l_nm+1, l_nm+1
            l_eps_nn( n, m ) = 1.0d100
         end do
      end do
      do j = 1, jmg
         do m = 1, l_mm
            do n = 1, l_nm+1
               if( abs( l_ord( m ) ) .gt. l_deg( n ) ) then
                  l_pmngwg( n, m, j ) = 1.0d100
               else
                  l_pmngwg( n, m, j ) = l_pmn( n, m, j ) * gwg( j )
               end if
            end do
         end do
      end do

      do j = 1, jmg
         do m = 1, l_mm
            do n = 1, l_ord(m)+1-1
               l_derivop_coefr1( n, m, j ) = 1.0d100
               l_derivop_coefi1( n, m, j ) = 1.0d100
               l_derivop_coefr2( n, m, j ) = 1.0d100
               l_derivop_coefi2( n, m, j ) = 1.0d100
            end do
            do n = l_ord(m)+1, l_nm
               l_derivop_coefr1( n, m, j ) &
                    = -l_ord(m) * l_pmn(n,m,j)         &
                    / ( coslatg( j ) * coslatg( j ) ) * gwg( j )
               l_derivop_coefi1( n, m, j ) &
                    =  l_ord(m) * l_pmn(n,m,j)         &
                    / ( coslatg( j ) * coslatg( j ) ) * gwg( j )
               l_derivop_coefr2( n, m, j ) &
                    = (     l_deg(n)     * l_eps_nn(n,m) * l_pmn_nn(n,m,j) &
                        - ( l_deg(n)+1 ) * l_eps   (n,m) * l_pmn_np(n,m,j) ) &
                        / ( coslatg( j ) * coslatg( j ) ) * gwg( j )
               l_derivop_coefi2( n, m, j ) &
                    = (     l_deg(n)     * l_eps_nn(n,m) * l_pmn_nn(n,m,j) &
                        - ( l_deg(n)+1 ) * l_eps   (n,m) * l_pmn_np(n,m,j) ) &
                        / ( coslatg( j ) * coslatg( j ) ) * gwg( j )
            end do
         end do
      end do


      l = 1
      do m = 1, l_mm
         do n = 1, l_nm
            if( abs( l_ord( m ) ) .gt. l_deg( n ) ) then
            else
               l_l2m( l   ) = m
               l_l2m( l+1 ) = m
               l = l + 2
            end if
         end do
      end do
      l = 1
      do m = 1, l_mm
         do n = 1, l_nm
            if( abs( l_ord( m ) ) .gt. l_deg( n ) ) then
               l_nm2l( n, m ) = -1
            else
               l_nm2l( n, m ) = l
               l = l + 2
            end if
         end do
      end do
      !--------------------------------------------


      l = 1
      do m = 1, l_mm
         do n = 1, l_nm
            if( abs( l_ord( m ) ) .gt. l_deg( n ) ) then
            else
               ord( l   ) = l_ord( m )
               ord( l+1 ) = l_ord( m )
               deg( l   ) = l_deg( n )
               deg( l+1 ) = l_deg( n )
               l = l + 2
            end if
         end do
      end do

      do j = 1, jmg
         l = 1
         do m = 1, l_mm
            do n = 1, l_nm
               if( abs( l_ord( m ) ) .gt. l_deg( n ) ) then
               else
                  pmn( l  , j ) = l_pmn( n, m, j )
                  pmn( l+1, j ) = l_pmn( n, m, j )
                  l = l + 2
               end if
            end do
         end do
      end do
      do l = 1, lm
         eps( l ) = sqrt( dble( deg(l) * deg(l) - ord(l) * ord(l) ) &
                    / dble( 4 * deg(l) * deg(l) - 1 ) )
      end do

      !-----

      l = 1
      do m = 1, l_mm
         do n = 1, l_nm
            if( abs( l_ord( m ) ) .gt. l_deg( n ) ) then
            else
               ll_ord( l   ) = l_ord( m )
               ll_ord( l+1 ) = l_ord( m )
               ll_deg( l   ) = l_deg( n )
               ll_deg( l+1 ) = l_deg( n )
               l = l + 2
            end if
         end do
      end do

      do j = 1, jmg
         l = 1
         do m = 1, l_mm
            do n = 1, l_nm
               if( abs( l_ord( m ) ) .gt. l_deg( n ) ) then
               else

                  if( abs( l_ord( m ) ) .eq. l_deg( n ) ) then
                     ll_pmn_np( l  , j ) = 0.0d0
                     ll_pmn_np( l+1, j ) = 0.0d0
                  else
                     ll_pmn_np( l  , j ) = l_pmn( n-1, m, j )
                     ll_pmn_np( l+1, j ) = l_pmn( n-1, m, j )
                  end if

                  ll_pmn   ( l  , j ) = l_pmn( n  , m, j )
                  ll_pmn   ( l+1, j ) = l_pmn( n  , m, j )

                  ll_pmn_nn( l  , j ) = l_pmn( n+1, m, j )
                  ll_pmn_nn( l+1, j ) = l_pmn( n+1, m, j )

                  l = l + 2
               end if
            end do
         end do
      end do
      do l = 1, lm
         ll_eps   ( l ) &
              = sqrt( dble(  ll_deg(l)   * ll_deg(l)   -ll_ord(l)*ll_ord(l) )&
                    / dble( 4* ll_deg(l)   * ll_deg(l)    - 1 ) )
         ll_eps_nn( l ) &
              = sqrt( dble( (ll_deg(l)+1)*(ll_deg(l)+1)-ll_ord(l)*ll_ord(l) ) &
                    / dble( 4*(ll_deg(l)+1)*(ll_deg(l)+1) - 1 ) )
      end do



      do j = 1, jmg
         do l = 1, lm, 2
            ll_derivop_coef1( l  , j ) &
                 = -ll_ord(l) * ll_pmn(l,j)                                &
                   / ( coslatg( j ) * coslatg( j ) ) * gwg( j )
            ll_derivop_coef1( l+1, j ) &
                 =  ll_ord(l) * ll_pmn(l,j)                                &
                   / ( coslatg( j ) * coslatg( j ) ) * gwg( j )

            ll_derivop_coef2( l  , j ) &
                 = (     ll_deg(l)       * ll_eps_nn(l) * ll_pmn_nn(l,j)   &
                     - ( ll_deg(l) + 1 ) * ll_eps   (l) * ll_pmn_np(l,j) ) &
                   / ( coslatg( j ) * coslatg( j ) ) * gwg( j )
            ll_derivop_coef2( l+1, j ) &
                 = (     ll_deg(l)       * ll_eps_nn(l) * ll_pmn_nn(l,j)   &
                     - ( ll_deg(l) + 1 ) * ll_eps   (l) * ll_pmn_np(l,j) ) &
                   / ( coslatg( j ) * coslatg( j ) ) * gwg( j )
         end do
      end do


      !----------

#ifdef LT2_USEFTPACK
      allocate( ft_t( 2*im ) )

      call fttrui( im, ft_it, ft_t )
#endif



    end subroutine lt2_init

    !**************************************************************************

    subroutine lt2_get_nm_mm( nm, mm )

      integer(i4b), intent(out) :: nm, mm


      mm = l_mm
      nm = l_nm


    end subroutine lt2_get_nm_mm

    !**************************************************************************

    subroutine lt2_exchange_array_l2nm( km, lm, sarr, sarrr, sarri )

      integer(i4b), intent(in ) :: km, lm
      real(dp)    , intent(in ) :: sarr ( lm, km )
      real(dp)    , intent(out) :: sarrr( l_nm, l_mm, km ), sarri( l_nm, l_mm, km )


      !
      ! local variables
      !
      integer(i4b) :: k, l, m, n


      !$omp parallel do private( l, m, n )
      do k = 1, km
         l = 1
         do m = 1, l_mm
            do n = 1, l_ord(m)+1-1
               sarrr( n, m, k ) = 0.0d0
               sarri( n, m, k ) = 0.0d0
            end do
            do n = l_ord(m)+1, l_nm
               sarrr( n, m, k ) = sarr( l  , k )
               sarri( n, m, k ) = sarr( l+1, k )
               l = l + 2
            end do
         end do
      end do


    end subroutine lt2_exchange_array_l2nm

    !**************************************************************************

    subroutine lt2_exchange_array_nm2l( km, lm, sarrr, sarri, sarr )

      integer(i4b), intent(in ) :: km, lm
      real(dp)    , intent(in ) :: sarrr( l_nm, l_mm, km ), sarri( l_nm, l_mm, km )
      real(dp)    , intent(out) :: sarr ( lm, km )


      !
      ! local variables
      !
      integer(i4b) :: k, l, m, n


      !$omp parallel do private( l, m, n )
      do k = 1, km
         l = 1
         do m = 1, l_mm
            do n = l_ord(m)+1, l_nm
               sarr( l  , k ) = sarrr( n, m, k )
               sarr( l+1, k ) = sarri( n, m, k )
               l = l + 2
            end do
         end do
      end do


    end subroutine lt2_exchange_array_nm2l

    !**************************************************************************
    ! subroutine lt2_naleg2
    !**************************************************************************
    ! This function naleg2 calculate normalized asociated Legendre function.
    ! The calculation method is obtained from Krishnamurti et al. [1998],
    ! p81, 82.
    !**************************************************************************
    ! 2003/06/06
    ! This routine was coded.
    !**************************************************************************

    function lt2_naleg2( m, n, x ) result( dres )

      real(dp)                 :: dres

      integer(i4b), intent(in) :: m, n
      real(dp)    , intent(in) :: x


      !     pmm     : P m,m
      !     gmmp    : g m-1,m-1
      !     pmn     : P m,n
      !     pmnp1   : P m,n-1
      !     pmnp2   : P m,n-2
      !     eps     : Epsilon m,n
      !     epsp1   : Epsilon m,n-1

      real(dp)     :: pmm,gmmp,pmn,pmnp1,pmnp2,eps,epsp1

      integer(i4b) :: i, m2
      integer(i4b) :: signsw


      !
      ! When m is negative, the associated Legendre functions are calculated
      ! using the relation, P_{-m,n} = (-1)^m P_{m,n}.
      !
      signsw = 1
      !
      m2 = m
      if( m2 .lt. 0 ) then
         m2     = -m2
         signsw = -1
      endif

      if( ( m2 .lt. 0 ) .or. ( m2 .gt. n ) .or. ( dabs( x ) .gt. 1.0d0 ) ) then
         write( 6, * ) "Bad arguments in routine lt2_naleg2 in lt2_module"
         write( 6, * ) m2, n, dabs( x )
         stop
      end if

      pmm = 1.0d0 / sqrt( 2.0d0 )
      if( m2 .gt. 0 ) then
         do i = 1, m2
            gmmp = sqrt( dble( (2*(i-1)+2) ) / dble( (2*(i-1)+3) ) )
            pmm  = sqrt( 1.0d0 - x * x ) * pmm / gmmp
         end do
      end if
      if( n .eq. m2 ) then
         dres = pmm
      else
         pmnp1 = pmm
         pmnp2 = 0.0d0
         do i = m2 + 1, n
            eps   = sqrt( dble( i**2 - m2**2 ) &
                 / dble( 4.0d0*i**2 - 1.0d0 ) )
            epsp1 = sqrt( dble( (i-1)**2 - m2**2 ) &
                 / dble( 4.0d0*(i-1)**2 - 1.0d0 ) )
            pmn   = ( x * pmnp1- epsp1 * pmnp2 ) / eps
            pmnp2 = pmnp1
            pmnp1 = pmn
         end do
         dres = pmn
      end if

      ! Treatment when m is negative
      if( signsw .eq. -1 ) then
         m2        = -m2
         dres = ( (-1)**mod(m2,2) ) * dres
      end if


    end function lt2_naleg2

    !**************************************************************************
    ! subroutine lt2_gauleg
    !**************************************************************************
    ! This code was obtained from Press et al. [1993], p141.
    !**************************************************************************

    subroutine lt2_gauleg( x1, x2, n, x, w )

      use const_module, only : pi


      real(dp)    , intent(in ) :: x1,x2
      integer(i4b), intent(in ) :: n
      real(dp)    , intent(out) :: x(n),w(n)


      real(dp)     :: eps
      real(dp)     :: z1, z, xm, xl, pp, p3, p2, p1
      integer(i4b) :: i, j, m


      !
      ! changed at 2005/09/14
      !
!!$      eps = 1.0e-11
      eps = 1.0e-15

      m  = ( n+1 ) / 2
      xm = ( x2+x1 ) / 2.0d0
      xl = ( x2-x1 ) / 2.0d0

      do i = 1, m
         z = cos( pi*dble(i-0.25)/(dble(n+0.5)) )

 100     continue
         p1 = 1.0d0
         p2 = 0.0d0
!!$         do j = 1, n
         do j = 1, n
            p3 = p2
            p2 = p1
            p1 = ( (2.0d0*dble(j)-1.0d0)*z*p2-(dble(j)-1.0d0)*p3 ) / dble(j)
         enddo

         pp = dble(n) * (z*p1-p2) / (z*z-1.0d0)
         z1 = z
         z  = z1 - p1 / pp

         !
         ! changed at 2005/09/14
         !
!!$         if( abs( z-z1 ) .gt. eps ) go to 100
!!$         if( abs( z-z1 ) / z1 .gt. eps ) go to 100
         if( abs( z-z1 ) / abs( z1 + 1.0d-200 ) .gt. eps ) go to 100

         x( i     ) = xm-xl*z
         x( n+1-i ) = xm+xl*z
         w( i     ) = 2.0d0*xl/((1.0d0-z*z)*pp*pp)
         w( n+1-i ) = w(i)
      end do


    end subroutine lt2_gauleg

    !**************************************************************************
    !**************************************************************************
    ! Followings are MPI related routines.
    !**************************************************************************

#ifndef LT2_NOMPI

!!$    subroutine lt2_init_mpi( jmg, mmg, jm, mm )
    subroutine lt2_init_mpi( jmg, ntrunc, jm, lm )

      use mpii_module

      integer(i4b), intent(in ) :: jmg, ntrunc
      integer(i4b), intent(out) :: jm, lm


      !
      ! local variables
      !
      integer(i4b) :: mmg_half, mmg_remain
      integer(i4b) :: m, n


      !
      ! If MPI is used, l_mmg and l_nm are calculated here for the first time. 
      ! But, those values will be overwritten in lt2_init routine. 
      !
      call lt2___calc_mmg_nm( ntrunc, l_mmg, l_nm )
      flag_inited_lt2_init_mpi = .true.


      !-----

      allocate( jm_a( 0:nprocs-1 ), jgs_a( 0:nprocs-1 ), jge_a( 0:nprocs-1 ) )

      jge = 0
      do n = 0, nprocs-1
         jgs = jge + 1
         jge = jgs - 1 + jmg / nprocs
         if( n .le. mod( jmg, nprocs ) - 1 ) jge = jge + 1
         jm  = jge - jgs + 1

         jgs_a( n ) = jgs
         jge_a( n ) = jge
         jm_a ( n ) = jm
      end do

      jgs = jgs_a( myrank )
      jge = jge_a( myrank )
      jm  = jm_a ( myrank )

      jm_max = jm_a( 0 )
      do n = 0+1, nprocs-1
         if( jm_a( n ) .gt. jm_max ) jm_max = jm_a( n )
      end do




      allocate( &
           mgs1_a( 0:nprocs-1 ), mge1_a( 0:nprocs-1 ), &
           mgs2_a( 0:nprocs-1 ), mge2_a( 0:nprocs-1 ), &
           mm1_a( 0:nprocs-1 ), mm2_a( 0:nprocs-1 ) )

      mmg_half = l_mmg / 2 + mod( l_mmg, 2 )

      mge1 = 0
      do n = 0, nprocs-1
         mgs1 = mge1 + 1
         mge1 = mgs1 - 1 + mmg_half / nprocs
         if( n + 1 .le. mod( mmg_half, nprocs ) ) mge1 = mge1 + 1
         mm1  = mge1 - mgs1 + 1

         mgs1_a( n ) = mgs1
         mge1_a( n ) = mge1
         mm1_a ( n ) = mm1
      end do

      mgs1 = mgs1_a( myrank )
      mge1 = mge1_a( myrank )
      mm1  = mm1_a ( myrank )


      mmg_remain = l_mmg - mmg_half

      mgs2 = l_mmg + 1
      do n = 0, nprocs-1
         mge2 = mgs2 - 1
         mgs2 = mge2 + 1 - mmg_remain / nprocs
         if( nprocs - 1 - n + 1 .le. mod( mmg_remain, nprocs ) ) mgs2 = mgs2 - 1
         mgs2 = max( mgs2, mmg_half )
         mm2  = mge2 - mgs2 + 1

         mgs2_a( n ) = mgs2
         mge2_a( n ) = mge2
         mm2_a ( n ) = mm2
      end do

      mgs2 = mgs2_a( myrank )
      mge2 = mge2_a( myrank )
      mm2  = mm2_a ( myrank )


      mm_max = mm1_a( 0 ) + mm2_a( 0 )
      do n = 0+1, nprocs-1
         if( mm1_a( n ) + mm2_a( n ) .gt. mm_max ) mm_max = mm1_a( n ) + mm2_a( n )
      end do


      allocate( m_index( mm1 + mm2 ) )
      do m = 1, mm1
         m_index( m     ) = mgs1 + m - 1
      end do
!!$      do m = mm1+1, mm1+mm2
      do m = 1, mm2
         m_index( mm1+m ) = mgs2 + m - 1
      end do


      l_mm = mm1 + mm2


#ifdef LT2_DEBUG
      if( myrank .eq. 0 ) then
         write( 6, * )
         write( 6, * ) "LT2_DEBUG LT2_DEBUG LT2_DEBUG LT2_DEBUG LT2_DEBUG"
         do n = 0, nprocs-1
            write( 6, * ) 'mmg:', l_mmg, ', myrank:', myrank, ', n:', n, &
                 ', mm: ', mm1_a( n ), ', mgs: ', mgs1_a( n ), ', mge: ', mge1_a( n )
         end do
         do n = nprocs-1, 0, -1
            write( 6, * ) 'mmg:', l_mmg, ', myrank:', myrank, ', n:', n, &
                 ', mm: ', mm2_a( n ), ', mgs: ', mgs2_a( n ), ', mge: ', mge2_a( n )
         end do
         write( 6, * ) 'mm_max: ', mm_max
         write( 6, * ) "LT2_DEBUG LT2_DEBUG LT2_DEBUG LT2_DEBUG LT2_DEBUG"
         write( 6, * )
      end if
#endif

#ifdef NOCYCLIC

      allocate( mm_a( 0:nprocs-1 ), mgs_a( 0:nprocs-1 ), mge_a( 0:nprocs-1 ) )

      mge = 0
      do n = 0, nprocs-1
         mgs = mge + 1
         mge = mgs - 1 + l_mmg / nprocs
         if( n .le. mod( l_mmg, nprocs ) - 1 ) mge = mge + 1
         mm  = mge - mgs + 1

         mgs_a( n ) = mgs
         mge_a( n ) = mge
         mm_a ( n ) = mm
      end do

      mgs = mgs_a( myrank )
      mge = mge_a( myrank )
      mm  = mm_a ( myrank )

      mm_max = mm_a( 0 )
      do n = 0+1, nprocs-1
         if( mm_a( n ) .gt. mm_max ) mm_max = mm_a( n )
      end do

#endif


      lm = 0
      do m = mgs1, mgs1+mm1-1
         lm = lm + l_nm - m + 1
      end do
      do m = mgs2, mgs2+mm2-1
         lm = lm + l_nm - m + 1
      end do
      lm = lm + lm


    end subroutine lt2_init_mpi

    !**************************************************************************

    subroutine lt2_end_mpi

      deallocate( jm_a, jgs_a, jge_a )

      deallocate( mgs1_a, mge1_a, mgs2_a, mge2_a, mm1_a, mm2_a, m_index )
#ifdef NOCYCLIC
      deallocate( mm_a, mgs_a, mge_a )
#endif

    end subroutine lt2_end_mpi

    !**************************************************************************
    !
    ! subroutine for data transposition
    !
    !**************************************************************************

    subroutine lt2_transposition_cyclic( im, jmg, jm, km, mm, arr_in, arr_out )

      use mpii_module

      integer(i4b), intent(in ) :: im, jmg, jm, km, mm
      real(dp)    , intent(in ) :: arr_in ( im, jm , km )
      real(dp)    , intent(out) :: arr_out( mm, jmg, km )


      !
      ! local variables
      !
      real(dp)     :: &
           sbuf( mm_max, jm_max, km, 0:nprocs-1 ), &
           rbuf( mm_max, jm_max, km, 0:nprocs-1 )
      integer(i4b) :: ireqs_a( 0:nprocs-1 ), ireqr_a( 0:nprocs-1 )
      integer(i4b) :: idest, idep
      integer(i4b) :: j, k, m, n


      !$omp parallel

      !$omp do private( m, j, n )
      do k = 1, km
         do n = 0, nprocs-1
            do j = 1, jm_max
               do m = 1, mm_max
                  sbuf( m, j, k, n ) = 1.0d100
               end do
            end do
         end do
      end do

      !$omp do private( m, j, n )
      do k = 1, km
         do n = 0, nprocs-1
            do j = 1, jm
               do m = 1, mm1_a( n )
                  sbuf( m, j, k, n ) = arr_in( mgs1_a(n)+m-1, j, k )
               end do
            end do
         end do
         do n = 0, nprocs-1
            do j = 1, jm
               do m = 1, mm2_a( n )
                  sbuf( mm1_a(n)+m, j, k, n ) = arr_in( mgs2_a(n)+m-1, j, k )
               end do
            end do
         end do
      end do

      !$omp end parallel

      do n = 0, nprocs-1

         if( n .ne. myrank ) then
            idest = n
            idep  = n
!!$            write( 6, * ) 'Rank ', myrank, ': send data to rank ', &
!!$                 idest, ', receive data from rank ', idep
            call mpii_isend( idest, mm_max, jm_max, km, sbuf(:,:,:,n), ireqs_a(n) )
            call mpii_irecv( idep , mm_max, jm_max, km, rbuf(:,:,:,n), ireqr_a(n) )
         else
            do k = 1, km
               do j = 1, jm_max
                  do m = 1, mm_max
                     rbuf( m, j, k, n ) = sbuf( m, j, k, n )
                  end do
               end do
            end do
         end if
      end do


      do n = 0, nprocs-1
         idep = n
         if( n .ne. myrank ) then
            call mpii_wait( ireqs_a( n ) )
            call mpii_wait( ireqr_a( n ) )
         end if

         do k = 1, km
            do j = 1, jm_a( idep )
               do m = 1, mm1+mm2
                  arr_out( m, jgs_a(idep)+j-1, k ) = rbuf( m, j, k, n )
               end do
            end do
         end do
      end do



    end subroutine lt2_transposition_cyclic

    !**************************************************************************

    subroutine lt2_transposition_cyclic_inv( im, jmg, jm, km, mm, arr_in, arr_out )

      use mpii_module

      integer(i4b), intent(in ) :: im, jmg, jm, km, mm
      real(dp)    , intent(in ) :: arr_in ( mm, jmg, km )
      real(dp)    , intent(out) :: arr_out( im, jm , km )


      !
      ! local variables
      !
      real(dp)     :: &
           sbuf( mm_max, jm_max, km, 0:nprocs-1 ), &
           rbuf( mm_max, jm_max, km, 0:nprocs-1 )
      integer(i4b) :: ireqs_a( 0:nprocs-1 ), ireqr_a( 0:nprocs-1 )
      integer(i4b) :: idest, idep
      integer(i4b) :: j, k, m, n


      !$omp parallel

      !$omp do private( m, j, n )
      do k = 1, km
         do n = 0, nprocs-1
            do j = 1, jm_max
               do m = 1, mm_max
                  sbuf( m, j, k, n ) = 1.0d100
               end do
            end do
         end do
      end do


      !$omp do private( m, j, n, idest )
      do k = 1, km
         do n = 0, nprocs-1
            idest = n
            do j = 1, jm_a( idest )
               do m = 1, mm1+mm2
                  sbuf( m, j, k, n ) = arr_in( m, jgs_a(idest)+j-1, k )
               end do
            end do
         end do
      end do

      !$omp end parallel

      do n = 0, nprocs-1

         if( n .ne. myrank ) then
            idest = n
            idep  = n
!!$            write( 6, * ) 'Rank ', myrank, ': send data to rank ', &
!!$                 idest, ', receive data from rank ', idep
            call mpii_isend( idest, mm_max, jm_max, km, sbuf(:,:,:,n), ireqs_a(n) )
            call mpii_irecv( idep , mm_max, jm_max, km, rbuf(:,:,:,n), ireqr_a(n) )
         else
            do k = 1, km
               do j = 1, jm_max
                  do m = 1, mm_max
                     rbuf( m, j, k, n ) = sbuf( m, j, k, n )
                  end do
               end do
            end do
         end if

      end do


      do n = 0, nprocs-1

         if( n .ne. myrank ) then
            call mpii_wait( ireqs_a( n ) )
            call mpii_wait( ireqr_a( n ) )
         end if

         do k = 1, km
            do j = 1, jm
               do m = 1, mm1_a( n )
                  arr_out( mgs1_a(n)+m-1, j, k ) = rbuf( m, j, k, n )
               end do
            end do
         end do
         do k = 1, km
            do j = 1, jm
               do m = 1, mm2_a( n )
                  arr_out( mgs2_a(n)+m-1, j, k ) = rbuf( mm1_a(n)+m, j, k, n )
               end do
            end do
         end do

      end do


      do k = 1, km
         do j = 1, jm
            do m = mge2_a(0)+1, im
!!$               arr_out( m, j, k ) = 1.0d100
               arr_out( m, j, k ) = 0.0d0
            end do
         end do
      end do


    end subroutine lt2_transposition_cyclic_inv

    !**************************************************************************

#ifdef NOCYCLIC

    !**************************************************************************
    !
    !        --------------
    !  jmg   |
    !        |
    !        | ...
    !        |
    !        |
    !        |-------------
    !        |
    !        |
    !        |
    !        |
    !        |-------------
    !        |
    !        |
    !        |
    !        |
    !        |-------------
    !   jm   |
    !        |
    !        |
    !    1   |
    !        --------------
    !
    !**************************************************************************

    subroutine lt2_transposition( im, jmg, jm, km, mm, arr_in, arr_out )

      use mpii_module

      integer(i4b), intent(in ) :: im, jmg, jm, km, mm
      real(dp)    , intent(in ) :: arr_in ( im , jm, km )
      real(dp)    , intent(out) :: arr_out( jmg, mm, km )


      !
      ! local variables
      !
      real(dp)     :: arr_tmp( mm, jmg, km )
      real(dp)     :: &
           sbuf( mm_max, jm_max, km, 0:nprocs-1 ), &
           rbuf( mm_max, jm_max, km, 0:nprocs-1 )
      integer(i4b) :: ireqs_a( 0:nprocs-1 ), ireqr_a( 0:nprocs-1 )
      integer(i4b) :: idest, idep
      integer(i4b) :: j, k, m, n



      do n = 0, nprocs-1
         do k = 1, km
            do j = 1, jm_max
               do m = 1, mm_max
                  sbuf( m, j, k, n ) = 1.0d100
               end do
            end do
         end do
      end do

      do n = 0, nprocs-1
         do k = 1, km
            do j = 1, jm
               do m = 1, mm_a( n )
                  sbuf( m, j, k, n ) = arr_in( mgs_a(n)+m-1, j, k )
               end do
            end do
         end do
      end do


      do n = 0, nprocs-1

         if( n .ne. myrank ) then
            idest = n
            idep  = n
!!$            write( 6, * ) 'Rank ', myrank, ': send data to rank ', &
!!$                 idest, ', receive data from rank ', idep
            call mpii_isend( idest, mm_max, jm_max, km, sbuf(:,:,:,n), ireqs_a( n ) )
            call mpii_irecv( idep , mm_max, jm_max, km, rbuf(:,:,:,n), ireqr_a( n ) )
         else
            do k = 1, km
               do j = 1, jm_max
                  do m = 1, mm_max
                     rbuf( m, j, k, n ) = sbuf( m, j, k, n )
                  end do
               end do
            end do
         end if
      end do


      do n = 0, nprocs-1
         idep = n
         if( n .ne. myrank ) then
            call mpii_wait( ireqs_a( n ) )
            call mpii_wait( ireqr_a( n ) )
         end if

         do k = 1, km
            do j = 1, jm_a( idep )
               do m = 1, mm
                  arr_tmp( m, jgs_a(idep)+j-1, k ) = rbuf( m, j, k, n )
               end do
            end do
         end do
      end do


      do k = 1, km
         do j = 1, jmg
            do m = 1, mm
               arr_out( j, m, k ) = arr_tmp( m, j, k )
            end do
         end do
      end do


    end subroutine lt2_transposition

    !**************************************************************************

#endif
    ! #endif for #ifdef NOCYCLIC

#endif
    ! #endif for #ifndef LT2_NOMPI

    !**************************************************************************
    ! Above are MPI related routines.
    !**************************************************************************
    !**************************************************************************

    subroutine lt2_g2s( im, jmg, jm, km, lm, ita, dta, gwg, pmn, &
         garr, sarr )

      integer(i4b), intent(in ) :: im, jmg, jm, km, lm, ita( 5, 2 )
      real(dp)    , intent(in ) :: dta( im, 2 )
      real(dp)    , intent(in ) :: gwg( jmg ), pmn( lm, jmg )
      real(dp)    , intent(in ) :: garr( im, jm, km )
      real(dp)    , intent(out) :: sarr( lm, km )


      !
      ! local variables
      !
      real(dp)     :: farrr( im, jm, km ), farri( im, jm, km )


      call lt2_g2f( im, jm, km, ita, dta, garr, farrr, farri )

      call lt2_f2s( im, jmg, jm, km, lm, gwg, pmn, &
           farrr, farri, sarr )


    end subroutine lt2_g2s

    !**************************************************************************

    subroutine lt2_s2g( im, jmg, jm, km, lm, ita, dta, pmn, &
         sarr, garr )


      integer(i4b), intent(in ) :: im, jmg, jm, km, lm
      integer(i4b), intent(in ) :: ita( 5, 2 )
      real(dp)    , intent(in ) :: dta( im, 2 )
      real(dp)    , intent(in ) :: pmn( lm, jmg )
      real(dp)    , intent(in ) :: sarr( lm, km )
      real(dp)    , intent(out) :: garr( im, jm, km )


      !
      ! local variables
      !
      real(dp) :: farrr( im, jm, km ), farri( im, jm, km )


      call lt2_s2f( im, jmg, jm, km, lm, pmn, &
           sarr, farrr, farri )

      call lt2_f2g( im, jm, km, ita, dta, farrr, farri, garr )


    end subroutine lt2_s2g

    !**************************************************************************

    subroutine lt2_g2f( im, jm, km, ita, dta, garr, farrr, farri )

      use fft7_module

      integer(i4b), intent(in ) :: im, jm, km, ita( 5, 2 )
      real(dp)    , intent(in ) :: dta( im, 2 )
      real(dp)    , intent(in ) :: garr ( im, jm, km )
      real(dp)    , intent(out) :: farrr( im, jm, km ), farri( im, jm, km )


      !
      ! local variables
      !
#ifdef LT2_USEFTPACK
      real(dp)     :: wka_ft1( jm * km, 0:im-1 ), wka_ft2( im * jm * km )
#else
      integer(i4b) :: lm
      real(dp)     :: wka ( jm * km + 1, im )
      real(dp)     :: wkar( jm * km + 1, im ), wkai( jm * km + 1, im )
#endif
      integer(i4b) :: isign
      integer(i4b) :: i, j, k


#ifdef LT2_USEFTPACK

      !$omp parallel do private( i, j )
      do k = 1, km
         do j = 1, jm
            do i = 1, im
               wka_ft1( ( k - 1 ) * jm + j, i-1 ) = garr( i, j, k )
            end do
         end do
      end do

      call fttruf( jm * km, im, wka_ft1, wka_ft2, ft_it, ft_t )

      !$omp parallel do private( i, j )
      do k = 1, km
         do j = 1, jm
            do i = 1, 1
               farrr( i, j, k ) = wka_ft1( ( k - 1 ) * jm + j, 2 * ( i - 1 )     )
               farri( i, j, k ) = 0.0d0
            end do
            do i = 1+1, im / 2
               farrr( i, j, k ) = wka_ft1( ( k - 1 ) * jm + j, 2 * ( i - 1 )     )
               farri( i, j, k ) = wka_ft1( ( k - 1 ) * jm + j, 2 * ( i - 1 ) + 1 )
            end do
            do i = im/2+1, im/2+1
               farrr( i, j, k ) = wka_ft1( ( k - 1 ) * jm + j, 1                 )
               farri( i, j, k ) = 0.0d0
            end do
            do i = im/2+1+1, im
               farrr( i, j, k ) =  farrr( im/2+1-(im-(im/2+1)), j, k )
               farri( i, j, k ) = -farri( im/2+1-(im-(im/2+1)), j, k )
            end do
         end do
      end do


#else

      lm = jm * km

      !$omp parallel do private( i, j )
      do k = 1, km
         do j = 1, jm
            do i = 1, im
               wka( ( k - 1 ) * jm + j, i ) = garr( i, j, k )
            end do
         end do
      end do

      isign = 1
      call fft7_fftreal( lm+1, im, ita, dta, wka, wkar, wkai, isign, 1, lm )

      !$omp parallel do private( i, j )
      do k = 1, km
         do j = 1, jm
            do i = 1, im
               farrr( i, j, k ) = wkar( ( k - 1 ) * jm + j, i )
               farri( i, j, k ) = wkai( ( k - 1 ) * jm + j, i )
            end do
         end do
      end do

#endif


    end subroutine lt2_g2f

    !**************************************************************************

    subroutine lt2_f2g( im, jm, km, ita, dta, farrr, farri, garr )

      use fft7_module

      integer(i4b), intent(in ) :: im, jm, km, ita( 5, 2 )
      real(dp)    , intent(in ) :: dta( im, 2 )
      real(dp)    , intent(in ) :: farrr( im, jm, km ), farri( im, jm, km )
      real(dp)    , intent(out) :: garr ( im, jm, km )


      !
      ! local variables
      !
#ifdef LT2_USEFTPACK
      real(dp)     :: wka_ft1( jm * km, 0:im-1 ), wka_ft2( im * jm * km )
#else
      integer(i4b) :: lm
      real(dp)     :: wka ( jm * km + 1, im )
      real(dp)     :: wkar( jm * km + 1, im ), wkai( jm * km + 1, im )
#endif
      integer(i4b) :: isign
      integer(i4b) :: i, j, k




#ifdef LT2_USEFTPACK

      !$omp parallel do private( i, j )
      do k = 1, km
         do j = 1, jm
            do i = 1, 1
               wka_ft1( ( k - 1 ) * jm + j, 2 * ( i - 1 )     ) = farrr( i, j, k )
            end do
            do i = 1+1, im/2
               wka_ft1( ( k - 1 ) * jm + j, 2 * ( i - 1 )     ) = farrr( i, j, k )
               wka_ft1( ( k - 1 ) * jm + j, 2 * ( i - 1 ) + 1 ) = farri( i, j, k )
            end do
            do i = im/2+1, im/2+1
               wka_ft1( ( k - 1 ) * jm + j, 1                 ) = farrr( i, j, k )
            end do
         end do
      end do

      call fttrub( jm * km, im, wka_ft1, wka_ft2, ft_it, ft_t )

      !$omp parallel do private( i, j )
      do k = 1, km
         do j = 1, jm
            do i = 1, im
               garr( i, j, k ) = wka_ft1( ( k - 1 ) * jm + j, i-1 )
            end do
         end do
      end do

#else

      lm = jm * km

      !$omp parallel do private( i, j )
      do k = 1, km
         do j = 1, jm
            do i = 1, im
               wkar( ( k - 1 ) * jm + j, i ) = farrr( i, j, k )
               wkai( ( k - 1 ) * jm + j, i ) = farri( i, j, k )
            end do
         end do
      end do

      isign = -1
      call fft7_fftreal( lm+1, im, ita, dta, wka, wkar, wkai, isign, 1, lm )

      !$omp parallel do private( i, j )
      do k = 1, km
         do j = 1, jm
            do i = 1, im
               garr( i, j, k ) = wka( ( k - 1 ) * jm + j, i )
            end do
         end do
      end do

#endif


    end subroutine lt2_f2g

    !**************************************************************************

    subroutine lt2_f2s( im, jmg, jm, km, lm, gwg, pmn, &
         farrr, farri, sarr )

#ifndef LT2_NOMPI
      use mpii_module
#endif

      integer(i4b), intent(in ) :: im, jmg, jm, km, lm
      real(dp)    , intent(in ) :: gwg( jmg )
      real(dp)    , intent(in ) :: pmn( lm, jmg )
      real(dp)    , intent(in ) :: farrr( im, jm, km ), farri( im, jm, km )
      real(dp)    , intent(out) :: sarr ( lm, km )


      !
      ! local variables
      !
#ifndef LT2_NOMPI
      real(dp)     :: farr_pack_bef( im, jm, 2*km ), farr_pack_aft( l_mm, jmg, 2*km )
      integer(i4b) :: i
#endif
      real(dp)     :: &
           farrr_s( l_mm, jmg/2+1:jmg, km ), farri_s( l_mm, jmg/2+1:jmg, km ), &
           farrr_a( l_mm, jmg/2+1:jmg, km ), farri_a( l_mm, jmg/2+1:jmg, km )
      real(dp)     :: sarrr( l_nm, l_mm, km ), sarri( l_nm, l_mm, km )
      integer(i4b) :: j, k, l, m, n
      integer(i4b) :: j2


#ifndef LT2_NOMPI

      !$omp parallel do private( i, j )
      do k = 1, km
         do j = 1, jm
            do i = 1, im
               farr_pack_bef( i, j, k    ) = farrr( i, j, k )
               farr_pack_bef( i, j, km+k ) = farri( i, j, k )
            end do
         end do
      end do

      call lt2_transposition_cyclic( im, jmg, jm, 2*km, l_mm, farr_pack_bef, farr_pack_aft )

      !$omp parallel

!      !$omp parallel do private( j, j2, m )
      !$omp do private( j, j2, m )
      do k = 1, km
         do j = jmg/2+1, jmg
            j2 = jmg-j+1
            do m = 1, l_mm
               farrr_s(m,j,k) = farr_pack_aft(m,j,k   ) + farr_pack_aft(m,j2,k   )
               farri_s(m,j,k) = farr_pack_aft(m,j,km+k) + farr_pack_aft(m,j2,km+k)
               farrr_a(m,j,k) = farr_pack_aft(m,j,k   ) - farr_pack_aft(m,j2,k   )
               farri_a(m,j,k) = farr_pack_aft(m,j,km+k) - farr_pack_aft(m,j2,km+k)
            end do
         end do
      end do

#else

      !$omp parallel

      ! old code
!!$      do k = 1, km
!!$         do j = jm/2+1, jm
!!$            j2 = jm-j+1
!!$            do i = 1, im
!!$               farrr_s( i, j, k ) = farrr( i, j, k ) + farrr( i, j2, k )
!!$               farri_s( i, j, k ) = farri( i, j, k ) + farri( i, j2, k )
!!$               farrr_a( i, j, k ) = farrr( i, j, k ) - farrr( i, j2, k )
!!$               farri_a( i, j, k ) = farri( i, j, k ) - farri( i, j2, k )
!!$            end do
!!$         end do
!!$      end do
!      !$omp parallel do private( j, j2, m )
      !$omp do private( j, j2, m )
      do k = 1, km
         do j = jm/2+1, jm
            j2 = jm-j+1
            do m = 1, l_mm
               farrr_s( m, j, k ) = farrr( m, j, k ) + farrr( m, j2, k )
               farri_s( m, j, k ) = farri( m, j, k ) + farri( m, j2, k )
               farrr_a( m, j, k ) = farrr( m, j, k ) - farrr( m, j2, k )
               farri_a( m, j, k ) = farri( m, j, k ) - farri( m, j2, k )
            end do
         end do
      end do

#endif



!      !$omp parallel do private( m, n )
      !$omp do private( m, n )
      do k = 1, km
         do m = 1, l_mm
            do n = 1, l_nm
               sarrr( n, m, k ) = 0.0d0
               sarri( n, m, k ) = 0.0d0
            end do
         end do
      end do

      !
      ! NOTE:
      !   As a definition of spherical harmonics, degree must be greater than 
      !   or equal to order:
      !     degree (meridional wavenumber) >= order (zonal wavenumber). 
      !
      !   In this routine, the relationship between degree and array index, n, 
      !   is as follows:
      !     degree = n(array index) - 1   ===>   n(array index) = degree + 1.
      !

!!$!      !$omp parallel do private( j, l, m, n )
!!$      !$omp do private( j, m, n )
!!$      do k = 1, km
!!$         do j = jmg/2+1, jmg
!!$
!!$            do m = 1, l_mm
!!$               do n = l_ord(m)+1, l_nm, 2
!!$                  sarrr( n, m, k ) = sarrr( n, m, k ) &
!!$                       + gwg( j ) * farrr_s( m, j, k ) * l_pmn( n, m, j )
!!$                  sarri( n, m, k ) = sarri( n, m, k ) &
!!$                       + gwg( j ) * farri_s( m, j, k ) * l_pmn( n, m, j )
!!$               end do
!!$               do n = l_ord(m)+1+1, l_nm, 2
!!$                  sarrr( n, m, k ) = sarrr( n, m, k ) &
!!$                       + gwg( j ) * farrr_a( m, j, k ) * l_pmn( n, m, j )
!!$                  sarri( n, m, k ) = sarri( n, m, k ) &
!!$                       + gwg( j ) * farri_a( m, j, k ) * l_pmn( n, m, j )
!!$               end do
!!$            end do
!!$
!!$         end do
!!$      end do


!      !$omp parallel do private( j, l, m, n )
      !$omp do private( j, m, n )
      do k = 1, km
         do j = jmg/2+1, jmg

            do m = 1, l_mm
               do n = l_ord(m)+1, l_nm, 2
                  sarrr( n, m, k ) = sarrr( n, m, k ) &
                       + farrr_s( m, j, k ) * l_pmngwg( n, m, j )
                  sarri( n, m, k ) = sarri( n, m, k ) &
                       + farri_s( m, j, k ) * l_pmngwg( n, m, j )
               end do
               do n = l_ord(m)+1+1, l_nm, 2
                  sarrr( n, m, k ) = sarrr( n, m, k ) &
                       + farrr_a( m, j, k ) * l_pmngwg( n, m, j )
                  sarri( n, m, k ) = sarri( n, m, k ) &
                       + farri_a( m, j, k ) * l_pmngwg( n, m, j )
               end do
            end do

         end do
      end do



!!$!      !$omp parallel do private( l )
!!$      !$omp do private( l )
!!$      do k = 1, km
!!$         do l = 1, lm
!!$            sarr( l, k ) = 0.0d0
!!$         end do
!!$      end do
!!$
!!$!      !$omp parallel do private( j, l, m, n )
!!$      !$omp do private( j, l, m, n )
!!$      do k = 1, km
!!$         do j = jmg/2+1, jmg
!!$
!!$            do m = 1, l_mm
!!$               !
!!$               ! NOTE:
!!$               !   As a definition of spherical harmonics, degree must be greater than 
!!$               !   or equal to order:
!!$               !     degree (meridional wavenumber) >= order (zonal wavenumber). 
!!$               !
!!$               !   In this routine, the relationship between degree and array index, n, 
!!$               !   is as follows:
!!$               !     degree = n(array index) - 1   ===>   n(array index) = degree + 1.
!!$               !
!!$!               do n = ord(m)+1, l_nm, 2
!!$               do n = l_ord(m)+1, l_nm, 2
!!$                  l = l_nm2l( n, m )
!!$                  sarr( l  , k ) = sarr( l  , k ) &
!!$                       + gwg( j ) * farrr_s( m, j, k ) * pmn( l  , j )
!!$                  sarr( l+1, k ) = sarr( l+1, k ) &
!!$                       + gwg( j ) * farri_s( m, j, k ) * pmn( l+1, j )
!!$               end do
!!$!               do n = ord(m)+1+1, l_nm, 2
!!$               do n = l_ord(m)+1+1, l_nm, 2
!!$                  l = l_nm2l( n, m )
!!$                  sarr( l  , k ) = sarr( l  , k ) &
!!$                       + gwg( j ) * farrr_a( m, j, k ) * pmn( l  , j )
!!$                  sarr( l+1, k ) = sarr( l+1, k ) &
!!$                       + gwg( j ) * farri_a( m, j, k ) * pmn( l+1, j )
!!$               end do
!!$            end do
!!$
!!$         end do
!!$      end do

      !$omp end parallel


      call lt2_exchange_array_nm2l( km, lm, sarrr, sarri, sarr )


#ifndef LT2_NOMPI
#ifdef LT2_DEBUG
!!$      do n = 1, nm
!!$         do m = 1, mm1
!!$            write( 20+myrank, * ) mgs1_a(myrank)+m-1, n, sarrr( n, m, 1 ), sarri( n, m, 1 ), m_index( m )
!!$         end do
!!$         do m = 1, mm2
!!$            write( 20+myrank, * ) mgs2_a(myrank)+m-1, n, sarrr( n, mm1+m, 1 ), sarri( n, mm1+m, 1 ), m_index( mm1+m )
!!$         end do
!!$!         write( 20+myrank, * )
!!$      end do
#endif
#endif

    end subroutine lt2_f2s

    !**************************************************************************

    subroutine lt2_s2f( im, jmg, jm, km, lm, pmn, &
         sarr, farrr, farri )

#ifndef LT2_NOMPI
      use mpii_module
#endif

      integer(i4b), intent(in ) :: im, jmg, jm, km, lm
      real(dp)    , intent(in ) :: pmn( lm, jmg )
      real(dp)    , intent(in ) :: sarr ( lm, km )
      real(dp)    , intent(out) :: farrr( im, jm, km ), farri( im, jm, km )


      !
      ! local variables
      !
#ifndef LT2_NOMPI
      real(dp)     :: farr_pack_bef( l_mm, jmg, 2*km ), farr_pack_aft( im, jm, 2*km )
#endif
!!$      real(dp)     :: &
!!$           farrr_s( l_mm, jmg/2+1:jmg, km ), farri_s( l_mm, jmg/2+1:jmg, km ), &
!!$           farrr_a( l_mm, jmg/2+1:jmg, km ), farri_a( l_mm, jmg/2+1:jmg, km )
      real(dp)     :: &
           farrr_s( jmg/2+1:jmg+1, l_mm, km ), farri_s( jmg/2+1:jmg+1, l_mm, km ), &
           farrr_a( jmg/2+1:jmg+1, l_mm, km ), farri_a( jmg/2+1:jmg+1, l_mm, km )
      integer(i4b) :: i, j, k, l, m, n
      integer(i4b) :: i2, j2


      !$omp parallel

      !
      ! This calculation (order of array index) strongly depends on the fft
      ! routine (order of array index).
      !

!      !$omp parallel do private( j, m )
      !$omp do private( j, m )
      do k = 1, km
!!$         do j = jmg/2+1, jmg
!!$            do m = 1, l_mm
!!$               farrr_s( m, j, k ) = 0.0d0
!!$               farri_s( m, j, k ) = 0.0d0
!!$               farrr_a( m, j, k ) = 0.0d0
!!$               farri_a( m, j, k ) = 0.0d0
!!$            end do
!!$         end do
         do m = 1, l_mm
            do j = jmg/2+1, jmg
               farrr_s( j, m, k ) = 0.0d0
               farri_s( j, m, k ) = 0.0d0
               farrr_a( j, m, k ) = 0.0d0
               farri_a( j, m, k ) = 0.0d0
            end do
         end do
      end do

!!$      do k = 1, km
!!$         do j = jmg/2+1, jmg
!!$
!!$            do m = 1, mm
!!$               !
!!$               ! NOTE:
!!$               !   As a definition of spherical harmonics, degree must be greater than 
!!$               !   or equal to order:
!!$               !     degree (meridional wavenumber) >= order (zonal wavenumber). 
!!$               !
!!$               !   In this routine, the relationship between degree and array index, n, 
!!$               !   is as follows:
!!$               !     degree = n(array index) - 1   ===>   n(array index) = degree + 1.
!!$               !
!!$               do n = ord(m)+1, nm, 2
!!$                  farrr_s( m, j, k ) = farrr_s( m, j, k ) &
!!$                       + sarrr( n, m, k ) * pmn( n, m, j )
!!$                  farri_s( m, j, k ) = farri_s( m, j, k ) &
!!$                       + sarri( n, m, k ) * pmn( n, m, j )
!!$               end do
!!$               do n = ord(m)+1+1, nm, 2
!!$                  farrr_a( m, j, k ) = farrr_a( m, j, k ) &
!!$                       + sarrr( n, m, k ) * pmn( n, m, j )
!!$                  farri_a( m, j, k ) = farri_a( m, j, k ) &
!!$                       + sarri( n, m, k ) * pmn( n, m, j )
!!$               end do
!!$            end do
!!$
!!$
!!$         end do
!!$      end do



!!$!      !$omp parallel do private( j, l, m, n )
!!$      !$omp do private( j, l, m, n )
!!$      do k = 1, km
!!$         do j = jmg/2+1, jmg
!!$
!!$            do m = 1, l_mm
!!$               !
!!$               ! NOTE:
!!$               !   As a definition of spherical harmonics, degree must be greater than 
!!$               !   or equal to order:
!!$               !     degree (meridional wavenumber) >= order (zonal wavenumber). 
!!$               !
!!$               !   In this routine, the relationship between degree and array index, n, 
!!$               !   is as follows:
!!$               !     degree = n(array index) - 1   ===>   n(array index) = degree + 1.
!!$               !
!!$               do n = l_ord(m)+1, l_nm, 2
!!$                  l = l_nm2l( n, m )
!!$                  farrr_s( m, j, k ) = farrr_s( m, j, k ) &
!!$                       + sarr( l  , k ) * pmn( l  , j )
!!$                  farri_s( m, j, k ) = farri_s( m, j, k ) &
!!$                       + sarr( l+1, k ) * pmn( l+1, j )
!!$               end do
!!$               do n = l_ord(m)+1+1, l_nm, 2
!!$                  l = l_nm2l( n, m )
!!$                  farrr_a( m, j, k ) = farrr_a( m, j, k ) &
!!$                       + sarr( l  , k ) * pmn( l  , j )
!!$                  farri_a( m, j, k ) = farri_a( m, j, k ) &
!!$                       + sarr( l+1, k ) * pmn( l+1, j )
!!$               end do
!!$            end do
!!$
!!$
!!$         end do
!!$      end do

      !
      ! NOTE:
      !   As a definition of spherical harmonics, degree must be greater than 
      !   or equal to order:
      !     degree (meridional wavenumber) >= order (zonal wavenumber). 
      !
      !   In this routine, the relationship between degree and array index, n, 
      !   is as follows:
      !     degree = n(array index) - 1   ===>   n(array index) = degree + 1.
      !
!      !$omp parallel do private( j, l, m, n )
      !$omp do private( j, l, m, n )
      do k = 1, km

         do m = 1, l_mm
            do n = l_ord(m)+1, l_nm, 2
               l = l_nm2l( n, m )
               do j = jmg/2+1, jmg
                  farrr_s( j, m, k ) = farrr_s( j, m, k ) &
                       + sarr( l  , k ) * pmn( l  , j )
                  farri_s( j, m, k ) = farri_s( j, m, k ) &
                       + sarr( l+1, k ) * pmn( l+1, j )
               end do
            end do
            do n = l_ord(m)+1+1, l_nm, 2
               l = l_nm2l( n, m )
               do j = jmg/2+1, jmg
                  farrr_a( j, m, k ) = farrr_a( j, m, k ) &
                       + sarr( l  , k ) * pmn( l  , j )
                  farri_a( j, m, k ) = farri_a( j, m, k ) &
                       + sarr( l+1, k ) * pmn( l+1, j )
               end do
            end do

         end do
      end do


#ifndef LT2_NOMPI

!      !$omp parallel do private( j, j2, m )
      !$omp do private( j, j2, m )
      do k = 1, km
         do j = jmg/2+1, jmg
            j2 = jmg-j+1
            do m = 1, l_mm
!!$               farrr( m, j2, k ) = farrr_s( m, j, k ) - farrr_a( m, j, k )
!!$               farri( m, j2, k ) = farri_s( m, j, k ) - farri_a( m, j, k )
!!$               farrr( m, j , k ) = farrr_s( m, j, k ) + farrr_a( m, j, k )
!!$               farri( m, j , k ) = farri_s( m, j, k ) + farri_a( m, j, k )

!!$               farr_pack_bef( m, j2, k    ) = farrr_s( m, j, k ) - farrr_a( m, j, k )
!!$               farr_pack_bef( m, j2, km+k ) = farri_s( m, j, k ) - farri_a( m, j, k )
!!$               farr_pack_bef( m, j , k    ) = farrr_s( m, j, k ) + farrr_a( m, j, k )
!!$               farr_pack_bef( m, j , km+k ) = farri_s( m, j, k ) + farri_a( m, j, k )

               farr_pack_bef( m, j2, k    ) = farrr_s( j, m, k ) - farrr_a( j, m, k )
               farr_pack_bef( m, j2, km+k ) = farri_s( j, m, k ) - farri_a( j, m, k )
               farr_pack_bef( m, j , k    ) = farrr_s( j, m, k ) + farrr_a( j, m, k )
               farr_pack_bef( m, j , km+k ) = farri_s( j, m, k ) + farri_a( j, m, k )
            end do
         end do
      end do

      !$omp end parallel

      call lt2_transposition_cyclic_inv( im, jmg, jm, 2*km, l_mm, farr_pack_bef, farr_pack_aft )

      !$omp parallel

!      !$omp parallel do private( i, j )
      !$omp do private( i, j )
      do k = 1, km
         do j = 1, jm
            do i = 1, im
               farrr( i, j, k ) = farr_pack_aft( i, j, k    )
               farri( i, j, k ) = farr_pack_aft( i, j, km+k )
            end do
         end do
      end do

#else


      ! old code
!!$      do k = 1, km
!!$         do j = jm/2+1, jm
!!$            j2 = jm-j+1
!!$            do i = 1, im/2+1
!!$               farrr( i, j2, k ) = farrr_s( i, j, k ) - farrr_a( i, j, k )
!!$               farri( i, j2, k ) = farri_s( i, j, k ) - farri_a( i, j, k )
!!$               farrr( i, j , k ) = farrr_s( i, j, k ) + farrr_a( i, j, k )
!!$               farri( i, j , k ) = farri_s( i, j, k ) + farri_a( i, j, k )
!!$            end do
!!$         end do
!!$      end do
!      !$omp parallel do private( j, j2, m )
      !$omp do private( j, j2, m )
      do k = 1, km
         do j = jm/2+1, jm
            j2 = jm-j+1
            do m = 1, l_mm
!!$               farrr( m, j2, k ) = farrr_s( m, j, k ) - farrr_a( m, j, k )
!!$               farri( m, j2, k ) = farri_s( m, j, k ) - farri_a( m, j, k )
!!$               farrr( m, j , k ) = farrr_s( m, j, k ) + farrr_a( m, j, k )
!!$               farri( m, j , k ) = farri_s( m, j, k ) + farri_a( m, j, k )
               farrr( m, j2, k ) = farrr_s( j, m, k ) - farrr_a( j, m, k )
               farri( m, j2, k ) = farri_s( j, m, k ) - farri_a( j, m, k )
               farrr( m, j , k ) = farrr_s( j, m, k ) + farrr_a( j, m, k )
               farri( m, j , k ) = farri_s( j, m, k ) + farri_a( j, m, k )
            end do
            do m = l_mm+1, im
               farrr( m, j2, k ) = 0.0d0
               farri( m, j2, k ) = 0.0d0
               farrr( m, j , k ) = 0.0d0
               farri( m, j , k ) = 0.0d0
            end do
         end do
      end do

#endif


!      !$omp parallel do private( i, i2, j )
      !$omp do private( i, i2, j )
      do k = 1, km
         do j = 1, jm
            do i = im/2+1+1, im
!               i2 = im/2+1-(i-(im/2+1))
               i2 = im+2-i
               farrr( i, j, k ) =  farrr( i2, j, k )
               farri( i, j, k ) = -farri( i2, j, k )
            end do
         end do
      end do

      !$omp end parallel


    end subroutine lt2_s2f

    !**************************************************************************

    subroutine lt2_derivop_f2s_old( im, jmg, jm, km, mm, nm, &
         sinlatg, gwg, ord, deg, pmn, eps, &
         farrrr, farrri, farrsr, farrsi, sarrr, sarri )


      integer(i4b), intent(in ) :: im, jmg, jm, km, mm, nm
      real(dp)    , intent(in ) :: sinlatg( jmg ), gwg( jmg )
      integer(i4b), intent(in ) :: ord( mm ), deg( nm+1 )
      real(dp)    , intent(in ) :: pmn( nm+1, mm, jmg ), eps( nm+1, mm )
      real(dp)    , intent(in ) :: &
           farrrr( im, jm, km ), farrri( im, jm, km ), &
           farrsr( im, jm, km ), farrsi( im, jm, km )
      real(dp)    , intent(out) :: sarrr( nm, mm, km ), sarri( nm, mm, km )


      !
      ! local variables
      !
#ifndef LT2_NOMPI
      real(dp) :: farr_pack_bef( im, jm, 4*km ), farr_pack_aft( mm, jmg, 4*km )
      integer(i4b) :: i
#endif
      real(dp) :: &
           farrrr_aft( mm, jmg, km ), farrri_aft( mm, jmg, km ), &
           farrsr_aft( mm, jmg, km ), farrsi_aft( mm, jmg, km )
      !
      ! coslatsqg : = 1 - sinlatg * sinlatg
      !
      real(dp)     :: coslatsqg( jmg )

!!$      integer(i4b) :: iarr( (nm+1)*(nm+2)/2 )

      real(dp)     :: tmpr, tmpi

      integer(i4b) :: j, k, m, n


!!$      call sh_lnplnn( nm, ord, deg, lnp, lnn, npzo, nnzo )
!!$
!!$      do j = 1, jm
!!$         cossq( j ) = 1.0d0 - sinlat( j ) * sinlat( j )
!!$      end do
!!$      do l = 1, (nm+1)*(nm+2)/2
!!$         iarr( l ) = fft5_m2i( im, ord( l ) )
!!$      end do
!!$
!!$      do k = 1, km
!!$         do l = 1, (nm+1)*(nm+2)/2
!!$
!!$            i = iarr( l )
!!$
!!$            sarrr( l, k ) = 0.0d0
!!$            sarri( l, k ) = 0.0d0
!!$
!!$            do j = 1, jm
!!$               tmpr = -ord( l ) * farrri( i, j, k ) * pmn( l, j ) &
!!$                    + farrsr( i, j, k ) &
!!$                    * ( deg(l) * eps( lnn(l) ) * pmn( lnn(l), j ) * nnzo(l) &
!!$                    - ( deg(l) + 1 ) * eps(l) * pmn( lnp(l), j ) * npzo(l) )
!!$               tmpr = tmpr / cossq( j )
!!$               tmpi =  ord( l ) * farrrr( i, j, k ) * pmn( l, j ) &
!!$                    + farrsi( i, j, k ) &
!!$                    * ( deg(l) * eps( lnn(l) ) * pmn( lnn(l), j ) * nnzo(l) &
!!$                    - ( deg(l) + 1 ) * eps(l) * pmn( lnp(l), j ) * npzo(l) )
!!$               tmpi = tmpi / cossq( j )
!!$
!!$               sarrr( l, k ) = sarrr( l, k ) + tmpr * gaussw( j )
!!$               sarri( l, k ) = sarri( l, k ) + tmpi * gaussw( j )
!!$            end do
!!$
!!$            sarrr( l, k ) = sarrr( l, k ) / pradi
!!$            sarri( l, k ) = sarri( l, k ) / pradi
!!$
!!$         end do
!!$      end do



#ifndef LT2_NOMPI
      do k = 1, km
         do j = 1, jm
            do i = 1, im
               farr_pack_bef( i, j,      k ) = farrrr( i, j, k )
               farr_pack_bef( i, j, 1*km+k ) = farrri( i, j, k )
               farr_pack_bef( i, j, 2*km+k ) = farrsr( i, j, k )
               farr_pack_bef( i, j, 3*km+k ) = farrsi( i, j, k )
            end do
         end do
      end do
      call lt2_transposition_cyclic( im, jmg, jm, 4*km, mm, farr_pack_bef, farr_pack_aft )
      do k = 1, km
         do j = 1, jmg
            do m = 1, mm
               farrrr_aft( m, j, k ) = farr_pack_aft( m, j,      k )
               farrri_aft( m, j, k ) = farr_pack_aft( m, j, 1*km+k )
               farrsr_aft( m, j, k ) = farr_pack_aft( m, j, 2*km+k )
               farrsi_aft( m, j, k ) = farr_pack_aft( m, j, 3*km+k )
            end do
         end do
      end do

#else

      do k = 1, km
         do j = 1, jmg
            do m = 1, mm
               farrrr_aft( m, j, k ) = farrrr( m, j, k )
               farrri_aft( m, j, k ) = farrri( m, j, k )
               farrsr_aft( m, j, k ) = farrsr( m, j, k )
               farrsi_aft( m, j, k ) = farrsi( m, j, k )
            end do
         end do
      end do

#endif



      do j = 1, jmg
         coslatsqg( j ) = 1.0d0 - sinlatg( j ) * sinlatg( j )
      end do

      do k = 1, km

         !
         ! NOTE:
         !   As a definition of spherical harmonics, degree must be greater than 
         !   or equal to order:
         !     degree (meridional wavenumber) >= order (zonal wavenumber). 
         !
         !   In this routine, the relationship between degree and array index, n, 
         !   is as follows:
         !     degree = n(array index) - 1   ===>   n(array index) = degree + 1.
         !
         do m = 1, mm

            do n = 1, ord(m)+1-1
               sarrr( n, m, k ) = 0.0d0
               sarri( n, m, k ) = 0.0d0
            end do

            do n = ord(m)+1, ord(m)+1
               sarrr( n, m, k ) = 0.0d0
               sarri( n, m, k ) = 0.0d0
               do j = 1, jmg
                  tmpr = -ord( m ) * farrri_aft( m, j, k ) * pmn( n, m, j )     &
                       + farrsr_aft( m, j, k )                                  &
                       * (     deg(n)       * eps( n+1, m ) * pmn( n+1, m, j )  &
                           + 0.0d0                                             )
                  tmpr = tmpr / coslatsqg( j )
                  tmpi =  ord( m ) * farrrr_aft( m, j, k ) * pmn( n, m, j )     &
                       + farrsi_aft( m, j, k )                                  &
                       * (     deg(n)       * eps( n+1, m ) * pmn( n+1, m, j )  &
                           + 0.0d0                                             )
                  tmpi = tmpi / coslatsqg( j )

                  sarrr( n, m, k ) = sarrr( n, m, k ) + tmpr * gwg( j )
                  sarri( n, m, k ) = sarri( n, m, k ) + tmpi * gwg( j )
               end do
            end do

!!$            do n = (ord(m)+1)+1, nm-1
            do n = (ord(m)+1)+1, nm
               sarrr( n, m, k ) = 0.0d0
               sarri( n, m, k ) = 0.0d0
               do j = 1, jmg
                  tmpr = -ord( m ) * farrri_aft( m, j, k ) * pmn( n, m, j )     &
                       + farrsr_aft( m, j, k )                                  &
                       * (     deg(n)       * eps( n+1, m ) * pmn( n+1, m, j )  &
                           - ( deg(n) + 1 ) * eps( n  , m ) * pmn( n-1, m, j ) )
                  tmpr = tmpr / coslatsqg( j )
                  tmpi =  ord( m ) * farrrr_aft( m, j, k ) * pmn( n, m, j )     &
                       + farrsi_aft( m, j, k )                                  &
                       * (     deg(n)       * eps( n+1, m ) * pmn( n+1, m, j )  &
                           - ( deg(n) + 1 ) * eps( n  , m ) * pmn( n-1, m, j ) )
                  tmpi = tmpi / coslatsqg( j )

                  sarrr( n, m, k ) = sarrr( n, m, k ) + tmpr * gwg( j )
                  sarri( n, m, k ) = sarri( n, m, k ) + tmpi * gwg( j )
               end do
            end do

!!$            do n = nm, nm
!!$               sarrr( n, m, k ) = 0.0d0
!!$               sarri( n, m, k ) = 0.0d0
!!$               do j = 1, jmg
!!$                  tmpr = -ord( m ) * farrri_aft( m, j, k ) * pmn( n, m, j )     &
!!$                       + farrsr_aft( m, j, k )                                  &
!!$                       * ( 0.0d0                                                &
!!$                           - ( deg(n) + 1 ) * eps( n  , m ) * pmn( n-1, m, j ) )
!!$                  tmpr = tmpr / coslatsqg( j )
!!$                  tmpi =  ord( m ) * farrrr_aft( m, j, k ) * pmn( n, m, j )     &
!!$                       + farrsi_aft( m, j, k )                                  &
!!$                       * ( 0.0d0                                                &
!!$                           - ( deg(n) + 1 ) * eps( n  , m ) * pmn( n-1, m, j ) )
!!$                  tmpi = tmpi / coslatsqg( j )
!!$
!!$                  sarrr( n, m, k ) = sarrr( n, m, k ) + tmpr * gwg( j )
!!$                  sarri( n, m, k ) = sarri( n, m, k ) + tmpi * gwg( j )
!!$               end do
!!$!               sarrr( m, n, k ) = sarrr( m, n, k ) / pradi
!!$!               sarri( m, n, k ) = sarri( m, n, k ) / pradi
!!$            end do


         end do

      end do



    end subroutine lt2_derivop_f2s_old

    !**************************************************************************

    subroutine lt2_derivop_l_f2s_old( im, jmg, jm, km, lm, &
         coslatg, gwg, &
         farrrr, farrri, farrsr, farrsi, sarr )


      integer(i4b), intent(in ) :: im, jmg, jm, km, lm
      real(dp)    , intent(in ) :: coslatg( jmg ), gwg( jmg )
      real(dp)    , intent(in ) :: &
           farrrr( im, jm, km ), farrri( im, jm, km ), &
           farrsr( im, jm, km ), farrsi( im, jm, km )
      real(dp)    , intent(out) :: sarr( lm, km )


      !
      ! local variables
      !
#ifndef LT2_NOMPI
      real(dp) :: farr_pack_bef( im, jm, 4*km ), farr_pack_aft( l_mm, jmg, 4*km )
      integer(i4b) :: i
#endif
      real(dp) :: &
           farrrr_aft( l_mm, jmg, km ), farrri_aft( l_mm, jmg, km ), &
           farrsr_aft( l_mm, jmg, km ), farrsi_aft( l_mm, jmg, km )
      real(dp) :: sarrr( l_nm, l_mm, km ), sarri( l_nm, l_mm, km )

      real(dp)     :: tmpr, tmpi

      real(dp)     :: pow_coslatg( jmg )

      integer(i4b) :: j, k, l, m, n


      !$omp parallel

      !$omp do
      do j = 1, jmg
         pow_coslatg( j ) = coslatg( j ) * coslatg( j )
      end do


#ifndef LT2_NOMPI

      !$omp do private( i, j )
      do k = 1, km
         do j = 1, jm
            do i = 1, im
               farr_pack_bef( i, j,      k ) = farrrr( i, j, k )
               farr_pack_bef( i, j, 1*km+k ) = farrri( i, j, k )
               farr_pack_bef( i, j, 2*km+k ) = farrsr( i, j, k )
               farr_pack_bef( i, j, 3*km+k ) = farrsi( i, j, k )
            end do
         end do
      end do

      !$omp end parallel

      call lt2_transposition_cyclic( im, jmg, jm, 4*km, l_mm, farr_pack_bef, farr_pack_aft )

      !$omp parallel

      !$omp do private( j, m )
      do k = 1, km
         do j = 1, jmg
            do m = 1, l_mm
               farrrr_aft( m, j, k ) = farr_pack_aft( m, j,      k )
               farrri_aft( m, j, k ) = farr_pack_aft( m, j, 1*km+k )
               farrsr_aft( m, j, k ) = farr_pack_aft( m, j, 2*km+k )
               farrsi_aft( m, j, k ) = farr_pack_aft( m, j, 3*km+k )
            end do
         end do
      end do

#else

      !$omp do private( j, m )
      do k = 1, km
         do j = 1, jmg
            do m = 1, l_mm
               farrrr_aft( m, j, k ) = farrrr( m, j, k )
               farrri_aft( m, j, k ) = farrri( m, j, k )
               farrsr_aft( m, j, k ) = farrsr( m, j, k )
               farrsi_aft( m, j, k ) = farrsi( m, j, k )
            end do
         end do
      end do

#endif


      !$omp do private( l )
      do k = 1, km
         do l = 1, lm
            sarr( l, k ) = 0.0d0
         end do
      end do

      !$omp do private( j, l, m, tmpr, tmpi )
      do k = 1, km

         do j = 1, jmg

            do l = 1, lm, 2
               m = l_l2m(l)
               tmpr = -ll_ord(l) * farrri_aft(m,j,k) * ll_pmn(l,j)           &
                    + farrsr_aft( m, j, k )                                  &
                    * (     ll_deg(l)       * ll_eps_nn(l) * ll_pmn_nn(l,j)  &
                        - ( ll_deg(l) + 1 ) * ll_eps   (l) * ll_pmn_np(l,j) )
               tmpr = tmpr / pow_coslatg( j )
               tmpi =  ll_ord(l) * farrrr_aft(m,j,k) * ll_pmn(l,j)           &
                    + farrsi_aft( m, j, k )                                  &
                    * (     ll_deg(l)       * ll_eps_nn(l) * ll_pmn_nn(l,j)  &
                        - ( ll_deg(l) + 1 ) * ll_eps   (l) * ll_pmn_np(l,j) )
               tmpi = tmpi / pow_coslatg( j )

               sarr( l  , k ) = sarr( l  , k ) + tmpr * gwg( j )
               sarr( l+1, k ) = sarr( l+1, k ) + tmpi * gwg( j )
            end do
         end do

      end do

      !$omp end parallel


    end subroutine lt2_derivop_l_f2s_old

    !**************************************************************************

    subroutine lt2_derivop_f2s( im, jmg, jm, km, lm, &
         coslatg, gwg, &
         farrrr, farrri, farrsr, farrsi, sarr )


      integer(i4b), intent(in ) :: im, jmg, jm, km, lm
      real(dp)    , intent(in ) :: coslatg( jmg ), gwg( jmg )
      real(dp)    , intent(in ) :: &
           farrrr( im, jm, km ), farrri( im, jm, km ), &
           farrsr( im, jm, km ), farrsi( im, jm, km )
      real(dp)    , intent(out) :: sarr( lm, km )


      !
      ! local variables
      !
#ifndef LT2_NOMPI
      real(dp) :: farr_pack_bef( im, jm, 4*km ), farr_pack_aft( l_mm, jmg, 4*km )
      integer(i4b) :: i
#endif
      real(dp) :: &
           farrrr_aft( l_mm, jmg, km ), farrri_aft( l_mm, jmg, km ), &
           farrsr_aft( l_mm, jmg, km ), farrsi_aft( l_mm, jmg, km )
      real(dp) :: sarrr( l_nm, l_mm, km ), sarri( l_nm, l_mm, km )

      real(dp)     :: tmpr, tmpi

      real(dp)     :: pow_coslatg( jmg )

      integer(i4b) :: j, k, l, m, n


      !$omp parallel

!!$      !$omp do
!!$      do j = 1, jmg
!!$         pow_coslatg( j ) = coslatg( j ) * coslatg( j )
!!$      end do


#ifndef LT2_NOMPI

      !$omp do private( i, j )
      do k = 1, km
         do j = 1, jm
            do i = 1, im
               farr_pack_bef( i, j,      k ) = farrrr( i, j, k )
               farr_pack_bef( i, j, 1*km+k ) = farrri( i, j, k )
               farr_pack_bef( i, j, 2*km+k ) = farrsr( i, j, k )
               farr_pack_bef( i, j, 3*km+k ) = farrsi( i, j, k )
            end do
         end do
      end do

      !$omp end parallel

      call lt2_transposition_cyclic( im, jmg, jm, 4*km, l_mm, farr_pack_bef, farr_pack_aft )

      !$omp parallel

      !$omp do private( j, m )
      do k = 1, km
         do j = 1, jmg
            do m = 1, l_mm
               farrrr_aft( m, j, k ) = farr_pack_aft( m, j,      k )
               farrri_aft( m, j, k ) = farr_pack_aft( m, j, 1*km+k )
               farrsr_aft( m, j, k ) = farr_pack_aft( m, j, 2*km+k )
               farrsi_aft( m, j, k ) = farr_pack_aft( m, j, 3*km+k )
            end do
         end do
      end do

#else

      !$omp do private( j, m )
      do k = 1, km
         do j = 1, jmg
            do m = 1, l_mm
               farrrr_aft( m, j, k ) = farrrr( m, j, k )
               farrri_aft( m, j, k ) = farrri( m, j, k )
               farrsr_aft( m, j, k ) = farrsr( m, j, k )
               farrsi_aft( m, j, k ) = farrsi( m, j, k )
            end do
         end do
      end do

#endif



      !$omp do private( l )
      do k = 1, km
         do l = 1, lm
            sarr( l, k ) = 0.0d0
         end do
      end do

      !$omp do private( j, l, m, tmpr, tmpi )
      do k = 1, km

         do j = 1, jmg
            do l = 1, lm, 2
!!$               m = l_l2m(l)
!!$               tmpr = -ll_ord(l) * farrri_aft(m,j,k) * ll_pmn(l,j)         &
!!$                    + farrsr_aft(m,j,k)                                    &
!!$                    * (     ll_deg(l)     * ll_eps_nn(l) * ll_pmn_nn(l,j)  &
!!$                        - ( ll_deg(l)+1 ) * ll_eps   (l) * ll_pmn_np(l,j) )
!!$               tmpr = tmpr / pow_coslatg( j )
!!$               tmpi =  ll_ord(l) * farrrr_aft(m,j,k) * ll_pmn(l,j)         &
!!$                    + farrsi_aft(m,j,k)                                    &
!!$                    * (     ll_deg(l)     * ll_eps_nn(l) * ll_pmn_nn(l,j)  &
!!$                        - ( ll_deg(l)+1 ) * ll_eps   (l) * ll_pmn_np(l,j) )
!!$               tmpi = tmpi / pow_coslatg( j )
!!$
!!$               sarr( l  , k ) = sarr( l  , k ) + tmpr * gwg( j )
!!$               sarr( l+1, k ) = sarr( l+1, k ) + tmpi * gwg( j )


            m = l_l2m(l)
            sarr( l  , k ) = sarr( l  , k )                    &
                 + ll_derivop_coef1(l  ,j) * farrri_aft(m,j,k) &
                 + ll_derivop_coef2(l  ,j) * farrsr_aft(m,j,k)
            sarr( l+1, k ) = sarr( l+1, k )                    &
                 + ll_derivop_coef1(l+1,j) * farrrr_aft(m,j,k) &
                 + ll_derivop_coef2(l+1,j) * farrsi_aft(m,j,k)

            end do
         end do

      end do

      !$omp end parallel


    end subroutine lt2_derivop_f2s

    !**************************************************************************

    subroutine lt2_derivop_mn_f2s_old( im, jmg, jm, km, lm, &
         coslatg, gwg, &
         farrrr, farrri, farrsr, farrsi, sarr )


      integer(i4b), intent(in ) :: im, jmg, jm, km, lm
      real(dp)    , intent(in ) :: coslatg( jmg ), gwg( jmg )
      real(dp)    , intent(in ) :: &
           farrrr( im, jm, km ), farrri( im, jm, km ), &
           farrsr( im, jm, km ), farrsi( im, jm, km )
      real(dp)    , intent(out) :: sarr( lm, km )


      !
      ! local variables
      !
#ifndef LT2_NOMPI
      real(dp) :: farr_pack_bef( im, jm, 4*km ), farr_pack_aft( l_mm, jmg, 4*km )
      integer(i4b) :: i
#endif
      real(dp) :: &
           farrrr_aft( l_mm, jmg, km ), farrri_aft( l_mm, jmg, km ), &
           farrsr_aft( l_mm, jmg, km ), farrsi_aft( l_mm, jmg, km )
      real(dp) :: sarrr( l_nm, l_mm, km ), sarri( l_nm, l_mm, km )

      real(dp)     :: tmpr, tmpi

      real(dp)     :: pow_coslatg( jmg )

      integer(i4b) :: j, k, l, m, n


      !$omp parallel

!      !$omp parallel do
      !$omp do
      do j = 1, jmg
         pow_coslatg( j ) = coslatg( j ) * coslatg( j )
      end do


!!$      call sh_lnplnn( nm, ord, deg, lnp, lnn, npzo, nnzo )
!!$
!!$      do j = 1, jm
!!$         cossq( j ) = 1.0d0 - sinlat( j ) * sinlat( j )
!!$      end do
!!$      do l = 1, (nm+1)*(nm+2)/2
!!$         iarr( l ) = fft5_m2i( im, ord( l ) )
!!$      end do
!!$
!!$      do k = 1, km
!!$         do l = 1, (nm+1)*(nm+2)/2
!!$
!!$            i = iarr( l )
!!$
!!$            sarrr( l, k ) = 0.0d0
!!$            sarri( l, k ) = 0.0d0
!!$
!!$            do j = 1, jm
!!$               tmpr = -ord( l ) * farrri( i, j, k ) * pmn( l, j ) &
!!$                    + farrsr( i, j, k ) &
!!$                    * ( deg(l) * eps( lnn(l) ) * pmn( lnn(l), j ) * nnzo(l) &
!!$                    - ( deg(l) + 1 ) * eps(l) * pmn( lnp(l), j ) * npzo(l) )
!!$               tmpr = tmpr / cossq( j )
!!$               tmpi =  ord( l ) * farrrr( i, j, k ) * pmn( l, j ) &
!!$                    + farrsi( i, j, k ) &
!!$                    * ( deg(l) * eps( lnn(l) ) * pmn( lnn(l), j ) * nnzo(l) &
!!$                    - ( deg(l) + 1 ) * eps(l) * pmn( lnp(l), j ) * npzo(l) )
!!$               tmpi = tmpi / cossq( j )
!!$
!!$               sarrr( l, k ) = sarrr( l, k ) + tmpr * gaussw( j )
!!$               sarri( l, k ) = sarri( l, k ) + tmpi * gaussw( j )
!!$            end do
!!$
!!$            sarrr( l, k ) = sarrr( l, k ) / pradi
!!$            sarri( l, k ) = sarri( l, k ) / pradi
!!$
!!$         end do
!!$      end do


#ifndef LT2_NOMPI

!      !$omp parallel do private( i, j )
      !$omp do private( i, j )
      do k = 1, km
         do j = 1, jm
            do i = 1, im
               farr_pack_bef( i, j,      k ) = farrrr( i, j, k )
               farr_pack_bef( i, j, 1*km+k ) = farrri( i, j, k )
               farr_pack_bef( i, j, 2*km+k ) = farrsr( i, j, k )
               farr_pack_bef( i, j, 3*km+k ) = farrsi( i, j, k )
            end do
         end do
      end do

      !$omp end parallel

      call lt2_transposition_cyclic( im, jmg, jm, 4*km, l_mm, farr_pack_bef, farr_pack_aft )

      !$omp parallel

!      !$omp parallel do private( j, m )
      !$omp do private( j, m )
      do k = 1, km
         do j = 1, jmg
            do m = 1, l_mm
               farrrr_aft( m, j, k ) = farr_pack_aft( m, j,      k )
               farrri_aft( m, j, k ) = farr_pack_aft( m, j, 1*km+k )
               farrsr_aft( m, j, k ) = farr_pack_aft( m, j, 2*km+k )
               farrsi_aft( m, j, k ) = farr_pack_aft( m, j, 3*km+k )
            end do
         end do
      end do

#else

!      !$omp parallel do private( j, m )
      !$omp do private( j, m )
      do k = 1, km
         do j = 1, jmg
            do m = 1, l_mm
               farrrr_aft( m, j, k ) = farrrr( m, j, k )
               farrri_aft( m, j, k ) = farrri( m, j, k )
               farrsr_aft( m, j, k ) = farrsr( m, j, k )
               farrsi_aft( m, j, k ) = farrsi( m, j, k )
            end do
         end do
      end do

#endif



      !
      ! NOTE:
      !   As a definition of spherical harmonics, degree must be greater than 
      !   or equal to order:
      !     degree (meridional wavenumber) >= order (zonal wavenumber). 
      !
      !   In this routine, the relationship between degree and array index, n, 
      !   is as follows:
      !     degree = n(array index) - 1   ===>   n(array index) = degree + 1.
      !
!!$      do k = 1, km
!!$
!!$         do m = 1, mm
!!$
!!$            do n = 1, ord(m)+1-1
!!$               sarrr( n, m, k ) = 0.0d0
!!$               sarri( n, m, k ) = 0.0d0
!!$            end do
!!$
!!$            do n = ord(m)+1, ord(m)+1
!!$               sarrr( n, m, k ) = 0.0d0
!!$               sarri( n, m, k ) = 0.0d0
!!$               do j = 1, jmg
!!$                  tmpr = -ord( m ) * farrri_aft( m, j, k ) * pmn( n, m, j )     &
!!$                       + farrsr_aft( m, j, k )                                  &
!!$                       * (     deg(n)       * eps( n+1, m ) * pmn( n+1, m, j )  &
!!$                           + 0.0d0                                             )
!!$                  tmpr = tmpr / pow_coslatg( j )
!!$                  tmpi =  ord( m ) * farrrr_aft( m, j, k ) * pmn( n, m, j )     &
!!$                       + farrsi_aft( m, j, k )                                  &
!!$                       * (     deg(n)       * eps( n+1, m ) * pmn( n+1, m, j )  &
!!$                           + 0.0d0                                             )
!!$                  tmpi = tmpi / pow_coslatg( j )
!!$
!!$                  sarrr( n, m, k ) = sarrr( n, m, k ) + tmpr * gwg( j )
!!$                  sarri( n, m, k ) = sarri( n, m, k ) + tmpi * gwg( j )
!!$               end do
!!$            end do
!!$
!!$            do n = (ord(m)+1)+1, nm
!!$               sarrr( n, m, k ) = 0.0d0
!!$               sarri( n, m, k ) = 0.0d0
!!$               do j = 1, jmg
!!$                  tmpr = -ord( m ) * farrri_aft( m, j, k ) * pmn( n, m, j )     &
!!$                       + farrsr_aft( m, j, k )                                  &
!!$                       * (     deg(n)       * eps( n+1, m ) * pmn( n+1, m, j )  &
!!$                           - ( deg(n) + 1 ) * eps( n  , m ) * pmn( n-1, m, j ) )
!!$                  tmpr = tmpr / pow_coslatg( j )
!!$                  tmpi =  ord( m ) * farrrr_aft( m, j, k ) * pmn( n, m, j )     &
!!$                       + farrsi_aft( m, j, k )                                  &
!!$                       * (     deg(n)       * eps( n+1, m ) * pmn( n+1, m, j )  &
!!$                           - ( deg(n) + 1 ) * eps( n  , m ) * pmn( n-1, m, j ) )
!!$                  tmpi = tmpi / pow_coslatg( j )
!!$
!!$                  sarrr( n, m, k ) = sarrr( n, m, k ) + tmpr * gwg( j )
!!$                  sarri( n, m, k ) = sarri( n, m, k ) + tmpi * gwg( j )
!!$               end do
!!$            end do
!!$
!!$         end do
!!$
!!$      end do



      !
      ! NOTE:
      !   As a definition of spherical harmonics, degree must be greater than 
      !   or equal to order:
      !     degree (meridional wavenumber) >= order (zonal wavenumber). 
      !
      !   In this routine, the relationship between degree and array index, n, 
      !   is as follows:
      !     degree = n(array index) - 1   ===>   n(array index) = degree + 1.
      !

!!$!      !$omp parallel do private( j, m, n, tmpr, tmpi )
!!$      !$omp do private( j, m, n, tmpr, tmpi )
!!$      do k = 1, km
!!$
!!$         do m = 1, l_mm
!!$
!!$            do n = 1, l_ord(m)+1-1
!!$               sarrr( n, m, k ) = 0.0d0
!!$               sarri( n, m, k ) = 0.0d0
!!$            end do
!!$
!!$            do n = l_ord(m)+1, l_ord(m)+1
!!$               sarrr( n, m, k ) = 0.0d0
!!$               sarri( n, m, k ) = 0.0d0
!!$               do j = 1, jmg
!!$                  tmpr = -l_ord(m) * farrri_aft(m,j,k) * l_pmn(n,m,j)         &
!!$                       + farrsr_aft( m, j, k )                                &
!!$                       * (     l_deg(n)       * l_eps(n+1,m) * l_pmn(n+1,m,j) &
!!$                           + 0.0d0                                            )
!!$                  tmpr = tmpr / pow_coslatg( j )
!!$                  tmpi =  l_ord(m) * farrrr_aft(m,j,k) * l_pmn(n,m,j)         &
!!$                       + farrsi_aft( m, j, k )                                &
!!$                       * (     l_deg(n)       * l_eps(n+1,m) * l_pmn(n+1,m,j) &
!!$                           + 0.0d0                                            )
!!$                  tmpi = tmpi / pow_coslatg( j )
!!$
!!$                  sarrr( n, m, k ) = sarrr( n, m, k ) + tmpr * gwg( j )
!!$                  sarri( n, m, k ) = sarri( n, m, k ) + tmpi * gwg( j )
!!$               end do
!!$            end do
!!$
!!$            do n = (l_ord(m)+1)+1, l_nm
!!$               sarrr( n, m, k ) = 0.0d0
!!$               sarri( n, m, k ) = 0.0d0
!!$               do j = 1, jmg
!!$                  tmpr = -l_ord(m) * farrri_aft(m,j,k) * l_pmn(n,m,j)         &
!!$                       + farrsr_aft( m, j, k )                                &
!!$                       * (     l_deg(n)       * l_eps(n+1,m) * l_pmn(n+1,m,j) &
!!$                           - ( l_deg(n) + 1 ) * l_eps(n  ,m) * l_pmn(n-1,m,j) )
!!$                  tmpr = tmpr / pow_coslatg( j )
!!$                  tmpi =  l_ord(m) * farrrr_aft(m,j,k) * l_pmn(n,m,j)         &
!!$                       + farrsi_aft( m, j, k )                                &
!!$                       * (     l_deg(n)       * l_eps(n+1,m) * l_pmn(n+1,m,j) &
!!$                           - ( l_deg(n) + 1 ) * l_eps(n  ,m) * l_pmn(n-1,m,j) )
!!$                  tmpi = tmpi / pow_coslatg( j )
!!$
!!$                  sarrr( n, m, k ) = sarrr( n, m, k ) + tmpr * gwg( j )
!!$                  sarri( n, m, k ) = sarri( n, m, k ) + tmpi * gwg( j )
!!$               end do
!!$            end do
!!$
!!$         end do
!!$
!!$      end do


!      !$omp parallel do private( j, m, n, tmpr, tmpi )
      !$omp do private( j, m, n, tmpr, tmpi )
      do k = 1, km

         do m = 1, l_mm

            do n = 1, l_ord(m)+1-1
               sarrr( n, m, k ) = 0.0d0
               sarri( n, m, k ) = 0.0d0
            end do

            do n = l_ord(m)+1, l_nm
               sarrr( n, m, k ) = 0.0d0
               sarri( n, m, k ) = 0.0d0
               do j = 1, jmg
                  tmpr = -l_ord(m) * farrri_aft(m,j,k) * l_pmn(n,m,j)         &
                       + farrsr_aft( m, j, k )                                &
                       * (     l_deg(n)     * l_eps_nn(n,m) * l_pmn_nn(n,m,j) &
                           - ( l_deg(n)+1 ) * l_eps   (n,m) * l_pmn_np(n,m,j) )
                  tmpr = tmpr / pow_coslatg( j )
                  tmpi =  l_ord(m) * farrrr_aft(m,j,k) * l_pmn(n,m,j)         &
                       + farrsi_aft( m, j, k )                                &
                       * (     l_deg(n)     * l_eps_nn(n,m) * l_pmn_nn(n,m,j) &
                           - ( l_deg(n)+1 ) * l_eps   (n,m) * l_pmn_np(n,m,j) )
                  tmpi = tmpi / pow_coslatg( j )

                  sarrr( n, m, k ) = sarrr( n, m, k ) + tmpr * gwg( j )
                  sarri( n, m, k ) = sarri( n, m, k ) + tmpi * gwg( j )
               end do
            end do

         end do

      end do

      !$omp end parallel

      call lt2_exchange_array_nm2l( km, lm, sarrr, sarri, sarr )



    end subroutine lt2_derivop_mn_f2s_old

    !**************************************************************************

    subroutine lt2_derivop_mn_f2s( im, jmg, jm, km, lm, &
         coslatg, gwg, &
         farrrr, farrri, farrsr, farrsi, sarr )


      integer(i4b), intent(in ) :: im, jmg, jm, km, lm
      real(dp)    , intent(in ) :: coslatg( jmg ), gwg( jmg )
      real(dp)    , intent(in ) :: &
           farrrr( im, jm, km ), farrri( im, jm, km ), &
           farrsr( im, jm, km ), farrsi( im, jm, km )
      real(dp)    , intent(out) :: sarr( lm, km )


      !
      ! local variables
      !
#ifndef LT2_NOMPI
      real(dp) :: farr_pack_bef( im, jm, 4*km ), farr_pack_aft( l_mm, jmg, 4*km )
      integer(i4b) :: i
#endif
      real(dp) :: &
           farrrr_aft( l_mm, jmg, km ), farrri_aft( l_mm, jmg, km ), &
           farrsr_aft( l_mm, jmg, km ), farrsi_aft( l_mm, jmg, km )
      real(dp) :: sarrr( l_nm, l_mm, km ), sarri( l_nm, l_mm, km )

      real(dp)     :: tmpr, tmpi

      real(dp)     :: pow_coslatg( jmg )

      integer(i4b) :: j, k, l, m, n


      !$omp parallel

      !$omp do
      do j = 1, jmg
         pow_coslatg( j ) = coslatg( j ) * coslatg( j )
      end do


#ifndef LT2_NOMPI

      !$omp do private( i, j )
      do k = 1, km
         do j = 1, jm
            do i = 1, im
               farr_pack_bef( i, j,      k ) = farrrr( i, j, k )
               farr_pack_bef( i, j, 1*km+k ) = farrri( i, j, k )
               farr_pack_bef( i, j, 2*km+k ) = farrsr( i, j, k )
               farr_pack_bef( i, j, 3*km+k ) = farrsi( i, j, k )
            end do
         end do
      end do

      !$omp end parallel

      call lt2_transposition_cyclic( im, jmg, jm, 4*km, l_mm, farr_pack_bef, farr_pack_aft )

      !$omp parallel

      !$omp do private( j, m )
      do k = 1, km
         do j = 1, jmg
            do m = 1, l_mm
               farrrr_aft( m, j, k ) = farr_pack_aft( m, j,      k )
               farrri_aft( m, j, k ) = farr_pack_aft( m, j, 1*km+k )
               farrsr_aft( m, j, k ) = farr_pack_aft( m, j, 2*km+k )
               farrsi_aft( m, j, k ) = farr_pack_aft( m, j, 3*km+k )
            end do
         end do
      end do

#else

      !$omp do private( j, m )
      do k = 1, km
         do j = 1, jmg
            do m = 1, l_mm
               farrrr_aft( m, j, k ) = farrrr( m, j, k )
               farrri_aft( m, j, k ) = farrri( m, j, k )
               farrsr_aft( m, j, k ) = farrsr( m, j, k )
               farrsi_aft( m, j, k ) = farrsi( m, j, k )
            end do
         end do
      end do

#endif



      !
      ! NOTE:
      !   As a definition of spherical harmonics, degree must be greater than 
      !   or equal to order:
      !     degree (meridional wavenumber) >= order (zonal wavenumber). 
      !
      !   In this routine, the relationship between degree and array index, n, 
      !   is as follows:
      !     degree = n(array index) - 1   ===>   n(array index) = degree + 1.
      !
!!$      do k = 1, km
!!$
!!$         do m = 1, mm
!!$
!!$            do n = 1, ord(m)+1-1
!!$               sarrr( n, m, k ) = 0.0d0
!!$               sarri( n, m, k ) = 0.0d0
!!$            end do
!!$
!!$            do n = ord(m)+1, ord(m)+1
!!$               sarrr( n, m, k ) = 0.0d0
!!$               sarri( n, m, k ) = 0.0d0
!!$               do j = 1, jmg
!!$                  tmpr = -ord( m ) * farrri_aft( m, j, k ) * pmn( n, m, j )     &
!!$                       + farrsr_aft( m, j, k )                                  &
!!$                       * (     deg(n)       * eps( n+1, m ) * pmn( n+1, m, j )  &
!!$                           + 0.0d0                                             )
!!$                  tmpr = tmpr / pow_coslatg( j )
!!$                  tmpi =  ord( m ) * farrrr_aft( m, j, k ) * pmn( n, m, j )     &
!!$                       + farrsi_aft( m, j, k )                                  &
!!$                       * (     deg(n)       * eps( n+1, m ) * pmn( n+1, m, j )  &
!!$                           + 0.0d0                                             )
!!$                  tmpi = tmpi / pow_coslatg( j )
!!$
!!$                  sarrr( n, m, k ) = sarrr( n, m, k ) + tmpr * gwg( j )
!!$                  sarri( n, m, k ) = sarri( n, m, k ) + tmpi * gwg( j )
!!$               end do
!!$            end do
!!$
!!$            do n = (ord(m)+1)+1, nm
!!$               sarrr( n, m, k ) = 0.0d0
!!$               sarri( n, m, k ) = 0.0d0
!!$               do j = 1, jmg
!!$                  tmpr = -ord( m ) * farrri_aft( m, j, k ) * pmn( n, m, j )     &
!!$                       + farrsr_aft( m, j, k )                                  &
!!$                       * (     deg(n)       * eps( n+1, m ) * pmn( n+1, m, j )  &
!!$                           - ( deg(n) + 1 ) * eps( n  , m ) * pmn( n-1, m, j ) )
!!$                  tmpr = tmpr / pow_coslatg( j )
!!$                  tmpi =  ord( m ) * farrrr_aft( m, j, k ) * pmn( n, m, j )     &
!!$                       + farrsi_aft( m, j, k )                                  &
!!$                       * (     deg(n)       * eps( n+1, m ) * pmn( n+1, m, j )  &
!!$                           - ( deg(n) + 1 ) * eps( n  , m ) * pmn( n-1, m, j ) )
!!$                  tmpi = tmpi / pow_coslatg( j )
!!$
!!$                  sarrr( n, m, k ) = sarrr( n, m, k ) + tmpr * gwg( j )
!!$                  sarri( n, m, k ) = sarri( n, m, k ) + tmpi * gwg( j )
!!$               end do
!!$            end do
!!$
!!$         end do
!!$
!!$      end do



      !
      ! NOTE:
      !   As a definition of spherical harmonics, degree must be greater than 
      !   or equal to order:
      !     degree (meridional wavenumber) >= order (zonal wavenumber). 
      !
      !   In this routine, the relationship between degree and array index, n, 
      !   is as follows:
      !     degree = n(array index) - 1   ===>   n(array index) = degree + 1.
      !

!!$!      !$omp parallel do private( j, m, n, tmpr, tmpi )
!!$      !$omp do private( j, m, n, tmpr, tmpi )
!!$      do k = 1, km
!!$
!!$         do m = 1, l_mm
!!$
!!$            do n = 1, l_ord(m)+1-1
!!$               sarrr( n, m, k ) = 0.0d0
!!$               sarri( n, m, k ) = 0.0d0
!!$            end do
!!$
!!$            do n = l_ord(m)+1, l_ord(m)+1
!!$               sarrr( n, m, k ) = 0.0d0
!!$               sarri( n, m, k ) = 0.0d0
!!$               do j = 1, jmg
!!$                  tmpr = -l_ord(m) * farrri_aft(m,j,k) * l_pmn(n,m,j)         &
!!$                       + farrsr_aft( m, j, k )                                &
!!$                       * (     l_deg(n)       * l_eps(n+1,m) * l_pmn(n+1,m,j) &
!!$                           + 0.0d0                                            )
!!$                  tmpr = tmpr / pow_coslatg( j )
!!$                  tmpi =  l_ord(m) * farrrr_aft(m,j,k) * l_pmn(n,m,j)         &
!!$                       + farrsi_aft( m, j, k )                                &
!!$                       * (     l_deg(n)       * l_eps(n+1,m) * l_pmn(n+1,m,j) &
!!$                           + 0.0d0                                            )
!!$                  tmpi = tmpi / pow_coslatg( j )
!!$
!!$                  sarrr( n, m, k ) = sarrr( n, m, k ) + tmpr * gwg( j )
!!$                  sarri( n, m, k ) = sarri( n, m, k ) + tmpi * gwg( j )
!!$               end do
!!$            end do
!!$
!!$            do n = (l_ord(m)+1)+1, l_nm
!!$               sarrr( n, m, k ) = 0.0d0
!!$               sarri( n, m, k ) = 0.0d0
!!$               do j = 1, jmg
!!$                  tmpr = -l_ord(m) * farrri_aft(m,j,k) * l_pmn(n,m,j)         &
!!$                       + farrsr_aft( m, j, k )                                &
!!$                       * (     l_deg(n)       * l_eps(n+1,m) * l_pmn(n+1,m,j) &
!!$                           - ( l_deg(n) + 1 ) * l_eps(n  ,m) * l_pmn(n-1,m,j) )
!!$                  tmpr = tmpr / pow_coslatg( j )
!!$                  tmpi =  l_ord(m) * farrrr_aft(m,j,k) * l_pmn(n,m,j)         &
!!$                       + farrsi_aft( m, j, k )                                &
!!$                       * (     l_deg(n)       * l_eps(n+1,m) * l_pmn(n+1,m,j) &
!!$                           - ( l_deg(n) + 1 ) * l_eps(n  ,m) * l_pmn(n-1,m,j) )
!!$                  tmpi = tmpi / pow_coslatg( j )
!!$
!!$                  sarrr( n, m, k ) = sarrr( n, m, k ) + tmpr * gwg( j )
!!$                  sarri( n, m, k ) = sarri( n, m, k ) + tmpi * gwg( j )
!!$               end do
!!$            end do
!!$
!!$         end do
!!$
!!$      end do



!!$!      !$omp parallel do private( j, m, n, tmpr, tmpi )
!!$      !$omp do private( j, m, n, tmpr, tmpi )
!!$      do k = 1, km
!!$
!!$         do m = 1, l_mm
!!$
!!$            do n = 1, l_ord(m)+1-1
!!$               sarrr( n, m, k ) = 0.0d0
!!$               sarri( n, m, k ) = 0.0d0
!!$            end do
!!$
!!$            do n = l_ord(m)+1, l_nm
!!$               sarrr( n, m, k ) = 0.0d0
!!$               sarri( n, m, k ) = 0.0d0
!!$               do j = 1, jmg
!!$                  tmpr = -l_ord(m) * farrri_aft(m,j,k) * l_pmn(n,m,j)         &
!!$                       + farrsr_aft( m, j, k )                                &
!!$                       * (     l_deg(n)     * l_eps_nn(n,m) * l_pmn_nn(n,m,j) &
!!$                           - ( l_deg(n)+1 ) * l_eps   (n,m) * l_pmn_np(n,m,j) )
!!$                  tmpr = tmpr / pow_coslatg( j )
!!$                  tmpi =  l_ord(m) * farrrr_aft(m,j,k) * l_pmn(n,m,j)         &
!!$                       + farrsi_aft( m, j, k )                                &
!!$                       * (     l_deg(n)     * l_eps_nn(n,m) * l_pmn_nn(n,m,j) &
!!$                           - ( l_deg(n)+1 ) * l_eps   (n,m) * l_pmn_np(n,m,j) )
!!$                  tmpi = tmpi / pow_coslatg( j )
!!$
!!$                  sarrr( n, m, k ) = sarrr( n, m, k ) + tmpr * gwg( j )
!!$                  sarri( n, m, k ) = sarri( n, m, k ) + tmpi * gwg( j )
!!$               end do
!!$            end do
!!$
!!$         end do
!!$
!!$      end do
!!$
!!$      !$omp end parallel


      !$omp do private( j, m, n, tmpr, tmpi )
      do k = 1, km

         do m = 1, l_mm
            do n = 1, l_ord(m)+1-1
               sarrr( n, m, k ) = 0.0d0
               sarri( n, m, k ) = 0.0d0
            end do
            do n = l_ord(m)+1, l_nm
               sarrr( n, m, k ) = 0.0d0
               sarri( n, m, k ) = 0.0d0
               do j = 1, jmg
                  sarrr( n, m, k ) = sarrr( n, m, k )                &
                       + l_derivop_coefr1(n,m,j) * farrri_aft(m,j,k) &
                       + l_derivop_coefr2(n,m,j) * farrsr_aft(m,j,k)
                  sarri( n, m, k ) = sarri( n, m, k )                &
                       + l_derivop_coefi1(n,m,j) * farrrr_aft(m,j,k) &
                       + l_derivop_coefi2(n,m,j) * farrsi_aft(m,j,k)
               end do
            end do
         end do

      end do

      !$omp end parallel

      call lt2_exchange_array_nm2l( km, lm, sarrr, sarri, sarr )



    end subroutine lt2_derivop_mn_f2s

    !**************************************************************************
    ! d( sarr )/d( lambda )
    !**************************************************************************

    subroutine lt2_lamderiv_s2s( km, lm, ord, sarr )

      integer(i4b), intent(in   ) :: km, lm
      integer(i4b), intent(in   ) :: ord( lm )
      real(dp)    , intent(inout) :: sarr( lm, km )


      !
      ! local variables
      !
      real(dp)     :: tmpval
      integer(i4b) :: k, l


!!$      do k = 1, km
!!$         !
!!$         ! NOTE:
!!$         !   As a definition of spherical harmonics, degree must be greater than 
!!$         !   or equal to order:
!!$         !     degree (meridional wavenumber) >= order (zonal wavenumber). 
!!$         !
!!$         !   In this routine, the relationship between degree and array index, n, 
!!$         !   is as follows:
!!$         !     degree = n(array index) - 1   ===>   n(array index) = degree + 1.
!!$         !
!!$!         do n = 1, nm
!!$!            do m = 1, deg(n)+1
!!$         do m = 1, mm
!!$            do n = ord(m)+1, nm
!!$               tmpval           = -ord( m ) * sarri( n, m, k )
!!$               sarri( n, m, k ) =  ord( m ) * sarrr( n, m, k )
!!$               sarrr( n, m, k ) = tmpval
!!$            end do
!!$         end do
!!$      end do


      !$omp parallel do private( l, tmpval )
      do k = 1, km
         do l = 1, lm, 2
            tmpval         = -ord( l+1 ) * sarr( l+1, k )
            sarr( l+1, k ) =  ord( l   ) * sarr( l  , k )
            sarr( l  , k ) = tmpval
         end do
      end do


    end subroutine lt2_lamderiv_s2s

    !**************************************************************************
    ! ( 1 - mu^2 ) * d( sarr )/d( mu )
    !**************************************************************************

    subroutine lt2_cossqmuderiv_s2s( km, lm, sarr )

#ifndef LT2_NOMPI
      use mpii_module
#endif

      integer(i4b), intent(in   ) :: km, lm
      real(dp)    , intent(inout) :: sarr( lm, km )


      !
      ! local variables
      !
      real(dp)     :: sarrr( l_nm, l_mm, km ), sarri( l_nm, l_mm, km )
      real(dp)     :: wksr ( l_nm, l_mm, km ), wksi ( l_nm, l_mm, km )
      !
      integer(i4b) :: k, m, n


      !
      ! NOTE:
      !   As a definition of spherical harmonics, degree must be greater than 
      !   or equal to order:
      !     degree (meridional wavenumber) >= order (zonal wavenumber). 
      !
      !   In this routine, the relationship between degree and array index, n, 
      !   is as follows:
      !     degree = n(array index) - 1   ===>   n(array index) = degree + 1.
      !


      call lt2_exchange_array_l2nm( km, lm, sarr, sarrr, sarri )


      !$omp parallel

!      !$omp parallel do private( m, n )
      !$omp do private( m, n )
      do k = 1, km

!         do n = 1+1, nm-1
!            do m = 1, deg(n)+1
         do m = 1, l_mm
            do n = l_ord(m)+1, l_ord(m)+1
               wksr( n, m, k ) = 0.0d0                                &
                    + (l_deg(n)+2)*l_eps( n+1, m )*sarrr( n+1, m, k )
               wksi( n, m, k ) = 0.0d0                                &
                    + (l_deg(n)+2)*l_eps( n+1, m )*sarri( n+1, m, k )
            end do
            do n = (l_ord(m)+1)+1, l_nm-1
               wksr( n, m, k ) =                                      &
                    - (l_deg(n)-1)*l_eps( n  , m )*sarrr( n-1, m, k ) &
                    + (l_deg(n)+2)*l_eps( n+1, m )*sarrr( n+1, m, k )
               wksi( n, m, k ) =                                      &
                    - (l_deg(n)-1)*l_eps( n  , m )*sarri( n-1, m, k ) &
                    + (l_deg(n)+2)*l_eps( n+1, m )*sarri( n+1, m, k )


!!$               tmpr = -ord( m ) * farrri_aft( m, j, k ) * pmn( n, m, j )     &
!!$                    + farrsr_aft( m, j, k )                                  &
!!$                    * ( deg(n)       * eps( n+1, m ) * pmn( n+1, m, j )  &
!!$                    - ( deg(n) + 1 ) * eps( n  , m ) * pmn( n-1, m, j ) )
!!$               tmpr = tmpr / pow_coslatg( j )
!!$               tmpi =  ord( m ) * farrrr_aft( m, j, k ) * pmn( n, m, j )     &
!!$                    + farrsi_aft( m, j, k )                                  &
!!$                    * ( deg(n)       * eps( n+1, m ) * pmn( n+1, m, j )  &
!!$                    - ( deg(n) + 1 ) * eps( n  , m ) * pmn( n-1, m, j ) )
!!$               tmpi = tmpi / pow_coslatg( j )


            end do
            do n = l_nm, l_nm
               wksr( n, m, k ) =                                      &
                    - (l_deg(n)-1)*l_eps( n  , m )*sarrr( n-1, m, k ) &
                    + 0.0d0
               wksi( n, m, k ) =                                      &
                    - (l_deg(n)-1)*l_eps( n  , m )*sarri( n-1, m, k ) &
                    + 0.0d0
            end do
         end do

      end do


!      !$omp parallel do private( m, n )
      !$omp do private( m, n )
      do k = 1, km
!!$         do n = 1, nm
!!$            do m = 1, deg(n)+1
         do m = 1, l_mm
            do n = l_ord(m)+1, l_nm
               sarrr( n, m, k ) = wksr( n, m, k )
               sarri( n, m, k ) = wksi( n, m, k )
            end do
         end do
      end do

      !$omp end parallel

      call lt2_exchange_array_nm2l( km, lm, sarrr, sarri, sarr )


    end subroutine lt2_cossqmuderiv_s2s

    !**************************************************************************
    ! nabla^2 = Laplacian
    !**************************************************************************

    subroutine lt2_lap_s2s( km, lm, deg, sarr )


      integer(i4b), intent(in   ) :: km, lm
      integer(i4b), intent(in   ) :: deg( lm )
      real(dp)    , intent(inout) :: sarr( lm, km )


      !
      ! local variables
      !
      real(dp)     :: eigval( lm )
      integer(i4b) :: k, l


      !$omp parallel

!      !$omp parallel do
      !$omp do
      do l = 1, lm
         eigval( l ) = -deg( l ) * ( deg( l ) + 1 )
      end do

!      !$omp parallel do private( l )
      !$omp do private( l )
      do k = 1, km
         do l = 1, lm
            sarr( l, k ) = eigval( l ) * sarr( l, k )
         end do
      end do

      !$omp end parallel


    end subroutine lt2_lap_s2s

    !**************************************************************************
    ! 
    !**************************************************************************

!!$    subroutine lt2_lapinv_s2s( km, mm, nm, ord, deg, sarrr, sarri )
!!$
!!$#ifndef LT2_NOMPI
!!$      use mpii_module, only : myrank
!!$#endif
!!$
!!$      integer(i4b), intent(in   ) :: km, mm, nm
!!$      integer(i4b), intent(in   ) :: ord( mm ), deg( nm+1 )
!!$      real(dp)    , intent(inout) :: sarrr( nm, mm, km ), sarri( nm, mm, km )
!!$
!!$
!!$      !
!!$      ! local variables
!!$      !
!!$      real(dp)     :: eigval( nm )
!!$      integer(i4b) :: k, m, n
!!$
!!$
!!$      do n = 1, nm
!!$         eigval( n ) = -deg( n ) * ( deg( n ) + 1 )
!!$      end do
!!$
!!$#ifndef LT2_NOMPI
!!$      if( myrank .eq. 0 ) then
!!$#endif
!!$         do k = 1, km
!!$            do m = 1, 1
!!$               do n = ord(m)+1, ord(m)+1
!!$                  sarrr( n, m, k ) = 0.0d0
!!$                  sarri( n, m, k ) = 0.0d0
!!$               end do
!!$               do n = ord(m)+1+1, nm
!!$                  sarrr( n, m, k ) = sarrr( n, m, k ) / eigval( n )
!!$                  sarri( n, m, k ) = sarri( n, m, k ) / eigval( n )
!!$               end do
!!$            end do
!!$            do m = 1+1, mm
!!$               do n = ord(m)+1, nm
!!$                  sarrr( n, m, k ) = sarrr( n, m, k ) / eigval( n )
!!$                  sarri( n, m, k ) = sarri( n, m, k ) / eigval( n )
!!$               end do
!!$            end do
!!$         end do
!!$#ifndef LT2_NOMPI
!!$      else
!!$         do k = 1, km
!!$            do m = 1, mm
!!$               do n = ord(m)+1, nm
!!$                  sarrr( n, m, k ) = sarrr( n, m, k ) / eigval( n )
!!$                  sarri( n, m, k ) = sarri( n, m, k ) / eigval( n )
!!$               end do
!!$            end do
!!$         end do
!!$      end if
!!$#endif
!!$
!!$
!!$    end subroutine lt2_lapinv_s2s

    subroutine lt2_lapinv_s2s( km, lm, deg, sarr )

#ifndef LT2_NOMPI
      use mpii_module, only : myrank
#endif

      integer(i4b), intent(in   ) :: km, lm
      integer(i4b), intent(in   ) :: deg( lm )
      real(dp)    , intent(inout) :: sarr( lm, km )


      !
      ! local variables
      !
      real(dp)     :: eigval( lm )
      integer(i4b) :: k, l


      !$omp parallel

!      !$omp parallel do
      !$omp do
      do l = 1, lm
         eigval( l ) = -deg( l ) * ( deg( l ) + 1 )
      end do

#ifndef LT2_NOMPI
      if( myrank .eq. 0 ) then
#endif
!         !$omp parallel do private( l )
         !$omp do private( l )
         do k = 1, km
            do l = 1, 2
               sarr( l, k ) = 0.0d0
            end do
            do l = 2+1, lm
               sarr( l, k ) = sarr( l, k ) / eigval( l )
            end do
         end do
#ifndef LT2_NOMPI
      else
!         !$omp parallel do private( l )
         !$omp do private( l )
         do k = 1, km
            do l = 1, lm
               sarr( l, k ) = sarr( l, k ) / eigval( l )
            end do
         end do
      end if
#endif

      !$omp end parallel


    end subroutine lt2_lapinv_s2s

    !**************************************************************************

    subroutine lt2_truncate( km, lm, ntrunc, sarr )


      integer(i4b), intent(in   ) :: km, lm
      integer(i4b), intent(in   ) :: ntrunc
      real(dp)    , intent(inout) :: sarr( lm, km )


      !
      ! local variables
      !
      integer(i4b) :: k, m, n, l


      !$omp parallel do private( l, m, n )
      do k = 1, km
         do m = 1, l_mm
            do n = (ntrunc+1)+1, l_nm
               l = l_nm2l( n, m )
               sarr( l  , k ) = 0.0d0
               sarr( l+1, k ) = 0.0d0
            end do
         end do
      end do


    end subroutine lt2_truncate

    !**************************************************************************

    subroutine lt2_schipsi2sucvc( km, lm, &
         schi, spsi, suc, svc )

      integer(i4b), intent(in   ) :: km, lm
      real(dp)    , intent(inout) :: schi( lm, km ), spsi( lm, km )
      real(dp)    , intent(out  ) :: suc( lm, km ), svc( lm, km )


      !
      ! local variables
      !
      real(dp)     :: &
           sucr( l_nm, l_mm, km ), suci( l_nm, l_mm, km ), &
           svcr( l_nm, l_mm, km ), svci( l_nm, l_mm, km )
      real(dp)     :: &
           schir( l_nm, l_mm, km ), schii( l_nm, l_mm, km ), &
           spsir( l_nm, l_mm, km ), spsii( l_nm, l_mm, km )
      integer(i4b) :: k, m, n


      call lt2_exchange_array_l2nm( km, lm, schi, schir, schii )
      call lt2_exchange_array_l2nm( km, lm, spsi, spsir, spsii )

      !$omp parallel do private( m, n )
      do k = 1, km
         do m = l_mm, l_mm
            do n = 1, l_ord(m)+1-1
               sucr( n, m, k ) = 0.0d0
               suci( n, m, k ) = 0.0d0
               svcr( n, m, k ) = 0.0d0
               svci( n, m, k ) = 0.0d0
            end do
         end do

         do m = 1, l_mm
            do n = l_ord(m)+1, l_ord(m)+1
               sucr( n, m, k ) = &
                    - l_ord( m ) * schii( n, m, k ) &
                    + 0.0d0                                               &
                    - ( l_deg(n) + 2 ) * l_eps( n+1, m ) * spsir( n+1, m, k )
               suci( n, m, k ) = &
                    + l_ord( m ) * schir( n, m, k ) &
                    + 0.0d0                                               &
                    - ( l_deg(n) + 2 ) * l_eps( n+1, m ) * spsii( n+1, m, k )
               svcr( n, m, k ) = &
                    - l_ord( m ) * spsii( n, m, k ) &
                    - 0.0d0                                               &
                    + ( l_deg(n) + 2 ) * l_eps( n+1, m ) * schir( n+1, m, k )
               svci( n, m, k ) = &
                    + l_ord( m ) * spsir( n, m, k ) &
                    - 0.0d0                                               &
                    + ( l_deg(n) + 2 ) * l_eps( n+1, m ) * schii( n+1, m, k )
            end do
            do n = (l_ord(m)+1)+1, l_nm-1
               sucr( n, m, k ) = &
                    - l_ord( m ) * schii( n, m, k ) &
                    + ( l_deg(n) - 1 ) * l_eps( n  , m ) * spsir( n-1, m, k ) &
                    - ( l_deg(n) + 2 ) * l_eps( n+1, m ) * spsir( n+1, m, k )
               suci( n, m, k ) = &
                    + l_ord( m ) * schir( n, m, k ) &
                    + ( l_deg(n) - 1 ) * l_eps( n  , m ) * spsii( n-1, m, k ) &
                    - ( l_deg(n) + 2 ) * l_eps( n+1, m ) * spsii( n+1, m, k )
               svcr( n, m, k ) = &
                    - l_ord( m ) * spsii( n, m, k ) &
                    - ( l_deg(n) - 1 ) * l_eps( n  , m ) * schir( n-1, m, k ) &
                    + ( l_deg(n) + 2 ) * l_eps( n+1, m ) * schir( n+1, m, k )
               svci( n, m, k ) = &
                    + l_ord( m ) * spsir( n, m, k ) &
                    - ( l_deg(n) - 1 ) * l_eps( n  , m ) * schii( n-1, m, k ) &
                    + ( l_deg(n) + 2 ) * l_eps( n+1, m ) * schii( n+1, m, k )
            end do
            do n = l_nm, l_nm
               sucr( n, m, k ) = &
                    - l_ord( m ) * schii( n, m, k ) &
                    + ( l_deg(n) - 1 ) * l_eps( n  , m ) * spsir( n-1, m, k ) &
                    - 0.0d0
               suci( n, m, k ) = &
                    + l_ord( m ) * schir( n, m, k ) &
                    + ( l_deg(n) - 1 ) * l_eps( n  , m ) * spsii( n-1, m, k ) &
                    - 0.0d0
               svcr( n, m, k ) = &
                    - l_ord( m ) * spsii( n, m, k ) &
                    - ( l_deg(n) - 1 ) * l_eps( n  , m ) * schir( n-1, m, k ) &
                    + 0.0d0
               svci( n, m, k ) = &
                    + l_ord( m ) * spsir( n, m, k ) &
                    - ( l_deg(n) - 1 ) * l_eps( n  , m ) * schii( n-1, m, k ) &
                    + 0.0d0
            end do

         end do

      end do


!!$      do k = 1, km
!!$!         do m = mm, mm
!!$!            do n = 1, ord(m)+1-1
!!$!               sucr( n, m, k ) = 0.0d0
!!$!               suci( n, m, k ) = 0.0d0
!!$!               svcr( n, m, k ) = 0.0d0
!!$!               svci( n, m, k ) = 0.0d0
!!$!            end do
!!$!         end do
!!$         do m = 1, mm
!!$            do n = ord(m)+1, ord(m)+1
!!$               sucr( n, m, k ) = sucr( n, m, k ) / pradi
!!$               suci( n, m, k ) = suci( n, m, k ) / pradi
!!$               svcr( n, m, k ) = svcr( n, m, k ) / pradi
!!$               svci( n, m, k ) = svci( n, m, k ) / pradi
!!$            end do
!!$            do n = (ord(m)+1)+1, nm-1
!!$               sucr( n, m, k ) = sucr( n, m, k ) / pradi
!!$               suci( n, m, k ) = suci( n, m, k ) / pradi
!!$               svcr( n, m, k ) = svcr( n, m, k ) / pradi
!!$               svci( n, m, k ) = svci( n, m, k ) / pradi
!!$            end do
!!$            do n = nm, nm
!!$               sucr( n, m, k ) = sucr( n, m, k ) / pradi
!!$               suci( n, m, k ) = suci( n, m, k ) / pradi
!!$               svcr( n, m, k ) = svcr( n, m, k ) / pradi
!!$               svci( n, m, k ) = svci( n, m, k ) / pradi
!!$            end do
!!$         end do
!!$      end do


!!$      do k = 1, km
!!$         do l = 1, (nm+1)*(nm+2)/2
!!$            sucr( l, k ) = -ord( l ) * schii( l, k ) &
!!$                 + ( deg( l ) - 1 ) * eps( l ) * spsir( lnp( l ), k ) &
!!$                 * npzo( l ) &
!!$                 - ( deg( l ) + 2 ) * eps( lnn( l ) ) * spsir( lnn( l ), k ) &
!!$                 * nnzo( l )
!!$            suci( l, k ) =  ord( l ) * schir( l, k ) &
!!$                 + ( deg( l ) - 1 ) * eps( l ) * spsii( lnp( l ), k ) &
!!$                 * npzo( l ) &
!!$                 - ( deg( l ) + 2 ) * eps( lnn( l ) ) * spsii( lnn( l ), k ) &
!!$                 * nnzo( l )
!!$            svcr( l, k ) = -ord( l ) * spsii( l, k ) &
!!$                 - ( deg( l ) - 1 ) * eps( l ) * schir( lnp( l ), k ) &
!!$                 * npzo( l ) &
!!$                 + ( deg( l ) + 2 ) * eps( lnn( l ) ) * schir( lnn( l ), k ) &
!!$                 * nnzo( l )
!!$            svci( l, k ) =  ord( l ) * spsir( l, k ) &
!!$                 - ( deg( l ) - 1 ) * eps( l ) * schii( lnp( l ), k ) &
!!$                 * npzo( l ) &
!!$                 + ( deg( l ) + 2 ) * eps( lnn( l ) ) * schii( lnn( l ), k ) &
!!$                 * nnzo( l )
!!$         end do
!!$      end do


      call lt2_exchange_array_nm2l( km, lm, sucr, suci, suc )
      call lt2_exchange_array_nm2l( km, lm, svcr, svci, svc )


    end subroutine lt2_schipsi2sucvc

    !************************************************************************************

  end module lt2_module
