!!****m* ABINIT/m_mlwfovlp
!! NAME
!!  m_mlwfovlp
!!
!! FUNCTION
!!  Interface with Wannier90
!!
!! COPYRIGHT
!!  Copyright (C) 2005-2025 ABINIT group (BAmadon, CEspejo, FJollet, TRangel, DRH, hexu)
!!  This file is distributed under the terms of the
!!  GNU General Public License, see ~abinit/COPYING
!!  or http://www.gnu.org/copyleft/gpl.txt .
!!
!! SOURCE

#if defined HAVE_CONFIG_H
#include "config.h"
#endif

#include "abi_common.h"

module m_mlwfovlp

 use, intrinsic :: iso_c_binding
 use defs_basis
 use defs_wannier90
 use m_abicore
 use m_errors
 use m_atomdata
 use m_xmpi
 use m_sort
#ifdef FC_NAG
 use f90_unix_dir
#endif
 use netcdf
 use m_nctk
 use m_hdr
 use m_dtset
 use m_dtfil
 use m_krank

 use defs_datatypes, only : pseudopotential_type
 use defs_abitypes, only : MPI_type
 use m_io_tools, only : delete_file, get_unit, open_file
 use m_hide_lapack,     only : matrginv, xheev
 use m_fstrings,      only : strcat, sjoin, itoa
 use m_numeric_tools, only : uniformrandom, simpson_int, c2r, l2int, isdiagmat, get_diag, blocked_loop
 use m_special_funcs,   only : besjm
 use m_geometry,  only : xred2xcart, rotmat, wigner_seitz
 use m_crystal,  only : crystal_t
 use m_fftcore,  only : sphereboundary
 use m_ebands,   only : ebands_t
 use m_pawang,   only : pawang_type
 use m_pawrad,   only : pawrad_type, simp_gen
 use m_pawtab,   only : pawtab_type
 use m_pawcprj,  only : pawcprj_type
 use m_paw_sphharm, only : ylm_cmplx, initylmr
 use m_paw_overlap, only : smatrix_pawinit
 use m_evdw_wannier, only : evdw_wannier
 use m_abstract_wf, only: abstract_wf,  wann_ksetting_t, cg_cprj, wfd_wf
 use m_wannier_io,  only: write_eigenvalues, write_Amn, compute_and_write_unk, write_Mmn

 implicit none

 private
!!***

 public :: mlwfovlp
!!***

!----------------------------------------------------------------------

!!****t* m_mlwfovlp/wan_t
!! NAME
!! wan_t
!!
!! FUNCTION
!!  This object stores the results of the Wannnierization algorithm.
!!  It can be constructed by reading the ABIWAN.nc file produced by
!!  Abinit when we call wannier90 in library mode. See mlwfovlp routine.
!!
!! SOURCE

 type,public :: wan_t

   integer :: spin = -1
   ! Spin index.

   integer :: nwan = -1
   ! Number of Wannier functions.

   integer :: max_nwan = -1
   ! Max number of Wannier functions over spins (used to dimension arrays)

   integer :: num_bands = -1
   ! Number of bands seen by wannier90 for this spin.

   !integer :: nbndep,         ! Number of remaining bands after excluding bands in Wannierizatin step
   !integer :: nbndskip,       ! Number of bands to be skipped in Wannierization step, leading to
                               ! the exclusion from the original Hamiltonian
   integer :: nkbz = -1
   ! Number of k-points in the full BZ

   integer :: nr_h = -1, nr_e = -1, nr_p = -1
   ! Number of lattice points for H, electrons, phonons

   integer :: ngkpt(3) = -1
   ! K-mesh divisions

   !integer :: nshiftk
   ! Number of shifts. At present only 1 shift is supported.

   logical :: have_disentangled
   ! True if disentanglement has been used.

   real(dp) :: spread(3) = -one
   ! Spread of wannier functions.

   type(krank_t) :: krank
   ! Used to find the index of the kpoint from its coordinates.

   integer,allocatable :: exclude_bands(:)
   ! FIXME: Is this still needed.

   integer,allocatable :: dimwin(:), winstart(:)
   ! (nkbz)
   ! Number of bands within the outer window at each k-point

   integer :: bmin = -1, bmax = -1
   ! Minimum and maximum band included in the Wannierization.

   integer,allocatable :: r_h(:,:), r_e(:,:), r_p(:,:)
   ! Lattice points for H in the Wannier representation
   ! (3, nr_h)

   integer,allocatable :: ndegen_h(:), ndegen_e(:), ndegen_p(:)

   real(dp),allocatable :: rmod_h(:), rmod_p(:), rmod_e(:)
   ! (nr_h), (nr_p)
   ! Lenght of the lattice points in Bohr

   real(dp),allocatable :: all_eigens(:,:)
   ! (mband, nkbz)
   ! All KS eigenvalues (before possible filtering done by wannier90)

   real(dp),allocatable :: centres(:,:)
   ! Wannier centers.
   ! (3, nwan)

   real(dp),allocatable :: spreads(:)
   ! Wannier centers.
   ! (nwan)

   real(dp),allocatable :: kbz(:,:)
   ! kpoints in the full BZ used by Wannier90.
   ! (3, nkbz)

   logical,allocatable :: band_in(:)
   ! (num_bands)

   logical,allocatable :: lwindow(:,:)
   ! (num_bands, nkbz)

   logical :: keep_umats
   ! True of u_mat_opt and u_mat should be saved in memory.

   complex(dp),allocatable :: u_mat_opt(:,:,:)
   complex(dp),allocatable :: u_mat(:,:,:)

   complex(dp),allocatable :: u_k(:,:,:)
   ! (max_dimwin, nwan, nkbz)
   ! total rotation matrix: the product of the optimal subspace x the rotation among the nwan Wannier functions.
   ! on the coarse ab-initio k-mesh

   complex(dp),allocatable :: hwan_r(:,:,:)
   ! (nr_h, nwan, nwan)
   ! KS Hamiltonian in the Wannier representation.

   integer :: my_npert = -1, my_pert_start = -1
   ! My number of perturbations and my initial perturbation.

   type(xcomm_t), pointer :: pert_comm => null()
   ! MPI-communicator for parallelism over perturbations.

   complex(dp),allocatable :: grpe_wwp(:,:,:,:,:)
   ! (nr_p, nr_e, nwan, nwan, my_npert))
   ! e-ph matrix elements in the Wannier representation.
   ! NB: These matrix elements are in the atomic represention and distributed inside pert_comm

 contains
   procedure :: from_abiwan => wan_from_abiwan
   ! Initialize a wan_t instance from a ABIWAN.nc file

   procedure :: load_gwan => wan_load_gwan
   ! Read g(R_p, R_e) in the Wannier representation from the GWAN.nc file.

   procedure :: print => wan_print
   ! print info on the object.

   procedure :: interp_ham => wan_interp_ham
   ! Interpolate Hamiltonian at an arbitray k-point

   procedure :: setup_eph_ws_kq => wan_setup_eph_ws_kq
   ! Prepare interpolation of e-ph matrix elements.

   procedure :: interp_eph_manyq => wan_interp_eph_manyq
   ! Interpolate e-ph matrix elements.

   procedure :: ncwrite_gwan => wan_ncwrite_gwan
   ! Write g in the Wannier representation to netcdf file.

   procedure :: free => wan_free
   ! Free memory.

 end type wan_t
!!***

 public :: wan_interp_ebands
! Build new ebands_t object on a k-mesh via Wannier interpolation.

contains
!!***

!!****f* m_mlwfovlp/mlwfovlp
!! NAME
!! mlwfovlp
!!
!! FUNCTION
!! Routine which computes overlap M_{mn}(k,b) and projection A_{mn}(k)
!! for Wannier code (www.wannier.org f90 version).
!! Various file are written (wannier90.*) which can be used to run a
!! separate wannier calculation with the wannier90 code.
!!
!! INPUTS
!!  crystal<crystal_t>=Info on the crystalline structure.
!!  ebands<ebands_t>=The object describing the band structure.
!!  hdr <type(hdr_type)>=the header of wf, den and pot files
!!  atindx1(natom)=index table for atoms, inverse of atindx (see gstate.f)
!!  cg(2,mcg)=planewave coefficients of wavefunctions.
!!  cprj(natom,mcprj)= <p_lmn|Cnk> coefficients for each WF |Cnk> and each |p_lmn> non-local projector
!!  dtset <type(dataset_type)>=all input variables for this dataset
!!  dtfil <type(datafiles_type)>=variables related to files
!!  ecut=cut-off energy for plane wave basis sphere (Ha)
!!  eigen(mband*nkpt*nsppol)=array for holding eigenvalues (hartree)
!!  gprimd(3,3)=dimensional reciprocal space primitive translations
!!  kg(3,mpw*mkmem)=reduced planewave coordinates.
!!  mband=maximum number of bands
!!  mcg=size of wave-functions array (cg) =mpw*nspinor*mband*mkmem*nsppol
!!  mcprj=size of projected wave-functions array (cprj) =nspinor*mband*mkmem*nsppol
!!  mgfft=maximum size of 1D FFTs
!!  mgfftc=maximum size of 1D FFTs (coarse grid)
!!  mkmem =number of k points treated by this node.
!!  mpi_enreg=information about MPI parallelization
!!  mpw=maximum dimensioned size of npw.
!!  natom=number of atoms in cell.
!!  nattyp(ntypat)= # atoms of each type.
!!  nfft=(effective) number of FFT grid points (for this processor) (see NOTES at beginning of scfcv)
!!  ngfft(18)=contain all needed information about 3D FFT (see NOTES at beginning of scfcv)
!!  nkpt=number of k points.
!!  npwarr(nkpt)=number of planewaves in basis at this k point
!!  nsppol=1 for unpolarized, 2 for spin-polarized
!!  ntypat=number of types of atoms in unit cell.
!!  occ(mband*nkpt*nsppol) Occupation number for each band (often 2) for each k point.
!!  prtvol=control print volume and debugging output
!!  psps <type(pseudopotential_type)>=variables related to pseudopotentials
!!  rprimd(3,3)=dimensional primitive translations for real space (bohr)
!!  ucvol=unit cell volume (bohr**3)
!!  xred(3,natom)=reduced dimensionless atomic coordinates
!!
!! OUTPUT
!!  (only writing, printing)
!!
!! SIDE EFFECTS
!!  (only writing, printing)
!!
!! NOTES
!!
!! SOURCE

   subroutine mlwfovlp(mywfc, crystal, ebands, hdr, atindx1, &
     !&cg,cprj, &
     &dtset,dtfil,eigen,gprimd,kg,&
& mband,mcg,mcprj,mgfftc,mkmem,mpi_enreg,mpw,natom,&
& nattyp,nfft,ngfft,nkpt,npwarr,nsppol,ntypat,occ,&
& pawang,pawrad,pawtab,prtvol,psps,rprimd,ucvol,xred, exclude_bands)

!Arguments ------------------------------------
!scalars
class(abstract_wf), pointer :: mywfc
 integer,intent(in) :: mband,mcg,mcprj,mgfftc,mkmem,mpw,natom,nfft,nkpt
 integer,intent(in) :: nsppol,ntypat,prtvol
 real(dp),intent(in) :: ucvol
 type(crystal_t),intent(in) :: crystal
 type(ebands_t),intent(in) :: ebands
 type(hdr_type),intent(in) :: hdr
 type(MPI_type),intent(inout) :: mpi_enreg
 type(dataset_type),intent(in) :: dtset
 type(datafiles_type),intent(in) :: dtfil
 type(pawang_type),intent(in) :: pawang
 type(pseudopotential_type),intent(in) :: psps
!arrays
 integer,intent(in) :: atindx1(natom)
 integer,intent(in) :: kg(3,mpw*mkmem),nattyp(ntypat),ngfft(18),npwarr(nkpt)
! real(dp), optional, intent(in) :: cg(2,mcg)
! type(pawcprj_type), optional, intent(in) :: cprj(natom,mcprj)
 real(dp),optional, intent(in) :: eigen(mband*nkpt*nsppol),gprimd(3,3),rprimd(3,3)
 real(dp),intent(in) :: occ(mband*nkpt*nsppol)
 real(dp),intent(in) :: xred(3,natom)
 !type(pawrad_type),intent(in) :: pawrad(psps%ntypat*psps%usepaw)
 !type(pawtab_type),intent(in) :: pawtab(psps%ntypat*psps%usepaw)
 type(pawrad_type),intent(in) :: pawrad(:)
 type(pawtab_type),intent(in) :: pawtab(:)
 integer, intent(inout) :: exclude_bands(:,:)

!Local variables-------------------------------
!scalars
 integer :: i, ierr, ikpt1,ikpt2,intot,isppol, iwan, lproj,lwanniersetup,mwan
#if defined HAVE_WANNIER90
 integer :: ncid, ncerr, nrpts
 character(len=fnlen) :: abiwan_fname
 integer :: have_disentangled_spin(nsppol)
 integer,allocatable :: irvec_r_h(:,:),ndegen_h(:)
 real(dp),allocatable :: rmods_r_h(:)
 !type(wan_t) :: wan
#endif
 integer,parameter :: master = 0
 integer :: nntot,num_nnmax
 integer :: max_num_bands,nprocs,comm,rank
 integer :: nwan(nsppol),nband_inc(nsppol),num_bands(nsppol)
 logical :: gamma_only,leig,lmmn,lwannierrun,spinors !,have_disentangled
 character(len=fnlen) :: wfnname
 character(len=1000) :: msg
 character(len=fnlen) :: seed_name(nsppol)
 character(len=fnlen) :: filew90_win(nsppol),filew90_wout(nsppol),filew90_amn(nsppol),filew90_ramn(nsppol)
 character(len=fnlen) :: filew90_mmn(nsppol),filew90_eig(nsppol)
!arrays
 integer :: g1temp(3),ngkpt(3), units(2)
 integer,allocatable :: g1(:,:,:)
 integer,allocatable:: ovikp(:,:)
 integer,allocatable :: proj_l(:,:),proj_m(:,:),proj_radial(:,:)
 integer,allocatable :: proj_s_loc(:)
 real(dp) :: real_lattice(3,3), recip_lattice(3,3)
 real(dp),allocatable :: cm1(:,:,:,:,:,:),cm2_paw(:,:,:)
 real(dp),allocatable :: eigenvalues_w(:,:,:)
 real(dp),allocatable :: proj_site(:,:,:),proj_x(:,:,:),proj_z(:,:,:),proj_zona(:,:)
 real(dp),allocatable :: wann_centres(:,:,:),wann_spreads(:,:),xcart(:,:)
 real(dp),allocatable :: proj_s_qaxis_loc(:,:)
 complex(dpc),allocatable :: A_paw(:,:,:,:)
 complex(dpc),allocatable :: M_matrix(:,:,:,:,:),U_matrix(:,:,:,:)
 complex(dpc),allocatable :: U_matrix_opt(:,:,:,:)
 complex(dpc),pointer :: A_matrix(:,:,:,:)
 logical,allocatable :: band_in(:,:),lwindow(:,:,:)
 character(len=3),allocatable :: atom_symbols(:)
 logical,allocatable:: just_augmentation(:,:)
 type(pawcprj_type), pointer :: cprj_ptr(:, :)
#ifdef HAVE_WANNIER90
 real(dp) :: spreadw(3,nsppol)
#endif

!************************************************************************

 ABI_UNUSED((/crystal%natom, ebands%nkpt, hdr%nkpt/))
 ABI_UNUSED(atindx1)
 ABI_UNUSED((/mcg, mcprj, prtvol/))
 ABI_UNUSED_A(pawang)

 units = [std_out, ab_out]

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!1) Initialize variables and allocations
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
!Some initialization and checks
!
 lwanniersetup=1 ! 1 is mandatory ( 0 is for debug)
 !to use lwanniersetup=0, one would need to define which bands to exclude.
 lwannierrun=.true.   ! .false. and .true. are possible
 lmmn=.true.          ! .false. and .true. are possible
 leig=.true.          ! .false. and .true. are possible
 gamma_only=.false.   !not yet implemented
 spinors=.false.
 if (dtset%nspinor == 2) spinors = .true.

 !mpi initialization
 comm=MPI_enreg%comm_cell
 nprocs=xmpi_comm_size(comm)
 rank=MPI_enreg%me_kpt
 !write(std_out,'("master ",i0," rank ",i0," nprocs ",i0)') master,rank,nprocs

 !Generate seed names for wannier90 files, and file names
 call mlwfovlp_seedname(dtfil%fnameabo_w90,filew90_win,filew90_wout,filew90_amn,&
                        filew90_ramn,filew90_mmn,filew90_eig,nsppol,seed_name)
 !Check the validity of input variables
 !FIXME: this is not a check, and prints a warning even if the input is fine!
 !must be changed to not print anything if kptopt 3 and istwfk 1 (the latter is easier to check)
 if (rank==master) then
   if(.not. (all(dtset%istwfk(1:nkpt) == 1) .and. all(dtset%wtk(1:nkpt) == dtset%wtk(1))) ) then
     write(msg, '(a,a,a,a)' ) ch10,&
     '   mlwfovlp:  you should give k-point in the full brillouin zone ',ch10,&
     '   with explicit k-points (or kptopt=3) and istwfk 1'
     call wrtout(units, msg)
     !ABI_ERROR(msg)
   end if
 end if
!
 if(MPI_enreg%paral_spinor==1) then
   ABI_ERROR('Parallelization over spinorial components not yet available !')
 end if

 ! MG: TODO: Why this check?
 if (psps%npsp/=psps%ntypat) then
   ABI_ERROR("Alchemical mixing not supported in mlwfovlp")
 end if

 if (nsppol==2) then
   write(msg, '(3a)' ) ch10,'   mlwfovlp:  Calculating matrices for both spin polarization  ',ch10
   call wrtout(units, msg)
 end if

 ! get lattice parameters in wannier90 format
 do i=1, 3
   real_lattice(:,i)=Bohr_Ang*rprimd(i,:)
   recip_lattice(:,i)=two_pi*gprimd(i,:)/Bohr_Ang
 end do

 ! Allocations.
 num_nnmax=12 ! limit fixed for compact structure in wannier_setup.
 ABI_MALLOC(g1,(3,nkpt,num_nnmax))
 ABI_MALLOC(ovikp,(nkpt,num_nnmax))
 ABI_MALLOC(atom_symbols,(natom))
 ABI_MALLOC(xcart,(3,natom))
 ABI_MALLOC(band_in,(mband,nsppol))
 ABI_MALLOC(proj_site,(3,mband,nsppol))
 ABI_MALLOC(proj_l,(mband,nsppol))
 ABI_MALLOC(proj_m,(mband,nsppol))
 ABI_MALLOC(proj_radial,(mband,nsppol))
 ABI_MALLOC(proj_x,(3,mband,nsppol))
 ABI_MALLOC(proj_s_loc,(mband))
 ABI_MALLOC(proj_s_qaxis_loc,(3,mband))
 ABI_MALLOC(proj_z,(3,mband,nsppol))
 ABI_MALLOC(proj_zona,(mband,nsppol))
 ! ABI_MALLOC(exclude_bands, (mband,nsppol))
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!2) Call to  Wannier setup
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
 nullify(A_matrix)

 !
 call mlwfovlp_setup(atom_symbols,band_in,dtset,filew90_win,gamma_only,&
&  g1,lwanniersetup,mband,natom,nband_inc,nkpt,&
&  nntot,num_bands,num_nnmax,nsppol,nwan,ovikp,&
&  proj_l,proj_m,proj_radial,proj_site,proj_s_loc, proj_s_qaxis_loc, proj_x,proj_z,proj_zona,&
&  real_lattice,recip_lattice,rprimd,seed_name,spinors,xcart,xred,exclude_bands)

 do isppol=1, nsppol
   write(msg, '(6a)' ) ch10,&
   '   mlwfovlp :  mlwfovlp_setup done -',ch10,&
   '-  see ',trim(filew90_wout(isppol)),' for details.'
   call wrtout(units, msg)
 end do

 ! some allocations after wannier90 setup
 max_num_bands=maxval(num_bands(:))
 mwan=maxval(nwan(:))
 ABI_MALLOC(eigenvalues_w,(max_num_bands,nkpt,nsppol))
 ABI_MALLOC(M_matrix,(max_num_bands,max_num_bands,nntot,nkpt,nsppol))
 ABI_MALLOC(A_matrix,(max_num_bands,mwan,nkpt,nsppol))

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!3) Write Eigenvalues (file seed_name.eig)
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 if (leig) then
   call write_eigenvalues(filew90_eig,eigen, band_in,  eigenvalues_w,  nsppol, nkpt, mband,  dtset, rank, master )
 end if !leig

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!4) Calculate overlaps (file seed_name.mmn)
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!
!First calculate indices and shift
!
!write(std_out,*) "Computes shift for cg"
 write(msg, '(a,a)' ) ch10,'   mlwfovlp : compute shifts for g-points '
 call wrtout(std_out, msg)
!----------------------------------------------------------------------
!Compute shifts for g points (icg,iwav)
!(here mband is not used, because shifts are internal variables of abinit)
!----------------------------------------------------------------------
 !call mywfc%init(cg, cprj, dtset, dtfil, hdr, &
 !     & MPI_enreg, nprocs, psps, pawtab, rank)



 !TODO uncomment
! call mywfc%kset%set_ovikp( ovikp=ovikp, nntot=nntot, num_nnmax=num_nnmax)
!
!Shifts computed.
!
 if( lmmn) then
!
!  In case of parallelization write out cg for all k-points
!
   !if (nprocs > 1) then
     ! call write_cg_and_cprj(dtset, cg, cprj, dtfil, iwav, npwarr, mband, natom, &
     !      &nsppol, nkpt,  MPI_enreg, rank, psps, pawtab)
     ! call mywfc%write_cg_and_cprj_tmpfile()
   !end if !MPI nprocs>1
!
!  End of MPI preliminarities
!  Calculate PW contribution of overlaps
!
   ABI_MALLOC(cm1,(2,mband,mband,nntot,nkpt,nsppol))
   ! this loops over spin internally
!   call mlwfovlp_pw(cg,cm1,g1,iwav,kg,mband,&
!&   mkmem,mpi_enreg,mpw,nfft,ngfft,nkpt,nntot,&
!&   npwarr,dtset%nspinor,nsppol,ovikp,dtfil%fnametmp_cg)
      call mlwfovlp_pw(mywfc,cm1,g1,kg,mband, mkmem,mpi_enreg,mpw,nfft,ngfft,nkpt,nntot,&
                       npwarr,hdr%nspinor,nsppol,ovikp)

   !mlwfovlp_pw(mywfc,cm1,g1,kg,mband,mkmem,mpi_enreg,mpw,nfft,ngfft,nkpt,nntot,&
   !     &  npwarr,nspinor,nsppol,ovikp,seed_name)
   write(msg, '(a,a)' ) ch10,'   mlwfovlp: PW part of overlap computed   '
   call wrtout(std_out, msg)
!
!  compute PAW Contribution and add it to PW contribution
!
   if(psps%usepaw==1) then
     write(msg, '(a,a)' ) ch10,'** smatrix_pawinit: PAW part of overlap  '
     call wrtout(std_out, msg)
     ABI_MALLOC(cm2_paw,(2,mband,mband))
     do isppol=1,nsppol
       do ikpt1=1,nkpt
         ! MPI:cycle over k-points not treated by this node
         if (nprocs>1 ) then !sometimes we can have just one processor
           if ( ABS(MPI_enreg%proc_distrb(ikpt1,1,isppol)-rank)  /=0) CYCLE
         end if

         write(msg, '(a,i0,a,2(i0,1x))')'   processor',rank,' computes PAW part for kpt and spin',ikpt1,isppol
         call wrtout(std_out,  msg)

         do intot=1,nntot
           ikpt2= ovikp(ikpt1,intot)
           g1temp(:)=g1(:,ikpt1,intot)
           ! TODO : smatrix_pawinit: use high level wfd.
             cprj_ptr=>mywfc%get_cprj_ptr()
           call smatrix_pawinit(atindx1,cm2_paw,cprj_ptr,ikpt1,ikpt2,isppol,&
&           g1temp,gprimd,hdr%kptns,mband,mband,mkmem,mpi_enreg,&
&           natom,dtset%nband,nkpt,dtset%nspinor,nsppol,dtset%ntypat,pawang,pawrad,pawtab,rprimd,&
&           dtfil%fnametmp_cprj,dtset%typat,xred)
!          cm1(:,:,:,intot,ikpt1,isppol)=four_pi*cm2_paw(:,:,:)
!           write(6,*) "ikpt1=",ikpt1
!           do iband=1,mband
!             write(6,*) "iband=",iband
!             write(6,*) "Wannier PW       overlap",cm1(:,iband,iband,intot,ikpt1,isppol)
!             write(6,*) "Wannier PAW      overlap",four_pi*cm2_paw(:,iband,iband)
!             write(6,*) "Wannier PW+PAW   overlap",cm1(:,iband,iband,intot,ikpt1,isppol)+four_pi*cm2_paw(:,iband,iband)
!           enddo
           cm1(:,:,:,intot,ikpt1,isppol)=cm1(:,:,:,intot,ikpt1,isppol)+four_pi*cm2_paw(:,:,:)
         end do ! intot
       end do ! ikpt1
     end do ! isppol
     ABI_FREE(cm2_paw)
     write(msg, '(a,a)' ) ch10,'   mlwfovlp : PAW part of overlap computed '
     call wrtout(std_out,  msg)
   end if ! usepaw

   call xmpi_sum(cm1,comm,ierr)

   call write_Mmn(filew90_mmn, band_in, cm1, ovikp, g1, M_matrix,  nkpt, nsppol, nntot, mband, num_bands, msg, iam_master=(rank==master))
   ABI_FREE(cm1)

   !
   !  erase temporary files created for parallel runs
   !
   !if (nprocs > 1) call mywfc%remove_tmpfile(prtvol)
   !end if !MPI nprocs>1
 end if !lmmn

 !Deallocate arrays no longer needed
 ABI_FREE(ovikp)
 ABI_FREE(g1)

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!5) Calculate initial projections
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

 if(dtset%w90iniprj/=0 )  then

   !  Set value for lproj (type of projections to be computed)
   !  In PAW, options 5 and 6 are not in use.
   !  5 means that there will be a contribution from inside the spheres and another from the PW part
   !  6 means that we take into account just the inside-spheres contribution
   !  2 means that PW part will be calculated
   lproj=dtset%w90iniprj
   if(dtset%w90iniprj == 5 ) lproj=2 ! Necessary to calculate PW contribution
   ABI_MALLOC(just_augmentation,(mwan,nsppol))
   just_augmentation(:,:)=.false.

   if( psps%usepaw==1 .and. (dtset%w90iniprj==2 .or. dtset%w90iniprj>4)) then
     if (dtset%w90iniprj==6) just_augmentation(:,:)=.true.
     if (dtset%w90iniprj==5) then
       do isppol=1,nsppol
         do iwan=1,nwan(isppol)
           ! Trick to skip the planewave contribution for some Wannier functions
           ! (Not in production).
           if(proj_radial(iwan,isppol) > 4) then
             just_augmentation(iwan,isppol)=.true.
             proj_radial(iwan,isppol)=proj_radial(iwan,isppol)-3
             write(msg, '(2a,2(i0,1x))')'   ','Skiping planewave contribution for iwan, ispin=',iwan,isppol
             call wrtout(std_out,  msg)
           end if !proj_radial>4
         end do !iwan
       end do !isppol
     end if !w90iniprj == 5
   end if !paw
!
!  Call mlwfovlp_proj (plane waves part of projections)
!
   if (dtset%w90iniprj/=6) then ! option 6 not yet in use
!     call mlwfovlp_proj(A_matrix,band_in,cg,cprj,dtset,gprimd,just_augmentation,kg,&
!&     lproj,max_num_bands,mband,mkmem,mpi_enreg,mpw,mwan,natom,&
!&     nattyp,nkpt,npwarr,&
!&     dtset%nspinor,nsppol,ntypat,num_bands,nwan,pawtab,proj_l,proj_m,&
!&     proj_radial,proj_site,proj_x,proj_z,proj_zona,psps,ucvol)
      call mlwfovlp_proj(A_matrix,band_in,mywfc,dtset,gprimd,just_augmentation,kg,&
&     lproj,max_num_bands,mband,mkmem,mpi_enreg,mpw,mwan,natom,&
&     nattyp,nkpt,npwarr,&
&     dtset%nspinor,nsppol,ntypat,num_bands,nwan,pawtab,proj_l,proj_m,&
&     proj_radial,proj_site,proj_x,proj_z,proj_zona,psps,ucvol)
     write(msg, '(a,a,a,a)' ) ch10,&
     '   mlwfovlp:  mlwfovlp_proj done -',ch10,&
     '   Projectors computed.'
     call wrtout(std_out, msg)
   end if !w90proj/=6
!
!  Calculate inside-sphere part of projections (PAW)
!
   if (psps%usepaw ==1 .and. ( dtset%w90iniprj>4)) then
     ABI_MALLOC(A_paw,(max_num_bands,mwan,nkpt,nsppol))
     call mlwfovlp_projpaw(A_paw,band_in,mywfc,just_augmentation,max_num_bands,mband,mkmem,&
&     mwan,natom,dtset%nband,nkpt,&
&     dtset%nspinor,nsppol,dtset%ntypat,nwan,pawrad,pawtab,&
&     proj_l,proj_m,proj_radial,proj_site,proj_x,proj_z,proj_zona,psps,&
&     rprimd,dtset%typat,xred)
!
     write(msg, '(a,a,a,a)' ) ch10,&
       '   mlwfovlp:  mlwfovlp_proj_paw done -',ch10,&
       '   Inside-spheres part of projectors computed.'
     call wrtout(std_out, msg)
!
!    Add in-sphere contribution to A_matrix
!
!
!    w90iniprj==5. Plane waves + augmentation contributions
!
     if(dtset%w90iniprj==5) A_matrix(:,:,:,:)=A_matrix(:,:,:,:)+A_paw(:,:,:,:)
!
!    w90iniprj==6. Just augmentation contribution
!
     if(dtset%w90iniprj==6) A_matrix(:,:,:,:)=A_paw(:,:,:,:)
     ABI_FREE(A_paw)
   end if !usepaw==1

   ABI_FREE(just_augmentation)
   call xmpi_sum(A_matrix,comm,ierr)

   ! write projections to file
   if (rank==master) then
     if(dtset%w90iniprj==1) then
       call write_Amn(A_matrix, filew90_ramn, nsppol, mband, nkpt, num_bands, nwan, band_in)
     else
       call write_Amn(A_matrix, filew90_amn, nsppol, mband, nkpt, num_bands, nwan, band_in)
     end if
   end if
 end if !dtset%w90iniprj/=0

 ! Deallocations
 ABI_FREE(proj_site)
 ABI_FREE(proj_l)
 ABI_FREE(proj_m)
 ABI_FREE(proj_radial)
 ABI_FREE(proj_x)
 ABI_FREE(proj_z)
 ABI_FREE(proj_zona)
 ABI_FREE(proj_s_loc)
 ABI_FREE(proj_s_qaxis_loc)


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!6) write files for wannier function plot
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 if( dtset%w90prtunk>0) then
    call compute_and_write_unk(wfnname, psps%usepaw, dtset%w90prtunk, &
         & mpi_enreg, ngfft, nsppol, dtset%nspinor,  &
         & nkpt, mband,  mpw, mgfftc, mkmem,  nprocs, rank, npwarr, &
         & band_in,  dtset, kg, mywfc)
 end if !dtset%w90prtunk
!

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!7) Call to  Wannier90
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 if (lwannierrun) then
   if (lwanniersetup.ne.1) ABI_ERROR("lwanniersetup.ne.1")
   ABI_MALLOC(U_matrix,(mwan,mwan,nkpt,nsppol))
   ABI_MALLOC(U_matrix_opt,(max_num_bands,mwan,nkpt,nsppol))
   ABI_MALLOC(lwindow,(max_num_bands,nkpt,nsppol))
   ABI_MALLOC(wann_centres,(3,mwan,nsppol))
   ABI_MALLOC(wann_spreads,(mwan,nsppol))
   ! Initialize
   U_matrix(:,:,:,:)=czero
   U_matrix_opt(:,:,:,:)=czero
   lwindow(:,:,:)=.false.
   wann_centres(:,:,:)=zero
   wann_spreads(:,:)=zero
   ! write(std_out,*) seed_name; write(std_out,*) ngkpt
   ABI_CHECK(isdiagmat(dtset%kptrlatt), "kptrlatt should be diagonal! Please use nkgpt with one shift")
   ngkpt(1)=dtset%kptrlatt(1,1)
   ngkpt(2)=dtset%kptrlatt(2,2)
   ngkpt(3)=dtset%kptrlatt(3,3)

!  write(std_out,*) nkpt
!  write(std_out,*) rprimd*Bohr_Ang
!  write(std_out,*) two_pi*gprimd/Bohr_Ang
!  write(std_out,*) mband
!  write(std_out,*) "nwan",nwan
!  write(std_out,*) nntot
!  write(std_out,*) natom
!  write(std_out,*) atom_symbols
!  write(std_out,*) xcart
!  write(std_out,*) num_bands,num_bands,nntot,nkpt
!  write(std_out,*) wann_spreads
!  wann_spreads=2
!  do i=1, nkpt
!  do j=1, nntot
!  write(std_out,*) i,j
!  do k=1, num_bands
!  do l=1, num_bands
!  write(std_out,*) "m",M_matrix(l,k,j,i,1)
!  enddo
!  enddo
!  enddo
!  enddo

#if defined HAVE_WANNIER90
   do isppol=1,nsppol
     ! when nsppol>1, master runs isppol 1 and rank==1 runs isppol 2
     if(nprocs>1 .and. isppol==1.and.rank.ne.master) cycle
     if(nprocs>1 .and. isppol==2.and.rank.ne.1) cycle

     write(msg, '(8a)' ) ch10,&
       '** mlwfovlp:   calling wannier90 library subroutine wannier_run ',ch10,&
       '   Calculation is running         ',ch10,&
       '-  see ',trim(filew90_wout(isppol)),' for details.'
     call wrtout(std_out, msg)

     call wannier_run(trim(seed_name(isppol)),ngkpt,nkpt,&                                                    ! input
       real_lattice,recip_lattice,hdr%kptns,num_bands(isppol),&                                               ! input
       nwan(isppol),nntot,natom,atom_symbols,&                                                                ! input
       xcart*Bohr_Ang,gamma_only,M_matrix(:,:,:,:,isppol),A_matrix(:,:,:,isppol),eigenvalues_w(:,:,isppol),&  ! input
       U_matrix(1:nwan(isppol),1:nwan(isppol),:,isppol),&                                                     ! output
       U_matrix_opt(1:num_bands(isppol),1:nwan(isppol),:,isppol),&                                            ! output
       lwindow_loc=lwindow(1:num_bands(isppol),:,isppol),&                                                    ! output
       wann_centres_loc=wann_centres(:,1:nwan(isppol),isppol),&                                               ! output
       wann_spreads_loc=wann_spreads(1:nwan(isppol),isppol),spread_loc=spreadw(:,isppol))                     ! output

     write(msg, '(7a)' ) ch10,&
       '   mlwfovlp :  mlwfovlp_run completed -',ch10,&
       '-  see ',trim(filew90_wout(isppol)),' for details.',ch10
     call wrtout(units, msg)
   end do !isppol

   ! collect output of wannier90 from different processors
   call xmpi_sum(U_matrix,comm,ierr)
   call xmpi_sum(U_matrix_opt,comm,ierr)
   call xmpi_lor(lwindow,comm)
   call xmpi_sum(wann_centres,comm,ierr)
   call xmpi_sum(wann_spreads,comm,ierr)

   ! Output ABIWAN.nc file
   if (hdr%kptopt == 0) then
     ABI_WARNING("Output of ABIWAN.nc requires kptopt /= 0. ABIWAN.nc file won't be produced!")
     ! Need kptrlatt in wigner_seitz and client code needs to know the k-grid.
   end if

   if (rank == master .and. hdr%kptopt /= 0) then
     abiwan_fname = strcat(dtfil%filnam_ds(4), "_ABIWAN.nc")
     call wrtout(std_out, sjoin(" Saving wannier90 ouput results to:", abiwan_fname))
     call wigner_seitz([zero, zero, zero], [2, 2, 2], dtset%kptrlatt, crystal%rmet, &
                       nrpts, irvec_r_h, ndegen_h, rmods_r_h, prtvol=prtvol)
     ! We know if disentanglement has been done by looking at the output values of lwindow
     ! Not elegant but it is the only way to avoid the parsing of the wannier input.
     ! In wannier_run lwindow is set to True if not disentanglement
     have_disentangled_spin = 0
     do isppol=1,nsppol
       !if nwan(isppol) < num_bands(isppol)
       if (.not. all(lwindow(:,:,isppol))) have_disentangled_spin(isppol) = 1
     end do

     NCF_CHECK(nctk_open_create(ncid, abiwan_fname, xmpi_comm_self))
     NCF_CHECK(hdr%ncwrite(ncid, fform_from_ext("ABIWAN"), nc_define=.True.))
     NCF_CHECK(crystal%ncwrite(ncid))
     NCF_CHECK(ebands%ncwrite(ncid))

     ncerr = nctk_def_dims(ncid, [ &
       nctkdim_t("mwan", mwan), &
       nctkdim_t("max_num_bands", max_num_bands), &
       nctkdim_t("nrpts", nrpts) &
     ], defmode=.True.)
     NCF_CHECK(ncerr)

     ncerr = nctk_def_iscalars(ncid, [character(len=nctk_slen) :: "nntot"])
     NCF_CHECK(ncerr)
     !ncerr = nctk_def_dpscalars(ncid, [character(len=nctk_slen) :: "fermi_energy", "smearing_width"])
     !NCF_CHECK(ncerr)

     ncerr = nctk_def_arrays(ncid, [ &
       nctkarr_t("nwan", "int", "number_of_spins"), &
       nctkarr_t("num_bands", "int", "number_of_spins"), &
       nctkarr_t("band_in_int", "int", "max_number_of_states, number_of_spins"), &
       nctkarr_t("lwindow_int", "int", "max_num_bands, number_of_kpoints, number_of_spins"), &
       nctkarr_t("exclude_bands", "int", "max_number_of_states, number_of_spins"), &
       nctkarr_t("spread", "dp", "three, number_of_spins"), &
       !nctkarr_t("A_matrix", "dp", "two, max_num_bands, mwan, number_of_kpoints, number_of_spins"), &
       nctkarr_t("irvec", "int", "three, nrpts"), &
       nctkarr_t("ndegen", "int", "nrpts"), &
       nctkarr_t("have_disentangled_spin", "int", "number_of_spins"), &
       nctkarr_t("U_matrix", "dp", "two, mwan, mwan, number_of_kpoints, number_of_spins"), &
       nctkarr_t("U_matrix_opt", "dp", "two, max_num_bands, mwan, number_of_kpoints, number_of_spins"), &
       nctkarr_t("wann_centres", "dp", "three, mwan, number_of_spins"), &
       nctkarr_t("wann_spreads", "dp", "mwan, number_of_spins") &
     ])
     NCF_CHECK(ncerr)

     ! Write data.
     NCF_CHECK(nctk_set_datamode(ncid))
     NCF_CHECK(nf90_put_var(ncid, nctk_idname(ncid, "nntot"), nntot))
     NCF_CHECK(nf90_put_var(ncid, nctk_idname(ncid, "nwan"), nwan))
     NCF_CHECK(nf90_put_var(ncid, nctk_idname(ncid, "num_bands"), num_bands))
     NCF_CHECK(nf90_put_var(ncid, nctk_idname(ncid, "band_in_int"), l2int(band_in)))
     NCF_CHECK(nf90_put_var(ncid, nctk_idname(ncid, "lwindow_int"), l2int(lwindow)))
     NCF_CHECK(nf90_put_var(ncid, nctk_idname(ncid, "exclude_bands"), exclude_bands))
     NCF_CHECK(nf90_put_var(ncid, nctk_idname(ncid, "spread"), spreadw))
     !NCF_CHECK(nf90_put_var(ncid, nctk_idname(ncid, "A_matrix"), c2r(A_matrix)))
     NCF_CHECK(nf90_put_var(ncid, nctk_idname(ncid, "irvec"), irvec_r_h))
     NCF_CHECK(nf90_put_var(ncid, nctk_idname(ncid, "ndegen"), ndegen_h))
     NCF_CHECK(nf90_put_var(ncid, nctk_idname(ncid, "have_disentangled_spin"), have_disentangled_spin))
     NCF_CHECK(nf90_put_var(ncid, nctk_idname(ncid, "U_matrix"), c2r(U_matrix)))
     NCF_CHECK(nf90_put_var(ncid, nctk_idname(ncid, "U_matrix_opt"), c2r(U_matrix_opt)))
     NCF_CHECK(nf90_put_var(ncid, nctk_idname(ncid, "wann_centres"), wann_centres))
     NCF_CHECK(nf90_put_var(ncid, nctk_idname(ncid, "wann_spreads"), wann_spreads))
     NCF_CHECK(nf90_close(ncid))

     ABI_FREE(irvec_r_h)
     ABI_FREE(ndegen_h)
     ABI_FREE(rmods_r_h)
   end if

!  CALL SILVESTRELLI'S APPROACH TO EVALUATE vdW INTERACTION ENERGY USING MLWF!!
!  ----------------------------------------------------------------------------------------------
   if (dtset%vdw_xc==10.or.dtset%vdw_xc==11.or.dtset%vdw_xc==12.or.dtset%vdw_xc==14.and.rank==master) then
     ! vdw_xc==10,11,12,14 starts the vdW interaction using MLWFs
     call evaluate_vdw_with_mlwf()
   end if

#else
   ABI_UNUSED(occ)
#endif
   !  FIXME: looks like there is no automatic test which goes through here: g95 bot did not catch the missing deallocations
   ABI_FREE(wann_centres)
   ABI_FREE(wann_spreads)
   ABI_FREE(U_matrix)
   ABI_FREE(U_matrix_opt)
   ABI_FREE(lwindow)
 end if !lwannierrun

 ABI_FREE(band_in)
 ABI_FREE(atom_symbols)
 ABI_FREE(xcart)
 ABI_FREE(eigenvalues_w)
 ABI_FREE(M_matrix)
 ABI_FREE(A_matrix)
 ! ABI_FREE(exclude_bands)

 call mywfc%free()
 ABI_FREE_SCALAR(mywfc)

contains
!!***

!--------------------------------------------------------------------------------
!> van der Waals interaction energy using MLWFs
!--------------------------------------------------------------------------------
  subroutine evaluate_vdw_with_mlwf()
#ifdef HAVE_WANNIER90
    integer :: ii, jj, ikpt, iband, kk
    real(dp) :: corrvdw
    complex(dpc) :: caux,caux2,caux3
    real(dp),allocatable :: csix(:,:,:,:)
    real(dpc),allocatable :: occ_arr(:,:,:),occ_wan(:,:,:)
    real(dp),allocatable :: tdocc_wan(:,:)

     write(std_out,*) 'nwan(nsppol)=',ch10
     do ii=1,nsppol
       write(std_out,*) 'nsppol=',ii, 'nwan(nsppol)=',nwan(ii),ch10
     end do
     write(std_out,*) 'mwan=', mwan, ch10

     ABI_MALLOC(occ_arr,(mband,nkpt,isppol))
     ABI_MALLOC(occ_wan,(mwan,nkpt,nsppol))
     ABI_MALLOC(tdocc_wan,(mwan,nsppol))

     occ_arr(:,:,:)=zero
     occ_wan(:,:,:)=zero
     tdocc_wan(:,:)=zero
     jj = 0
     do isppol=1,nsppol
       do ikpt=1,nkpt
         do iband=1,num_bands(isppol)
           jj = jj + 1
           occ_arr(iband,ikpt,isppol) = occ(jj)
         end do
       end do
     end do

     do isppol=1,nsppol
       do ikpt=1,nkpt
         do iwan=1,nwan(isppol)
           caux=czero
           caux2=czero
           caux3=czero
           do iband=1,num_bands(isppol) !nband_inc(isppol) !nwan(isppol)
             do ii=1,nwan(isppol)
               caux=U_matrix(ii,iwan,ikpt,isppol)*U_matrix_opt(iband,ii,ikpt,isppol)
!              DEBUG
!              if(ISNAN(dble(caux))) then
!              write(std_out,*) 'NaN: caux(ikpt,iwan,iband,ii):',ikpt,iwan,iband,ii,ch10
!              end if
!              END DEBUG
               do kk=1,nwan(isppol)
                 caux2=conjg(U_matrix(kk,iwan,ikpt,isppol))*conjg(U_matrix_opt(iband,kk,ikpt,isppol))
                 caux3= caux3+caux*caux2*occ_arr(iband,ikpt,isppol) !take care here as exclude_bands case is not well
!                DEBUG
!                if(ISNAN(dble(caux2))) then
!                write(std_out,*) 'NaN: caux2(ikpt,iwan,iband,kk):',ikpt,iwan,iband,kk,ch10
!                end if
!                if(ISNAN(dble(caux3))) then
!                write(std_out,*) 'NaN: caux3(ikpt,iwan,iband,kk,jj):',ikpt,iwan,iband,kk,jj
!                end if
!                END DEBUG
               end do
             end do
           end do
           occ_wan(iwan,ikpt,isppol) = dble(caux3)
!          DEBUG
!          write(std_out,*) occ_wan(iwan,ikpt,isppol)
!          END DEBUG
!          end do
         end do
       end do
     end do

     write(std_out,*) ch10,'MLWFs Occupation Matrix diagonal terms:',ch10

     do jj=1,nsppol
       forall(iwan=1:nwan(jj)) tdocc_wan(iwan,jj) = sum(occ_wan(iwan,1:nkpt,jj)) / real(nkpt,dp)
       write(std_out,*) 'tdocc_wan(iwan),isppol:',ch10
       write(std_out,*) (tdocc_wan(iwan,jj),iwan=1,nwan(jj)),jj
     end do

     ABI_MALLOC(csix,(mwan,mwan,nsppol,nsppol))

     call evdw_wannier(csix,corrvdw,mwan,natom,nsppol,nwan,tdocc_wan,dtset%vdw_nfrag,&
&     dtset%vdw_supercell,dtset%vdw_typfrag,dtset%vdw_xc,rprimd,wann_centres,wann_spreads,xcart)

     ABI_FREE(csix)
     ABI_FREE(occ_arr)
     ABI_FREE(occ_wan)
     ABI_FREE(tdocc_wan)

#endif
end subroutine evaluate_vdw_with_mlwf

end subroutine mlwfovlp
!!***

!!****f* m_mlwfovlp/mlwfovlp_seedname
!! NAME
!! mlwfovlp_seedname
!!
!! FUNCTION
!! Get seed name and file names of all wannier90 related files
!!
!! INPUTS
!! fname_w90=root name of file appended with _w90
!!
!! OUTPUT
!! filew90_win= main input file for Wannier90
!! filew90_wout= main output file for Wannier90
!! filew90_amn= file containing Amn matrix
!! filew90_ramn= file containing Amn matrix (random initial projections)
!! filew90_mmn= file containing Mmn matrix
!! filew90_eig= file containing eigenvalues
!! nsppol= number of spin polarizations
!! seed_name= common seed name for all wannier90 related files
!!
!! SOURCE

subroutine mlwfovlp_seedname(fname_w90,filew90_win,filew90_wout,filew90_amn,&
& filew90_ramn,filew90_mmn,filew90_eig,nsppol,seed_name)

!Arguments ------------------------------------
 integer,intent(in) :: nsppol
 character(len=fnlen),intent(out) :: filew90_win(nsppol),filew90_wout(nsppol),filew90_amn(nsppol),filew90_ramn(nsppol)
 character(len=fnlen),intent(out) :: filew90_mmn(nsppol),filew90_eig(nsppol),seed_name(nsppol)
 character(len=fnlen),intent(in) :: fname_w90

!Local variables-------------------------------
 integer:: isppol, units(2)
 character(len=fnlen) :: test_win1,test_win2,test_win3
 logical :: lfile
 character(len=2000) :: msg
 character(len=10):: postfix
! *************************************************************************

 units = [std_out, ab_out]
 seed_name(:)=trim(fname_w90)

 do isppol=1,nsppol
   if(nsppol==1) postfix='.win'
   if(nsppol==2 .and. isppol==1) postfix='_up.win'
   if(nsppol==2 .and. isppol==2) postfix='_down.win'

   filew90_win(isppol) = trim(seed_name(isppol))//trim(postfix)
   test_win1 = filew90_win(isppol)
   inquire(file=filew90_win(isppol),exist=lfile)

   if(.not.lfile) then
     seed_name(isppol)='wannier90'
     filew90_win(isppol)=trim(seed_name(isppol))//trim(postfix)
     test_win2=filew90_win(isppol)
     inquire(file=filew90_win(isppol),exist=lfile)
   end if

   if(.not.lfile) then
     seed_name(isppol)='w90'
     filew90_win=trim(seed_name(isppol))//trim(postfix)
     test_win3=filew90_win(isppol)
     inquire(file=filew90_win(isppol),exist=lfile)
   end if

   if(.not. lfile) then
     write(msg,'(12a)')&
      ' wannier90 interface needs one of the following input files:',ch10,&
      '      ',trim(test_win1),ch10,&
      '      ',trim(test_win2),ch10,&
      '      ',trim(test_win3),ch10,&
      ' Action: read wannier90 tutorial and/or user manual and supply proper *.win file'
     ABI_ERROR(msg)
   end if
 end do !isppol

 ! Files having different names for different spin polarizations
 if(nsppol==1) then
   filew90_win(1) =trim(seed_name(1))//'.win'
   filew90_wout(1)=trim(seed_name(1))//'.wout'
   filew90_ramn(1)=trim(seed_name(1))//'random.amn'
   filew90_amn(1) =trim(seed_name(1))//'.amn'
   filew90_mmn(1) =trim(seed_name(1))//'.mmn'
   filew90_eig(1) =trim(seed_name(1))//'.eig'
 elseif(nsppol==2) then
   filew90_win(1) =trim(seed_name(1))//'_up.win'
   filew90_win(2) =trim(seed_name(2))//'_down.win'
   filew90_wout(1)=trim(seed_name(1))//'_up.wout'
   filew90_wout(2)=trim(seed_name(2))//'_down.wout'
   filew90_ramn(1)=trim(seed_name(1))//'random_up.amn'
   filew90_ramn(2)=trim(seed_name(2))//'random_down.amn'
   filew90_amn(1)=trim(seed_name(1))//'_up.amn'
   filew90_amn(2)=trim(seed_name(2))//'_down.amn'
   filew90_mmn(1)=trim(seed_name(1))//'_up.mmn'
   filew90_mmn(2)=trim(seed_name(2))//'_down.mmn'
   filew90_eig(1)=trim(seed_name(1))//'_up.eig'
   filew90_eig(2)=trim(seed_name(2))//'_down.eig'
 end if

 ! change also seed_name for nsppol=2
 if(nsppol==2) then
   seed_name(1)=trim(seed_name(1))//'_up'
   seed_name(2)=trim(seed_name(2))//'_down'
 end if

 write(msg, '(a,a)' ) ch10,'---------------------------------------------------------------'
 call wrtout(units, msg)
 write(msg, '(5a)' ) ch10,&
 '  Calculation of overlap and call to wannier90 library ',ch10,&
 '  to obtain maximally localized wannier functions ',ch10
 call wrtout(units, msg)

 if(nsppol==1) then
   write(msg, '(23a)' ) &
    '  - ',trim(filew90_win(1)),' is a mandatory secondary input',ch10,&
    '  - ',trim(filew90_wout(1)),' is the output for the library',ch10,&
    '  - ',trim(filew90_ramn(1)),' contains random projections',ch10,&
    '  - ',trim(filew90_amn(1)),' contains projections',ch10,&
    '  - ',trim(filew90_mmn(1)),' contains the overlap',ch10,&
    '  - ',trim(filew90_eig(1)),' contains the eigenvalues'
 else if(nsppol==2) then
   write(msg, '(41a)' ) &
    '  - ',trim(filew90_win(1)),&
    ' and ',trim(filew90_win(2)),ch10,'are mandatory secondary input',ch10,&
    '  - ',trim(filew90_wout(1)),&
    ' and ',trim(filew90_wout(2)),ch10,' are the output for the library',ch10,&
    '  - ',trim(filew90_ramn(1)),&
    ' and ',trim(filew90_ramn(2)),ch10,' contain random projections',ch10,&
    '  - ',trim(filew90_amn(1)),&
    ' and ',trim(filew90_amn(2)),ch10,' contain projections',ch10,&
    '  - ',trim(filew90_mmn(1)),&
    ' and ',trim(filew90_mmn(2)),ch10,' contain the overlap',ch10,&
    '  - ',trim(filew90_eig(1)),&
    ' and ',trim(filew90_eig(2)),ch10,' contain the eigenvalues'
 end if
 call wrtout(units, msg)

 write(msg, '(a,a)' ) ch10,'---------------------------------------------------------------'
 call wrtout(units, msg)

end subroutine mlwfovlp_seedname
!!***

!!****f* m_mlwfovlp/mlwfovlp_setup
!! NAME
!! mlwfovlp_setup
!!
!! FUNCTION
!! Routine which creates table g1 and ovikp  necessary to compute
!! overlap for Wannier code (www.wannier.org f90 version).
!!
!! INPUTS
!!  atom_symbols(natom)= table of symbol for each atom
!!                                          and each |p_lmn> non-local projector
!!  dtset <type(dataset_type)>=all input variables for this dataset
!!  filew90_win(nsppol) secondary input files for w90
!!  lwanniersetup= flag: only 1 is fully working.
!!  natom              =number of atoms in cell.
!!  mband=maximum number of bands
!!  natom=number of atoms in cell.
!!  nkpt=number of k points.
!!  num_bands(isppol)=number of bands actually used to construct the wannier function
!!  nwan(isppol)= number of wannier fonctions (read in wannier90.win).
!!  dtset <type(dataset_type)>=all input variables for this dataset
!!  real_lattice(3,3)=dimensional primitive translations for real space
!!                 in format required by wannier90
!!  recip_lattice(3,3)=dimensional primitive translations for reciprocal space
!!                 in format required by wannier90
!!  rprimd(3,3)=dimensional primitive translations for real space (bohr)
!!  seed_name=character string for generating wannier90 filenames
!!  xcart(3,natom)=atomic coordinates in bohr
!!  xred(3,natom)=reduced dimensionless atomic coordinates
!!
!! OUTPUT
!!  band_in(mband,nsppol)   = band to take into account for wannier calculation
!!  g1(3,nkpt,nntot) = G vector shift which is necessary to obtain k1+b
!!                     from k2 in the case where k1+b does not belong to the 1st BZ.
!!  nband_inc(nsppol) = # of included bands
!!  nntot            = number of k-point neighbour
!!  ovikp(nkpt,nntot)= gives nntot value of k2 (in the BZ) for each k1  (k2=k1+b mod(G))
!!
!! SIDE EFFECTS
!!  (only writing, printing)
!!
!! SOURCE

 subroutine mlwfovlp_setup(atom_symbols,band_in,dtset,filew90_win,gamma_only,&
& g1,lwanniersetup,mband,natom,nband_inc,nkpt,&
& nntot,num_bands,num_nnmax,nsppol,nwan,ovikp,&
& proj_l,proj_m,proj_radial,proj_site,proj_s_loc, &
& proj_s_qaxis_loc,proj_x,proj_z,proj_zona,&
& real_lattice,recip_lattice,rprimd,seed_name,spinors,xcart,xred,exclude_bands)

!Arguments---------------------------
!scalars
 integer,intent(in) :: lwanniersetup,mband,natom,nkpt,nsppol
 integer,intent(in) :: num_nnmax
 integer,intent(out) :: nband_inc(nsppol),nntot,num_bands(nsppol),nwan(nsppol)
 logical,intent(in) :: gamma_only,spinors
 type(dataset_type),intent(in) :: dtset
!arrays
 integer,intent(out) :: g1(3,nkpt,num_nnmax),ovikp(nkpt,num_nnmax)
 integer,intent(out) :: proj_l(mband,nsppol),proj_m(mband,nsppol),proj_radial(mband,nsppol)
 real(dp),intent(in) :: real_lattice(3,3)
 real(dp),intent(in) :: recip_lattice(3,3),rprimd(3,3),xred(3,natom)
 real(dp),intent(out) :: proj_site(3,mband,nsppol),proj_x(3,mband,nsppol),proj_z(3,mband,nsppol)
 real(dp),intent(out) :: proj_zona(mband,nsppol),xcart(3,natom)
 logical,intent(out) :: band_in(mband,nsppol)
 integer,intent(out) :: exclude_bands(mband,nsppol)
 character(len=3),intent(out) :: atom_symbols(natom)
 character(len=fnlen),intent(in) :: seed_name(nsppol),filew90_win(nsppol)
 integer, optional, intent(out) :: proj_s_loc(mband)
 real(dp), optional, intent(out) :: proj_s_qaxis_loc(3,mband)

!Local variables---------------------------
!scalars
 integer :: iatom,icb,ikpt,ikpt1,intot,isppol,itypat,jj,mband_,unt
 real(dp) :: znucl1
 character(len=2) :: symbol
 character(len=500) :: msg
 character(len=fnlen) :: filew90_nnkp
 type(atomdata_t) :: atom
!arrays
 integer :: ngkpt(3)
! *************************************************************************

 !^^^^^^^^^^^^^^^^read wannier90.nnkp^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 if (lwanniersetup==0) then  !this part is not coded for nsppol>1
   isppol=1
   filew90_nnkp=trim(seed_name(isppol))//'.nnkp'
   if (open_file(filew90_nnkp,msg,newunit=unt,form='formatted',status='old') /= 0) then
     ABI_ERROR(msg)
   end if
   read(unt,*)
   read(unt,*) nntot , mband_, nwan(1)
   write(msg, '(2a,2(i0,1x))')ch10,' mlwfovlp_setup nntot, mband, nwan ', nntot,mband_,nwan(1)
   call wrtout(std_out,msg)
   if (mband_ /= mband) then
     write(msg, '(4a)' )'mband_ is not equal to mband ',ch10,'Action: check ',trim(filew90_nnkp)
     ABI_ERROR(msg)
   end if
   if(nwan(1) > mband) then
     write(msg, '(4a)' )'nwan > mband ',ch10,'Action: check ',trim(filew90_nnkp)
     ABI_ERROR(msg)
   end if
   if(nwan(1) == 0) then
     write(msg, '(4a)' )'nwan = 0 ',ch10, 'Action: check ',trim(filew90_nnkp)
     ABI_ERROR(msg)
   end if
   do ikpt=1,nkpt
     do intot=1,nntot
       ! ikpt1: k point  (ikpt=ikpt1)
       ! ovikp(intot,ikpt): neighbour number intot for ikpt
       ! g1(1:3,intot,ikpt): non reciprocal space vector between the 2 k-points
       read(unt,*)  ikpt1,ovikp(ikpt,intot),(g1(jj,ikpt,intot),jj=1,3)
       if(ikpt1 /= ikpt) then
         ABI_WARNING("ikpt1 .ne ikpt : ?")
       end if
     end do
   end do
   close(unt)
   write(msg, '(3a)' )ch10,trim(filew90_nnkp),'wannier90.nnkp has been read !'
   call wrtout(std_out,msg)

   ABI_ERROR(' exclude bands is not given in this case (not implemented) ')

!  ^^^^^^^^^^^^^^^^^^^^^^^ call wannier_setup begin^^^^^^^^^^^^^^^^^^^^^^^^
 else if (lwanniersetup==1) then
   num_bands(:)=mband
!  num_nnmax=12 !limit fixed for compact structure in wannier_setup.
   ovikp=0.d0
!  "When nshiftk=1, kptrlatt is initialized as a diagonal (3x3) matrix, whose diagonal
!  elements are the three values ngkpt(1:3)"
   ngkpt(1)=dtset%kptrlatt(1,1)
   ngkpt(2)=dtset%kptrlatt(2,2) !  have to verify that kptrlatt is diagonal
   ngkpt(3)=dtset%kptrlatt(3,3)
   ABI_CHECK(isdiagmat(dtset%kptrlatt), "kptrlatt must be diagonal please use ngkpt and nshiftk 1.")
   do iatom=1,natom
     itypat=dtset%typat(iatom)
     znucl1=dtset%znucl(itypat)
     call atomdata_from_znucl(atom, znucl1)
     symbol=trim(adjustl(atom%symbol))
     !write(309,*) symbol
     atom_symbols(iatom)=symbol
     xcart(:,iatom)=rprimd(:,1)*xred(1,iatom)+ rprimd(:,2)*xred(2,iatom)+ rprimd(:,3)*xred(3,iatom)
   end do ! iatom
   ! write(std_out,*) xcart; write(std_out,*) Bohr_Ang; write(std_out,*) rprimd*Bohr_Ang
   ! write(std_out,*) seed_name; write(std_out,*) ngkpt; write(std_out,*) nkpt
   ! write(std_out,*) mband; write(std_out,*) natom; write(std_out,*) atom_symbols
   write(msg, '(a,a)' )ch10,' mlwfovlp_setup: calling wannier90 library subroutine wannier_setup'
   call wrtout(std_out,msg)

#if defined HAVE_WANNIER90
   nwan(:)=0
   num_bands(:)=0

   do isppol=1,nsppol
#ifdef HAVE_WANNIER90_V1
       call wannier_setup(seed_name(isppol),ngkpt,nkpt&            !input
&      ,real_lattice,recip_lattice,dtset%kptns&                    !input
&      ,mband,natom,atom_symbols,xcart*Bohr_Ang&                   !input
&      ,gamma_only,spinors&                                        !input
&      ,nntot,ovikp,g1,num_bands(isppol),nwan(isppol)&             !output
&      ,proj_site(:,:,isppol),proj_l(:,isppol)&                    !output
&      ,proj_m(:,isppol),proj_radial(:,isppol)&                    !output
&      ,proj_z(:,:,isppol),proj_x(:,:,isppol)&                     !output
&      ,proj_zona(:,isppol),exclude_bands(:,isppol))               !output
#else
     !WANNIER90_V2 has the 2 optional arguments
     if (present(proj_s_loc)) then
       call wannier_setup(seed_name(isppol),ngkpt,nkpt&            !input
&      ,real_lattice,recip_lattice,dtset%kptns&                    !input
&      ,mband,natom,atom_symbols,xcart*Bohr_Ang&                   !input
&      ,gamma_only,spinors&                                        !input
&      ,nntot,ovikp,g1,num_bands(isppol),nwan(isppol)&             !output
&      ,proj_site(:,:,isppol),proj_l(:,isppol)&                    !output
&      ,proj_m(:,isppol),proj_radial(:,isppol)&                    !output
&      ,proj_z(:,:,isppol),proj_x(:,:,isppol)&                     !output
&      ,proj_zona(:,isppol),exclude_bands(:,isppol)&               !output
&      ,proj_s_loc,proj_s_qaxis_loc)                               !output
     else
       !no proj_s_loc provided
       call wannier_setup(seed_name(isppol),ngkpt,nkpt&            !input
&      ,real_lattice,recip_lattice,dtset%kptns&                    !input
&      ,mband,natom,atom_symbols,xcart*Bohr_Ang&                   !input
&      ,gamma_only,spinors&                                        !input
&      ,nntot,ovikp,g1,num_bands(isppol),nwan(isppol)&             !output
&      ,proj_site(:,:,isppol),proj_l(:,isppol)&                    !output
&      ,proj_m(:,isppol),proj_radial(:,isppol)&                    !output
&      ,proj_z(:,:,isppol),proj_x(:,:,isppol)&                     !output
&      ,proj_zona(:,isppol),exclude_bands(:,isppol))               !output
     end if
#endif
   end do !isppol
! if we do not have w90, avoid complaints about unused input variables
#else
   ABI_UNUSED(gamma_only)
   ABI_UNUSED(real_lattice)
   ABI_UNUSED(recip_lattice)
   ABI_UNUSED(spinors)
#endif

  do isppol=1,nsppol
    write(std_out,*)  "1", nntot,nwan(isppol)
    write(std_out,*)  "2", num_bands(isppol)  ! states on which wannier functions are computed
    write(std_out,*)  "3", proj_site(:,1:nwan(isppol),isppol)
    write(std_out,*)  "4", proj_l(1:nwan(isppol),isppol)
    write(std_out,*)  "5", proj_m(1:nwan(isppol),isppol)
    write(std_out,*)  "6", proj_radial(1:nwan(isppol),isppol)
    write(std_out,*)  "7", proj_z(:,1:nwan(isppol),isppol)
    write(std_out,*)  "8", proj_x(:,1:nwan(isppol),isppol)
    write(std_out,*)  "9", proj_zona(1:nwan(isppol),isppol)
    write(std_out,*)  "10", exclude_bands(:,isppol)
  end do!isppol
 end if  ! lwanniersetup

 do isppol=1,nsppol
   band_in(:,isppol)=.true.
   do icb=1,mband
     if(exclude_bands(icb,isppol) /= 0)  band_in(exclude_bands(icb,isppol),isppol)=.false.
   end do
   nband_inc(isppol)=0
   do icb=1, mband
     if (band_in(icb,isppol)) nband_inc(isppol) = nband_inc(isppol)+1
   end do
 end do !isppol

 if (any(mband.gt.num_bands(:))) then
   write(msg, '(a,a)' )ch10,'   The following bands are excluded from the calculation of wannier functions:'
   call wrtout(std_out,msg)

   do isppol=1,nsppol
     if(nsppol==2) then
       write(msg,'("For spin",i2)')isppol
       call wrtout(std_out,msg)
     end if !nsppol
     do jj=1,mband-num_bands(isppol),10
       write(msg,'(10i7)') exclude_bands(jj:min(jj+9,mband-num_bands(isppol)),isppol)
       call wrtout(std_out,msg)
     end do
   end do !isppol
 end if

 do isppol=1,nsppol
   if(nsppol==2) then
     write(msg,'("For spin",i2)')isppol
     call wrtout(std_out,msg)
   end if !nsppol
   write(msg, '(a,i0,3a)' )ch10,nwan(isppol),' wannier functions will be computed (see ',trim(filew90_win(isppol)),')'
   call wrtout(std_out,msg)
   ! write(std_out,*) exclude_bands(icb),band_in(icb)
   write(msg, '(a,i0,a)' )ch10,num_bands(isppol),' bands will be used to extract wannier functions'
   call wrtout(std_out,msg)
   if (num_bands(isppol).lt.nwan(isppol)) then
     write(msg, '(4a)' )&
     ' number of bands is lower than the number of wannier functions',ch10,&
     ' Action : check input file and ',trim(filew90_win(isppol))
     ABI_ERROR(msg)
   else if (num_bands(isppol)==nwan(isppol)) then
     write(msg, '(4a)' )ch10,&
     '   Number of bands is equal to the number of wannier functions',ch10,&
     '   Disentanglement is not necessary'
     call wrtout(std_out,msg)
   else if  (num_bands(isppol).gt.nwan(isppol)) then
     write(msg, '(4a)' )ch10,&
     '   Number of bands is larger than the number of wannier functions',ch10,&
     '   Disentanglement will be necessary'
     call wrtout(std_out,msg)
   end if
   write(msg, '(2x,a,a,i0,1x,a)' )ch10,'   Each k-point has: ', nntot,' neighbours'
   call wrtout(std_out,msg)
 end do !isppol

end subroutine mlwfovlp_setup
!!***

!!****f* m_mlwfovlp/mlwfovlp_pw
!! NAME
!! mlwfovlp_pw
!!
!! FUNCTION
!! Routine which computes PW part of overlap M_{mn}(k,b)
!! for Wannier code (www.wannier.org f90 version).
!!
!! INPUTS
!!  cg(2,mpw*nspinor*mband*mkmem*nsppol)=planewave coefficients of wavefunctions.
!!  g1(3,nkpt,nntot) = G vector shift which is necessary to obtain k1+b
!!  iwav(mband,nkpt,nsppol): shift for pw components in cg.
!!  kg(3,mpw*mkmem)=reduced planewave coordinates.
!!  mband=maximum number of bands
!!  mgfft=maximum size of 1D FFTs
!!  mkmem =number of k points treated by this node.
!!  mpi_enreg=information about MPI parallelization
!!  mpw=maximum dimensioned size of npw.
!!  nfft=(effective) number of FFT grid points (for this processor) (see NOTES at beginning of scfcv)
!!  ngfft(18)=contain all needed information about 3D FFT (see NOTES at beginning of scfcv)
!!  nkpt=number of k points.
!!  npwarr(nkpt)=number of planewaves in basis at this k point
!!  nspinor=number of spinorial components of the wavefunctions
!!  nsppol=1 for unpolarized, 2 for spin-polarized
!!  ovikp(nkpt,nntot)= gives  nntot value of k2 (in the BZ) for each k1  (k2=k1+b mod(G))
!!  seed_name= seed_name of files containing cg for all k-points to be used with MPI
!!
!! OUTPUT
!!  cm1(2,mband,mband,nntot,nkpt,nsppol): overlap <u_(nk1)|u_(mk1+b)>.
!!
!! SIDE EFFECTS
!!  (only writing, printing)
!!
!! SOURCE

subroutine mlwfovlp_pw(mywfc,cm1,g1,kg,mband,mkmem,mpi_enreg,mpw,nfft,ngfft,nkpt,nntot,&
                       npwarr,nspinor,nsppol,ovikp)

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: mband,mkmem,mpw,nfft,nkpt,nntot
 integer,intent(in) :: nspinor,nsppol
! character(len=fnlen) ::  seed_name  !seed names of files containing cg info used in case of MPI
 class(abstract_wf) :: mywfc
 type(MPI_type),intent(in) :: mpi_enreg
!arrays
 integer,intent(in) :: g1(3,nkpt,nntot),kg(3,mpw*mkmem),ngfft(18),npwarr(nkpt)
! integer,intent(in) :: iwav(mband,nkpt,nsppol)
 integer,intent(in) :: ovikp(nkpt,nntot)
! real(dp),intent(in) :: cg(2,mpw*nspinor*mband*mkmem*nsppol)
 real(dp),intent(out) :: cm1(2,mband,mband,nntot,nkpt,nsppol)

!Local variables-------------------------------
!scalars
 integer :: iband1,iband2,ierr,ig,ig1,ig1b,ig2,ig2b
 integer :: ig3,ig3b,igk1,igk2,ikg,ikpt,ikpt1,ikpt2,imntot,index,intot
 integer :: ispinor,isppol,me,n1,n2,n3,npoint,npoint2,npw_k,npw_k2
 integer :: nprocs,comm
 integer,allocatable :: indpwk(:,:),kg_k(:,:), invpwk(:,:)
 character(len=500) :: msg
 logical:: lfile
 real(dp),allocatable :: cg_read(:,:) !to be used in case of MPI

!************************************************************************

 write(msg, '(a,a)' ) ch10, '** mlwfovlp_pw : compute pw part of overlap'
 call wrtout(std_out, msg)

!initialize flags
 lfile=.false.
!mpi initialization
 comm=MPI_enreg%comm_cell
 nprocs=xmpi_comm_size(comm)
 me=MPI_enreg%me_kpt

 if(nprocs>1) then
   ABI_MALLOC(cg_read,(2,nspinor*mpw*mband))
 end if


!****************compute intermediate quantities  (index, shifts) ******
!------------compute index for g points--------------------------------
!ig is a plane waves which belongs to the sphere ecut for ikpt (they
!are npwarr(ikpt))
!npoint is the position in the grid of planes waves
!(they are nfft)
!indpwk is a application ig-> npoint
!invpwk is not an application (some npoint have no ig corresponding)
!cg are ordered with npw_k !
!----------------------------------------------------------------------
!------------compute index for g points--------------------------------
!----------------------------------------------------------------------
 write(msg, '(a,a)' ) ch10,'   first compute index for g-points'
 call wrtout(std_out, msg)

 ! Allocations
 ABI_MALLOC(kg_k,(3,mpw))
 ABI_MALLOC(indpwk,(nkpt,mpw))
 ABI_MALLOC(invpwk,(nkpt,nfft))

 n1=ngfft(1) ; n2=ngfft(2) ; n3=ngfft(3)
 invpwk=0
 indpwk=0
 kg_k=0

 do isppol=1,1  !invpwk is not spin dependent so we just do it once
   ikg=0
   do ikpt=1,nkpt
      ! MPI:cycle over k-points not treated by this node
      if (nprocs>1 ) then !sometimes we can have just one processor
        !print *, "MPI_enreg%proc_distrb(ikpt,1,isppol)", ikpt, isppol , MPI_enreg%proc_distrb(ikpt,1,isppol)
        if (ABS(MPI_enreg%proc_distrb(ikpt,1,isppol)-MPI_enreg%me) /= 0) CYCLE
      end if

     ! write(std_out,*)'me',me,'ikpt',ikpt,'isppol',isppol
     do npoint=1,nfft
       if(invpwk(ikpt,npoint)/=0 )then
         write(std_out,*) "error0 , invpwk is overwritten"
         write(std_out,*) ikpt,npoint
         ABI_ERROR("Aborting now")
       end if
     end do
     npw_k=npwarr(ikpt)
     ! write(std_out,*) ikpt,npw_k,nfft
     kg_k(:,1:npw_k)=kg(:,1+ikg:npw_k+ikg)
     do ig=1,npw_k
       if(ig.gt.mpw) then
         write(std_out,*)"error ig",ig,"greater than mpw ",mpw
         ABI_ERROR("Aborting now")
       end if
       if(indpwk(ikpt,ig)/=0) then
         write(std_out,*) "error, indpwk is overwritten"
         write(std_out,*) ikpt,ig,indpwk(ikpt,ig)
         ABI_ERROR("Aborting now")
       end if
       ig1=modulo(kg_k(1,ig),n1)
       ig2=modulo(kg_k(2,ig),n2)
       ig3=modulo(kg_k(3,ig),n3)
       indpwk(ikpt,ig)=ig1+1+n1*(ig2+n2*ig3)
       npoint=indpwk(ikpt,ig)
       if(npoint.gt.nfft) then
         ABI_ERROR("error npoint")
       end if
       ! write(std_out,*) ikpt,ig,npoint,invpwk(ikpt,npoint)
       if(invpwk(ikpt,npoint)/=0) then
         write(std_out,*) "error, invpwk is overwritten"
         write(std_out,*) ikpt,ig,npoint,invpwk(ikpt,npoint)
         ABI_ERROR("Aborting now")
       end if
       invpwk(ikpt,npoint)=ig
       ! write(std_out,*)'ikpt,npoint,invpwk',ikpt,npoint,invpwk(ikpt,npoint)
       ! if(ikpt.eq.1) write(std_out,*) "ig npoint",ig, npoint
       ! write(std_out,*) "ikpt ig npoint",ikpt,ig, npoint
     end do
     ikg=ikg+npw_k

   end do !ikpt
 end do !isppol
 !write(std_out,*) "index for g points has been computed"

 call xmpi_sum(invpwk,comm,ierr)

!----------------------------------------------------------------------
!------------test invpwk-----------------------------------------------
!----------------------------------------------------------------------
!write(std_out,*) "TEST INVPWK"
!ikpt=3
!isppol=1
!do ig=1,npwarr(ikpt)
!npoint=indpwk(ikpt,ig)
!write(std_out,*) "ig npoint    ",ig, npoint
!write(std_out,*) "ig npoint inv",invpwk(ikpt,npoint),npoint
!end do
!do ig3=1,n3
!do ig2=1,n2
!do ig1=1,n1
!npoint=ig1+(ig2-1)*n1+(ig3-1)*n2*n1
!ig=invpwk(ikpt,npoint)
!!   if(ig/=0)  write(std_out,*) "ig npoint",ig, npoint
!end do
!end do
!end do

 ABI_FREE(kg_k)
 ABI_FREE(indpwk)

!***********************************************************************
!**calculate overlap M_{mn}(k,b)=<\Psi_{k,m}|e^{-ibr}|\Psi_{k+b,n}>*****
!***********************************************************************
 write(msg, '(a,a)' ) ch10,'   mlwfovlp_pw : compute overlaps '
 call wrtout(std_out,  msg)
 write(msg, '(a,a)' ) ch10,"     nkpt  nntot  mband "
 call wrtout(std_out,  msg)
 write(msg, '(i6,2x,i6,2x,i6,2x,i6)' ) nkpt,nntot,mband
 call wrtout(std_out,  msg)
 cm1=zero
 write(msg, '(a)' )  '  '
 call wrtout(std_out,  msg)

 do isppol=1,nsppol
   imntot=0
   do ikpt1=1,nkpt
     ! MPI:cycle over k-points not treated by this node
     if (nprocs>1) then
       if (ABS(MPI_enreg%proc_distrb(ikpt1,1,isppol)-me) /= 0) CYCLE
     end if
     write(msg, '(a,i0,a,i0,a,i0)' ) '     Processor: ',me,' computes k-point: ',ikpt1,', and spin: ',isppol
     call wrtout(std_out, msg)

     do intot=1,nntot
       lfile=.false. !flag to know if this kpt will be read from a file, see below
       imntot=imntot+1
       ikpt2= ovikp(ikpt1,intot)
       ! write(std_out,*)'me',me,'ikpt1',ikpt1,'ikpt2',ikpt2,'intot',intot,'isppol',isppol

!
!      MPI: if ikpt2 not found in this processor then
!      read info from an unformatted file
! TODO: also get MPI mapping to retrieve who has this wf k-point
!
!        if (nprocs>1) then
!           if ( ABS(MPI_enreg%proc_distrb(ikpt2,1,isppol)-me)  /=0) then
!          lfile=.true.
!          write(cg_file,'(a,I5.5,".",I1)') trim(seed_name),ikpt2,isppol
!          iunit=1000+ikpt2+ikpt2*(isppol-1)
!          npw_k2=npwarr(ikpt2)
!          open (unit=iunit, file=cg_file,form='unformatted',status='old',iostat=ios)
!          if(ios /= 0) then
!            write(msg,*) " mlwfovlp_pw: file",trim(cg_file), "not found"
!            ABI_ERROR(msg)
!          end if
! !
!          do iband2=1,mband
!            do ipw=1,npw_k2*nspinor
!              index=ipw+(iband2-1)*npw_k2*nspinor
!              read(iunit) (cg_read(ii,index),ii=1,2)
! !            if(me==0 .and. ikpt2==4)write(300,*)'ipw,iband2,index',ipw,iband2,index,cg_read(:,index)
! !            if(me==1 .and. ikpt2==4)write(301,*)'ipw,iband2,index',ipw,iband2,index,cg_read(:,index)
!            end do
!          end do
!          close(iunit)
!        end if
!     end if

       if(nprocs>1) then
          !call mywfc%read_cg(cg_read, ikpt2)
          !call mywfc%read_cg( ikpt2, isppol, cg_read)

          if (ABS(MPI_enreg%proc_distrb(ikpt2,1,isppol)-me) /= 0) then
            lfile=.true.
            call mywfc%load_cg(ikpt2, isppol, cg_read)
          endif
        end if

       npw_k=npwarr(ikpt1)
       npw_k2=npwarr(ikpt2)
       do ig3=1,n3
         do ig2=1,n2
           do ig1=1,n1
             ! write(std_out,*) isppol,ikpt1,iband1,iband2,intot
             npoint=ig1+(ig2-1)*n1+(ig3-1)*n2*n1
             if(npoint.gt.nfft) then
               ABI_ERROR("error npoin Aborting now")
             end if
             ig1b=ig1+g1(1,ikpt1,intot)
             ig2b=ig2+g1(2,ikpt1,intot)
             ig3b=ig3+g1(3,ikpt1,intot)
             ! write(std_out,*) ig1,ig2,ig3; write(std_out,*) ig1b,ig2b,ig3b
             if(ig1b.lt.1) ig1b=ig1b+n1
             if(ig2b.lt.1) ig2b=ig2b+n2
             if(ig3b.lt.1) ig3b=ig3b+n3
             if(ig1b.gt.n1) ig1b=ig1b-n1
             if(ig2b.gt.n2) ig2b=ig2b-n2
             if(ig3b.gt.n3) ig3b=ig3b-n3
             npoint2=ig1b+(ig2b-1)*n1+(ig3b-1)*n2*n1
             if(npoint2.gt.nfft) then
               ABI_ERROR("error npoint c")
             end if
             igk1=invpwk(ikpt1,npoint)
             igk2=invpwk(ikpt2,npoint2)

             ! if(intot==10) write(std_out,*)'Before igk1 and igk2',ikpt1,ikpt2,isppol

             if(igk1/=0.and.igk2/=0) then
               do iband2=1,mband
                 do iband1=1,mband
                   do ispinor=1,nspinor
                     if(lfile) index=ispinor + nspinor*(igk2-1) + nspinor*npw_k2*(iband2-1) !In case of MPI, see below
                     ! TODO : Check if the index in the cg_elems are correct.
                     !
                     ! If MPI sometimes the info was read from an unformatted file
                     ! If that is the case lfile==.true.
                     !
                     ! TODO: this filter should be outside, not inside 1000 loops!!!
                     if(lfile) then
                       cm1(1,iband1,iband2,intot,ikpt1,isppol)=cm1(1,iband1,iband2,intot,ikpt1,isppol)+ &
                             &   mywfc%cg_elem(1, igk1, ispinor, iband1, ikpt1, isppol) *cg_read(1,index)&
                             & + mywfc%cg_elem(2, igk1, ispinor, iband1, ikpt1, isppol)*cg_read(2,index)
                       cm1(2,iband1,iband2,intot,ikpt1,isppol)=cm1(2,iband1,iband2,intot,ikpt1,isppol)+ &
                             &  mywfc%cg_elem(1, igk1, ispinor, iband1, ikpt1, isppol)*cg_read(2,index)&
                             &- mywfc%cg_elem(2, igk1, ispinor, iband1, ikpt1, isppol)*cg_read(1,index)
                     else
                        ! TODO: Here it is very inefficient.
                        ! Could be replaced with the fftbox and dotproduct.
                        ! cgtk_rotate. sphere.
                        !
                        cm1(1,iband1,iband2,intot,ikpt1,isppol)=&
                             & cm1(1,iband1,iband2,intot,ikpt1,isppol) &
                             & + mywfc%cg_elem(1,  igk1, ispinor,iband1, ikpt1, isppol) &
                             & *mywfc%cg_elem(1, igk2,  ispinor,iband2, ikpt2, isppol) &
                             & +mywfc%cg_elem(2, igk1,  ispinor,iband1, ikpt1, isppol) &
                             & *mywfc%cg_elem(2,  igk2, ispinor,iband2, ikpt2, isppol)
                        cm1(2,iband1,iband2,intot,ikpt1,isppol)= &
                             & cm1(2,iband1,iband2,intot,ikpt1,isppol) &
                             & + mywfc%cg_elem(1, igk1,  ispinor,iband1, ikpt1, isppol) &
                             & *mywfc%cg_elem( 2, igk2,  ispinor,iband2, ikpt2, isppol) &
                             & -mywfc%cg_elem( 2, igk1,  ispinor,iband1, ikpt1, isppol) &
                             & *mywfc%cg_elem( 1, igk2,  ispinor,iband2, ikpt2, isppol)
                     end if
                   end do !ispinor
                 end do ! iband1
               end do ! iband2
             end if
           end do ! ig1
         end do ! ig2
       end do ! ig3
     end do ! intot
   end do ! ikpt1
 end do ! isppol

 ABI_FREE(invpwk)
 ABI_SFREE(cg_read)

 end subroutine mlwfovlp_pw
!!***

!!****f* m_mlwfovlp/mlwfovlp_proj
!! NAME
!! mlwfovlp_proj
!!
!! FUNCTION
!! Routine which computes projection A_{mn}(k) for Wannier code (www.wannier.org f90 version).
!!
!! INPUTS
!!  cg(2,mpw*nspinor*mband*mkmem*nsppol)=planewave coefficients of wavefunctions
!!  cprj(natom,nspinor*mband*mkmem*nsppol)= <p_lmn|Cnk> coefficients for each WF |Cnk>
!!                                          and each |p_lmn> non-local projector
!!  dtset <type(dataset_type)>=all input variables for this dataset
!!  filew90_win = secondary input file for wannier90   (WAS NOT USED IN v6.7.1 - so has been temporarily removed)
!!  kg(3,mpw*mkmem)=reduced planewave coordinates.
!!  lproj= flag 0: no projections, 1: random projections,
!!              2: projections on atomic orbitals
!!              3: projections on projectors
!!  mband=maximum number of bands
!!  mkmem =number of k points treated by this node.
!!  npwarr(nkpt)=number of planewaves in basis at this k point
!!  mpi_enreg=information about MPI parallelization
!!  mpw=maximum dimensioned size of npw.
!!  natom=number of atoms in cell.
!!  nattyp(ntypat)= # atoms of each type.
!!  nkpt=number of k points.
!!  nspinor=number of spinorial components of the wavefunctions
!!  nsppol=1 for unpolarized, 2 for spin-polarized
!!  ntypat=number of types of atoms in unit cell.
!!  num_bands=number of bands actually used to construct the wannier function
!!  nwan= number of wannier fonctions (read in wannier90.win).
!!  proj_l(mband)= angular part of the projection function (quantum number l)
!!  proj_m(mband)= angular part of the projection function (quantum number m)
!!  proj_radial(mband)= radial part of the projection.
!!  proj_site(3,mband)= site of the projection.
!!  proj_x(3,mband)= x axis for the projection.
!!  proj_z(3,mband)= z axis for the projection.
!!  proj_zona(mband)= extension of the radial part.
!!  psps <type(pseudopotential_type)>=variables related to pseudopotentials
!!
!! OUTPUT
!!  A_matrix(num_bands,nwan,nkpt,nsppol)= Matrix of projections needed by wannier_run
!!  ( also wannier90random.amn is written)
!!
!! SIDE EFFECTS
!!  (only writing, printing)
!!
!! SOURCE

 subroutine mlwfovlp_proj(A_matrix,band_in,mywfc, dtset,gprimd,just_augmentation,kg,&
&lproj,max_num_bands,mband,mkmem,mpi_enreg,mpw,mwan,natom,nattyp,&
&nkpt,npwarr,nspinor,&
&nsppol,ntypat,num_bands,nwan,pawtab,proj_l,proj_m,proj_radial,&
&proj_site,proj_x,proj_z,proj_zona,psps,ucvol)

!Arguments ------------------------------------
!scalars
 complex(dpc),parameter :: c1=(1._dp,0._dp)
 integer,intent(in) :: lproj,max_num_bands,mband,mkmem,mpw,mwan,natom,nkpt,nspinor,nsppol
 integer,intent(in) :: ntypat
 type(MPI_type),intent(in) :: mpi_enreg
 type(dataset_type),intent(in) :: dtset
 type(pseudopotential_type),intent(in) :: psps
!arrays
 integer ::nattyp(ntypat)
 integer,intent(in) :: kg(3,mpw*mkmem),npwarr(nkpt),num_bands(nsppol),nwan(nsppol),proj_l(mband,nsppol)
 integer,intent(in) :: proj_m(mband,nsppol)
 integer,intent(inout)::proj_radial(mband,nsppol)
 !real(dp),intent(in) :: cg(2,mpw*nspinor*mband*mkmem*nsppol)
 real(dp),intent(in) :: gprimd(3,3),proj_site(3,mband,nsppol)
 real(dp),intent(in) :: proj_x(3,mband,nsppol),proj_z(3,mband,nsppol),proj_zona(mband,nsppol)
 complex(dpc),intent(out) :: A_matrix(max_num_bands,mwan,nkpt,nsppol)
!character(len=fnlen),intent(in) :: filew90_win(nsppol)
 logical,intent(in) :: band_in(mband,nsppol)
 logical,intent(in)::just_augmentation(mwan,nsppol)
 !type(pawcprj_type) :: cprj(natom,nspinor*mband*mkmem*nsppol)
 !type(pawtab_type),intent(in) :: pawtab(psps%ntypat*psps%usepaw)
 type(pawtab_type),intent(in) :: pawtab(:)
 class(abstract_wf), intent(inout) :: mywfc

!Local variables-------------------------------
!scalars
 integer :: iatom,iatprjn,iband,iband1,iband2,ibg,icat,icg,icg_shift
 integer :: idum,ikg,ikpt,ilmn,ipw,iproj
 integer :: ispinor,isppol,itypat,iwan,jband,jj1,libprjn
 integer :: lmn_size,natprjn,nband_k,nbprjn,npw_k, sumtmp
 integer :: max_lmax,max_lmax2,mproj,nprocs,comm,rank, idx
 real(dp),parameter :: qtol=2.0d-8
 real(dp) :: arg,norm_error,norm_error_bar
 real(dp) :: ucvol,x1,x2,xnorm,xnormb,xx,yy,zz
 complex(dpc) :: amn_tmp(nspinor)
 complex(dpc) :: cstr_fact
 character(len=500) :: msg
!arrays
 integer :: kg_k(3,mpw),lmax(nsppol),lmax2(nsppol),nproj(nsppol)
 integer,allocatable :: lprjn(:),npprjn(:)
 real(dp) :: kpg(3),kpt(3)
 real(dp),allocatable :: amn(:,:,:,:,:),amn2(:,:,:,:,:,:,:)
 real(dp),allocatable :: gsum2(:),kpg2(:),radial(:)
 complex(dpc),allocatable :: gf(:,:),gft_lm(:), ylmc_fac(:,:,:),ylmcp(:)
!Tables 3.1 & 3.2, User guide
 integer,parameter :: orb_l_defs(-5:3)=(/2,2,1,1,1,0,1,2,3/)
! integer,parameter :: mtransfo(0:3,7)=&
!&  reshape((/1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,-2,-1,2,1,0,0,0,-1,1,2,-2,-3,3/),(/4,7/))

!************************************************************************

!mpi initialization
 comm=MPI_enreg%comm_cell
 nprocs=xmpi_comm_size(comm)
 rank=MPI_enreg%me_kpt

 ! Check input variables
 if ((lproj/=1).and.(lproj/=2).and.(lproj/=5)) then
   write(msg, '(3a)' )' Value of lproj no allowed ',ch10,' Action: change lproj.'
   ABI_ERROR(msg)
 end if

 write(msg, '(a,a)' )ch10,'** mlwfovlp_proj: compute A_matrix of initial guess for wannier functions'
 call wrtout(std_out,msg)

!Initialize to 0.d0
 A_matrix(:,:,:,:)=cmplx(0.d0,0.d0)

 !********************* Write Random projectors
 if(lproj==1) then
   idum=123456
   ! Compute random projections
   ABI_MALLOC(amn,(2,mband,mwan,nkpt,nsppol))
   amn=zero
   do isppol=1,nsppol
     do ikpt=1,nkpt
       ! MPI: cycle over kpts not treated by this node
       if (ABS(MPI_enreg%proc_distrb(ikpt,1,isppol)-rank)/=0) CYCLE
       ! write(std_out,'("kpt loop2: ikpt",i3," rank ",i3)') ikpt,rank

       do iband1=1,mband
         xnormb=0.d0
         do iband2=1,nwan(isppol)
           x1=uniformrandom(idum)
           x2=uniformrandom(idum)
           xnorm=sqrt(x1**2+x2**2)
           xnormb=xnormb+xnorm
           amn(1,iband1,iband2,ikpt,isppol)=x1
           amn(2,iband1,iband2,ikpt,isppol)=x2
         end do
         do iband2=1,nwan(isppol)
           amn(1,iband1,iband2,ikpt,isppol)=amn(1,iband1,iband2,ikpt,isppol)/xnormb
           amn(2,iband1,iband2,ikpt,isppol)=amn(2,iband1,iband2,ikpt,isppol)/xnormb
         end do !iband2
       end do !iband1
     end do !ikpt
   end do !isppol
   do isppol=1,nsppol
     do ikpt=1,nkpt
       ! MPI: cycle over kpts not treated by this node
       if (ABS(MPI_enreg%proc_distrb(ikpt,1,isppol)-rank)/=0) CYCLE
       do iband2=1,nwan(isppol)
         jband=0
         do iband1=1,mband
           if(band_in(iband1,isppol)) then
             jband=jband+1
             if(jband.gt.num_bands(isppol)) then
               ABI_ERROR('Value of jband is above num_bands ')
             end if
             A_matrix(jband,iband2,ikpt,isppol)=cmplx(amn(1,iband1,iband2,ikpt,isppol),amn(2,iband1,iband2,ikpt,isppol))
           end if
         end do !iband1
       end do !iband2
     end do !ikpt
   end do !isppol
   ABI_FREE(amn)
 end if

!********************* Projection on atomic orbitals based on .win file
 if( lproj==2) then !based on .win file
   nproj(:)=nwan(:)/nspinor !if spinors, then the number of projections are
   mproj=maxval(nproj(:))
   ! half the total of wannier functions. obtain lmax and lmax2
   lmax(:)=0
   lmax2(:)=0

   do isppol=1,nsppol
     do iproj=1,nproj(isppol)
       lmax(isppol)=max(lmax(isppol),orb_l_defs(proj_l(iproj,isppol)))
     end do !iproj
     lmax2(isppol)=(lmax(isppol)+1)**2
   end do !isppol
   max_lmax=maxval(lmax(:))
   max_lmax2=maxval(lmax2(:))
   ! Allocate arrays
   ABI_MALLOC(ylmc_fac,(max_lmax2,mproj,nsppol))

   ! get ylmfac, factor used for rotations and hybrid orbitals
   do isppol=1,nsppol
     !print *, "nproj", nproj(isppol)
     !print *, "isppol", isppol
     !print *, ylmc_fac(1:lmax2(isppol),1:nproj(isppol),isppol)
     !print *, "lmax, lmax2: ", lmax(isppol),lmax2(isppol)
     !print *, nproj(isppol),proj_l(:,isppol),proj_m(:,isppol),proj_x(:,:,isppol)
     !print *, proj_z(:,:,isppol)
     call mlwfovlp_ylmfac(ylmc_fac(1:lmax2(isppol),1:nproj(isppol),isppol),lmax(isppol),lmax2(isppol),&
&     mband,nproj(isppol),proj_l(:,isppol),proj_m(:,isppol),proj_x(:,:,isppol),proj_z(:,:,isppol))
   end do

   norm_error=zero
   norm_error_bar=zero
   icg=0

   do isppol=1,nsppol
     ! Allocate arrays
     ! this has to be done this way because the variable icg changes at the end of the
     ! cycle. We cannot just skip the whole cycle.
     ABI_MALLOC(gf,(mpw,nproj(isppol)))
     ABI_MALLOC(gft_lm,(lmax2(isppol)))
     ABI_MALLOC(gsum2,(nproj(isppol)))
     ABI_MALLOC(kpg2,(mpw))
     ABI_MALLOC(radial,(lmax2(isppol)))
     ABI_MALLOC(ylmcp,(lmax2(isppol)))
     ikg=0
     do ikpt=1, nkpt
       ! MPI: cycle over kpts not treated by this node
       if (ABS(MPI_enreg%proc_distrb(ikpt,1,isppol)-rank)/=0) CYCLE
       write(msg, '(a,i6,a,2(i0,1x))' )'   processor',rank,' will compute k-point,spin=',ikpt,isppol
       call wrtout(std_out, msg)

       ! Initialize variables
       npw_k=npwarr(ikpt)
       gsum2(:)=0.d0
       gf(:,:) = (0.d0,0.d0)
       kpt(:)=dtset%kptns(:,ikpt)
       kg_k(:,1:npw_k)=kg(:,1+ikg:npw_k+ikg)

       do ipw=1, npw_k
         kpg(1)= (kpt(1) + real(kg_k(1,ipw),dp))     !k+G
         kpg(2)= (kpt(2) + real(kg_k(2,ipw),dp))
         kpg(3)= (kpt(3) + real(kg_k(3,ipw),dp))
         ! Calculate modulus of k+G
         xx=gprimd(1,1)*kpg(1)+gprimd(1,2)*kpg(2)+gprimd(1,3)*kpg(3)
         yy=gprimd(2,1)*kpg(1)+gprimd(2,2)*kpg(2)+gprimd(2,3)*kpg(3)
         zz=gprimd(3,1)*kpg(1)+gprimd(3,2)*kpg(2)+gprimd(3,3)*kpg(3)
         kpg2(ipw)= two_pi*sqrt(xx**2+yy**2+zz**2)
         ! Complex Y_lm for k+G
         if(lmax(isppol)==0) then
           ylmcp(1)=c1/sqrt(four_pi)
         else
           call ylm_cmplx(lmax(isppol),ylmcp,xx,yy,zz)
         end if

         do iproj=1,nproj(isppol)
           ! In PAW, we can use proj_radial > 4 to indicate that we just want the in-sphere contribution
           if( psps%usepaw==1) then
             if( just_augmentation(iproj,isppol)) cycle
           end if
           ! obtain radial part
           call mlwfovlp_radial(proj_zona(iproj,isppol),lmax(isppol),lmax2(isppol), &
                                radial,proj_radial(iproj,isppol),kpg2(ipw))
           ! scale complex representation of projector orbital with radial functions of appropriate l
           gft_lm(:)=radial(:)*ylmc_fac(1:lmax2(isppol),iproj,isppol)
           ! complex structure factor for projector orbital position
           arg = ( kpg(1)*proj_site(1,iproj,isppol) + &
                   kpg(2)*proj_site(2,iproj,isppol) + &
                   kpg(3)*proj_site(3,iproj,isppol) ) * 2*pi
           cstr_fact = cmplx(cos(arg), -sin(arg) )

           ! obtain guiding functions
           gf(ipw,iproj)=cstr_fact*dot_product(ylmcp,gft_lm)
           gsum2(iproj)=gsum2(iproj)+real(gf(ipw,iproj))**2+aimag(gf(ipw,iproj))**2
         end do !iproj
       end do !ipw

       do iproj=1,nproj(isppol)
         ! In PAW, we can use proj_radial > 4 to indicate that we just want the in-sphere contribution
         if(psps%usepaw==1 ) then
           if (just_augmentation(iproj,isppol)) cycle
         end if
         gsum2(iproj)=16._dp*pi**2*gsum2(iproj)/ucvol
         gf(:,iproj)=gf(:,iproj)/sqrt(gsum2(iproj))
         norm_error=max(abs(gsum2(iproj)-one),norm_error)
         norm_error_bar=norm_error_bar+(gsum2(iproj)-one)**2
       end do !iproj
!
!      Guiding functions are computed.
!      compute overlaps of gaussian projectors and wave functions
       do iproj=1,nproj(isppol)
!
!        In PAW, we can use proj_radial > 4 to indicate that we just
!        want the in-sphere contribution
!
         if(psps%usepaw==1 ) then
           if ( just_augmentation(iproj,isppol)) cycle
         end if
!
         jband=0
         do iband=1,mband
           if(band_in(iband,isppol)) then
             icg_shift=npw_k*nspinor*(iband-1)+icg
             jband=jband+1
             amn_tmp(:)=cmplx(0.d0,0.d0)
             do ispinor=1,nspinor
               do ipw=1,npw_k
                !
                ! The case of spinors is tricky, we have nproj =  nwan/2
                ! so we project to spin up and spin down separately, to have at
                ! the end an amn matrix with nwan projections.
                idx=ipw*nspinor - (nspinor-ispinor)
                 select type(mywfc)
                 type is (cg_cprj)
                   amn_tmp(ispinor)=amn_tmp(ispinor)+gf(ipw,iproj)*cmplx(mywfc%cg(1,idx+icg_shift),-mywfc%cg(2,idx+icg_shift))
                 type is (wfd_wf)
                   amn_tmp(ispinor)=amn_tmp(ispinor)+gf(ipw,iproj)*conjg(mywfc%cg_elem_complex(ipw, ispinor, iband, ikpt, isppol))
                end select
               end do !ipw
             end do !ispinor
             do ispinor=1,nspinor
               iwan=(iproj*nspinor)- (nspinor-ispinor)
               A_matrix(jband,iwan,ikpt,isppol)=amn_tmp(ispinor)
             end do
           end if !band_in
         end do !iband
       end do !iproj
       icg=icg+npw_k*nspinor*mband
       ikg=ikg+npw_k
     end do !ikpt
     ! Deallocations
     ABI_FREE(gf)
     ABI_FREE(gft_lm)
     ABI_FREE(gsum2)
     ABI_FREE(kpg2)
     ABI_FREE(radial)
     ABI_FREE(ylmcp)
   end do !isppol
!
!  if(isppol==1) then
!    norm_error_bar=sqrt(norm_error_bar/real(nkpt*(nwan(1)),dp))
!  else
!    norm_error_bar=sqrt(norm_error_bar/real(nkpt*(nwan(1)+nwan(2)),dp))
!  end if
!  if(norm_error>0.05_dp) then
!  write(msg, '(6a,f6.3,a,f6.3,12a)' )ch10,&
!  &     ' mlwfovlp_proj : WARNING',ch10,&
!  &     '  normalization error for wannier projectors',ch10,&
!  &     '  is',norm_error_bar,' (average) and',norm_error,' (max).',ch10,&
!  &     '  this may indicate more cell-to-cell overlap of the radial functions',ch10,&
!  &     '  than you want.',ch10,&
!  &     '  Action : modify zona (inverse range of radial functions)',ch10,&
!  '  under "begin projectors" in ',trim(filew90_win),' file',ch10
!  call wrtout(std_out,msg)
!  end if
!
   ABI_FREE(ylmc_fac)
 end if !lproj==2


!*************** computes projection  from PROJECTORS ********************
 if(lproj==3) then  !! if LPROJPRJ
!  ----- set values for projections --------------------- ! INPUT
!  nbprjn:number of  different l-values for projectors
!  lprjn: value of l for each projectors par ordre croissant
!  npprjn: number of projectors for each lprjn
   natprjn=1  ! atoms with wannier functions are first
   if(natprjn/=1) then ! in this case lprjn should depend on iatprjn
     ABI_ERROR("natprjn/=1")
   end if
   nbprjn=2
   ABI_MALLOC(lprjn,(nbprjn))
   lprjn(1)=0
   lprjn(2)=1
   ABI_MALLOC(npprjn,(0:lprjn(nbprjn)))
   npprjn(0)=1
   npprjn(1)=1
!  --- test coherence of nbprjn and nwan
   sumtmp=0
   do iatprjn=1,natprjn
     do libprjn=0,lprjn(nbprjn)
       sumtmp=sumtmp+(2*libprjn+1)*npprjn(libprjn)
     end do
   end do
   if(sumtmp/=nwan(1)) then
     write(std_out,*) "Number of Wannier orbitals is not equal to number of projections"
     write(std_out,*) "Action: check values of lprjn,npprjn % nwan"
     write(std_out,*) "nwan, sumtmp=",nwan,sumtmp
     ABI_ERROR("Aborting now")
   end if
!  --- end test of coherence
   ABI_MALLOC(amn2,(2,natom,nsppol,nkpt,mband,nspinor,nwan(1)))
   if(psps%usepaw==1) then
     amn2=zero
     ibg=0
     do isppol=1,nsppol
       do ikpt=1,nkpt   !TODO : hexu: check if it should be mkmem, or should skip if the kpt is not in this node.
         nband_k=dtset%nband(ikpt+(isppol-1)*nkpt)
         do iband=1,nband_k
!          write(std_out,*)"amn2",iband,ibg,ikpt
           do ispinor=1,nspinor
             icat=1
             do itypat=1,dtset%ntypat
               lmn_size=pawtab(itypat)%lmn_size
               do iatom=icat,icat+nattyp(itypat)-1
                 jj1=0
                 do ilmn=1,lmn_size
                   if(iatom.le.natprjn) then
!                    do iwan=1,nwan
                     do libprjn=0,lprjn(nbprjn)
!                      if (psps%indlmn(1,ilmn,itypat)==proj_l(iwan)) then
!                      if (psps%indlmn(2,ilmn,itypat)==mtransfo(proj_l(iwan),proj_m(iwan))) then
                       if (psps%indlmn(1,ilmn,itypat)==libprjn) then
                         if (psps%indlmn(3,ilmn,itypat)<=npprjn(libprjn)) then
                           if(band_in(iband,isppol)) then
                             jj1=jj1+1
                             if(jj1>nwan(isppol)) then
                               write(std_out,*) "number of wannier orbitals is lower than lmn_size"
                               write(std_out,*) jj1,nwan(isppol)
                               ABI_ERROR("Aborting now")
                             end if
                             !amn2(1,iatom,isppol,ikpt,iband,ispinor,jj1)=cprj(iatom,iband+ibg)%cp(1,ilmn)
                             !amn2(2,iatom,isppol,ikpt,iband,ispinor,jj1)=cprj(iatom,iband+ibg)%cp(2,ilmn)
                             amn2(1,iatom,isppol,ikpt,iband,ispinor,jj1)= &
                                  &mywfc%cprj_elem(1, ispinor, iband, ikpt, isppol, iatom, ilmn)
                             amn2(2,iatom,isppol,ikpt,iband,ispinor,jj1)= &
                                  &mywfc%cprj_elem(2, ispinor, iband, ikpt, isppol, iatom, ilmn)

                             !amn2(2,iatom,isppol,ikpt,iband,ispinor,jj1)=cprj(iatom,iband+ibg)%cp(2,ilmn)
                           end if
                         end if
                       end if
                     end do ! libprjn
!                    endif
!                    endif
!                    enddo ! iwan
                   end if ! natprjn
                 end do !ilmn
               end do ! iatom
               icat=icat+nattyp(itypat)
             end do ! itypat
           end do ! ispinor
         end do !iband
         ibg=ibg+nband_k*nspinor
!        write(std_out,*)'amn2b',iband,ibg,ikpt
       end do !ikpt
     end do ! isppol

!    -----------------------  Save Amn   --------------------
     do isppol=1,nsppol
       do ikpt=1,nkpt
         do iband2=1,nwan(isppol)
           jband=0
           do iband1=1,mband
             if(band_in(iband1,isppol)) then
               jband=jband+1
               A_matrix(jband,iband2,ikpt,isppol)=&
                 cmplx(amn2(1,1,1,ikpt,iband1,1,iband2),amn2(2,1,1,ikpt,iband1,1,iband2))
             end if
           end do
         end do
       end do
     end do
   end if !usepaw
   ABI_FREE(amn2)
   ABI_FREE(npprjn)
   ABI_FREE(lprjn)

 end if ! lproj==3

end subroutine mlwfovlp_proj
!!***

!!****f* m_mlwfovlp/mlwfovlp_projpaw
!! NAME
!! mlwfovlp_projpaw
!!
!! FUNCTION
!! Calculates the functions that are given to Wannier90 as an starting guess.
!! Here we project them inside the PAW spheres
!!
!! INPUTS
!!  band_in(mband)= logical array which indicates the bands to be excluded from the calculation
!!  cprj(natom,nspinor*mband*mkmem*nsppol)= <p_lmn|Cnk> coefficients for each WF |Cnk>
!!                                          and each |p_lmn> non-local projector
!!  just_augmentation= flag used to indicate that we are just going
!!                     to compute augmentation part of the matrix
!!                     and we are excluding the plane wave part.
!!  mband= maximum number of bands
!!  mkmem= number of k points which can fit in memory; set to 0 if use disk
!!  natom= number of atoms in cell.
!!  nband(nkpt*nsppol)= array cointaining number of bands at each k-point and isppol
!!  nkpt=number of k points.
!!  num_bands=number of bands actually used to construct the wannier function (NOT USED IN 6.7.1 SO WAS TEMPORARILY REMOVED)
!!  nspinor=number of spinorial components of the wavefunctions
!!  nsppol=1 for unpolarized, 2 for spin-polarized
!!  ntypat=number of types of atoms in unit cell.
!!  nwan= number of wannier fonctions (read in wannier90.win).
!!  pawrad(ntypat)= type(pawrad_type) radial information of paw objects
!!  pawtab(ntypat)= For PAW, TABulated data initialized at start
!!  proj_l(mband)= angular part of the projection function (quantum number l)
!!  proj_m(mband)= angular part of the projection function (quantum number m)
!!  proj_radial(mband)= radial part of the projection.
!!  proj_site(3,mband)= site of the projection.
!!  proj_x(3,mband)= x axis for the projection.
!!  proj_z(3,mband)= z axis for the projection.
!!  proj_zona(mband)= extension of the radial part.
!!  psps <type(pseudopotential_type)>=variables related to pseudopotentials
!!  rprimd(3,3)= Direct lattice vectors, Bohr units.
!!  typat(natom)= atom type
!!  xred(3,natom)=reduced dimensionless atomic coordinates
!!
!! OUTPUT
!!  A_paw(max_num_bands,nwan,nkpt) = A matrix containing initial guess for MLWFs
!!                          (augmentation part of the matrix)
!!
!! NOTES
!! This routine is still under developement
!!
!! SOURCE

subroutine mlwfovlp_projpaw(A_paw,band_in,mywfc,just_augmentation,max_num_bands,mband,mkmem,&
&mwan,natom,nband,nkpt,&
&nspinor,nsppol,ntypat,nwan,pawrad,pawtab,&
&proj_l,proj_m,proj_radial,proj_site,proj_x,proj_z,proj_zona,psps,&
&rprimd,typat,xred)

!Arguments ------------------------------------
 integer,intent(in) :: max_num_bands,mband,mkmem,mwan,natom,nkpt
 integer,intent(in) :: nspinor,nsppol,ntypat
 !arrays
 integer,intent(in) :: nband(nsppol*nkpt),nwan(nsppol)
 integer,intent(in) :: proj_l(mband,nsppol),proj_m(mband,nsppol),proj_radial(mband,nsppol)
 integer,intent(in) :: typat(natom)
 real(dp),intent(in):: proj_site(3,mband,nsppol)
 real(dp),intent(in) :: proj_x(3,mband,nsppol),proj_z(3,mband,nsppol),proj_zona(mband,nsppol)
 real(dp),intent(in) :: rprimd(3,3),xred(3,natom)
 complex(dpc),intent(out) :: A_paw(max_num_bands,mwan,nkpt,nsppol)
 logical,intent(in) :: band_in(mband,nsppol)
 logical,intent(in)::just_augmentation(mwan,nsppol)
 !type(pawcprj_type) :: cprj(natom,nspinor*mband*mkmem*nsppol)
 type(abstract_wf), intent(inout) :: mywfc
 type(pawrad_type),intent(in) :: pawrad(ntypat)
 type(pawtab_type),intent(in) :: pawtab(ntypat)
 type(pseudopotential_type),intent(in) :: psps

!Local variables-------------------------------
 !local variables
 integer :: basis_size,iatom,iband,ii
 integer :: ikpt,ir,isppol,itypat,iwan,jband
 integer :: ll,lm,ln,mm,ilmn
 integer :: lmn_size,max_lmax2, mesh_size,nn
 integer :: lmax(nsppol),lmax2(nsppol)
 real(dp):: aa,int_rad2,prod_real,prod_imag
 real(dp),parameter :: dx=0.015d0,rmax=10.d0,xmin=0.d0
 real(dp):: sum,wan_lm_fac,x
 complex(dpc)::prod
 character(len=500) :: msg
 !arrays
 integer :: index(mband,nkpt,nsppol)
 real(dp) :: dist,norm(mwan,nsppol)
 real(dp) :: proj_cart(3,mwan,nsppol),proj_site_unit(3,mwan,nsppol)
 real(dp) :: xcart_unit(3,natom),xred_unit(3,natom)
 real(dp),allocatable :: aux(:),ff(:),r(:),int_rad(:),rad_int(:)
 real(dp),allocatable :: ylmr_fac(:,:,:)
 integer,parameter :: orb_l_defs(-5:3)=(/2,2,1,1,1,0,1,2,3/) ! Tables 3.1 & 3.2, User guide

! *************************************************************************

 ABI_UNUSED(mkmem)
 ABI_UNUSED(nspinor)

 write(msg, '(a,a)' )ch10,'** mlwfovlp_proj:  compute in-sphere part of A_matrix'
 call wrtout(std_out,msg)

!Check input variables
 do isppol=1,nsppol
   do iwan=1,nwan(nsppol)
     if(proj_radial(iwan,isppol)<1 .or. proj_radial(iwan,isppol)>4)then
       write(msg,'(a,a,a,i0)')&
       '  proj_radial should be between 1 and 4,',ch10,&
       '  however, proj_radial=',proj_radial(iwan,isppol)
       ABI_BUG(msg)
     end if
   end do
 end do

!Initialize
 A_paw(:,:,:,:)=cmplx(0.d0,0.d0)

!Get index for cprj
 ii=0
 do isppol=1,nsppol
   do ikpt=1,nkpt
     do iband=1,nband(ikpt)
       ii=ii+1
       index(iband,ikpt,isppol)=ii
     end do
   end do
 end do

!obtain lmax and lmax2
 lmax(:)=0
 lmax2(:)=0
 do isppol=1,nsppol
   do iwan=1,nwan(isppol)
     lmax(isppol)=max(lmax(isppol),orb_l_defs(proj_l(iwan,isppol)))
   end do !iwan
   lmax2(isppol)=(lmax(isppol)+1)**2
 end do
 max_lmax2=maxval(lmax2(:))
!
!get ylmfac, factor used for rotations and hybrid orbitals
!
 ABI_MALLOC(ylmr_fac,(max_lmax2,mwan,nsppol))


 do isppol=1,nsppol
   call mlwfovlp_ylmfar(ylmr_fac(1:lmax2(isppol),1:nwan(isppol),isppol),&
&   lmax(isppol),lmax2(isppol),mband,nwan(isppol),proj_l(:,isppol),proj_m(:,isppol),&
&   proj_x(:,:,isppol),proj_z(:,:,isppol))
!
!  Shift projection centers and atom centers to the primitive cell
!  This will be useful after, when we check if the Wannier function
!  lies on one specific atom
!
   proj_site_unit(:,:,:)=0.d0
   do iwan=1,nwan(isppol)
     do ii=1,3
       proj_site_unit(ii,iwan,isppol)=ABS(proj_site(ii,iwan,isppol)-AINT(proj_site(ii,iwan,isppol)) )
     end do
   end do
   do iatom=1,natom
     do ii=1,3
       xred_unit(ii,iatom)=ABS(xred(ii,iatom)-AINT(xred(ii,iatom)) )
     end do
   end do
   call xred2xcart(natom,rprimd,xcart_unit,xred_unit)
   call xred2xcart(mwan,rprimd,proj_cart(:,:,isppol),proj_site_unit(:,:,isppol))
!
!  Normalize the Wannier functions
!
!  Radial part
   mesh_size= nint((rmax - xmin ) / dx + 1)
   ABI_MALLOC( ff,(mesh_size))
   ABI_MALLOC(r,(mesh_size))
   ABI_MALLOC(rad_int,(mesh_size))
   ABI_MALLOC(aux,(mesh_size))
   do ir=1, mesh_size
     x=xmin+DBLE(ir-1)*dx
     r(ir)=x
   end do   !ir
   do iwan=1,nwan(isppol)
!    write(std_out,*)'iwan',iwan
!    radial functions shown in table 3.3 of wannier90 manual
     if(proj_radial(iwan,isppol)==1) ff(:) = 2.d0 * proj_zona(iwan,isppol)**(1.5d0) * exp(-proj_zona(iwan,isppol)*r(:))
     if(proj_radial(iwan,isppol)==2) ff(:) = 1.d0/(2.d0*sqrt(2.d0))*proj_zona(iwan,isppol)**(1.5d0) *&
&     (2.d0 - proj_zona(iwan,isppol)*r(:))*exp(-proj_zona(iwan,isppol)*r(:)/2.d0)
     if(proj_radial(iwan,isppol)==3) ff(:) = sqrt(4.d0/27.d0)*proj_zona(iwan,isppol)**(1.5d0)&
&     * (1.d0 - 2.d0*proj_zona(iwan,isppol)*r(:)/3.d0 + 2.d0*proj_zona(iwan,isppol)**2*r(:)**2/27.d0)&
&     * exp(-proj_zona(iwan,isppol) * r(:)/3.d0)

     if(proj_radial(iwan,isppol)/=4) then
       aux(:)=ff(:)**2*r(:)**2
       call simpson_int(mesh_size,dx,aux,rad_int)
       sum=0.d0
       do ir=1,mesh_size
         sum=sum+rad_int(ir)
       end do
       int_rad2=sum/real(mesh_size,dp)
!
!      do ir=1,mesh_size
!      if(iwan==1) write(400,*)r(ir),aux(ir),rad_int(ir)
!      end do
     else
!
!      ==4: gaussian function
!      f(x)=\exp(-1/4(x/aa)**2)
!      \int f(x)f(x) dx = \int \exp(-1/2(x/aa)**2) = aa*sqrt(2pi)
!
       int_rad2=sqrt(2.d0*pi)*proj_zona(iwan,isppol)
     end if

!
!    Now angular part
!
     prod_real=0.d0
     do lm=1,lmax2(isppol)
       wan_lm_fac=ylmr_fac(lm,iwan,isppol)
!      write(std_out,*)'wan_lm_fac',wan_lm_fac
!      write(std_out,*)'int_rad2',int_rad2
       prod_real= prod_real + wan_lm_fac**2 * int_rad2
     end do
     norm(iwan,isppol)=sqrt(prod_real)
   end do !iwan
   ABI_FREE(ff)
   ABI_FREE(r)
   ABI_FREE(rad_int)
   ABI_FREE(aux)
!
!  Now that we found our guiding functions
!  We proceed with the internal product of
!  our guiding functions and the wave function
!  Amn=<G_m|\Psi_n> inside the sphere.
!  The term <G_m|\Psi_n> inside the sphere is:
!  = \sum_i <G_n | \phi_i - \tphi_i> <p_im|\Psi_m>
!
!
!  G_n \phi and \tphi can be decomposed in
!  a radial function times an angular function.
!
!
!  Big loop on iwan and iatom
!
   do iwan=1,nwan(isppol)
     do iatom=1,natom
!
!      check if center of wannier function coincides
!      with the center of the atom
!
       dist=((proj_cart(1,iwan,isppol)-xcart_unit(1,iatom))**2 + &
             (proj_cart(2,iwan,isppol)-xcart_unit(2,iatom))**2 + &
             (proj_cart(3,iwan,isppol)-xcart_unit(3,iatom))**2)**0.5

       !  if the distance between the centers is major than 0.1 angstroms skip
       if( dist > 0.188972613) cycle
       write(msg, '(2a,i4,a,i4,2a)')ch10, '   Wannier function center',iwan,' is on top of atom',&
                                    iatom,ch10,'      Calculating in-sphere contribution'
       call wrtout(ab_out,msg)
       call wrtout(std_out,msg)
       ! Get useful quantities
       itypat=typat(iatom)
       lmn_size=pawtab(itypat)%lmn_size
       basis_size=pawtab(itypat)%basis_size
       mesh_size=pawtab(itypat)%mesh_size
       ABI_MALLOC(int_rad,(basis_size))
       ABI_MALLOC(ff,(mesh_size))
       ABI_MALLOC(aux,(mesh_size))

!      Integrate first the radial part and save it into an array
!      radial functions shown in table 3.3 of wannier90 manual
       if(proj_radial(iwan,isppol)==1) aux(1:mesh_size) = 2.d0 * proj_zona(iwan,isppol)**(1.5d0) *&
&       exp(-proj_zona(iwan,isppol)*pawrad(itypat)%rad(1:mesh_size))
       if(proj_radial(iwan,isppol)==2) aux(1:mesh_size) = 1.d0/(2.d0*sqrt(2.d0))*proj_zona(iwan,isppol)**(1.5d0) *&
&       (2.d0 - proj_zona(iwan,isppol)*pawrad(itypat)%rad(1:mesh_size)) &
&       * exp(-proj_zona(iwan,isppol)*pawrad(itypat)%rad(1:mesh_size)/2.d0)
       if(proj_radial(iwan,isppol)==3) aux(1:mesh_size) = sqrt(4.d0/27.d0)*proj_zona(iwan,isppol)**(1.5d0)&
&       * (1.d0 - 2.d0*proj_zona(iwan,isppol)*pawrad(itypat)%rad(1:mesh_size)/3.d0 &
&       + 2.d0*proj_zona(iwan,isppol)**2 *pawrad(itypat)%rad(1:mesh_size)**2/27.d0)&
&       * exp(-proj_zona(iwan,isppol) * pawrad(itypat)%rad(1:mesh_size)/3.d0)
!
!      ==4: gaussian function
!      f(x)=\exp(-1/4(x/aa)**2)
!
       if(proj_radial(iwan,isppol)==4) then
         aa=1.d0/proj_zona(iwan,isppol)
         aux(1:mesh_size)= exp(-0.25d0*(pawrad(itypat)%rad(1:mesh_size)*aa)**2)
       end if
!
!      Normalize aux
       aux(:)=aux(:)/norm(iwan,isppol)
!
       do ln=1,basis_size
         if(just_augmentation(iwan,isppol)) then
!
!          just augmentation region contribution
!          In this case there is no need to use \tphi
!          ff= \int R_wan(r) (R_phi(ln;r)/r ) r^2 dr
!
           ff(1:mesh_size)= aux(1:mesh_size) * pawtab(itypat)%phi(1:mesh_size,ln) &
&           * pawrad(itypat)%rad(1:mesh_size)
         else
!          Inside sphere contribution = \phi - \tphi
!          ff= \int R_wan(r) (R_phi(ln;r)/r - R_tphi(ln;r)/r) r^2 dr
           ff(1:mesh_size)= aux(1:mesh_size) * (pawtab(itypat)%phi(1:mesh_size,ln)-pawtab(itypat)%tphi(1:mesh_size,ln)) &
&           * pawrad(itypat)%rad(1:mesh_size)
         end if
!
!        Integration with simpson routine
!
         call simp_gen(int_rad(ln),ff,pawrad(itypat))
!        do ii=1,mesh_size
!        unit_ln=400+ln
!        if( iwan==1 ) write(unit_ln,*)pawrad(itypat)%rad(ii),ff(ii),int_rad(ln)
!        end do
       end do !ln
       ABI_FREE(ff)
       ABI_FREE(aux)
!
!      Now integrate the angular part
!      Cycle on i indices
!
!      prod_real=0.d0
       do ilmn=1, lmn_size
         ll=Psps%indlmn(1,ilmn,itypat)
         mm=Psps%indlmn(2,ilmn,itypat)
         nn=Psps%indlmn(3,ilmn,itypat)
         lm=Psps%indlmn(4,ilmn,itypat)
         ln=Psps%indlmn(5,ilmn,itypat)
!        write(std_out,*)'ll ',ll,' mm ',mm,'nn',nn,"lm",lm,"ln",ln
!
!        Get wannier factor for that lm component
         if(lm <=lmax2(isppol)) then
           wan_lm_fac=ylmr_fac(lm,iwan,isppol)
!          Make delta product
!          Here we integrate the angular part
!          Since the integral of the product of two spherical harmonics
!          is a delta function
           if( abs(wan_lm_fac) > 0.0d0) then
!            write(std_out,*) 'll',ll,'mm',mm,'lm',lm,'ln',ln,'factor',wan_lm_fac !lm index for wannier function
!
!            Calculate Amn_paw, now that the radial and angular integrations are done
!
             prod=cmplx(0.d0,0.d0)
             do ikpt=1,nkpt
               jband=0
               ! NOTE: hexu: this doesn't seem right for nspinor=2
               ! NOTE: also nband size is (nsppol*nkpt)
               do iband=1,nband(ikpt)
                 if(band_in(iband,isppol)) then
                   jband=jband+1

                   !prod_real= cprj(iatom,index(iband,ikpt,isppol))%cp(1,ilmn) * int_rad(ln) * wan_lm_fac
                   !prod_imag= cprj(iatom,index(iband,ikpt,isppol))%cp(2,ilmn) * int_rad(ln) * wan_lm_fac
                   ! FIXME: here ispinor is set to 1
                   ! There should be a loop over ispinor
                   prod_real= mywfc%cprj_elem(1, 1, iband, ikpt, isppol, iatom, ilmn ) * int_rad(ln) * wan_lm_fac
                   prod_imag= mywfc%cprj_elem(2, 1, iband, ikpt, isppol, iatom, ilmn ) * int_rad(ln) * wan_lm_fac
                   prod=cmplx(prod_real,prod_imag)

                   A_paw(jband,iwan,ikpt,isppol)=A_paw(jband,iwan,ikpt,isppol)+prod
                 end if !band_in
               end do !iband
             end do !ikpt
!
           end if !lm<=lmax2
         end if  ! abs(wan_lm_fac) > 0.0d0
       end do !ilmn=1, lmn_size
       ABI_FREE(int_rad)
     end do !iatom
   end do !iwan
 end do !isppol

!Deallocate quantities
 ABI_FREE(ylmr_fac)

end subroutine mlwfovlp_projpaw
!!***

!!****f* m_mlwfovlp/mlwfovlp_radial
!! NAME
!! mlwfovlp_radial
!!
!! FUNCTION
!! Calculates the radial part of the initial functions given to Wannier90
!! as an starting point for the minimization.
!! The trial functions are a set of solutions to the radial part of the hydrogenic
!! Schrodinger equation as it is explained in Table 3.3 of the Wannier90 user guide.
!!
!! INPUTS
!!  alpha= Z/a = zona
!!  lmax= maximum value of l
!!  rvalue= integer defining the choice for radial functions R(r).
!!   It can take values from 1-3.
!!   It is associted to the radial part of the hydrogenic Schrodinger equation for l=0,
!!   See the manual of Wannier90 for more information. (www.wannier.org)
!!  xx= scalar number used to calculate the spherical bessel function. J_il(xx)
!!
!! OUTPUT
!!  mlwfovlp_radial= radial part for initial projections used to construct MLWF
!!
!! SIDE EFFECTS
!!  None
!!
!! NOTES
!!  Calculates the radial part of the initial functions given as an initial
!!  guess by the user to construct the MLWF.
!!
!! SOURCE

subroutine mlwfovlp_radial(alpha,lmax,lmax2,radial,rvalue,xx)

!Arguments ------------------------------------
!scalars
 integer,intent(in) :: lmax,lmax2,rvalue
 real(dp),intent(in) :: alpha,xx
!arrays
 real(dp),intent(out) :: radial(lmax2)

!Local variables
!scalars
 integer :: ir,ll,lm,mesh,mm
 real(dp),parameter :: dx=0.015d0,rmax=10.d0,xmin=0.d0
 real(dp) :: aa,ftmp,gauss,rtmp,x
 character(len=500) :: msg
!arrays
 real(dp),parameter :: dblefact(4)=(/1_dp,3_dp,15_dp,105_dp/)
 real(dp),allocatable :: aux(:),bes(:),cosr(:),func_r(:),r(:),rad_int(:),sinr(:)

! *************************************************************************

!Radial functions in the form of hydrogenic orbitals as defined in the
!wannier90 manual.
 if(( rvalue > 0 ).and.(rvalue < 4)) then

!  mesh
   mesh= nint((rmax - xmin ) / dx + 1)
   ABI_MALLOC( bes,(mesh))
   ABI_MALLOC(func_r,(mesh))
   ABI_MALLOC(r,(mesh))
   ABI_MALLOC(rad_int,(mesh))
   ABI_MALLOC( aux,(mesh))
   ABI_MALLOC(cosr,(mesh))
   ABI_MALLOC(sinr,(mesh))
   do ir=1, mesh
     x=xmin+DBLE(ir-1)*dx
     r(ir)=x
   end do   !ir

!  radial functions shown in table 3.3 of wannier90 manual
   if (rvalue==1) func_r(:) = 2.d0 * alpha**(3.d0/2.d0) * exp(-alpha*r(:))
   if (rvalue==2) func_r(:) = 1.d0/(2.d0*sqrt(2.d0))*alpha**(3.d0/2.d0) *&
&   (2.d0 - alpha*r(:))*exp(-alpha*r(:)/2.d0)
   if (rvalue==3) func_r(:) = sqrt(4.d0/27.d0)*alpha**(3.d0/2.d0)&
&   * (1.d0 - 2.d0*alpha*r(:)/3.d0 + 2.d0*alpha**2*r(:)**2/27.d0)&
&   * exp(-alpha * r(:)/3.d0)

!  compute spherical bessel functions
   cosr(:)=cos(xx*r(:))
   sinr(:)=sin(xx*r(:))
   lm=0
   do ll=0,lmax
     call besjm(xx,bes,cosr,ll,mesh,sinr,r)
     aux(:)=bes(:)*func_r(:)*r(:)
!    do ir=1,mesh
!    write(310,*) r(ir),bes(ir)
!    end do
     call simpson_int(mesh,dx,aux,rad_int)
     rtmp=rad_int(mesh)/mesh
     do mm=-ll,ll
       lm=lm+1
       radial(lm)=rtmp
     end do !mm
   end do !ll
   ABI_FREE(bes)
   ABI_FREE(func_r)
   ABI_FREE(r)
   ABI_FREE(aux)
   ABI_FREE(rad_int)
   ABI_FREE(cosr)
   ABI_FREE(sinr)

!  Radial part in the form of Gaussian functions of a given width
!  Taken by code of made by drh.
 elseif ( rvalue == 4) then
   aa=1._dp/alpha
   gauss=exp(-0.25_dp*(aa*xx)**2)
   lm=0
   do ll=0,lmax
     ftmp=(0.5_dp*pi)**(0.25_dp)*aa*sqrt(aa/dblefact(ll+1))*(aa*xx)**ll*gauss
     do mm=-ll,ll
       lm=lm+1
       radial(lm)=ftmp
     end do
   end do
 else ! rvalue < 0 of rvalue > 4
   write(msg,'(a,i6,5a)')&
   '  Radial function r=',rvalue,ch10,&
   '  is not defined',ch10,&
   '  Modify .win file',ch10
   ABI_BUG(msg)
 end if !rvalue

end subroutine mlwfovlp_radial
!!***

!!****f* m_mlwfovlp/mlwfovlp_ylmfac
!! NAME
!! mlwfovlp_ylmfac
!!
!! FUNCTION
!! Routine that produces a factor by which the initial
!! guess of functions will be multiplied for the Wannier90 interface.
!! It is just used if there are rotations, or if the functions required
!! are linear combinations of the ylm real functions.
!!
!! Example,
!! For a function G(r)= 1/2 s + 1/3 px - 1/2 pz
!!   it would produce a matrix of the following form:
!!   [1/2,-1/2,1/3,0,0...0]
!!
!! The real spherical harmonics are given as factors of complex spherical harmonics
!! The real spherical harmonics are given in table 3.1 of Wannier90 user guide.
!!
!! INPUTS
!!  lmax= maximum l value for spherical harmonics
!!  lmax2=number of ylm functions
!!  mband=maximum number of bands
!!  nwan = number of wannier functions
!!  proj_l(mband)= angular part of the projection function (quantum number l)
!!  proj_m(mband)= angular part of the projection function (quantum number m)
!!  proj_x(3,mband)= x axis for the projection.
!!  proj_z(3,mband)= z axis for the projection.
!!
!! OUTPUT
!!  ylmc_fac(lmax2,nwan)=matrix containig a factor for ylm hybrid orbitals
!!
!! SIDE EFFECTS
!!  (only writing, printing)
!!
!! NOTES
!!
!! SOURCE


subroutine mlwfovlp_ylmfac(ylmc_fac,lmax,lmax2,mband,nwan,proj_l,proj_m,proj_x,proj_z)

!Arguments ------------------------------------
 integer, intent(in):: lmax,lmax2,nwan,mband
! arrays
 integer,intent(in) :: proj_l(mband),proj_m(mband)
 real(dp),intent(in) :: proj_x(3,mband),proj_z(3,mband)
 complex(dp),intent(out)::ylmc_fac(lmax2,nwan)
!
!Local variables-------------------------------
!
 integer :: orb_idx(16)=(/1,3,4,2,7,8,6,9,5,13,14,12,15,11,16,10/) !Tab3.1 Wannier90 user guide
 integer :: idum,ii,info,inversion_flag
 integer :: ir,iwan,jj,ll,lm,lmc,mm,mr
 real(dp):: onem,test
! arrays
 integer:: ipiv(lmax2)
 real(dp)::r(3,lmax2),rp(3,lmax2)
 real(dp)::rs2,rs3,rs6,rs12,umat(3,3)
 complex(dp)::crot(lmax2,lmax2),ctor(lmax2,lmax2),orb_lm(lmax2,-5:3,7)
 complex(dp):: ylmcp(lmax2)
 complex(dp):: ylmc_rr(lmax2,lmax2),ylmc_rr_save(lmax2,lmax2)
 complex(dp):: ylmc_rrinv(lmax2,lmax2),ylmc_rp(lmax2,lmax2)
 complex(dp),parameter :: c0=(0._dp,0._dp),c1=(1._dp,0._dp),ci=(0._dp,1._dp)
 character(len=500) :: msg

! *************************************************************************


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!DEBUG
!write(std_out,*)'lmax ',lmax,'lmax2 ',lmax2
!write(std_out,*)'mband ',mband,'nwan ',nwan
!
!do iwan=1,nwan
!write(std_out,*)'iwan,proj_l, proj_m',proj_l(iwan),proj_m(iwan)
!write(std_out,*)'iwan,proj_x, proj_z',iwan,proj_x(:,iwan),proj_z(:,iwan)
!end do
!!END DEBUG

!constants for linear combinations of ylm's
 rs2=1._dp/sqrt(2._dp)
 rs3=1._dp/sqrt(3._dp)
 rs6=1._dp/sqrt(6._dp)
 rs12=1._dp/sqrt(12._dp)

!complex lm coefficients for real spherical harmonics in conventional order
!s, py,pz,px, dxy,dyz,dz2,dxz,dx2-y2, fy(3x2-y2),fxyz,fyz2,fz3,fxz2,
!fz(x2-y2),fx(x2-3y2)
 ctor(:,:)=c0
 do ll=0,lmax
   mm=0
   lm= ll**2+ll+mm+1
   ctor(lm,lm)=c1
   if(ll>0) then
     onem=one
     do mm=1,ll
       onem=-onem !(-1^mm)
       lm= ll**2+ll+mm+1
       lmc=ll**2+ll-mm+1
       ctor(lm ,lm )=rs2*c1
       ctor(lmc,lm )=onem*rs2*c1
       ctor(lm ,lmc)=rs2*ci
       ctor(lmc,lmc)=-onem*rs2*ci
     end do
   end if
 end do

 lm=0
 do ll=0,lmax
   do mm=-ll,ll
     lm=lm+1
     ctor(:,lm)=ctor(:,lm)*conjg(ci)**ll
   end do !mm
 end do !ll


!coefficients for basic wannier orbitals in Table 3.1 order
 orb_lm(:,:,:)=c0
 ii=0
 do ll=0,lmax
   do mr=1,2*ll+1
     ii=ii+1
     orb_lm(:,ll,mr)=ctor(:,orb_idx(ii))
   end do
 end do



!coefficients for linear combinations in table 3.2 order
 if(lmax>=1) then
!  s            px
   orb_lm(:,-1,1)=rs2*ctor(:,1)+rs2*ctor(:,4)
   orb_lm(:,-1,2)=rs2*ctor(:,1)-rs2*ctor(:,4)
!  s            px            py
   orb_lm(:,-2,1)=rs3*ctor(:,1)-rs6*ctor(:,4)+rs2*ctor(:,2)
   orb_lm(:,-2,2)=rs3*ctor(:,1)-rs6*ctor(:,4)-rs2*ctor(:,2)
   orb_lm(:,-2,3)=rs3*ctor(:,1)+2._dp*rs6*ctor(:,4)
!  s        px        py        pz
   orb_lm(:,-3,1)=half*(ctor(:,1)+ctor(:,4)+ctor(:,2)+ctor(:,3))
   orb_lm(:,-3,2)=half*(ctor(:,1)+ctor(:,4)-ctor(:,2)-ctor(:,3))
   orb_lm(:,-3,3)=half*(ctor(:,1)-ctor(:,4)+ctor(:,2)-ctor(:,3))
   orb_lm(:,-3,4)=half*(ctor(:,1)-ctor(:,4)-ctor(:,2)+ctor(:,3))
 end if
 if(lmax>=2) then
!  s            px            py
   orb_lm(:,-4,1)=rs3*ctor(:,1)-rs6*ctor(:,4)+rs2*ctor(:,2)
   orb_lm(:,-4,2)=rs3*ctor(:,1)-rs6*ctor(:,4)-rs2*ctor(:,2)
   orb_lm(:,-4,3)=rs3*ctor(:,1)+2._dp*rs6*ctor(:,4)
!  pz           dz2
   orb_lm(:,-4,4)= rs2*ctor(:,3)+rs2*ctor(:,7)
   orb_lm(:,-4,5)=-rs2*ctor(:,3)+rs2*ctor(:,7)
!  s            px            dz2         dx2-y2
   orb_lm(:,-5,1)=rs6*ctor(:,1)-rs2*ctor(:,4)-rs12*ctor(:,7)+half*ctor(:,9)
   orb_lm(:,-5,2)=rs6*ctor(:,1)+rs2*ctor(:,4)-rs12*ctor(:,7)+half*ctor(:,9)
!  s            py            dz2         dx2-y2
   orb_lm(:,-5,3)=rs6*ctor(:,1)-rs2*ctor(:,2)-rs12*ctor(:,7)-half*ctor(:,9)
   orb_lm(:,-5,4)=rs6*ctor(:,1)+rs2*ctor(:,2)-rs12*ctor(:,7)-half*ctor(:,9)
!  s            pz           dz2
   orb_lm(:,-5,5)=rs6*ctor(:,1)-rs2*ctor(:,3)+rs3*ctor(:,7)
   orb_lm(:,-5,6)=rs6*ctor(:,1)+rs2*ctor(:,3)+rs3*ctor(:,7)
 end if

!stuff complex wannier orbital coefficient array
 do iwan=1,nwan
   ylmc_fac(:,iwan)=orb_lm(:,proj_l(iwan),proj_m(iwan))
 end do


!setup to rotate ylmc_fac to new axes if called for
!skip if only s projectors are used
 if ( lmax>0 ) then
!  generate a set of nr=lmax2 random vectors
!  idum=123456
   do ir=1,lmax2
     do ii=1,3
       r(ii,ir) = uniformrandom(idum)-0.5d0
     end do !ii
     call ylm_cmplx(lmax,ylmcp,r(1,ir),r(2,ir),r(3,ir))
     ylmc_rr(ir,:)=conjg(ylmcp(:))
     ylmc_rr_save(ir,:)=conjg(ylmcp(:))
   end do !ir

   ylmc_rrinv(:,:)=c0
   do ii=1,lmax2
     ylmc_rrinv(ii,ii)=c1
   end do !ii
!  calculate inverse of ylmc(ir,lm) matrix
   call ZGESV(lmax2,lmax2,ylmc_rr,lmax2,ipiv,ylmc_rrinv,lmax2,info)

!  check that r points are independent (ie., that matrix inversion wasn't
!  too close to singular)
   ylmc_rr=matmul(ylmc_rrinv,ylmc_rr_save)
   test=zero
   do ii=1,lmax2
     ylmc_rr(ii,ii)=ylmc_rr(ii,ii)-c1
     do jj=1,lmax2
       test=max(abs(ylmc_rr(ii,jj)),test)
     end do !ii
   end do !jj
   if(test>tol8) then
     write(msg, '(5a)' )&
&     '  matrix inversion error for wannier rotations',ch10,&
&     '  random vectors r(j,1:nr) are not all independent !! ',ch10,&
&     '  Action : re-seed uniformrandom or maybe just try again'
     ABI_ERROR(msg)
   end if !test>tol8

!  end of the preliminaries, now to the rotations of the wannier orbitals
   do iwan=1,nwan
!    don't bother for s orbitals
     if(proj_l(iwan)==0) cycle
!    check for default axes and cycle if found
     if(proj_z(1,iwan)==zero .and. proj_z(2,iwan)==zero .and.&
&     proj_z(3,iwan)== one .and. proj_x(1,iwan)==one .and.&
&     proj_x(2,iwan)==zero .and. proj_x(3,iwan)==zero) cycle

!    get the u matrix that rotates the reference frame
     call rotmat(proj_x(:,iwan),proj_z(:,iwan),inversion_flag,umat)

!    find rotated r-vectors. Optional inversion
!    operation is an extension of the wannier90 axis-setting options
!    which only allow for proper axis rotations
     if(inversion_flag==1) then
       rp(:,:)= -matmul ( umat(:,:),  r(:,:) )
     else
       rp(:,:) = matmul ( umat(:,:) , r(:,:) )
     end if !inversion_flag

     do ir=1,lmax2
!      get the ylm representation of the rotated vectors
       call ylm_cmplx(lmax,ylmcp,rp(1,ir),rp(2,ir),rp(3,ir))
       ylmc_rp(ir,:)=conjg(ylmcp(:))
     end do !ir
!    the matrix product sum(ir) ylmc_rrinv(lm,ir)*ylmc_rp(ir,lm') gives the
!    the complex lmXlm matrix representation of the coordinate rotation
     crot(:,:)=matmul(ylmc_rrinv(:,:),ylmc_rp(:,:))

!    now rotate the current wannier orbital
     ylmcp(:)=matmul(crot(:,:),ylmc_fac(:,iwan))
     ylmc_fac(:,iwan)=ylmcp(:)

!    write(std_out,*)'ylmc_fac',ylmc_fac(:,iwan)
   end do !iwan
 end if !lmax>0

end subroutine mlwfovlp_ylmfac
!!***

!!****f* m_mlwfovlp/mlwfovlp_ylmfar
!! NAME
!! mlwfovlp_ylmfar
!!
!! FUNCTION
!! Routine that produces a fator by which the initial
!! guess of functions will be multiplied for the Wannier90 interface.
!! It is just used if there are rotations, or if the functions required
!! are linear combinations of the ylm real functions.
!!
!! Example,
!! For a function G(r)= 1/2 s + 1/3 px - 1/2 pz
!!   it would produce a matrix of the following form:
!!   [1/2,-1/2,1/3,0,0...0]
!!
!! This function is similar to mlwfovlp_ylmfac, but the factors it uses
!! real spherical harmonics instead of complex
!! spherical harmonics. Remember that real spherical harmonics
!! are linear combinations of complex
!! spherical harmonics
!!
!! INPUTS
!!  lmax= maximum l value for spherical harmonics
!!  lmax2=number of ylm functions
!!  mband=maximum number of bands
!!  nwan = number of wannier functions
!!  proj_l(mband)= angular part of the projection function (quantum number l)
!!  proj_m(mband)= angular part of the projection function (quantum number m)
!!  proj_x(3,mband)= x axis for the projection.
!!  proj_z(3,mband)= z axis for the projection.
!!
!! OUTPUT
!!  ylmc_fac(lmax2,nwan)=matrix containig a factor for ylm hybrid orbitals
!!
!! SOURCE

subroutine mlwfovlp_ylmfar(ylmr_fac,lmax,lmax2,mband,nwan,proj_l,proj_m,proj_x,proj_z)

!Arguments ------------------------------------
 integer, intent(in):: lmax,lmax2,nwan,mband
! arrays
 integer,intent(in) :: proj_l(mband),proj_m(mband)
 real(dp),intent(in) :: proj_x(3,mband),proj_z(3,mband)
 real(dp),intent(out)::ylmr_fac(lmax2,nwan)
!
!Local variables-------------------------------
!
 integer :: idum,ii,inversion_flag
 integer :: ir,iwan,jj,ll,lm,mm,mr
 real(dp) :: onem,test
! arrays
 real(dp),allocatable:: dummy(:,:),nrm(:)
 real(dp) :: r(3,lmax2),rp(3,lmax2)
 real(dp) :: rs2,rs3,rs6,rs12,umat(3,3)
 real(dp) :: rot(lmax2,lmax2),tor(lmax2,lmax2),orb_lm(lmax2,-5:3,7)
 real(dp) :: ylmrp(lmax2)
 real(dp) :: ylmr_rr(lmax2,lmax2),ylmr_rr_save(lmax2,lmax2)
 real(dp) :: ylmr_rrinv(lmax2,lmax2),ylmr_rp(lmax2,lmax2)
 character(len=500) :: msg                   ! to be uncommented, if needed
!integer :: orb_idx(16)=(/1,3,4,2,7,8,6,9,5,13,14,12,15,11,16,10/) !Tab3.1 Wannier90 user guide

! *************************************************************************

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!DEBUG
!write(std_out,*)'lmax ',lmax,'lmax2 ',lmax2
!write(std_out,*)'mband ',mband,'nwan ',nwan
!
!do iwan=1,nwan
!write(std_out,*)'iwan,proj_l, proj_m',proj_l(iwan),proj_m(iwan)
!write(std_out,*)'iwan,proj_x, proj_z',iwan,proj_x(:,iwan),proj_z(:,iwan)
!end do
!!END DEBUG

!constants for linear combinations of ylm's
 rs2=1._dp/sqrt(2._dp)
 rs3=1._dp/sqrt(3._dp)
 rs6=1._dp/sqrt(6._dp)
 rs12=1._dp/sqrt(12._dp)

!
!mapping lm coefficients for real spherical harmonics
!table 3.1 of Wannier90 user guide with real spherical harmonics in routine initylmr
!s, py,pz,px, dxy,dyz,dz2,dxz,dx2-y2, fy(3x2-y2),fxyz,fyz2,fz3,fxz2,
!fz(x2-y2),fx(x2-3y2)
!note: check ordering of f orbitals, it might be wrong

 tor(:,:)=0.d0
 lm=0
 do ll=0,lmax
   do mm=-ll,ll
     onem=(-1.d0)**mm
     lm=lm+1
     if(ll == 0) then
       tor(lm,lm)=1.d0
     else
       tor(lm,lm)=onem*1.d0
     end if
   end do !mm
 end do !ll
!do lm=1,16
!write(std_out,*)'tor lm=',lm,tor(:,lm)
!end do

!coefficients for basic wannier orbitals in Table 3.1 order
 orb_lm(:,:,:)=0.d0
 ii=0
 do ll=0,lmax
   do mr=1,2*ll+1
     ii=ii+1
     orb_lm(:,ll,mr)= tor(:,ii)
!    write(std_out,*)'ii',ii,'orb_lm',orb_lm(:,ll,mr)
   end do
 end do



!coefficients for linear combinations in table 3.2 order
 if(lmax>=1) then
!  s            px
   orb_lm(:,-1,1)=rs2*tor(:,1)+rs2*tor(:,4)
   orb_lm(:,-1,2)=rs2*tor(:,1)-rs2*tor(:,4)
!  s            px            py
   orb_lm(:,-2,1)=rs3*tor(:,1)-rs6*tor(:,4)+rs2*tor(:,2)
   orb_lm(:,-2,2)=rs3*tor(:,1)-rs6*tor(:,4)-rs2*tor(:,2)
   orb_lm(:,-2,3)=rs3*tor(:,1)+2._dp*rs6*tor(:,4)
!  s        px        py        pz
   orb_lm(:,-3,1)=half*(tor(:,1)+tor(:,4)+tor(:,2)+tor(:,3))
   orb_lm(:,-3,2)=half*(tor(:,1)+tor(:,4)-tor(:,2)-tor(:,3))
   orb_lm(:,-3,3)=half*(tor(:,1)-tor(:,4)+tor(:,2)-tor(:,3))
   orb_lm(:,-3,4)=half*(tor(:,1)-tor(:,4)-tor(:,2)+tor(:,3))
 end if
 if(lmax>=2) then
!  s            px            py
   orb_lm(:,-4,1)=rs3*tor(:,1)-rs6*tor(:,4)+rs2*tor(:,2)
   orb_lm(:,-4,2)=rs3*tor(:,1)-rs6*tor(:,4)-rs2*tor(:,2)
   orb_lm(:,-4,3)=rs3*tor(:,1)+2._dp*rs6*tor(:,4)
!  pz           dz2
   orb_lm(:,-4,4)= rs2*tor(:,3)+rs2*tor(:,7)
   orb_lm(:,-4,5)=-rs2*tor(:,3)+rs2*tor(:,7)
!  s            px            dz2         dx2-y2
   orb_lm(:,-5,1)=rs6*tor(:,1)-rs2*tor(:,4)-rs12*tor(:,7)+half*tor(:,9)
   orb_lm(:,-5,2)=rs6*tor(:,1)+rs2*tor(:,4)-rs12*tor(:,7)+half*tor(:,9)
!  s            py            dz2         dx2-y2
   orb_lm(:,-5,3)=rs6*tor(:,1)-rs2*tor(:,2)-rs12*tor(:,7)-half*tor(:,9)
   orb_lm(:,-5,4)=rs6*tor(:,1)+rs2*tor(:,2)-rs12*tor(:,7)-half*tor(:,9)
!  s            pz           dz2
   orb_lm(:,-5,5)=rs6*tor(:,1)-rs2*tor(:,3)+rs3*tor(:,7)
   orb_lm(:,-5,6)=rs6*tor(:,1)+rs2*tor(:,3)+rs3*tor(:,7)
 end if

!real wannier orbital coefficient array
 do iwan=1,nwan
   ylmr_fac(:,iwan)=orb_lm(:,proj_l(iwan),proj_m(iwan))
 end do


!setup to rotate ylmr_fac to new axes if called for
!skip if only s projetors are used
 if ( lmax>0 ) then
!  generate a set of nr=lmax2 random vectors
   idum=123456
   do ir=1,lmax2
     do ii=1,3
       r(ii,ir) = uniformrandom(idum)-0.5d0
     end do !ii
   end do !ir
   ABI_MALLOC(nrm,(lmax2))
   nrm(:)=sqrt(r(1,:)**2+r(2,:)**2+r(3,:)**2)**0.5
   call initylmr(lmax+1,1,lmax2,nrm,1,r(:,:),ylmr_rr_save(:,:),dummy)
   ylmr_rr(:,:)=ylmr_rr_save(:,:)
   do ir=1,lmax2
     ylmr_rr_save(ir,:)=ylmr_rr(:,ir)
   end do
   ABI_FREE(nrm)

   ylmr_rrinv(:,:)=0.d0
   do ii=1,lmax2
     ylmr_rrinv(ii,ii)=1.d0
   end do !ii
!  calculate inverse of ylmr(ir,lm) matrix
   ylmr_rrinv(:,:)=ylmr_rr_save(:,:)
   call matrginv(ylmr_rrinv,lmax2,lmax2)

!  check that r points are independent (ie., that matrix inversion wasn't too close to singular)
   ylmr_rr=matmul(ylmr_rrinv,ylmr_rr_save)
   test=0.d0
   do ii=1,lmax2
     ylmr_rr(ii,ii)=ylmr_rr(ii,ii)-1.d0
     do jj=1,lmax2
       test=max(abs(ylmr_rr(ii,jj)),test)
     end do !ii
   end do !jj
   if(test>tol8) then
     write(msg, '(5a)' )&
     '  matrix inversion error for wannier rotations',ch10,&
     '  random vectors r(j,1:nr) are not all independent !! ',ch10,&
     '  Action : re-seed uniformrandom or maybe just try again'
     ABI_ERROR(msg)
   end if !test>tol8

!  end of the preliminaries, now to the rotations of the wannier orbitals
   do iwan=1,nwan
!    don't bother for s orbitals
     if(proj_l(iwan)==0) cycle
!    check for default axes and cycle if found
     if(proj_z(1,iwan)==0.d0 .and. proj_z(2,iwan)==0.d0 .and.&
&     proj_z(3,iwan)== 1.d0 .and. proj_x(1,iwan)==1.d0 .and.&
&     proj_x(2,iwan)==0.d0 .and. proj_x(3,iwan)==0.d0) cycle

!    get the u matrix that rotates the reference frame
     call rotmat(proj_x(:,iwan),proj_z(:,iwan),inversion_flag,umat)
!
!    find rotated r-vectors. Optional inversion
!    operation is an extension of the wannier90 axis-setting options
!    which only allow for proper axis rotations
     if(inversion_flag==1) then
       rp(:,:)= -matmul ( umat(:,:),  r(:,:) )
     else
       rp(:,:) = matmul ( umat(:,:) , r(:,:) )
     end if !inversion_flag

!    get the ylm representation of the rotated vectors
     ABI_MALLOC(nrm,(lmax2))
     nrm(:)=sqrt(rp(1,:)**2+rp(2,:)**2+rp(3,:)**2)**0.5
     call initylmr(lmax+1,1,lmax2,nrm,1,rp(:,:),ylmr_rp(:,:),dummy)
     ylmr_rr(:,:)=ylmr_rp(:,:)
     do ir=1,lmax2
       ylmr_rp(ir,:)=ylmr_rr(:,ir)
     end do
     ABI_FREE(nrm)
!    the matrix product sum(ir) ylmr_rrinv(lm,ir)*ylmr_rp(ir,lm') gives the
!    the  lmXlm matrix representation of the coordinate rotation

     rot(:,:)=matmul(ylmr_rrinv(:,:),ylmr_rp(:,:))
!
!    now rotate the current wannier orbital
     ylmrp(:)=matmul(rot(:,:),ylmr_fac(:,iwan))
     ylmr_fac(:,iwan)=ylmrp(:)
   end do !iwan
 end if !lmax>0

end subroutine mlwfovlp_ylmfar
!!***

!!****f* m_mlwfovlp/wan_from_abiwan
!! NAME
!! wan_from_abiwan
!!
!! FUNCTION
!!  Initialize a wan_t instance from the ABIWAN.nc netcf file
!!
!! INPUTS
!!
!! OUTPUT
!!
!! SOURCE

subroutine wan_from_abiwan(wan, abiwan_filepath, spin, nsppol, keep_umats, out_prefix, comm)

!Arguments ------------------------------------
 class(wan_t),intent(out) :: wan
 character(len=*),intent(in) :: abiwan_filepath, out_prefix
 logical,intent(in) :: keep_umats
 integer,intent(in) :: spin, nsppol, comm

!Local variables-------------------------------
!scalars
 integer,parameter :: master = 0
 integer :: ncid, my_rank, ii, ir, ik, ib, jb, mb, nsppol_, mband, nwan, nkbz, ount, num_bands, nr_h, nextbands
 character(len=500) :: msg
 character(len=fnlen) :: out_path
 type(crystal_t) :: cryst
 complex(dp) :: ctmp
!arrays
 integer :: kptrlatt(3,3)
 integer,allocatable :: int_1d(:), int_2d(:,:) !, shiftk(:,:)
 real(dp),allocatable :: u_mat(:,:,:,:), u_mat_opt(:,:,:,:), et_opt(:,:)
 complex(dp),allocatable :: chs(:,:,:), chw(:,:,:)
!************************************************************************

 my_rank = xmpi_comm_rank(comm)

 !if (my_rank == master) then
 wan%spin = spin
 NCF_CHECK(nctk_open_read(ncid, abiwan_filepath, comm))

 ! Get dimensions.
 NCF_CHECK(nctk_get_dim(ncid, "number_of_spins", nsppol_))
 ABI_CHECK_IEQ(nsppol, nsppol_, "Inconsistent number of spins")

 call cryst%ncread(ncid)

 ! NB: mband is the value of nband and not the number of bands for Wannier that is called num_bands!
 NCF_CHECK(nctk_get_dim(ncid, "max_number_of_states", mband))
 NCF_CHECK(nctk_get_dim(ncid, "number_of_kpoints", nkbz))
 NCF_CHECK(nctk_get_dim(ncid, "nrpts", nr_h))
 NCF_CHECK(nctk_get_dim(ncid, "mwan", wan%max_nwan))
 NCF_CHECK(nf90_get_var(ncid, vid("nwan"), nwan, start=[spin]))
 wan%nkbz = nkbz; wan%nwan = nwan; wan%nr_h = nr_h

 ! Read variables for this spin.
 NCF_CHECK(nf90_get_var(ncid, vid("num_bands"), num_bands, start=[spin]))
 wan%num_bands = num_bands
 NCF_CHECK(nf90_get_var(ncid, nctk_idname(ncid, "spread"), wan%spread, start=[1,spin]))

 ABI_MALLOC(wan%kbz, (3, nkbz))
 NCF_CHECK(nf90_get_var(ncid, nctk_idname(ncid, "reduced_coordinates_of_kpoints"), wan%kbz))

 ABI_MALLOC(wan%r_h, (3, nr_h))
 ABI_MALLOC(wan%ndegen_h, (nr_h))
 NCF_CHECK(nf90_get_var(ncid, vid("irvec"), wan%r_h))
 NCF_CHECK(nf90_get_var(ncid, vid("ndegen"), wan%ndegen_h))
 NCF_CHECK(nf90_get_var(ncid, vid("have_disentangled_spin"), ii, start=[spin]))

 ABI_MALLOC(wan%exclude_bands, (mband))
 NCF_CHECK(nf90_get_var(ncid, vid("exclude_bands"), wan%exclude_bands, start=[1,spin]))

 wan%have_disentangled = (ii /= 0)

 ! Read U matrices using real arrays.
 ! TODO: This should be tested more carefully, especially when we are excluding bands.
 ABI_MALLOC(u_mat, (2, nwan, nwan, nkbz))
 NCF_CHECK(nf90_get_var(ncid, vid("U_matrix"), u_mat, start=[1,1,1,1,spin], count=[2, nwan, nwan, nkbz, 1]))
 ABI_MALLOC(u_mat_opt, (2, num_bands, nwan, nkbz))
 NCF_CHECK(nf90_get_var(ncid, vid("U_matrix_opt"), u_mat_opt, start=[1,1,1,1,spin], count=[2, num_bands, nwan, nkbz, 1]))

 ! Copy data: real --> complex
 ABI_MALLOC(wan%u_mat, (num_bands, nwan, nkbz))
 ABI_MALLOC(wan%u_mat_opt, (nwan, nwan, nkbz))
 wan%u_mat = u_mat(1,:,:,:) + j_dpc * u_mat(2,:,:,:)
 wan%u_mat_opt = u_mat_opt(1,:,:,:) + j_dpc * u_mat_opt(2,:,:,:)

 ABI_FREE(u_mat)
 ABI_FREE(u_mat_opt)

 ABI_MALLOC(wan%centres, (3, nwan))
 ABI_MALLOC(wan%spreads, (nwan))
 NCF_CHECK(nf90_get_var(ncid, vid("wann_centres"), wan%centres, start=[1,1,spin], count=[3, nwan, 1]))
 NCF_CHECK(nf90_get_var(ncid, vid("wann_spreads"), wan%spreads, start=[1,spin], count=[nwan]))

 ABI_MALLOC(int_1d, (num_bands))
 NCF_CHECK(nf90_get_var(ncid, vid("band_in_int"), int_1d, start=[1,spin], count=[num_bands, 1]))
 ABI_MALLOC(wan%band_in, (num_bands))
 wan%band_in = (int_1d /= 0)
 ABI_FREE(int_1d)

 ABI_MALLOC(int_2d, (num_bands, wan%nkbz))
 NCF_CHECK(nf90_get_var(ncid, vid("lwindow_int"), int_2d, start=[1,1,spin], count=[num_bands, nkbz, 1]))
 ABI_MALLOC(wan%lwindow, (wan%num_bands, wan%nkbz))
 wan%lwindow = (int_2d /= 0)
 ABI_FREE(int_2d)

 NCF_CHECK(nf90_get_var(ncid, vid("kptrlatt"), kptrlatt))
 ABI_CHECK(isdiagmat(kptrlatt), "kptrlatt should be diagonal! Please use nkgpt with one shift")
 wan%ngkpt = get_diag(kptrlatt)
 !NCF_CHECK(nctk_get_dim(ncid, "nshiftk", wan%nshiftk))
 !if nshiftk

 ! Read all KS eigenvalues and trasfer data to %eigs_w (note mband here)
 ABI_MALLOC(wan%all_eigens, (mband, nkbz))
 NCF_CHECK(nf90_get_var(ncid, vid("eigenvalues"), wan%all_eigens, start=[1,1,spin]))

 NCF_CHECK(nf90_close(ncid))
 !end if ! master

 ! Compute dimwin, winstart, bmin and bmax from lwindow.
 ABI_ICALLOC(wan%dimwin, (nkbz))
 ABI_ICALLOC(wan%winstart, (nkbz))

 wan%bmin = huge(1); wan%bmax = -1
 do ik=1,nkbz
   do ib=1,wan%num_bands
     if (wan%lwindow(ib, ik)) then
       wan%dimwin(ik) = wan%dimwin(ik) + 1
       if (wan%winstart(ik) == 0) wan%winstart(ik) = ib
       wan%bmin = min(wan%bmin, ib)
       wan%bmax = max(wan%bmax, ib)
     end if
   end do
 end do

 wan%krank = krank_from_kptrlatt(nkbz, wan%kbz, kptrlatt, compute_invrank=.True.)

 ABI_MALLOC(wan%rmod_h, (wan%nr_h))
 do ir=1,wan%nr_h
   wan%rmod_h(ir) = sqrt(dot_product(wan%r_h(:,ir), matmul(cryst%rmet, wan%r_h(:,ir))))
 end do

 ! Get total rotation matrix: the product of the optimal subspace x the rotation among the nwan Wannier functions.
 ii = maxval(wan%dimwin)
 ABI_CALLOC(wan%u_k, (ii, nwan, nkbz))
 do ik=1,nkbz
   wan%u_k(1:wan%dimwin(ik), 1:nwan, ik) = matmul(wan%u_mat_opt(1:wan%dimwin(ik), :, ik), wan%u_mat(:, 1:nwan, ik))
 end do

 wan%keep_umats = keep_umats
 if (.not. keep_umats) then
   ABI_FREE(wan%u_mat)
   ABI_FREE(wan%u_mat_opt)
 end if

 ! ====================================================
 ! Build the Hamiltonian in the Wannier representation
 ! ====================================================
 !call wan%get_window_eig(et_opt)
 nextbands = count(wan%exclude_bands /= 0)
 !REAL(KIND = DP) :: et_opt(nbndep, nks)
 ! KS eigenvalues within the outer window in the first dimwin(ik) entries
 ii = wan%num_bands ! TODO: Check
 ABI_MALLOC(et_opt, (ii ,nkbz))

 !print *, "nextbands", nextbands; print *, "exclude_bands:", wan%exclude_bands
 !print *, "band_in:", wan%band_in; print *, "lwindow:", wan%lwindow

 if (nextbands /= 0) then
   do ik=1,nkbz
     jb = 0; mb = 0
     do ib=1,wan%num_bands
       !if (wan%exclude_bands(ib) /= 0) cycle
       if (.not. wan%band_in(ib)) cycle
       jb = jb + 1
       if (wan%lwindow(jb, ik)) then
         mb = mb + 1; et_opt(mb, ik) = wan%all_eigens(ib, ik)
       end if
     end do
   end do

 else
   do ik=1,nkbz
     mb = 0
     do ib=1,wan%dimwin(ik)
       if (wan%lwindow(ib, ik)) then
         mb = mb + 1; et_opt(mb, ik) = wan%all_eigens(ib, ik)
       end if
     end do
   end do
 end if

 ABI_CALLOC(chs, (nwan, nwan, nkbz))

 do ik=1,nkbz
   do jb=1,nwan
     do ib=1,jb
       ctmp = czero
       do mb=1,wan%dimwin(ik)
         ctmp = ctmp + conjg(wan%u_k(mb, ib, ik)) * et_opt(mb, ik) * wan%u_k(mb, jb, ik)
       end do
       chs(ib, jb, ik) = ctmp
       chs(jb, ib, ik) = conjg(ctmp)
     end do
   end do
 end do ! ik
 ABI_FREE(et_opt)

 ABI_CALLOC(chw, (nwan, nwan, nr_h))
 do ir=1,nr_h
   do ik=1,nkbz
     chw(:,:,ir) = chw(:,:,ir) + chs(:,:,ik) * exp(-j_dpc * two_pi * dot_product(wan%kbz(:, ik), wan%r_h(:, ir))) / dble(nkbz)
   end do
 end do
 ABI_FREE(chs)

 ! Now rearrange the data to have R_e in the first dimension.
 ABI_CALLOC(wan%hwan_r, (nr_h, nwan, nwan))
 do ir=1,nr_h
   wan%hwan_r(ir,:,:) = chw(:,:,ir)
 end do
 ABI_FREE(chw)

 ! Write spatial decay to file.
 if (my_rank == master .and. len_trim(out_prefix) > 0) then
   out_path = strcat(out_prefix, "_spin", itoa(spin), "_HRWAN.txt")
   if (open_file(out_path, msg, newunit=ount, form="formatted", action="write", status='unknown') /= 0) then
     ABI_ERROR(msg)
   end if
   write(ount, "(a)")"# Decay of Hamiltonian in the Wannier representation"
   write(ount, "(a)")"# |R| [Bohr]                 Max_{m,n} |H(R,m,n)| [Ha]"
   do ir=1,nr_h
     write(ount, *) wan%rmod_h(ir), maxval(abs(wan%hwan_r(ir,:,:)))
   end do
   close(ount)
 end if

 call cryst%free()

contains
 integer function vid(var_name)
   character(len=*),intent(in) :: var_name
   vid = nctk_idname(ncid, var_name)
 end function vid

end subroutine wan_from_abiwan
!!***

!!****f* m_mlwfovlp/wan_print
!! NAME
!! wan_print
!!
!! FUNCTION
!!
!! SOURCE

subroutine wan_print(wan, units)

 use m_yaml

!Arguments ------------------------------------
 class(wan_t),intent(in) :: wan
 integer,intent(in) :: units(:)

!Local variables-------------------------------
 type(yamldoc_t) :: ydoc
!************************************************************************

 ydoc = yamldoc_open("WANNIER_PARAMS")

 !call ydoc%add_string("method", "Gaussian")
 call ydoc%add_ints("spin, nwan, num_bands, bmin, bmax, nkbz, nr_h, nr_e, nr_p", &
                    [wan%spin, wan%nwan, wan%num_bands, wan%bmin, wan%bmax, wan%nkbz, wan%nr_h, wan%nr_e, wan%nr_p])
 !call ydoc%add_reals("nelect, wan_mesh_step_eV", &
 !                   [wan%nelect, wan%step * Ha_eV])
 !call ydoc%add_real("", efermi * Ha_eV)

 ! Write header in Yaml format but prepend # so that one can still use tools such as gnuplot or xmgrace.
 call ydoc%write_units_and_free(units)

end subroutine wan_print
!!***

!!****f* m_mlwfovlp/wan_interp_ham
!! NAME
!! wan_interp_ham
!!
!! FUNCTION
!! Interpolate the Hamiltonian at an arbitray k-point
!! and return the rotation matrix.
!!
!! SOURCE

subroutine wan_interp_ham(wan, kpt, uk_wan, eigens)

!Arguments ------------------------------------
 class(wan_t),intent(in) :: wan
 real(dp),intent(in) :: kpt(3)
 real(dp),intent(out) :: eigens(wan%nwan)
 complex(dp),intent(out) :: uk_wan(wan%nwan, wan%nwan)

!Local variables-------------------------------
 integer :: ir
 complex(dp) :: eikr(wan%nr_h)
!************************************************************************

 do ir=1,wan%nr_h
   eikr(ir) = exp(j_dpc * two_pi * dot_product(kpt, wan%r_h(:, ir))) / wan%ndegen_h(ir)
 end do

 ! H_ij(k) = sum_R e^{+ik.R} * H_ij(R)
 call ZGEMV("T", wan%nr_h, wan%nwan**2, cone, wan%hwan_r, wan%nr_h, eikr, 1, czero, uk_wan, 1)

 ! Hermitianize and diagonalize.
 uk_wan = half * (uk_wan + transpose(conjg(uk_wan)))
 call xheev("N", "U", wan%nwan, uk_wan, eigens)

end subroutine wan_interp_ham
!!***

!!****f* m_mlwfovlp/wan_free
!! NAME
!! wan_free
!!
!! FUNCTION
!!  Free dynamic memory.
!!
!! SOURCE

subroutine wan_free(wan)

!Arguments ------------------------------------
 class(wan_t),intent(inout) :: wan
!************************************************************************

 ! integer
 ABI_SFREE(wan%dimwin)
 ABI_SFREE(wan%winstart)
 ABI_SFREE(wan%exclude_bands)
 ABI_SFREE(wan%r_h)
 ABI_SFREE(wan%r_e)
 ABI_SFREE(wan%r_p)

 ! real
 ABI_SFREE(wan%ndegen_h)
 ABI_SFREE(wan%ndegen_e)
 ABI_SFREE(wan%ndegen_p)
 ABI_SFREE(wan%rmod_h)
 ABI_SFREE(wan%rmod_e)
 ABI_SFREE(wan%rmod_p)
 ABI_SFREE(wan%all_eigens)
 ABI_SFREE(wan%centres)
 ABI_SFREE(wan%spreads)
 ABI_SFREE(wan%kbz)
 ABI_SFREE(wan%band_in)
 ABI_SFREE(wan%lwindow)

 ! Complex
 ABI_SFREE(wan%u_mat)
 ABI_SFREE(wan%u_mat_opt)
 ABI_SFREE(wan%u_k)
 ABI_SFREE(wan%hwan_r)
 ABI_SFREE(wan%grpe_wwp)

 call wan%krank%free()

end subroutine wan_free
!!***

!!****f* m_mlwfovlp/wan_setup_eph_ws_kq
!! NAME
!! wan_setup_eph_ws_kq
!!
!! FUNCTION
!!   Prepare the interpolation of the e-ph matrix elements.
!!
!! SOURCE

subroutine wan_setup_eph_ws_kq(wan, cryst, shiftk, kptrlatt, qptrlatt, my_pert_start, my_npert, pert_comm)

!Arguments ------------------------------------
 class(wan_t),intent(inout) :: wan
 type(crystal_t),intent(in) :: cryst
 real(dp),intent(in) :: shiftk(3)
 integer,intent(in) :: kptrlatt(3,3), qptrlatt(3,3), my_pert_start, my_npert
 type(xcomm_t),target,intent(in) :: pert_comm

!Local variables-------------------------------
 integer,parameter :: lmax(3) = [2,2,2]
 real(dp),parameter :: center(3) = zero
!************************************************************************

 ABI_UNUSED(shiftk)

 ! See Appendix A of Phys. Rev. Research 3, 043022 for possible improvements.
 if (.not. allocated(wan%r_h)) then
   call wigner_seitz(center, lmax, kptrlatt, cryst%rmet, wan%nr_h, wan%r_h, wan%ndegen_h, wan%rmod_h)
 end if

 if (.not. allocated(wan%r_e)) then
   call wigner_seitz(center, lmax, kptrlatt, cryst%rmet, wan%nr_e, wan%r_e, wan%ndegen_e, wan%rmod_e)
 end if

 if (.not. allocated(wan%r_p)) then
   call wigner_seitz(center, lmax, qptrlatt, cryst%rmet, wan%nr_p, wan%r_p, wan%ndegen_p, wan%rmod_p)
 end if

 if (wan%my_pert_start == -1) then
   wan%my_pert_start = my_pert_start; wan%my_npert = my_npert; wan%pert_comm => pert_comm
 else
   ABI_CHECK_IEQ(wan%my_pert_start, my_pert_start, "different values for my_pert_start")
   ABI_CHECK_IEQ(wan%my_npert, my_npert, "different values for my_npert")
   ABI_CHECK(associated(wan%pert_comm), "wan%per_comm is not associated!")
   ABI_CHECK_IEQ(wan%pert_comm%value, pert_comm%value, "different values for pert_comm%value")
 end if

 ! Allocate g in the Wannier representation.
 ABI_CALLOC(wan%grpe_wwp, (wan%nr_p, wan%nr_e, wan%nwan, wan%nwan, my_npert))

end subroutine wan_setup_eph_ws_kq
!!***

!!****f* m_mlwfovlp/wan_interp_eph_manyq
!! NAME
!! wan_interp_eph_manyq
!!
!! FUNCTION
!! Interpolate the e-ph matrix elements for one k-point and nq q-points.
!! Returns matrix elements in the atomic-representation.
!!
!! SOURCE

subroutine wan_interp_eph_manyq(wan, nq, qpts, kpt, g_atm)

!Arguments ------------------------------------
 class(wan_t),intent(in) :: wan
 integer,intent(in) :: nq
 real(dp),intent(in) :: qpts(3,nq), kpt(3)
 complex(dp),intent(out) :: g_atm(wan%nwan, wan%nwan, wan%my_npert, nq)

!Local variables-------------------------------
 integer :: ir, nr_e, nr_p, nwan, iq, my_npert, ipc, ncols_e, ncols_w
!arrays
 real(dp) :: kq(3), eigens_k(wan%nwan), eigens_kq(wan%nwan)
 complex(dp),allocatable :: eikr(:), eiqr(:), u_k(:,:), u_kq(:,:), cbuf_e(:,:,:,:), cbuf_w(:,:,:), cmat_w(:,:)

!************************************************************************

 ! TODO: Handle long-range part.
 nr_p = wan%nr_p; nr_e = wan%nr_e; nwan = wan%nwan; my_npert = wan%my_npert

 ABI_MALLOC(eikr, (nr_e))
 ABI_MALLOC(eiqr, (nr_p))
 ABI_MALLOC(u_k, (nwan, nwan))
 ABI_MALLOC(u_kq, (nwan, nwan))
 ABI_MALLOC(cmat_w, (nwan, nwan))

 !ABI_CHECK(allocated(wan%r_e), "wan%r_e is not allocated!")
 !ABI_CHECK(allocated(wan%ndegen_e), "wan%ndegen_e is not allocated!")
 !ABI_CHECK(allocated(wan%grpe_wwp), "wan%grpe_wwp is not allocated!")

 do ir=1,nr_e
   eikr(ir) = exp(+j_dpc * two_pi * dot_product(kpt, wan%r_e(:, ir))) / wan%ndegen_e(ir)
 end do
 call wan%interp_ham(kpt, u_k, eigens_k)

 ! grpe_wwp has shape: (nr_p, nr_e, nwan, nwan, my_npert))
 ncols_e = nr_e * nwan **2 * my_npert
 ABI_MALLOC(cbuf_e, (nr_e, nwan, nwan, my_npert))

 ncols_w = nwan ** 2 * my_npert
 ABI_MALLOC(cbuf_w, (nwan, nwan, my_npert))

 ! TODO: Recheck this part.
 do iq=1,nq
   kq = kpt + qpts(:,iq)
   call wan%interp_ham(kq, u_kq, eigens_kq)
   do ir=1,nr_p
     eiqr(ir) = exp(+j_dpc * two_pi * dot_product(qpts(:,iq), wan%r_p(:, ir))) / wan%ndegen_p(ir)
   end do

   ! Transform along r_p
   !  [Eqn. 22 of PRB 76, 165108 (2007)]
   !  g~(R_e,q') = 1/ndegen(R_p) sum_R_p e^{iq'R_p} g(R_e,R_p)
   call ZGEMV("T", nr_p, ncols_e, cone, wan%grpe_wwp, nr_p, eiqr, 1, czero, cbuf_e, 1)

   ! Transfor along r_e
   !  [Eqn. 22 of PRB 76, 165108 (2007)]
   !  g~(k',q') = 1/ndegen(R_e) sum_R_e e^{ik'R_e} g(R_e,q')
   call ZGEMV("T", nr_e, ncols_w, cone, cbuf_e, nr_e, eikr, 1, czero, cbuf_w, 1)

   ! Go to Bloch space.
   !  [Eqn. 22 of PRB 76, 165108 (2007)]
   !  g(k',q') = U(k'+q') * g~(k',q') * U(k')^\dagger
   !
   !  RM - this is what is calculated
   !  g(k',q') = U(k'+q')^\dagger * g~(k',q') * U(k')

   !  the two zgemm calls perform the following operations:
   !  epmatf  = [ cufkq * epmatf ] * cufkk^\dagger

   do ipc=1,my_npert
     call ZGEMM('N', 'N', nwan, nwan, nwan, cone, u_kq, nwan, cbuf_w(:,:,ipc), nwan, czero, cmat_w, nwan)
     call ZGEMM('N', 'C', nwan, nwan, nwan, cone, cmat_w, nwan, u_k, nwan, czero, g_atm(:,:,ipc,iq), nwan)
   end do
 end do ! iq

 ABI_FREE(cbuf_e)
 ABI_FREE(cbuf_w)
 ABI_FREE(eikr)
 ABI_FREE(eiqr)
 ABI_FREE(u_k)
 ABI_FREE(u_kq)
 ABI_FREE(cmat_w)

end subroutine wan_interp_eph_manyq
!!***

!!****f* m_mlwfovlp/wan_ncwrite_gwan
!! NAME
!! wan_ncwrite_gwan
!!
!! FUNCTION
!!  Write the e-ph matrix elements in the Wannier representation g(R_e,R_ph) to the GWAN.nc netcdf file.
!!
!! SOURCE

subroutine wan_ncwrite_gwan(wan, dtfil, cryst, ebands, pert_comm)

!Arguments ------------------------------------
 class(wan_t),target,intent(in) :: wan
 type(datafiles_type),intent(in) :: dtfil
 type(crystal_t),intent(in) :: cryst
 type(ebands_t),intent(in) :: ebands
 type(xcomm_t),intent(in) :: pert_comm

!Local variables-------------------------------
!scalars
 integer :: spin, root_ncid, spin_ncid, ncerr, natom3, ount, ir, var_id, units(2), batch_size, idat, ndat
 real(dp), ABI_CONTIGUOUS pointer :: rpt_d4(:,:,:,:), rpt_d6(:,:,:,:,:,:)
 character(len=fnlen) :: gwan_filepath, txt_path
 complex(dp),target,allocatable :: cbuf5(:,:,:,:,:)
 character(len=500) :: msg
!************************************************************************

 units = [std_out, ab_out]
 spin = wan%spin; natom3 = 3 * cryst%natom

 gwan_filepath = strcat(dtfil%filnam_ds(4), "_GWAN.nc")
 call wrtout(units, sjoin("- Writing e-ph matrix elements in the Wannier representation to file:", gwan_filepath))

 if (spin == 1) then
   NCF_CHECK(nctk_open_create(root_ncid, gwan_filepath, pert_comm%value))
   NCF_CHECK(cryst%ncwrite(root_ncid))
   !NCF_CHECK(hdr%ncwrite(root_ncid))
   NCF_CHECK(ebands%ncwrite(root_ncid))
 else
   NCF_CHECK(nctk_open_modify(root_ncid, gwan_filepath, pert_comm%value))
 end if

 ! Create group for this spin and define dimensions.
 NCF_CHECK(nf90_def_grp(root_ncid, strcat("gwan", "_spin", itoa(spin)), spin_ncid))

 ncerr = nctk_def_dims(spin_ncid, [ &
    nctkdim_t("nwan", wan%nwan), &
    nctkdim_t("natom3", natom3), &
    nctkdim_t("nr_h", wan%nr_h), &
    nctkdim_t("nr_e", wan%nr_e), &
    nctkdim_t("nr_p", wan%nr_p)  &
 ], defmode=.True.)
 NCF_CHECK(ncerr)

 ncerr = nctk_def_arrays(spin_ncid, [ &
   nctkarr_t("r_h", "dp", "three, nr_h"), &
   nctkarr_t("r_e", "dp", "three, nr_e"), &
   nctkarr_t("r_p", "dp", "three, nr_p"), &
   nctkarr_t("ndegen_h", "int", "nr_h"), &
   nctkarr_t("ndegen_e", "int", "nr_e"), &
   nctkarr_t("ndegen_p", "int", "nr_p"), &
   nctkarr_t("hwan_r", "dp", "two, nr_h, nwan, nwan"), &
   nctkarr_t("grpe_wwp", "dp", "two, nr_p, nr_e, nwan, nwan, natom3") &
 ])
 NCF_CHECK(ncerr)

 ! Write data.
 NCF_CHECK(nctk_set_datamode(spin_ncid))
 NCF_CHECK(nf90_put_var(spin_ncid, vid_spin("r_h"), wan%r_h))
 NCF_CHECK(nf90_put_var(spin_ncid, vid_spin("r_e"), wan%r_e))
 NCF_CHECK(nf90_put_var(spin_ncid, vid_spin("r_p"), wan%r_p))
 NCF_CHECK(nf90_put_var(spin_ncid, vid_spin("ndegen_h"), wan%ndegen_h))
 NCF_CHECK(nf90_put_var(spin_ncid, vid_spin("ndegen_e"), wan%ndegen_e))
 NCF_CHECK(nf90_put_var(spin_ncid, vid_spin("ndegen_p"), wan%ndegen_p))

 call c_f_pointer(c_loc(wan%hwan_r), rpt_d4, [2, wan%nr_h, wan%nwan, wan%nwan])
 NCF_CHECK(nf90_put_var(spin_ncid, vid_spin("hwan_r"), rpt_d4))

 ! Take into account that the array might be distributed over perturbations.
 !NCF_CHECK(nctk_set_collective(spin_ncid, vid_spin("foo")))
 call c_f_pointer(c_loc(wan%grpe_wwp), rpt_d6, [2, wan%nr_p, wan%nr_e, wan%nwan, wan%nwan, wan%my_npert])
 ncerr = nf90_put_var(spin_ncid, vid_spin("grpe_wwp"), rpt_d6, &
                      start=[1,1,1,1,1,wan%my_pert_start], &
                      count=[2, wan%nr_p, wan%nr_e, wan%nwan, wan%nwan, wan%my_npert])
 NCF_CHECK(ncerr)
 NCF_CHECK(nf90_close(root_ncid))

 ! Check spatial decay of the EP matrix elements in the wannier basis
 ! We plot: R_e, R_p, max_{m,n,nu} |g(m,n,nu;R_e,R_p)|
 if (pert_comm%me == 0) then
   NCF_CHECK(nctk_open_read(root_ncid, gwan_filepath, xmpi_comm_self))
   ! Get group for this spin.
   NCF_CHECK(nf90_inq_ncid(root_ncid, strcat("gwan", "_spin", itoa(spin)), spin_ncid))

   txt_path = strcat(dtfil%filnam_ds(4), "_spin", itoa(spin), "_GWAN.txt")
   if (open_file(txt_path, msg, newunit=ount, form="formatted", action="write", status='unknown') /= 0) then
     ABI_ERROR(msg)
   end if
   write(ount, '(a)') '#   R_e [Bohr]    max_{m,n,nu} |g(m,n,nu R_e,:)| [Ha/Bohr] '
   var_id = vid_spin("grpe_wwp")
   ! Perform IO in batches to keep memory at bay.
   ! FIXME
   batch_size = 1
   ABI_MALLOC(cbuf5, (wan%nr_p, batch_size, wan%nwan, wan%nwan, natom3))
   call c_f_pointer(c_loc(cbuf5), rpt_d6, [2, wan%nr_p, batch_size, wan%nwan, wan%nwan, natom3])

   do ir=1,wan%nr_e, batch_size
     ndat = blocked_loop(ir, wan%nr_e, batch_size)
     !nctkarr_t("grpe_wwp", "dp", "two, nr_p, nr_e, nwan, nwan, natom3") &
     ncerr = nf90_get_var(spin_ncid, var_id, rpt_d6, &
                          start=[1,1,ir,1,1,1], count=[2, wan%nr_p, batch_size, wan%nwan, wan%nwan, natom3])
     NCF_CHECK(ncerr)
     do idat=1,ndat
       write(ount, *) wan%rmod_e(ir+idat-1), maxval(abs(cbuf5(:,idat,:,:,:)))
     end do
   end do
   ABI_FREE(cbuf5)

   close(ount)
   NCF_CHECK(nf90_close(root_ncid))
 end if

contains
 integer function vid_spin(var_name)
   character(len=*),intent(in) :: var_name
   vid_spin = nctk_idname(spin_ncid, var_name)
 end function vid_spin

end subroutine wan_ncwrite_gwan
!!***

!!****f* m_mlwfovlp/wan_load_gwan
!! NAME
!! wan_load_gwan
!!
!! FUNCTION
!!  Read g(R_e, R_p) in the Wannier representation from an ABIWAN.nc file
!!
!! SOURCE

subroutine wan_load_gwan(wan, gwan_filepath, cryst, spin, nsppol, all_comm)

!Arguments ------------------------------------
 class(wan_t),target,intent(inout) :: wan
 character(len=*),intent(in) :: gwan_filepath
 integer,intent(in) :: spin, nsppol
 type(crystal_t),intent(in) :: cryst
 type(xcomm_t),intent(in) :: all_comm ! , pert_comm

!Local variables-------------------------------
!scalars
 integer :: root_ncid, spin_ncid, ncerr, units(2)
 logical,parameter :: keep_umats = .False.
 type(crystal_t) :: gwan_cryst
 real(dp), ABI_CONTIGUOUS pointer :: rpt_d6(:,:,:,:,:,:) !, rpt_d4(:,:,:,:)
!************************************************************************

 units = [std_out, ab_out]
 if (nsppol == 2) then
   call wrtout(units, sjoin(" Reading g(R_e, R_p) for spin:", itoa(spin), " from GWAN file:", gwan_filepath))
 else
   call wrtout(units, sjoin(" Reading g(R_e, R_p) from GWAN file:", gwan_filepath))
 end if

 NCF_CHECK(nctk_open_read(root_ncid, gwan_filepath, all_comm%value))

 call gwan_cryst%ncread(root_ncid)
 if (cryst%compare(gwan_cryst, header=" Comparing input crystal with GWAN crystal.") /= 0) then
   ABI_ERROR("Crystal structure from input and GWAN file do not agree! Check messages above!")
 end if
 call gwan_cryst%free()

 ! Get netcdf group for this spin.
 NCF_CHECK(nf90_inq_ncid(root_ncid, strcat("gwan", "_spin", itoa(spin)), spin_ncid))

 ! Read supercell lattice vectors.
 NCF_CHECK(nctk_get_dim(spin_ncid, "nr_e", wan%nr_e))
 NCF_CHECK(nctk_get_dim(spin_ncid, "nr_p", wan%nr_p))

 ABI_MALLOC(wan%r_e, (3, wan%nr_e))
 ABI_MALLOC(wan%r_p, (3, wan%nr_p))
 ABI_MALLOC(wan%ndegen_e, (wan%nr_e))
 ABI_MALLOC(wan%ndegen_p, (wan%nr_p))

 NCF_CHECK(nf90_get_var(spin_ncid, vid_spin("r_e"), wan%r_e))
 NCF_CHECK(nf90_get_var(spin_ncid, vid_spin("r_p"), wan%r_p))
 NCF_CHECK(nf90_get_var(spin_ncid, vid_spin("ndegen_e"), wan%ndegen_e))
 NCF_CHECK(nf90_get_var(spin_ncid, vid_spin("ndegen_p"), wan%ndegen_p))

 !call c_f_pointer(c_loc(wan%hwan_r), rpt_d4, [2, wan%nr_h, wan%nwan, wan%nwan])
 !NCF_CHECK(nf90_get_var(spin_ncid, vid_spin("hwan_r"), rpt_d4))

 ! TODO:
 ! Take into account that the array might be distributed over perturbations.
 !print *, "wan%nr_p, wan%nr_e, wan%nwan, wan%nwan, wan%my_npert", wan%nr_p, wan%nr_e, wan%nwan, wan%nwan, wan%my_npert
 ABI_MALLOC(wan%grpe_wwp, (wan%nr_p, wan%nr_e, wan%nwan, wan%nwan, wan%my_npert))
 call c_f_pointer(c_loc(wan%grpe_wwp), rpt_d6, [2, wan%nr_p, wan%nr_e, wan%nwan, wan%nwan, wan%my_npert])

 ! nctkarr_t("grpe_wwp", "dp", "two, nr_p, nr_e, nwan, nwan, natom3") &
 if (all_comm%nproc > 1) then
   NCF_CHECK(nctk_set_collective(spin_ncid, vid_spin("grpe_wwp")))
 end if
 ncerr = nf90_get_var(spin_ncid, vid_spin("grpe_wwp"), rpt_d6, &
                      start=[1, 1, 1, 1, 1, wan%my_pert_start], &
                      count=[2, wan%nr_p, wan%nr_e, wan%nwan, wan%nwan, wan%my_npert])
 NCF_CHECK(ncerr)

 NCF_CHECK(nf90_close(root_ncid))
 call wrtout(units, " Reading of GWAN.nc file completed.")

contains
 integer function vid_spin(var_name)
   character(len=*),intent(in) :: var_name
   vid_spin = nctk_idname(spin_ncid, var_name)
 end function vid_spin
end subroutine wan_load_gwan
!!***

!!****f* m_mlwfovlp/wan_interp_ebands
!! NAME
!! wan_interp_ebands
!!
!! FUNCTION
!!   Build new ebands_t object on a k-mesh via Wannier interpolation.
!!
!! INPUT
!!  cryst<crystal_t> = Crystalline structure.
!!  intp_kptrlatt(3,3) = New k-mesh
!!  intp_nshiftk= Number of shifts in new k-mesh.
!!  intp_shiftk(3,intp_nshiftk) = Shifts in new k-mesh.
!!  band_block(2)=Initial and final band index. If [0,0], all bands are used
!!
!! OUTPUT
!!  out_ebands: object with interpolated energies.
!!
!! NOTES
!!  Fermi level and occupation factors of the interpolated bands are not recomputed by this routine.
!!  Values are compied from in_ebands.
!!
!! SOURCE

subroutine wan_interp_ebands(wan_spin, cryst, in_ebands, intp_kptrlatt, intp_nshiftk, intp_shiftk, out_ebands, comm)

!Arguments ------------------------------------
 type(ebands_t),intent(in) :: in_ebands
 type(wan_t),intent(in) :: wan_spin(in_ebands%nsppol)
 type(crystal_t),intent(in) :: cryst
 integer,intent(in) :: intp_kptrlatt(3,3), intp_nshiftk, comm
 real(dp),intent(in) :: intp_shiftk(3,intp_nshiftk)
 type(ebands_t),intent(out) :: out_ebands

!Local variables-------------------------------
!scalars
 integer :: spin, ik, nwan, ierr, cnt, my_rank, nproc
!arrays
 integer :: band_block(2)
 real(dp) :: params(4)
 real(dp),allocatable :: eigens_k(:)
 complex(dp),allocatable :: u_k(:,:)
!************************************************************************

 my_rank = xmpi_comm_rank(comm); nproc = xmpi_comm_size(comm)

 ! Build new ebands object with memory to be filled.
 band_block(:) = [1, wan_spin(1)%max_nwan]
 out_ebands = in_ebands%interp_kmesh(cryst, params, intp_kptrlatt, intp_nshiftk, intp_shiftk, &
                                     band_block, comm, malloc_only=.True.)
 out_ebands%eig = zero

 do spin=1,in_ebands%nsppol
   associate (wan => wan_spin(spin))
   nwan = wan%nwan
   ABI_MALLOC(u_k, (nwan, nwan))
   ABI_MALLOC(eigens_k, (nwan))
   do ik=1,out_ebands%nkpt
     cnt = cnt + 1; if (mod(cnt, nproc) /= my_rank) cycle ! MPI parallelism inside comm.
     call wan%interp_ham(out_ebands%kptns(:,ik), u_k, eigens_k)
     out_ebands%eig(1:nwan, ik, spin) = eigens_k
   end do ! ik
   ABI_FREE(u_k)
   ABI_FREE(eigens_k)
   end associate
 end do ! spin

 call xmpi_sum(out_ebands%eig, comm, ierr)

end subroutine wan_interp_ebands
!!***

end module m_mlwfovlp
!!***
