#!/usr/bin/env python3

import sys
import re
import os
import os.path
import numpy as np # to install this lib in deb-type linux, run as root: apt-get install python3-numpy
import xyz_utils
from collections import namedtuple

def formatted_print(filen,nframe,comment,single,minatom,natom,manatom,lmin,lmax,lave,lstd):
    if single:
        nums=re.split('\s+',comment)
        try:
            time_frame=float(nums[1]) # I occasionally write the time of a frame in the comment as the second word
        except:
            time_frame="unset"

        print("File:  ",filen," frame: ",nframe," #atoms:",natom,"frame time:",time_frame)
        prestring="  frame"
    else:
        print("File:  ",filen," Frames:",nframe," Min-Ave-Max#atoms/Frame: ",minatom,natom,manatom)
        prestring="       "
        
    print(prestring,"min(x,y,z):",' '.join(map(str, lmin)))
    print(prestring,"max(x,y,z):",' '.join(map(str, lmax)))
    print(prestring,"ave(x,y,z):",' '.join(map(str, lave)))
    print(prestring,"std(x,y,z):",' '.join(map(str, lstd)))
    return None

def xyz_frame_statistics(snap):
    lis=snap.coords
    natom=len(snap.atoms)
    lmin=np.amin(lis,axis=0)
    lmax=np.amax(lis,axis=0)
    lave=np.average(lis,axis=0)
    lav2=np.sum(lis**2, axis=0)/natom
    lstd=np.std(lis,axis=0)
    return natom,lmin,lmax,lave,lav2,lstd
        

def xyz_statistics(filen,print_allframes=False,print_finalreport=False):

    if filen=="-":
        f=sys.stdin
    else:
        f = open(filen, 'r')
    nframe=0
    tatom=0
    minatom=sys.maxsize
    manatom=0
    tmin=np.full((3), sys.float_info.max)
    tmax=np.full((3),-sys.float_info.max)
    tave=np.zeros([3], dtype="float")
    tav2=np.zeros([3], dtype="float")
    tstd=np.zeros([3], dtype="float")
        
    while True: # loop on all smapshots
        fr=xyz_utils.xyz_read_one_frame(f) # read one frame from file
        if fr.atoms == None:
            break
        else:
            nframe+=1
            natom,lmin,lmax,lave,lav2,lstd=xyz_frame_statistics(fr)
            tatom+=natom
            if minatom>natom:
                minatom=natom
            if manatom<natom:
                manatom=natom
            for i in range(3):
                if tmin[i]>lmin[i]:
                    tmin[i]=lmin[i]
                if tmax[i]<lmax[i]:
                    tmax[i]=lmax[i]
                tave[i]+=natom*lave[i]
                tav2[i]+=natom*lav2[i]
            if print_allframes:
                formatted_print(filen,nframe,fr.comment,True,minatom,natom,manatom,lmin,lmax,lave,lstd)

    for i in range(3):
        tave[i]/=tatom
        tav2[i]/=tatom
        tstd[i]=np.sqrt(tav2[i]-tave[i]*tave[i])
    natom=float(tatom)/nframe
    if print_finalreport:
        formatted_print(filen,nframe,"",False,minatom,natom,manatom,tmin,tmax,tave,tstd)

    return nframe,natom,tmin,tmax,tave,tstd
        

# the following function is only exectuted when this code is run as a script, and its purposes is to parse
# the command line and to generate a meaningful parsed args list to the actual function doing the job:
if __name__ == "__main__":
    import sys
    import argparse
    commandname=sys.argv[0]
# default values:
    filenames = []

    desc="""print statistical info about the xyz file(s)"""

    epil="""v2.0 by N. Manini, 14.04.2020"""

    ##  Argument Parser definition: this is just an example...
    parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter
                                    , description=desc, epilog=epil)

    parser.add_argument( 'filenames', nargs='*', default=['-'],
                         help='Files to be processed. If not given, stdin is used')

    parser.add_argument( '-a', action='store_true',
                         dest='allframes', 
                         help='print out statistics for all individual frames')
    ## End arg parser definition
    args=parser.parse_args(sys.argv[1:])
    d = vars(args)	# adding prog in args, for unknown reasons it's not there...
    d['prog']=parser.prog
#   here the call to the actual function doing the job:
    for filen in args.filenames:
        nframe,natom,tmin,tmax,tave,tstd=xyz_statistics(filen,args.allframes,True)
# now one might want to do something more with these info, e.g. averaging over multiple files...
