#!/bin/bash
#

# input is N directory paths.  Each site must be in a separate directory.  Will not search subdirectories.  Use links for that purpose.

# getting better at sed, but still not precise enough.  Strips leading path, i.e. basename, then retains 11-number datestamp.  But retains filenames that don't match...  not good for stability.
# could do a sed ///g global instead of using tr here.  but don't know how.
#tstampv(){ echo $@ | tr ' ' '\n' | sed -e 's|.*/||' -e 's|\([0-9]\{11\}\).*|\1|' ; } 
# this is more robust.  until i get a nice vectorized version working.
tstampl(){ for n in "$@"; do echo $(expr substr `basename $n` 1 11); done;  }
# Converts WERA times to total minutes since year 0
wtime(){ for n in "$@"; do
   Y=$((10#${n:0:4})) # Year
   j=$((10#${n:4:3})) # julian day
   H=$((10#${n:7:2})) # Hour
   M=$((10#${n:9:2})) # Minute
   echo $(( $(($Y * 365 * 24 * 60)) + $(($j * 24 * 60)) + $(($H * 60)) + $M ))
   done
 }
dx(){ x=("$@"); for n in `seq 0 $(($#-2))`; do 
      #echo "${x[$(($n+1))]} - ${x[$n]} =" $(( 10#${x[$(($n+1))]} - 10#${x[$n]} ))
      echo $(( 10#${x[$(($n+1))]} - 10#${x[$n]} )); 
      done; 
      echo 0;} # difference.  note the final 0

echo executing `basename $0` ...

Ns=$#  # number of sites

echo "$Ns source directories: $@" 

n=-1
# "$@" is different than $@.  Handles arguments with spaces, e.g. "path/bad file name.txt"
for src in "$@"; do
   ((n++))
   # pretty strict filtering for correct filename:
   tmp[$n]=`find $src -maxdepth 1 | egrep [0-9]\{11\}.*SORT[.gz]*`
   Nf=`echo ${tmp[$n]} | wc -w`
   echo $Nf files in $src 

   for m in ${tmp[$n]}; do
      tmp2[$n]="$n ${tmp2[$n]}" # numeric site index for sorting
   done
done


files=${tmp[*]} # vectorize it
sites=${tmp2[*]}
stamp=`tstampl $files` # corresponding timestamps
times=`wtime $stamp`

stamp=( ${stamp[0]} ) # array it.
times=( ${times[0]} )
sites=( ${sites[0]} )
files=( ${files[0]} )  

Nf=${#files[@]} # these should all be equal
Nt=${#times[@]}
Ns=${#sites[@]}

echo $Nf files total

unset tmp

j=`seq 0 $(($Nf-1))` # indices

for n in $j; do   # join times, site index, and filenames together for sorting
   tmp="$tmp ${stamp[$n]} ${times[$n]} ${sites[$n]} ${files[$n]} \n"
done
# I insert the newline, echo+sort strips it

# echo -e ${tmp[*]} | sort -n   # shows the organization

tsf=( `echo -e ${tmp[*]} | sort -n ` )

for n in $j; do # parse the sorted fields
   stamp[$n]=${tsf[$((4*$n))]}
   times[$n]=${tsf[$((4*$n+1))]}
   sites[$n]=${tsf[$((4*$n+2))]}
   files[$n]=${tsf[$((4*$n+3))]}
done

dt=(`dx ${times[*]}`) # time differences

echo Sampling intervals are  "{count} {difference}"
echo ${dt[*]} | tr " " "\n" | sort -n | uniq -c | sort -nr

unset tmp
unset tsf

for n in $j; do   # join, again
   tsf="$tsf ${stamp[$n]} ${times[$n]} ${dt[$n]} ${sites[$n]} ${files[$n]} \n"
done

echo -e ${tsf[*]} # show the organization

exit 0




exit 0







