# LIGO Open Science Center Tutorial Step 2: What's in a LIGO Data File?
# https://losc.ligo.org/tutorial02/   copied 27 July 2018
# and modified by Eric Myers <EricMyers47@gmail.com> for Python 3
# 
#----------------------
# Import needed modules
#----------------------
import numpy as np
import matplotlib.pyplot as plt
import h5py

#-------------------------
# Open the File
#-------------------------
#fileName = 'H-H2_LOSC_4_V1-842694656-4096.hdf5'
#fileName = 'L-L1_LOSC_4_V1-842682368-4096.hdf5'
fileName = '../LIGOdata/H-H2_LOSC_4_V1-842690560-4096.hdf5'
dataFile = h5py.File(fileName, 'r')

#----------------------
# Explore the file
#----------------------
for key in dataFile.keys():
    print (key)

#---------------------
# Read in strain data
#---------------------
strain = dataFile['strain']['Strain'].value
ts = dataFile['strain']['Strain'].attrs['Xspacing']

#-----------------------
# Read in some meta data
#-----------------------
metaKeys = dataFile['meta'].keys()
meta = dataFile['meta']
print ("\n\n")
for key in metaKeys:
    print (key, meta[key].value)

gpsStart = meta['GPSstart'].value
duration = meta['Duration'].value
gpsEnd   = gpsStart + duration
detector = (meta['Detector'].value).decode('utf-8')
#print("Detector: ", detector)

#----------------------
# Statistics
#----------------------
import math

print("\n=Statistics=")
sum = 0.0
sumsq = 0.0
N = 0
MaxStrain = 0
for x in strain :
    if (math.isnan(x) ): continue
    if abs(x) > MaxStrain:
        MaxStrain = abs(x)
    sum += x
    sumsq += x*x
    N += 1

avg_strain = sum/N
var_strain = (sumsq - N*avg_strain*avg_strain) / (N-1)
stdev = math.sqrt(var_strain)

print("Average strain: ", avg_strain)
print("  from ", N, "data values (", N*100/len(strain),"%)")
print("  out of ", len(strain)," samples. ")
print("Maximum strain: ", MaxStrain)
print("Variance: ", var_strain)
print("Sample Standard Deviation: ", stdev)

#---------------------------
# Create a time vector
#---------------------------
time = np.arange(gpsStart, gpsEnd, ts)

#----------------------
# Plot the time series
#----------------------
numSamples = len(strain)  #10000
plt.plot(time[0:numSamples], strain[0:numSamples])
plt.xlabel('GPS Time (s) from '+str(gpsStart) )
plt.ylabel(detector+' Strain')
plt.show()


