def fdcmatch(df, site, begyear=1900, endyear=2015, normalizer=1, fun=1):
'''
* This function creates a flow duration curve (or its inverse) and then matches a natural logrithmic function (or its inverse - exp)
to the flow duration curve
* The flow duration curve will be framed for averaged daily data for the duration of one year (366 days)
PARAMETERS:
df = pandas dataframe of interest; must have a date or date-time as the index
site = pandas column containing discharge data; must be within df
begyear = beginning year of analysis; defaults to 1900
endyear = end year of analysis; defaults to 2015
normalizer = value to use to normalize discharge; defaults to 1 (no normalization)
fun = 1 for probability as a function of discharge; 0 for discharge as a function of probability; default=1
* 1 will choose:
prob = a*ln(discharge*b+c)+d
* 0 will choose:
discharge = a*exp(prob*b+c)+d
RETURNS:
para, parb, parc, pard, r_squared_value, stderr
par = modifying variables for functions = a,b,c,d
r_squared_value = r squared value for model
stderr = standard error of the estimate
REQUIREMENTS:
pandas, scipy, numpy
'''
df = df[[site]]
# filter dataframe to only include dates of interest
data = df[(df.index.to_datetime() > pd.datetime(begyear,1,1))&(df.index.to_datetime() < pd.datetime(endyear,1,1))]
# remove na values from dataframe
data = data.dropna()
# take average of each day of year (from 1 to 366) over the selected period of record
data['doy']=data.index.dayofyear
dailyavg = data[site].groupby(data['doy']).mean()
data = np.sort(dailyavg)
## uncomment the following to use normalized discharge instead of discharge
#mean = np.mean(data)
#std = np.std(data)
#data = [(data[i]-np.mean(data))/np.std(data) for i in range(len(data))]
data = [(data[i])/normalizer for i in range(len(data))]
# ranks data from smallest to largest
ranks = sp.rankdata(data, method='average')
# reverses rank order
ranks = ranks[::-1]
# calculate probability of each rank
prob = [(ranks[i]/(len(data)+1)) for i in range(len(data)) ]
# choose which function to use
try:
if fun==1:
# function to determine probability as a function of discharge
def func(x,a,b,c,d):
return a*np.log(x*b+c)+d
# matches func to data
par, cov = op.curve_fit(func, data, prob)
# checks fit of curve match
slope, interecept, r_value, p_value, stderr = \
sp.linregress(prob, [par[0]*np.log(data[i]*par[1]+par[2])+par[3] for i in range(len(data))])
else:
# function to determine discharge as a function of probability
def func(x,a,b,c,d):
return a*np.exp(x*b+c)+d
# matches func to data
par, cov = op.curve_fit(func, prob, data)
# checks fit of curve match
slope, interecept, r_value, p_value, stderr = \
sp.linregress(data,[par[0]*np.exp(prob[i]*par[1]+par[2])+par[3] for i in range(len(prob))])
# return parameters (a,b,c,d), r-squared of model fit, and standard error of model fit
return par[0], par[1], par[2], par[3], round(r_value**2,2), round(stderr,5)
except (RuntimeError,TypeError):
return np.nan, np.nan, np.nan, np.nan, np.nan, np.nan