import numpy # numerics (matrix operations, etc) import scipy.optimize, scipy.stats.stats #Fitting and Statistics import pylab # Plotting #Read in the data and check (print) that we read in correctly x, counts = numpy.loadtxt('counts.txt', unpack=True) print x print counts #Because these are counts, we can use poisson statistics to define #the standard deviation std = numpy.sqrt(counts) #Plot Data pylab.errorbar(x,counts,std, fmt='o') pylab.xlabel('x (units)') pylab.ylabel('Counts') pylab.savefig('data.png') pylab.clf() #Define a function that we want to fit def gaussian_with_background(x,A,B,C,D,E): return A*numpy.exp(-(x-B)**2/(2.*C**2)) + D*x + E #Call the non-linear (Levenburg-Marquardt-based) fitting routine #scipy.optimize.curve_fit #http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html #returns answer and covariance ans, cov = scipy.optimize.curve_fit(gaussian_with_background, x, counts, sigma=std) print "A=", ans[0], "+/-", numpy.sqrt(cov[0][0]) print "B=", ans[1], "+/-", numpy.sqrt(cov[1][1]) print "C=", ans[2], "+/-", numpy.sqrt(cov[2][2]) print "D=", ans[3], "+/-", numpy.sqrt(cov[3][3]) print "E=", ans[4], "+/-", numpy.sqrt(cov[4][4]) #Write out result for later reference numpy.savetxt('counts_best_fit.txt', ans) numpy.savetxt('counts_cov.txt', cov) #Plot data, best-fit model, and residual model = gaussian_with_background(x,ans[0], ans[1], ans[2], ans[3], ans[4]) pylab.plot(x, model, label='Model') pylab.errorbar(x, counts, std, label='Data', fmt='o') residual = counts - model pylab.errorbar(x, residual, std, label='Residual', fmt='o') pylab.legend() pylab.xlabel('x (units)') pylab.ylabel('Counts') pylab.savefig('counts_with_fit.png') pylab.clf() #compute model chi-sq chisq = (residual**2/std**2).sum() dof = len(counts)-len(ans) print 'Chi-Sq =', chisq print 'dof = ', dof print 'PTE = ', scipy.stats.stats.chisqprob(chisq,dof) # Monte Carlo N_mc = 2000 mcs = [] while N_mc > 0: mc_counts = numpy.random.poisson(counts) mc_ans, mc_cov = scipy.optimize.curve_fit(gaussian_with_background, x, \ mc_counts, sigma=std, p0 = ans) mcs.append(mc_ans) N_mc -= 1 mcs = numpy.array(mcs) mc_mean = numpy.mean(mcs, axis=0) mc_std = numpy.std(mcs, axis=0) print "Monte-Carlo" print "A=", mc_mean[0], "+/-", mc_std[0] print "B=", mc_mean[1], "+/-", mc_std[1] print "C=", mc_mean[2], "+/-", mc_std[2] print "D=", mc_mean[3], "+/-", mc_std[3] print "E=", mc_mean[4], "+/-", mc_std[4] #Bootstrap Monte Carlo N_mc = 2000 bootstraps = [] useErrorInformation=True while N_mc > 0: inds = numpy.random.random_integers(0,len(counts)-1, len(counts)) bootstrap_counts = counts[inds] bootstrap_err = std[inds] if not useErrorInformation: bootstrap_err = None bootstrap_x = x[inds] mc_ans, mc_cov = scipy.optimize.curve_fit(gaussian_with_background, \ bootstrap_x, bootstrap_counts, sigma=bootstrap_err, p0 = ans) bootstraps.append(mc_ans) N_mc -= 1 bootstraps = numpy.array(bootstraps) bootstrap_mean = numpy.mean(bootstraps, axis=0) bootstrap_std = numpy.std(bootstraps, axis=0) print "Bootstrap Monte-Carlo" print "A=", bootstrap_mean[0], "+/-", bootstrap_std[0] print "B=", bootstrap_mean[1], "+/-", bootstrap_std[1] print "C=", bootstrap_mean[2], "+/-", bootstrap_std[2] print "D=", bootstrap_mean[3], "+/-", bootstrap_std[3] print "E=", bootstrap_mean[4], "+/-", bootstrap_std[4] #Confidence Intervals #Investigate degeneracy between peak height A (param 0) and #constant background term E (param 4) #Make delta-chi-sq associated with deviations of these two parameters subcov= numpy.array([ [cov[0,0], cov[0,4]], \ [cov[4,0], cov[4,4]] ] ) subalpha = numpy.linalg.inv(subcov) def dchisq(dp0, dp4): """ Compute change in chi-sq for change in parameters return dp \dot cov^-1 \dot dp """ dp = numpy.array([dp0, dp4]) return numpy.dot(dp, numpy.dot(subalpha, dp)) #Parameter ranges dp0 = numpy.arange(-20.,20,.1) dp4 = numpy.arange(-10. ,10.,.05) #Make a 2D array of the (integrated) chisq probability dchisq_2D = numpy.zeros((len(dp0), len(dp4))) for i in range(len(dp0)): for j in range(len(dp4)): dchisq_2D[i,j] = dchisq(dp0[i], dp4[j]) params_dof = 2 chisqprob_2D = 1-scipy.stats.stats.chisqprob(dchisq_2D, params_dof) #Plot Conf Intervals and Monte Carlo #overplot single parameter 68% constraints with straight lines pylab.hlines([ans[4]-numpy.sqrt(cov[4][4]),ans[4]+numpy.sqrt(cov[4][4])], ans[0]-20,ans[0]+20, linestyles='dotted') pylab.vlines([ans[0]-numpy.sqrt(cov[0][0]),ans[0]+numpy.sqrt(cov[0][0])], ans[4]-10,ans[4]+10, linestyles='dotted') #plot contours at 68% and 95% dp4_2D, dp0_2D = numpy.meshgrid(dp4,dp0) #2D coordinate grids for contour plot cs=pylab.contour(ans[0]+dp0_2D, ans[4]+dp4_2D, chisqprob_2D, [0.68, 0.95], linewidths=5) pylab.clabel(cs) pylab.xlim(62,112) pylab.ylim(23,43) pylab.xlabel('A (units)') pylab.ylabel('E (units)') pylab.savefig('confidence_peak_constant_background.png') #plot monte carlo points pylab.plot(mcs[:,0], mcs[:,4], '.', color= 'grey') pylab.xlim(62,112) pylab.ylim(23,43) pylab.savefig('confidence_peak_constant_background_with_mc.png') pylab.clf() #Plot conf intervals and bootstrap #overplot single parameter 68% constraints with straight lines pylab.hlines([ans[4]-numpy.sqrt(cov[4][4]),ans[4]+numpy.sqrt(cov[4][4])], ans[0]-20,ans[0]+20, linestyles='dotted') pylab.vlines([ans[0]-numpy.sqrt(cov[0][0]),ans[0]+numpy.sqrt(cov[0][0])], ans[4]-10,ans[4]+10, linestyles='dotted') #plot contours at 68% and 95% dp4_2D, dp0_2D = numpy.meshgrid(dp4,dp0) #2D coordinate grids for contour plot cs=pylab.contour(ans[0]+dp0_2D, ans[4]+dp4_2D, chisqprob_2D, [0.68, 0.95], linewidths=5) pylab.clabel(cs) pylab.xlabel('A (units)') pylab.ylabel('E (units)') #plot monte carlo points pylab.plot(bootstraps[:,0], bootstraps[:,4], '.', color= 'grey') pylab.xlim(62,112) pylab.ylim(23,43) pylab.savefig('confidence_peak_constant_background_with_bootstrap.png')