import numpy as np
import pandas as pd
csvs = [pd.read_csv('4mu_2011.csv'), pd.read_csv('4e_2011.csv'), pd.read_csv('2e2mu_2011.csv')]
csvs += [pd.read_csv('4mu_2012.csv'), pd.read_csv('4e_2012.csv'), pd.read_csv('2e2mu_2012.csv')]
fourlep = pd.concat(csvs)
len(fourlep)
fourlep.head()
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import math
rmin = 70
rmax = 181
nbins = 37
M_hist = np.histogram(fourlep['M'], bins=nbins, range=(rmin,rmax))
hist, bins = M_hist
width = 1.0*(bins[1] - bins[0])
center = (bins[:-1] + bins[1:]) / 2
xerrs = [width*0.5 for i in range(0, nbins)]
yerrs = np.sqrt(hist)
plt.errorbar(center, hist, xerr=xerrs, yerr=yerrs, linestyle='None', color='black', marker='o')
plt.xlabel('4l invariant mass (GeV)', fontsize=15)
plt.ylabel('Events / 3 GeV', fontsize=15)
plt.ylim(0,25)
plt.xlim(rmin,rmax)
plt.show()
In order to compare with the predictions for signal and background we need to extract the contents of the MC histograms which are already weighted by luminosity, cross-section, and number of events. The results are below:
dy = np.array([0,0,0,0,0,0.354797,0.177398,2.60481,0,0,0,0,0,0,0,0,0,0.177398,0.177398,0,0.177398,0,0,0,0,0,0,0,0,0,0,0,0.177398,0,0,0,0])
ttbar = np.array([0.00465086,0,0.00465086,0,0,0,0,0,0,0,0.00465086,0,0,0,0,0,0.00465086,0,0,0,0,0.00465086,0.00465086,0,0,0.0139526,0,0,0.00465086,0,0,0,0.00465086,0.00465086,0.0139526,0,0])
zz = np.array([0.181215,0.257161,0.44846,0.830071,1.80272,4.57354,13.9677,14.0178,4.10974,1.58934,0.989974,0.839775,0.887188,0.967021,1.07882,1.27942,1.36681,1.4333,1.45141,1.41572,1.51464,1.45026,1.47328,1.42899,1.38757,1.33561,1.3075,1.29831,1.31402,1.30672,1.36442,1.39256,1.43472,1.58321,1.85313,2.19304,2.95083])
hzz = np.array([0.00340992,0.00450225,0.00808944,0.0080008,0.00801578,0.0108945,0.00794274,0.00950757,0.0130648,0.0163568,0.0233832,0.0334813,0.0427229,0.0738129,0.13282,0.256384,0.648352,2.38742,4.87193,0.944299,0.155005,0.0374193,0.0138906,0.00630364,0.00419265,0.00358719,0.00122527,0.000885718,0.000590479,0.000885718,0.000797085,8.86337e-05,0.000501845,8.86337e-05,0.000546162,4.43168e-05,8.86337e-05])
What do the individual background and signal contributions looks like?
#ttbar
plt.bar(center, ttbar, align='center', width=width, color='gray', linewidth=0, edgecolor='b', alpha=0.5)
plt.xlabel('4l invariant mass (GeV)', fontsize=15)
plt.ylabel('Events / 3 GeV', fontsize=15)
plt.ylim(0,0.1)
plt.xlim(rmin,rmax)
plt.show()
#DY
plt.bar(center, dy, align='center', width=width, color='g', linewidth=0, edgecolor='black', alpha=0.5)
plt.xlabel('4l invariant mass (GeV)', fontsize=15)
plt.ylabel('Events / 3 GeV', fontsize=15)
plt.ylim(0,25)
plt.xlim(rmin,rmax)
plt.show()
#ZZ
plt.bar(center, zz, align='center', width=width, color='b', linewidth=0, edgecolor='black', alpha=0.5)
plt.xlabel('4l invariant mass (GeV)', fontsize=15)
plt.ylabel('Events / 3 GeV', fontsize=15)
plt.ylim(0,25)
plt.xlim(rmin,rmax)
plt.show()
#HZZ
plt.bar(center, hzz, align='center', width=width, color='w', linewidth=1, edgecolor='r')
plt.xlabel('4l invariant mass (GeV)', fontsize=15)
plt.ylabel('Events / 3 GeV', fontsize=15)
plt.ylim(0,25)
plt.xlim(rmin,rmax)
plt.show()
Let's make a final histogram will all of the contributions along with the data. N.B. we make a stacked histogram.
#ttbar
ttbar_bar = plt.bar(center, ttbar, align='center', width=width, color='gray', linewidth=0, edgecolor='b', alpha=0.5)
#DY
dy_bar = plt.bar(center, dy, align='center', width=width, color='g', linewidth=0, edgecolor='black', alpha=0.5, bottom=ttbar)
#ZZ
zz_bar = plt.bar(center, zz, align='center', width=width, color='b', linewidth=0, edgecolor='black', alpha=0.5, bottom=ttbar+dy)
#HZZ
hzz_bar = plt.bar(center, hzz, align='center', width=width, color='w', linewidth=1, edgecolor='r', bottom=ttbar+dy+zz)
#data
data_bar = plt.errorbar(center, hist, xerr=xerrs, yerr=yerrs, linestyle='None', color='black', marker='o')
matplotlib.rcParams['text.usetex'] = True
matplotlib.rcParams['font.family'] = 'sans-serif'
matplotlib.rcParams['font.sans-serif'] = 'Helvetica'
plt.title(r'$\bf{\sqrt{s} = 7~TeV, L = 2.8~fb^{-1}; \sqrt{s} = 8~TeV, L = 11.6~fb^{-1}}$', fontsize=12)
plt.legend([ttbar_bar, dy_bar, zz_bar, hzz_bar, data_bar],
[r'$\bf{t\bar{t}}$', r'$\bf{Z/\gamma^{*} + X}$', r'$\bf{ZZ \rightarrow 4l}$', r'$\bf{m_{H} = 125~GeV}$', r'$\bf{Data}$'])
plt.xlabel(r'$\bf{m_{4l}}$ (GeV)', fontsize=15)
plt.ylabel(r'Events / 3 GeV', fontsize=15)
plt.ylim(0,25)
plt.xlim(rmin,rmax)
plt.show()
Not bad. One bad thing is that the way we have to stack the plots: the red line from the expected signal for H to 4l is spread over the whole range. Also, there should be some font fixes. Let's compare with the example analysis plot: