import matplotlib.pyplot as plt
import numpy as np
import scipy.stats
import warnings
import hist
errband_opts = {
"hatch": "////",
"facecolor": "none",
"lw": 0,
"color": "k",
"alpha": 0.4,
}
markers = [".", "o", "^", "s", "+", "x", "D", "*"]
color_map = {
# https://cms-analysis.docs.cern.ch/guidelines/plotting/colors/
"$t\\bar{t}$": "#86c8dd",
"Single top": "#578dff",
"tt+X": "#1845fb",
"VV": "#c91f16",
"W+jets": "#c849a9",
"Z+jets": "#ff5e02",
"DY": "#ff5e02",
"QCD": "#adad7d",
"QCD($\\mu$)": "#656364",
"Ph+jets": "#f2ab6d",
# https://btvweb.web.cern.ch/artworks/BTV_colorpalette/Palette.png
"udsg": "#15a3ef", # blue
"ud": "#15a3ef", # blue
"s": "#209A24", # green
"g": "#7a21dd", # purple
"pu": "#717581", # grey
"other": "#717581", # grey
"c": "#ed1e02", # red
"b": "#fcb302", # yellow
}
sample_mergemap = {
# ttbar
"$t\\bar{t}$": [
"TTto2L2Nu_TuneCP5_13p6TeV_powheg-pythia8",
"TTto4Q_TuneCP5_13p6TeV_powheg-pythia8",
"TTtoLNu2Q_TuneCP5_13p6TeV_powheg-pythia8",
"TTTo2J1L1Nu_CP5_13p6TeV_powheg-pythia8",
# Run 2 (13 TeV)
"TTTo2L2Nu_TuneCP5_13TeV-powheg-pythia8",
"TTToHadronic_TuneCP5_13TeV-powheg-pythia8",
"TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8",
],
# single top
"Single top": [
"TbarBQ_t-channel_4FS_CP5_13p6TeV_powheg-madspin-pythia8",
"TBbarQ_t-channel_4FS_CP5_13p6TeV_powheg-madspin-pythia8",
"TWminus_DR_AtLeastOneLepton_CP5_13p6TeV_powheg-pythia8",
"TbarBQ_t-channel_4FS_CP5_13p6TeV_powheg-madspin-pythia8",
"TbarWplus_DR_AtLeastOneLepton_CP5_13p6TeV_powheg-pythia8",
# decay
"TWminusto4Q_TuneCP5_13p6TeV_powheg-pythia8",
"TWminustoLNu2Q_TuneCP5_13p6TeV_powheg-pythia8",
"TWminusto2L2Nu_TuneCP5_13p6TeV_powheg-pythia8",
"TbarWplusto4Q_TuneCP5_13p6TeV_powheg-pythia8",
"TbarWplustoLNu2Q_TuneCP5_13p6TeV_powheg-pythia8",
"TbarWplusto2L2Nu_TuneCP5_13p6TeV_powheg-pythia8",
"TBbarQ_t-channel_4FS_TuneCP5_13p6TeV_powheg-madspin-pythia8",
"TbarBQ_t-channel_4FS_TuneCP5_13p6TeV_powheg-madspin-pythia8",
"TbarBQtoLNu-t-channel-4FS_TuneCP5_13p6TeV_powheg-madspin-pythia8",
"TBbarQtoLNu-t-channel-4FS_TuneCP5_13p6TeV_powheg-madspin-pythia8",
"TbarBtoLminusNuB-s-channel-4FS_TuneCP5_13p6TeV_amcatnlo-pythia8",
"TBbartoLplusNuBbar-s-channel-4FS_TuneCP5_13p6TeV_amcatnlo-pythia8",
# Run 2 (13 TeV)
"ST_tW_top_5f_inclusiveDecays_TuneCP5_13TeV-powheg-pythia8",
"ST_tW_antitop_5f_inclusiveDecays_TuneCP5_13TeV-powheg-pythia8",
"ST_t-channel_top_4f_InclusiveDecays_TuneCP5_13TeV-powheg-madspin-pythia8",
"ST_t-channel_antitop_4f_InclusiveDecays_TuneCP5_13TeV-powheg-madspin-pythia8",
"ST_s-channel_4f_leptonDecays_TuneCP5_13TeV-amcatnlo-pythia8",
"TBbartoLNu-s-channel_TuneCP5_13p6TeV_powheg-pythia8",
"TbarBtoLNu-s-channel_TuneCP5_13p6TeV_powheg-pythia8",
],
"tt+X": [
"TTLNu-1Jets_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"TTW-WtoQQ-1Jets_TuneCP5_13p6TeV_amcatnloFXFXold-pythia8",
"TTNuNu_TuneCP5_13p6TeV_amcatnlo-pythia8",
"TTLL_Bin-MLL-4to50_TuneCP5_13p6TeV_amcatnlo-pythia8",
"TTLL_Bin-MLL-50_TuneCP5_13p6TeV_amcatnlo-pythia8",
"TTZ-ZtoQQ-1Jets_TuneCP5_13p6TeV_amcatnloFXFXold-pythia8",
],
# diboson
"VV": [
"WW_TuneCP5_13p6TeV-pythia8",
"WZ_TuneCP5_13p6TeV-pythia8",
"ZZ_TuneCP5_13p6TeV-pythia8",
"WW_TuneCP5_13p6TeV_pythia8",
"WZ_TuneCP5_13p6TeV_pythia8",
"ZZ_TuneCP5_13p6TeV_pythia8",
# decay
"ZZto2L2Q_TuneCP5_13p6TeV_powheg-pythia8",
"ZZto2Nu2Q_TuneCP5_13p6TeV_powheg-pythia8",
"ZZto4L_TuneCP5_13p6TeV_powheg-pythia8",
"ZZto2L2Nu_TuneCP5_13p6TeV_powheg-pythia8",
"WZtoLNu2Q_TuneCP5_13p6TeV_powheg-pythia8",
"WZToLNu2B-1Jets-4FS_TuneCP5_13p6TeV_amcatnloFXFX-madspin-pythia8",
"WZtoL3Nu-1Jets-4FS_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"WZto2L2Q_TuneCP5_13p6TeV_powheg-pythia8",
"WZto3LNu_TuneCP5_13p6TeV_powheg-pythia8",
"WWto4Q_TuneCP5_13p6TeV_powheg-pythia8",
"WWtoLNu2Q_TuneCP5_13p6TeV_powheg-pythia8",
"WWto2L2Nu_TuneCP5_13p6TeV_powheg-pythia8",
],
"Z+jets": [
"DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYJetsToLL_M-10to50_TuneCP5_13p6TeV-madgraphMLM-pythia8",
"DYJetsToLL_M-50_TuneCP5_13p6TeV-madgraphMLM-pythia8",
"DYto2E-2Jets_Bin-MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2Mu-2Jets_Bin-MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2Tau-2Jets_Bin-MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2E-2Jets_Bin-MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2Mu-2Jets_Bin-MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2Tau-2Jets_Bin-MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
],
"W+jets": [
"WtoENu-4Jets_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"WtoMuNu-4Jets_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"WtoTauNu-4Jets_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"WtoLNu-4Jets_Bin-1J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"WtoLNu-4Jets_Bin-2J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"WtoLNu-4Jets_Bin-3J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"WtoLNu-4Jets_Bin-4J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"WJetsToLNu_TuneCP5_13p6TeV-madgraphMLM-pythia8",
"WtoLNu-4Jets_TuneCP5_13p6TeV_madgraphMLM-pythia8",
# jet binned samples
"WtoLNu-4Jets_1J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"WtoLNu-4Jets_2J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"WtoLNu-4Jets_3J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"WtoLNu-4Jets_Bin-1J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"WtoLNu-4Jets_Bin-2J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"WtoLNu-4Jets_Bin-3J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"WtoLNu-4Jets_Bin-4J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
# lepton binned samples
"WtoENu-4Jets_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"WtoMuNu-4Jets_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"WtoTauNu-4Jets_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"WtoENu-2Jets_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"WtoMuNu-2Jets_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"WtoTauNu-2Jets_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
],
"DY": [
"DYto2L-2Jets_Bin-1J-MLL-50-PTLL-40to100_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2L-2Jets_Bin-1J-MLL-50-PTLL-100to200_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2L-2Jets_Bin-1J-MLL-50-PTLL-200to400_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2L-2Jets_Bin-1J-MLL-50-PTLL-400to600_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2L-2Jets_Bin-1J-MLL-50-PTLL-600_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2L-2Jets_Bin-2J-MLL-50-PTLL-40to100_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2L-2Jets_Bin-2J-MLL-50-PTLL-100to200_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2L-2Jets_Bin-2J-MLL-50-PTLL-200to400_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2L-2Jets_Bin-2J-MLL-50-PTLL-400to600_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2L-2Jets_Bin-2J-MLL-50-PTLL-600_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2E-2Jets_Bin-MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2E-2Jets_Bin-MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2Mu-2Jets_Bin-MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2Mu-2Jets_Bin-MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2Tau-2Jets_Bin-MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
"DYto2Tau-2Jets_Bin-MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
],
# QCD
"QCD": [
"QCD_Bin-PT-30to50_Fil-MuEnriched_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-50to80_Fil-MuEnriched_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-80to120_Fil-MuEnriched_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-120to170_Fil-MuEnriched_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-170to300_Fil-MuEnriched_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-300to470_Fil-MuEnriched_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-470to600_Fil-MuEnriched_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-600to800_Fil-MuEnriched_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-800to1000_Fil-MuEnriched_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-1000_Fil-MuEnriched_TuneCP5_13p6TeV_pythia8",
"QCD-4Jets_Bin-HT-40to70_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"QCD-4Jets_Bin-HT-70to100_TuneCP5_13p6TeV_madgraphMLM-pythia",
"QCD-4Jets_Bin-HT-100to200_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"QCD-4Jets_Bin-HT-200to400_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"QCD-4Jets_Bin-HT-400to600_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"QCD-4Jets_Bin-HT-600to800_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"QCD-4Jets_Bin-HT-800to1000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"QCD-4Jets_Bin-HT-1000to1200_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"QCD-4Jets_Bin-HT-1200to1500_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"QCD-4Jets_Bin-HT-1500to2000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"QCD-4Jets_Bin-HT-2000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"QCD_PT-15to30_TuneCP5_13p6TeV_pythia8",
"QCD_PT-30to50_TuneCP5_13p6TeV_pythia8",
"QCD_PT-50to80_TuneCP5_13p6TeV_pythia8",
"QCD_PT-80to120_TuneCP5_13p6TeV_pythia8",
"QCD_PT-120to170_TuneCP5_13p6TeV_pythia8",
"QCD_PT-170to300_TuneCP5_13p6TeV_pythia8",
"QCD_PT-300to470_TuneCP5_13p6TeV_pythia8",
"QCD_PT-470to600_TuneCP5_13p6TeV_pythia8",
"QCD_PT-600to800_TuneCP5_13p6TeV_pythia8",
"QCD_PT-800to1000_TuneCP5_13p6TeV_pythia8",
"QCD_PT-1000to1400_TuneCP5_13p6TeV_pythia8",
"QCD_PT-1400to1800_TuneCP5_13p6TeV_pythia8",
"QCD_PT-1800to2400_TuneCP5_13p6TeV_pythia8",
"QCD_PT-2400to3200_TuneCP5_13p6TeV_pythia8",
"QCD_PT-3200_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-15to20_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-20to30_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-30to50_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-50to80_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-80to120_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-120to170_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-170to300_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-300to470_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-470to600_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-600to800_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-800to1000_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-1000to1500_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-1500to2000_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-2000to2500_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-2500to3000_TuneCP5_13p6TeV_pythia8",
"QCD_Bin-PT-3000_TuneCP5_13p6TeV_pythia8",
],
# QCD muon enriched
"QCD($\\mu$)": [
"QCD_PT-15to20_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
"QCD_PT-20to30_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
"QCD_PT-30to50_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
"QCD_PT-50to80_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
"QCD_PT-80to120_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
"QCD_PT-120to170_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
"QCD_PT-170to300_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
"QCD_PT-300to470_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
"QCD_PT-470to600_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
"QCD_PT-600to800_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
"QCD_PT-800to1000_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
"QCD_PT-1000_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
],
"Ph+jets": [
"GJ-4Jets_HT-100to200_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"GJ-4Jets_HT-200to400_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"GJ-4Jets_HT-400to600_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"GJ-4Jets_HT-40to70_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"GJ-4Jets_HT-600_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"GJ-4Jets_HT-70to100_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"GJ-4Jets_dRGJ-0p25_PTG-10to100_HT-100to200_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"GJ-4Jets_dRGJ-0p25_PTG-10to100_HT-200to400_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"GJ-4Jets_dRGJ-0p25_PTG-10to100_HT-40to100_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"GJ-4Jets_dRGJ-0p25_PTG-10to100_HT-1000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"GJ-4Jets_dRGJ-0p25_PTG-10to100_HT-400to600_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"GJ-4Jets_dRGJ-0p25_PTG-10to100_HT-600to1000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"GJ-4Jets_dRGJ-0p25_PTG-100to200_HT-1000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"GJ-4Jets_dRGJ-0p25_PTG-100to200_HT-200to400_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"GJ-4Jets_dRGJ-0p25_PTG-100to200_HT-400to600_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"GJ-4Jets_dRGJ-0p25_PTG-100to200_HT-40to200_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"GJ-4Jets_dRGJ-0p25_PTG-100to200_HT-600to1000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"GJ-4Jets_dRGJ-0p25_PTG-200_HT-1000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"GJ-4Jets_dRGJ-0p25_PTG-200_HT-400to600_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"GJ-4Jets_dRGJ-0p25_PTG-200_HT-40to400_TuneCP5_13p6TeV_madgraphMLM-pythia8",
"GJ-4Jets_dRGJ-0p25_PTG-200_HT-600to1000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
],
}
### copy functions coffea.hist.plotratio https://github.com/CoffeaTeam/coffea/blob/master/coffea/hist/plot.py to boost-hist
################
## ratio plot ##
################
_coverage1sd = scipy.stats.norm.cdf(1) - scipy.stats.norm.cdf(-1)
[docs]
def compatible(self, other):
"""
Checks if this histogram is compatible with another, i.e. they have identical binning
"""
if len(self.axes) != len(other.axes):
return False
if set(self.axes.name) != set(other.axes.name):
return False
# if not all(d1 == d2 for d1, d2 in zip(self.dense_axes(), other.dense_axes())):
# return False
return True
[docs]
def poisson_interval(sumw, sumw2, coverage=_coverage1sd):
"""
Frequentist coverage interval for Poisson-distributed observations
Parameters
----------
sumw : numpy.ndarray
Sum of weights vector
sumw2 : numpy.ndarray
Sum weights squared vector
coverage : float, optional
Central coverage interval, defaults to 68%
Calculates the so-called 'Garwood' interval,
c.f. https://www.ine.pt/revstat/pdf/rs120203.pdf or
http://ms.mcmaster.ca/peter/s743/poissonalpha.html
For weighted data, this approximates the observed count by ``sumw**2/sumw2``, which
effectively scales the unweighted poisson interval by the average weight.
This may not be the optimal solution: see https://arxiv.org/pdf/1309.1287.pdf for a proper treatment.
When a bin is zero, the scale of the nearest nonzero bin is substituted to scale the nominal upper bound.
If all bins zero, a warning is generated and interval is set to ``sumw``.
"""
scale = np.empty_like(sumw)
scale[sumw != 0] = sumw2[sumw != 0] / sumw[sumw != 0]
if np.sum(sumw == 0) > 0:
missing = np.where(sumw == 0)
available = np.nonzero(sumw)
if len(available[0]) == 0:
warnings.warn(
"All sumw are zero! Cannot compute meaningful error bars",
RuntimeWarning,
)
return np.vstack([sumw, sumw])
nearest = sum(
[np.subtract.outer(d, d0) ** 2 for d, d0 in zip(available, missing)]
).argmin(axis=0)
argnearest = tuple(dim[nearest] for dim in available)
scale[missing] = scale[argnearest]
counts = sumw / scale
lo = scale * scipy.stats.chi2.ppf((1 - coverage) / 2, 2 * counts) / 2.0
hi = scale * scipy.stats.chi2.ppf((1 + coverage) / 2, 2 * (counts + 1)) / 2.0
interval = np.array([lo, hi])
interval[interval == np.nan] = 0.0 # chi2.ppf produces nan for counts=0
return interval
[docs]
def normal_interval(pw, tw, pw2, tw2, coverage=_coverage1sd):
"""
Compute errors based on the expansion of pass/(pass + fail), possibly weighted
Parameters
----------
pw : np.ndarray
Numerator, or number of (weighted) successes, vectorized
tw : np.ndarray
Denominator or number of (weighted) trials, vectorized
pw2 : np.ndarray
Numerator sum of weights squared, vectorized
tw2 : np.ndarray
Denominator sum of weights squared, vectorized
coverage : float, optional
Central coverage interval, defaults to 68%
c.f. https://root.cern.ch/doc/master/TEfficiency_8cxx_source.html#l02515
"""
eff = pw / tw
variance = (pw2 * (1 - 2 * eff) + tw2 * eff**2) / (tw**2)
sigma = np.sqrt(variance)
prob = 0.5 * (1 - coverage)
delta = np.zeros_like(sigma)
delta[sigma != 0] = scipy.stats.norm.ppf(prob, scale=sigma[sigma != 0])
lo = eff - np.minimum(eff + delta, np.ones_like(eff))
hi = np.maximum(eff - delta, np.zeros_like(eff)) - eff
return np.array([lo, hi])
[docs]
def clopper_pearson_interval(num, denom, coverage=_coverage1sd):
"""
Compute Clopper-Pearson coverage interval for a binomial distribution
Parameters
----------
num : numpy.ndarray
Numerator, or number of successes, vectorized
denom : numpy.ndarray
Denominator or number of trials, vectorized
coverage : float, optional
Central coverage interval, defaults to 68%
c.f. http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
"""
if np.any(num > denom):
raise ValueError(
"Found numerator larger than denominator while calculating binomial uncertainty"
)
lo = scipy.stats.beta.ppf((1 - coverage) / 2, num, denom - num + 1)
hi = scipy.stats.beta.ppf((1 + coverage) / 2, num + 1, denom - num)
interval = np.array([lo, hi])
interval[:, num == 0.0] = 0.0
interval[1, num == denom] = 1.0
return interval
## ratioplot function
[docs]
def plotratio(
num,
denom,
ax=None,
clear=True,
flow=None,
xerr=False,
error_opts={},
denom_fill_opts={},
guide_opts={},
unc="num",
label=None,
ext_denom_error=None,
):
"""
Create a ratio plot, dividing two compatible histograms
Parameters
----------
num : Hist
Numerator, a single-axis histogram
denom : Hist
Denominator, a single-axis histogram
ax : matplotlib.axes.Axes, optional
Axes object (if None, one is created)
clear : bool, optional
Whether to clear Axes before drawing (if passed); if False, this function will skip drawing the legend
flow : str, optional {None, "show", "sum"}
Whether plot the under/overflow bin. If "show", add additional under/overflow bin. If "sum", add the under/overflow bin content to first/last bin.
xerr: bool, optional
If true, then error bars are drawn for x-axis to indicate the size of the bin.
error_opts : dict, optional
A dictionary of options to pass to the matplotlib
`ax.errorbar <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.errorbar.html>`_ call
internal to this function. Leave blank for defaults. Some special options are interpreted by
this function and not passed to matplotlib: 'emarker' (default: '') specifies the marker type
to place at cap of the errorbar.
denom_fill_opts : dict, optional
A dictionary of options to pass to the matplotlib
`ax.fill_between <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.axes.Axes.fill_between.html>`_ call
internal to this function, filling the denominator uncertainty band. Leave blank for defaults.
guide_opts : dict, optional
A dictionary of options to pass to the matplotlib
`ax.axhline <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.axes.Axes.axhline.html>`_ call
internal to this function, to plot a horizontal guide line at ratio of 1. Leave blank for defaults.
unc : str, optional
Uncertainty calculation option: 'clopper-pearson' interval for efficiencies; 'poisson-ratio' interval
for ratio of poisson distributions; 'num' poisson interval of numerator scaled by denominator value
(common for data/mc, for better or worse).
label : str, optional
Associate a label to this entry (note: y axis label set by ``num.label``)
ext_denom_error: list of np.array[error_up,error_down], optional
External MC errors not stored in the original histogram
Returns
-------
ax : matplotlib.axes.Axes
A matplotlib `Axes <https://matplotlib.org/3.1.1/api/axes_api.html>`_ object
"""
if ax is None:
fig, ax = plt.subplots(1, 1)
else:
if not isinstance(ax, plt.Axes):
raise ValueError("ax must be a matplotlib Axes object")
if clear:
ax.clear()
if not compatible(num, denom):
raise ValueError(
"numerator and denominator histograms have incompatible axis definitions"
)
if len(num.axes) > 1:
raise ValueError("plotratio() can only support one-dimensional histograms")
if error_opts is None and denom_fill_opts is None and guide_opts is None:
error_opts = {}
denom_fill_opts = {}
axis = num.axes[0]
ax.set_xlabel(axis.label)
ax.set_ylabel(num.label)
edges = axis.edges
if flow == "show":
edges = np.array(
[
edges[0] - (edges[1] - edges[0]) * 3,
edges[0] - (edges[1] - edges[0]),
*edges,
edges[-1] + (edges[1] - edges[0]),
edges[-1] + (edges[1] - edges[0]) * 3,
]
)
centers = (edges[1:] + edges[:-1]) / 2
ranges = (edges[1:] - edges[:-1]) / 2 if xerr else None
sumw_num, sumw2_num = num.values(), num.variances()
sumw_denom, sumw2_denom = denom.values(), denom.variances()
if flow == "show":
print("Show under/overflow bin ")
sumw_num, sumw2_num = (
num.view(flow=True)["value"],
num.view(flow=True)["variance"],
)
sumw_num, sumw2_num = np.insert(sumw_num, -1, 0), np.insert(sumw2_num, -1, 0)
sumw_num, sumw2_num = np.insert(sumw_num, 1, 0), np.insert(sumw2_num, 1, 0)
sumw_denom, sumw2_denom = (
denom.view(flow=True)["value"],
denom.view(flow=True)["variance"],
)
sumw_denom, sumw2_denom = np.insert(sumw_denom, -1, 0), np.insert(
sumw2_denom, -1, 0
)
sumw_denom, sumw2_denom = np.insert(sumw_denom, 1, 0), np.insert(
sumw2_denom, 1, 0
)
elif flow == "sum":
print("Merge under/overflow bin to first/last bin")
sumw_num[0], sumw2_num[0] = (
sumw_num[0] + num.view(flow=True)["value"][0],
sumw2_num[0] + num.view(flow=True)["value"][0],
)
sumw_num[-1], sumw2_num[-1] = (
sumw_num[-1] + num.view(flow=True)["value"][-1],
sumw2_num[-1] + num.view(flow=True)["value"][-1],
)
sumw_denom[0], sumw2_denom[0] = (
sumw_denom[0] + denom.view(flow=True)["value"][0],
sumw2_denom[0] + denom.view(flow=True)["value"][0],
)
sumw_denom[-1], sumw2_denom[-1] = (
sumw_denom[-1] + denom.view(flow=True)["value"][-1],
sumw2_denom[-1] + denom.view(flow=True)["value"][-1],
)
rsumw = sumw_num / sumw_denom
if unc == "clopper-pearson":
rsumw_err = np.abs(clopper_pearson_interval(sumw_num, sumw_denom) - rsumw)
elif unc == "poisson-ratio":
# poisson ratio n/m is equivalent to binomial n/(n+m)
rsumw_err = np.abs(
clopper_pearson_interval(sumw_num, sumw_num + sumw_denom) - rsumw
)
elif unc == "num":
rsumw_err = np.abs(poisson_interval(rsumw, sumw2_num / sumw_denom**2) - rsumw)
elif unc == "efficiency":
rsumw_err = np.abs(
normal_interval(sumw_num, sumw_denom, sumw2_num, sumw2_denom)
)
else:
raise ValueError("Unrecognized uncertainty option: %r" % unc)
## if additional uncertainties
if ext_denom_error is not None:
if denom_fill_opts is {}:
print("suggest to use different style for additional error")
if np.shape(rsumw_err) != np.shape(ext_denom_error / sumw_denom):
raise ValueError("Imcompatible error length")
rsumw_err = np.sqrt(rsumw_err**2 + (ext_denom_error / sumw_denom) ** 2)
if error_opts is not None:
opts = {
"label": label,
"linestyle": "none",
"lw": 1,
"marker": "o",
"color": "k",
}
opts.update(error_opts)
emarker = opts.pop("emarker", "")
errbar = ax.errorbar(x=centers, y=rsumw, xerr=ranges, yerr=rsumw_err, **opts)
plt.setp(errbar[1], "marker", emarker)
if denom_fill_opts is not None:
unity = np.ones_like(sumw_denom)
denom_unc = poisson_interval(unity, sumw2_denom / sumw_denom**2)
opts = {
"hatch": "////",
"facecolor": "none",
"lw": 0,
"color": "k",
"alpha": 0.4,
}
if ext_denom_error is not None:
denom_unc[0] = (
unity[0]
- np.sqrt(
(denom_unc - unity) ** 2 + (ext_denom_error / sumw_denom) ** 2
)[0]
)
denom_unc[1] = (
unity[1]
+ np.sqrt(
(denom_unc - unity) ** 2 + (ext_denom_error / sumw_denom) ** 2
)[1]
)
opts = denom_fill_opts
ax.stairs(denom_unc[0], edges=edges, baseline=denom_unc[1], **opts)
if guide_opts is not None:
opts = {"linestyle": "--", "color": (0, 0, 0, 0.5), "linewidth": 1}
opts.update(guide_opts)
if clear is not False:
ax.axhline(1.0, **opts)
if clear:
ax.autoscale(axis="x", tight=True)
ax.set_ylim(0, None)
return ax
## Calculate SF error
[docs]
def SFerror(collated, discr, flow=None):
err_up = (
collated["mc"][discr][{"syst": "SFup", "flav": sum}].values()
- collated["mc"][discr][{"syst": "SF", "flav": sum}].values()
)
err_dn = (
collated["mc"][discr][{"syst": "SFdn", "flav": sum}].values()
- collated["mc"][discr][{"syst": "SF", "flav": sum}].values()
)
if flow is not None:
err_up = (
collated["mc"][discr][{"syst": "SFup", "flav": sum}].view(flow=True)[
"value"
]
- collated["mc"][discr][{"syst": "SF", "flav": sum}].view(flow=True)[
"value"
]
)
err_dn = (
collated["mc"][discr][{"syst": "SFdn", "flav": sum}].view(flow=True)[
"value"
]
- collated["mc"][discr][{"syst": "SF", "flav": sum}].view(flow=True)[
"value"
]
)
if "C" in discr: ## scale uncertainties for charm tagger by 2
err_up = np.sqrt(
np.add(
np.power(err_up, 2),
np.power(
2
* (
collated["mc"][discr][{"syst": "SFup", "flav": 4}].values()
- collated["mc"][discr][{"syst": "SF", "flav": 4}].values()
),
2,
),
)
)
err_dn = np.sqrt(
np.add(
np.power(err_dn, 2),
np.power(
2
* (
collated["mc"][discr][{"syst": "SFdn", "flav": 4}].values()
- collated["mc"][discr][{"syst": "SF", "flav": 4}].values()
),
2,
),
)
)
if flow:
err_up = np.sqrt(
np.add(
np.power(err_up, 2),
np.power(
2
* (
collated["mc"][discr][{"syst": "SFup", "flav": 4}].view(
flow=True
)["value"]
- collated["mc"][discr][{"syst": "SF", "flav": 4}].view(
flow=True
)["value"]
),
2,
),
)
)
err_dn = np.sqrt(
np.add(
np.power(err_dn, 2),
np.power(
2
* (
collated["mc"][discr][{"syst": "SFdn", "flav": 4}].view(
flow=True
)["value"]
- collated["mc"][discr][{"syst": "SF", "flav": 4}].view(
flow=True
)["value"]
),
2,
),
)
)
return np.array([err_dn, err_up])
[docs]
def autoranger(hist, flow=None):
val, axis = hist.values(), hist.axes[-1].edges
if flow == "show":
val = hist.view(flow=True)["value"]
axis = np.array(
[
axis[0] - (axis[1] - axis[0]) * 3,
axis[0] - (axis[1] - axis[0]),
*axis,
axis[-1] + (axis[1] - axis[0]),
axis[-1] + (axis[1] - axis[0]) * 3,
]
)
for i in range(len(val)):
if val[i] != 0:
mins = i
break
for i in reversed(range(len(val))):
if val[i] != 0:
maxs = i + 1
break
return axis[mins], axis[maxs]
[docs]
def MCerrorband(
hmc,
ax=None,
flow=None,
label="Stat. unc.",
fill_opts=None,
ext_error=None,
clear=False,
):
"""Create a ratio plot, dividing two compatible histograms
Parameters
----------
hmc : Hist
A single-axis histogram
ax : matplotlib.axes.Axes, optional
Axes object (if None, one is created)
flow : str, optional {None, "show", "sum"}
Whether plot the under/overflow bin. If "show", add additional under/overflow bin. If "sum", add the under/overflow bin content to first/last bin.
fill_opts : dict, optional
A dictionary of options to pass to the matplotlib
`ax.fill_between <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.axes.Axes.fill_between.html>`_ call
internal to this function, filling the denominator uncertainty band. Leave blank for defaults.
label : str, optional
Associate a label to this entry (note: y axis label set by ``num.label``)
ext_error: list of np.array[error_up,error_down], optional
External MC errors not stored in the original histogram
clear : bool, optional
Whether to clear Axes before drawing (if passed); if False, this function will skip drawing the legend
Returns
-------
ax : matplotlib.axes.Axes
A matplotlib `Axes <https://matplotlib.org/3.1.1/api/axes_api.html>`_ object
"""
if ax is None:
fig, ax = plt.subplots(1, 1)
else:
if not isinstance(ax, plt.Axes):
raise ValueError("ax must be a matplotlib Axes object")
if clear:
ax.clear()
if ext_error is None:
values = hmc.values() + np.sqrt(hmc.variances())
baseline = hmc.values() - np.sqrt(hmc.variances())
edges = hmc.axes[0].edges
if flow == "show":
edges = np.array(
[
edges[0] - (edges[1] - edges[0]) * 2,
*edges,
edges[-1] + (edges[1] - edges[0]) * 2,
]
)
values = hmc.view(flow=True)["value"] + np.sqrt(
hmc.view(flow=True)["variance"]
)
baseline = hmc.view(flow=True)["value"] - np.sqrt(
hmc.view(flow=True)["variance"]
)
if flow == "sum":
values[0], values[-1] = (
hmc.view(flow=True)["value"] + np.sqrt(hmc.view(flow=True)["variance"])
)[0] + values[0], (
hmc.view(flow=True)["value"] + np.sqrt(hmc.view(flow=True)["variance"])
)[
-1
] + values[
-1
]
baseline[0], baseline[-1] = (
hmc.view(flow=True)["value"] - np.sqrt(hmc.view(flow=True)["variance"])
)[0] + baseline[0], (
hmc.view(flow=True)["value"] - np.sqrt(hmc.view(flow=True)["variance"])
)[
-1
] + baseline[
-1
]
ax.stairs(
values=values,
baseline=baseline,
edges=edges,
label=label,
**errband_opts,
)
else:
values = hmc.values() + ext_error[1]
baseline = hmc.values() - ext_error[0]
edges = hmc.axes[-1].edges
if flow == "show":
edges = np.array(
[
edges[0] - (edges[1] - edges[0]) * 2,
*edges,
edges[-1] + (edges[1] - edges[0]) * 2,
]
)
values = hmc.view(flow=True)["value"] + ext_error[1]
baseline = hmc.view(flow=True)["value"] - ext_error[0]
if flow == "sum":
values[0], values[-1] = (hmc.view(flow=True)["value"] + ext_error[1])[
0
] + values[0], (hmc.view(flow=True)["value"] + ext_error[0])[-1] + values[
-1
]
baseline[0], baseline[-1] = (hmc.view(flow=True)["value"] - ext_error[1])[
0
] + baseline[0], (hmc.view(flow=True)["value"] - ext_error[0])[
-1
] + baseline[
-1
]
ax.stairs(
values=values,
baseline=baseline,
edges=edges,
label=label,
**fill_opts,
)
## Very nice implementtion from Kenneth Long:
## https://gist.github.com/kdlong/d697ee691c696724fc656186c25f8814
[docs]
def rebin_hist(h, axis_name, edges):
if type(edges) == int:
return h[{axis_name: hist.rebin(edges)}]
ax = h.axes[axis_name]
ax_idx = [a.name for a in h.axes].index(axis_name)
if not all([np.isclose(x, ax.edges).any() for x in edges]):
raise ValueError(
f"Cannot rebin histogram due to incompatible edges for axis '{ax.name}'\n"
f"Edges of histogram are {ax.edges}, requested rebinning to {edges}"
)
# If you rebin to a subset of initial range, keep the overflow and underflow
overflow = ax.traits.overflow or (
edges[-1] < ax.edges[-1] and not np.isclose(edges[-1], ax.edges[-1])
)
underflow = ax.traits.underflow or (
edges[0] > ax.edges[0] and not np.isclose(edges[0], ax.edges[0])
)
flow = overflow or underflow
new_ax = hist.axis.Variable(
edges, name=ax.name, label=ax.label, overflow=overflow, underflow=underflow
)
axes = list(h.axes)
axes[ax_idx] = new_ax
hnew = hist.Hist(*axes, name=h.name, storage=h._storage_type())
# Offset from bin edge to avoid numeric issues
offset = 0.5 * np.min(ax.edges[1:] - ax.edges[:-1])
edges_eval = edges + offset
edge_idx = ax.index(edges_eval)
# Avoid going outside the range, reduceat will add the last index anyway
if edge_idx[-1] == ax.size + ax.traits.overflow:
edge_idx = edge_idx[:-1]
if underflow:
# Only if the original axis had an underflow should you offset
if ax.traits.underflow:
edge_idx += 1
edge_idx = np.insert(edge_idx, 0, 0)
# Take is used because reduceat sums i:len(array) for the last entry, in the case
# where the final bin isn't the same between the initial and rebinned histogram, you
# want to drop this value. Add tolerance of 1/2 min bin width to avoid numeric issues
hnew.values(flow=flow)[...] = np.add.reduceat(
h.values(flow=flow), edge_idx, axis=ax_idx
).take(indices=range(new_ax.size + underflow + overflow), axis=ax_idx)
if hnew._storage_type() == hist.storage.Weight():
hnew.variances(flow=flow)[...] = np.add.reduceat(
h.variances(flow=flow), edge_idx, axis=ax_idx
).take(indices=range(new_ax.size + underflow + overflow), axis=ax_idx)
return hnew