Source code for BTVNanoCommissioning.utils.plot_utils

import matplotlib.pyplot as plt
import numpy as np
import scipy.stats
import warnings
import hist

errband_opts = {
    "hatch": "////",
    "facecolor": "none",
    "lw": 0,
    "color": "k",
    "alpha": 0.4,
}
markers = [".", "o", "^", "s", "+", "x", "D", "*"]
color_map = {
    # https://cms-analysis.docs.cern.ch/guidelines/plotting/colors/
    "$t\\bar{t}$": "#86c8dd",
    "Single top": "#578dff",
    "tt+X": "#1845fb",
    "VV": "#c91f16",
    "W+jets": "#c849a9",
    "Z+jets": "#ff5e02",
    "DY": "#ff5e02",
    "QCD": "#adad7d",
    "QCD($\\mu$)": "#656364",
    "Ph+jets": "#f2ab6d",
    # https://btvweb.web.cern.ch/artworks/BTV_colorpalette/Palette.png
    "udsg": "#15a3ef",  # blue
    "ud": "#15a3ef",  # blue
    "s": "#209A24",  # green
    "g": "#7a21dd",  # purple
    "pu": "#717581",  # grey
    "other": "#717581",  # grey
    "c": "#ed1e02",  # red
    "b": "#fcb302",  # yellow
}
sample_mergemap = {
    # ttbar
    "$t\\bar{t}$": [
        "TTto2L2Nu_TuneCP5_13p6TeV_powheg-pythia8",
        "TTto4Q_TuneCP5_13p6TeV_powheg-pythia8",
        "TTtoLNu2Q_TuneCP5_13p6TeV_powheg-pythia8",
        "TTTo2J1L1Nu_CP5_13p6TeV_powheg-pythia8",
        # Run 2 (13 TeV)
        "TTTo2L2Nu_TuneCP5_13TeV-powheg-pythia8",
        "TTToHadronic_TuneCP5_13TeV-powheg-pythia8",
        "TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8",
    ],
    # single top
    "Single top": [
        "TbarBQ_t-channel_4FS_CP5_13p6TeV_powheg-madspin-pythia8",
        "TBbarQ_t-channel_4FS_CP5_13p6TeV_powheg-madspin-pythia8",
        "TWminus_DR_AtLeastOneLepton_CP5_13p6TeV_powheg-pythia8",
        "TbarBQ_t-channel_4FS_CP5_13p6TeV_powheg-madspin-pythia8",
        "TbarWplus_DR_AtLeastOneLepton_CP5_13p6TeV_powheg-pythia8",
        # decay
        "TWminusto4Q_TuneCP5_13p6TeV_powheg-pythia8",
        "TWminustoLNu2Q_TuneCP5_13p6TeV_powheg-pythia8",
        "TWminusto2L2Nu_TuneCP5_13p6TeV_powheg-pythia8",
        "TbarWplusto4Q_TuneCP5_13p6TeV_powheg-pythia8",
        "TbarWplustoLNu2Q_TuneCP5_13p6TeV_powheg-pythia8",
        "TbarWplusto2L2Nu_TuneCP5_13p6TeV_powheg-pythia8",
        "TBbarQ_t-channel_4FS_TuneCP5_13p6TeV_powheg-madspin-pythia8",
        "TbarBQ_t-channel_4FS_TuneCP5_13p6TeV_powheg-madspin-pythia8",
        "TbarBQtoLNu-t-channel-4FS_TuneCP5_13p6TeV_powheg-madspin-pythia8",
        "TBbarQtoLNu-t-channel-4FS_TuneCP5_13p6TeV_powheg-madspin-pythia8",
        "TbarBtoLminusNuB-s-channel-4FS_TuneCP5_13p6TeV_amcatnlo-pythia8",
        "TBbartoLplusNuBbar-s-channel-4FS_TuneCP5_13p6TeV_amcatnlo-pythia8",
        # Run 2 (13 TeV)
        "ST_tW_top_5f_inclusiveDecays_TuneCP5_13TeV-powheg-pythia8",
        "ST_tW_antitop_5f_inclusiveDecays_TuneCP5_13TeV-powheg-pythia8",
        "ST_t-channel_top_4f_InclusiveDecays_TuneCP5_13TeV-powheg-madspin-pythia8",
        "ST_t-channel_antitop_4f_InclusiveDecays_TuneCP5_13TeV-powheg-madspin-pythia8",
        "ST_s-channel_4f_leptonDecays_TuneCP5_13TeV-amcatnlo-pythia8",
        "TBbartoLNu-s-channel_TuneCP5_13p6TeV_powheg-pythia8",
        "TbarBtoLNu-s-channel_TuneCP5_13p6TeV_powheg-pythia8",
        "ST_t-channel_antitop_4f_InclusiveDecays_TuneCP5_13TeV-powheg-madspin-pythia8",
    ],
    "tt+X": [
        "TTLNu-1Jets_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "TTW-WtoQQ-1Jets_TuneCP5_13p6TeV_amcatnloFXFXold-pythia8",
        "TTNuNu_TuneCP5_13p6TeV_amcatnlo-pythia8",
        "TTLL_Bin-MLL-4to50_TuneCP5_13p6TeV_amcatnlo-pythia8",
        "TTLL_Bin-MLL-50_TuneCP5_13p6TeV_amcatnlo-pythia8",
        "TTZ-ZtoQQ-1Jets_TuneCP5_13p6TeV_amcatnloFXFXold-pythia8",
    ],
    # diboson
    "VV": [
        "WW_TuneCP5_13p6TeV-pythia8",
        "WZ_TuneCP5_13p6TeV-pythia8",
        "ZZ_TuneCP5_13p6TeV-pythia8",
        "WW_TuneCP5_13p6TeV_pythia8",
        "WZ_TuneCP5_13p6TeV_pythia8",
        "ZZ_TuneCP5_13p6TeV_pythia8",
        # decay
        "ZZto2L2Q_TuneCP5_13p6TeV_powheg-pythia8",
        "ZZto2Nu2Q_TuneCP5_13p6TeV_powheg-pythia8",
        "ZZto4L_TuneCP5_13p6TeV_powheg-pythia8",
        "ZZto2L2Nu_TuneCP5_13p6TeV_powheg-pythia8",
        "WZtoLNu2Q_TuneCP5_13p6TeV_powheg-pythia8",
        "WZToLNu2B-1Jets-4FS_TuneCP5_13p6TeV_amcatnloFXFX-madspin-pythia8",
        "WZtoL3Nu-1Jets-4FS_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "WZto2L2Q_TuneCP5_13p6TeV_powheg-pythia8",
        "WZto3LNu_TuneCP5_13p6TeV_powheg-pythia8",
        "WWto4Q_TuneCP5_13p6TeV_powheg-pythia8",
        "WWtoLNu2Q_TuneCP5_13p6TeV_powheg-pythia8",
        "WWto2L2Nu_TuneCP5_13p6TeV_powheg-pythia8",
    ],
    "Z+jets": [
        "DYto2L-2Jets_MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2L-2Jets_MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYJetsToLL_M-10to50_TuneCP5_13p6TeV-madgraphMLM-pythia8",
        "DYJetsToLL_M-50_TuneCP5_13p6TeV-madgraphMLM-pythia8",
        "DYto2E-2Jets_Bin-MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2Mu-2Jets_Bin-MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2Tau-2Jets_Bin-MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2E-2Jets_Bin-MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2Mu-2Jets_Bin-MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2Tau-2Jets_Bin-MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
    ],
    "W+jets": [
        "WtoENu-4Jets_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "WtoMuNu-4Jets_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "WtoTauNu-4Jets_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "WtoLNu-4Jets_Bin-1J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "WtoLNu-4Jets_Bin-2J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "WtoLNu-4Jets_Bin-3J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "WtoLNu-4Jets_Bin-4J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "WJetsToLNu_TuneCP5_13p6TeV-madgraphMLM-pythia8",
        "WtoLNu-4Jets_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        # jet binned samples
        "WtoLNu-4Jets_1J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "WtoLNu-4Jets_2J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "WtoLNu-4Jets_3J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "WtoLNu-4Jets_Bin-1J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "WtoLNu-4Jets_Bin-2J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "WtoLNu-4Jets_Bin-3J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "WtoLNu-4Jets_Bin-4J_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "WtoLNu-2Jets_0J_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "WtoLNu-2Jets_1J_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "WtoLNu-2Jets_2J_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        # lepton binned samples
        "WtoENu-4Jets_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "WtoMuNu-4Jets_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "WtoTauNu-4Jets_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "WtoENu-2Jets_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "WtoMuNu-2Jets_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "WtoTauNu-2Jets_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        # run2
        "WJetsToLNu_TuneCP5_13TeV-madgraphMLM-pythia8",
        # jet and lepton binned samples
        "WtoENu-2Jets_Bin-0J_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "WtoENu-2Jets_Bin-1J_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "WtoENu-2Jets_Bin-2J_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "WtoMuNu-2Jets_Bin-0J_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "WtoMuNu-2Jets_Bin-1J_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "WtoMuNu-2Jets_Bin-2J_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "WtoTauNu-2Jets_Bin-0J_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "WtoTauNu-2Jets_Bin-1J_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "WtoTauNu-2Jets_Bin-2J_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
    ],
    "DY": [
        "DYto2L-2Jets_Bin-1J-MLL-50-PTLL-40to100_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2L-2Jets_Bin-1J-MLL-50-PTLL-100to200_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2L-2Jets_Bin-1J-MLL-50-PTLL-200to400_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2L-2Jets_Bin-1J-MLL-50-PTLL-400to600_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2L-2Jets_Bin-1J-MLL-50-PTLL-600_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2L-2Jets_Bin-2J-MLL-50-PTLL-40to100_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2L-2Jets_Bin-2J-MLL-50-PTLL-100to200_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2L-2Jets_Bin-2J-MLL-50-PTLL-200to400_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2L-2Jets_Bin-2J-MLL-50-PTLL-400to600_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2L-2Jets_Bin-2J-MLL-50-PTLL-600_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2E-2Jets_Bin-MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2E-2Jets_Bin-MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2Mu-2Jets_Bin-MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2Mu-2Jets_Bin-MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2Tau-2Jets_Bin-MLL-10to50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        "DYto2Tau-2Jets_Bin-MLL-50_TuneCP5_13p6TeV_amcatnloFXFX-pythia8",
        # Run 2
        "DYJetsToLL_M-50_TuneCP5_13TeV-amcatnloFXFX-pythia8",
        "DYJetsToLL_M-10to50_TuneCP5_13TeV-amcatnloFXFX-pythia8",
    ],
    # QCD
    "QCD": [
        "QCD_Bin-PT-30to50_Fil-MuEnriched_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-50to80_Fil-MuEnriched_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-80to120_Fil-MuEnriched_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-120to170_Fil-MuEnriched_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-170to300_Fil-MuEnriched_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-300to470_Fil-MuEnriched_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-470to600_Fil-MuEnriched_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-600to800_Fil-MuEnriched_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-800to1000_Fil-MuEnriched_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-1000_Fil-MuEnriched_TuneCP5_13p6TeV_pythia8",
        "QCD-4Jets_Bin-HT-40to70_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "QCD-4Jets_Bin-HT-70to100_TuneCP5_13p6TeV_madgraphMLM-pythia",
        "QCD-4Jets_Bin-HT-100to200_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "QCD-4Jets_Bin-HT-200to400_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "QCD-4Jets_Bin-HT-400to600_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "QCD-4Jets_Bin-HT-600to800_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "QCD-4Jets_Bin-HT-800to1000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "QCD-4Jets_Bin-HT-1000to1200_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "QCD-4Jets_Bin-HT-1200to1500_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "QCD-4Jets_Bin-HT-1500to2000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "QCD-4Jets_Bin-HT-2000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "QCD_PT-15to30_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-30to50_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-50to80_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-80to120_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-120to170_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-170to300_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-300to470_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-470to600_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-600to800_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-800to1000_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-1000to1400_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-1400to1800_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-1800to2400_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-2400to3200_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-3200_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-15to20_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-20to30_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-30to50_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-50to80_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-80to120_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-120to170_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-170to300_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-300to470_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-470to600_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-600to800_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-800to1000_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-1000to1500_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-1500to2000_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-2000to2500_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-2500to3000_TuneCP5_13p6TeV_pythia8",
        "QCD_Bin-PT-3000_TuneCP5_13p6TeV_pythia8",
        # Run 2:
        "QCD_Pt-30To50_MuEnrichedPt5_TuneCP5_13TeV-pythia8",
        "QCD_Pt-50To80_MuEnrichedPt5_TuneCP5_13TeV-pythia8",
        "QCD_Pt-80To120_MuEnrichedPt5_TuneCP5_13TeV-pythia8",
        "QCD_Pt-120To170_MuEnrichedPt5_TuneCP5_13TeV-pythia8",
        "QCD_Pt-170To300_MuEnrichedPt5_TuneCP5_13TeV-pythia8",
        "QCD_Pt-300To470_MuEnrichedPt5_TuneCP5_13TeV-pythia8",
        "QCD_Pt-470To600_MuEnrichedPt5_TuneCP5_13TeV-pythia8",
        "QCD_Pt-600To800_MuEnrichedPt5_TuneCP5_13TeV-pythia8",
        "QCD_Pt-800To1000_MuEnrichedPt5_TuneCP5_13TeV-pythia8",
        "QCD_Pt-1000_MuEnrichedPt5_TuneCP5_13TeV-pythia8",
    ],
    # QCD muon enriched
    "QCD($\\mu$)": [
        "QCD_PT-15to20_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-20to30_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-30to50_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-50to80_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-80to120_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-120to170_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-170to300_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-300to470_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-470to600_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-600to800_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-800to1000_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
        "QCD_PT-1000_MuEnrichedPt5_TuneCP5_13p6TeV_pythia8",
    ],
    "Ph+jets": [
        "GJ-4Jets_HT-100to200_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "GJ-4Jets_HT-200to400_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "GJ-4Jets_HT-400to600_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "GJ-4Jets_HT-40to70_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "GJ-4Jets_HT-600_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "GJ-4Jets_HT-70to100_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "GJ-4Jets_dRGJ-0p25_PTG-10to100_HT-100to200_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "GJ-4Jets_dRGJ-0p25_PTG-10to100_HT-200to400_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "GJ-4Jets_dRGJ-0p25_PTG-10to100_HT-40to100_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "GJ-4Jets_dRGJ-0p25_PTG-10to100_HT-1000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "GJ-4Jets_dRGJ-0p25_PTG-10to100_HT-400to600_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "GJ-4Jets_dRGJ-0p25_PTG-10to100_HT-600to1000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "GJ-4Jets_dRGJ-0p25_PTG-100to200_HT-1000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "GJ-4Jets_dRGJ-0p25_PTG-100to200_HT-200to400_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "GJ-4Jets_dRGJ-0p25_PTG-100to200_HT-400to600_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "GJ-4Jets_dRGJ-0p25_PTG-100to200_HT-40to200_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "GJ-4Jets_dRGJ-0p25_PTG-100to200_HT-600to1000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "GJ-4Jets_dRGJ-0p25_PTG-200_HT-1000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "GJ-4Jets_dRGJ-0p25_PTG-200_HT-400to600_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "GJ-4Jets_dRGJ-0p25_PTG-200_HT-40to400_TuneCP5_13p6TeV_madgraphMLM-pythia8",
        "GJ-4Jets_dRGJ-0p25_PTG-200_HT-600to1000_TuneCP5_13p6TeV_madgraphMLM-pythia8",
    ],
}
### copy functions coffea.hist.plotratio https://github.com/CoffeaTeam/coffea/blob/master/coffea/hist/plot.py to boost-hist
################
## ratio plot ##
################

_coverage1sd = scipy.stats.norm.cdf(1) - scipy.stats.norm.cdf(-1)



[docs]
def compatible(self, other):
    """
    Checks if this histogram is compatible with another, i.e. they have identical binning
    """

    if len(self.axes) != len(other.axes):
        return False
    if set(self.axes.name) != set(other.axes.name):
        return False
    # if not all(d1 == d2 for d1, d2 in zip(self.dense_axes(), other.dense_axes())):
    #     return False
    return True




[docs]
def poisson_interval(sumw, sumw2, coverage=_coverage1sd):
    """
    Frequentist coverage interval for Poisson-distributed observations
    Parameters
    ----------
        sumw : numpy.ndarray
            Sum of weights vector
        sumw2 : numpy.ndarray
            Sum weights squared vector
        coverage : float, optional
            Central coverage interval, defaults to 68%
    Calculates the so-called 'Garwood' interval,
    c.f. https://www.ine.pt/revstat/pdf/rs120203.pdf or
    http://ms.mcmaster.ca/peter/s743/poissonalpha.html
    For weighted data, this approximates the observed count by ``sumw**2/sumw2``, which
    effectively scales the unweighted poisson interval by the average weight.
    This may not be the optimal solution: see https://arxiv.org/pdf/1309.1287.pdf for a proper treatment.
    When a bin is zero, the scale of the nearest nonzero bin is substituted to scale the nominal upper bound.
    If all bins zero, a warning is generated and interval is set to ``sumw``.
    """

    scale = np.empty_like(sumw)
    scale[sumw != 0] = sumw2[sumw != 0] / sumw[sumw != 0]
    if np.sum(sumw == 0) > 0:
        missing = np.where(sumw == 0)
        available = np.nonzero(sumw)
        if len(available[0]) == 0:
            warnings.warn(
                "All sumw are zero!  Cannot compute meaningful error bars",
                RuntimeWarning,
            )
            return np.vstack([sumw, sumw])
        nearest = sum(
            [np.subtract.outer(d, d0) ** 2 for d, d0 in zip(available, missing)]
        ).argmin(axis=0)
        argnearest = tuple(dim[nearest] for dim in available)
        scale[missing] = scale[argnearest]
    counts = sumw / scale
    lo = scale * scipy.stats.chi2.ppf((1 - coverage) / 2, 2 * counts) / 2.0
    hi = scale * scipy.stats.chi2.ppf((1 + coverage) / 2, 2 * (counts + 1)) / 2.0
    interval = np.array([lo, hi])
    interval[interval == np.nan] = 0.0  # chi2.ppf produces nan for counts=0
    return interval




[docs]
def normal_interval(pw, tw, pw2, tw2, coverage=_coverage1sd):
    """
    Compute errors based on the expansion of pass/(pass + fail), possibly weighted
    Parameters
    ----------

    pw : np.ndarray
        Numerator, or number of (weighted) successes, vectorized
    tw : np.ndarray
        Denominator or number of (weighted) trials, vectorized
    pw2 : np.ndarray
        Numerator sum of weights squared, vectorized
    tw2 : np.ndarray
        Denominator sum of weights squared, vectorized
    coverage : float, optional
        Central coverage interval, defaults to 68%
    c.f. https://root.cern.ch/doc/master/TEfficiency_8cxx_source.html#l02515
    """

    eff = pw / tw

    variance = (pw2 * (1 - 2 * eff) + tw2 * eff**2) / (tw**2)
    sigma = np.sqrt(variance)

    prob = 0.5 * (1 - coverage)
    delta = np.zeros_like(sigma)
    delta[sigma != 0] = scipy.stats.norm.ppf(prob, scale=sigma[sigma != 0])

    lo = eff - np.minimum(eff + delta, np.ones_like(eff))
    hi = np.maximum(eff - delta, np.zeros_like(eff)) - eff

    return np.array([lo, hi])




[docs]
def clopper_pearson_interval(num, denom, coverage=_coverage1sd):
    """
    Compute Clopper-Pearson coverage interval for a binomial distribution
    Parameters
    ----------
        num : numpy.ndarray
            Numerator, or number of successes, vectorized
        denom : numpy.ndarray
            Denominator or number of trials, vectorized
        coverage : float, optional
            Central coverage interval, defaults to 68%
    c.f. http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
    """

    if np.any(num > denom):
        raise ValueError(
            "Found numerator larger than denominator while calculating binomial uncertainty"
        )
    lo = scipy.stats.beta.ppf((1 - coverage) / 2, num, denom - num + 1)
    hi = scipy.stats.beta.ppf((1 + coverage) / 2, num + 1, denom - num)
    interval = np.array([lo, hi])
    interval[:, num == 0.0] = 0.0
    interval[1, num == denom] = 1.0
    return interval



## ratioplot function

[docs]
def plotratio(
    num,
    denom,
    ax=None,
    clear=True,
    flow=None,
    xerr=False,
    error_opts={},
    denom_fill_opts={},
    guide_opts={},
    unc="num",
    label=None,
    ext_denom_error=None,
):
    """
    Create a ratio plot, dividing two compatible histograms
    Parameters
    ----------
        num : Hist
            Numerator, a single-axis histogram
        denom : Hist
            Denominator, a single-axis histogram
        ax : matplotlib.axes.Axes, optional
            Axes object (if None, one is created)
        clear : bool, optional
            Whether to clear Axes before drawing (if passed); if False, this function will skip drawing the legend
        flow : str, optional {None, "show", "sum"}
            Whether plot the under/overflow bin. If "show", add additional under/overflow bin. If "sum", add the under/overflow bin content to first/last bin.
        xerr: bool, optional
            If true, then error bars are drawn for x-axis to indicate the size of the bin.
        error_opts : dict, optional
            A dictionary of options to pass to the matplotlib
            `ax.errorbar <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.errorbar.html>`_ call
            internal to this function.  Leave blank for defaults.  Some special options are interpreted by
            this function and not passed to matplotlib: 'emarker' (default: '') specifies the marker type
            to place at cap of the errorbar.
        denom_fill_opts : dict, optional
            A dictionary of options to pass to the matplotlib
            `ax.fill_between <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.axes.Axes.fill_between.html>`_ call
            internal to this function, filling the denominator uncertainty band.  Leave blank for defaults.
        guide_opts : dict, optional
            A dictionary of options to pass to the matplotlib
            `ax.axhline <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.axes.Axes.axhline.html>`_ call
            internal to this function, to plot a horizontal guide line at ratio of 1.  Leave blank for defaults.
        unc : str, optional
            Uncertainty calculation option: 'clopper-pearson' interval for efficiencies; 'poisson-ratio' interval
            for ratio of poisson distributions; 'num' poisson interval of numerator scaled by denominator value
            (common for data/mc, for better or worse).
        label : str, optional
            Associate a label to this entry (note: y axis label set by ``num.label``)
        ext_denom_error: list of np.array[error_up,error_down], optional
            External MC errors not stored in the original histogram
    Returns
    -------
        ax : matplotlib.axes.Axes
            A matplotlib `Axes <https://matplotlib.org/3.1.1/api/axes_api.html>`_ object
    """
    if ax is None:
        fig, ax = plt.subplots(1, 1)
    else:
        if not isinstance(ax, plt.Axes):
            raise ValueError("ax must be a matplotlib Axes object")
        if clear:
            ax.clear()
    if not compatible(num, denom):
        raise ValueError(
            "numerator and denominator histograms have incompatible axis definitions"
        )
    if len(num.axes) > 1:
        raise ValueError("plotratio() can only support one-dimensional histograms")
    if error_opts is None and denom_fill_opts is None and guide_opts is None:
        error_opts = {}
        denom_fill_opts = {}

    axis = num.axes[0]

    ax.set_xlabel(axis.label)
    ax.set_ylabel(num.label)
    edges = axis.edges
    if flow == "show":
        edges = np.array(
            [
                edges[0] - (edges[1] - edges[0]) * 3,
                edges[0] - (edges[1] - edges[0]),
                *edges,
                edges[-1] + (edges[1] - edges[0]),
                edges[-1] + (edges[1] - edges[0]) * 3,
            ]
        )
    centers = (edges[1:] + edges[:-1]) / 2
    ranges = (edges[1:] - edges[:-1]) / 2 if xerr else None
    sumw_num, sumw2_num = num.values(), num.variances()
    sumw_denom, sumw2_denom = denom.values(), denom.variances()

    if flow == "show":
        print("Show under/overflow bin ")
        sumw_num, sumw2_num = (
            num.view(flow=True)["value"],
            num.view(flow=True)["variance"],
        )
        sumw_num, sumw2_num = np.insert(sumw_num, -1, 0), np.insert(sumw2_num, -1, 0)
        sumw_num, sumw2_num = np.insert(sumw_num, 1, 0), np.insert(sumw2_num, 1, 0)
        sumw_denom, sumw2_denom = (
            denom.view(flow=True)["value"],
            denom.view(flow=True)["variance"],
        )
        sumw_denom, sumw2_denom = np.insert(sumw_denom, -1, 0), np.insert(
            sumw2_denom, -1, 0
        )
        sumw_denom, sumw2_denom = np.insert(sumw_denom, 1, 0), np.insert(
            sumw2_denom, 1, 0
        )
    elif flow == "sum":
        print("Merge under/overflow bin to first/last bin")

        sumw_num[0], sumw2_num[0] = (
            sumw_num[0] + num.view(flow=True)["value"][0],
            sumw2_num[0] + num.view(flow=True)["value"][0],
        )
        sumw_num[-1], sumw2_num[-1] = (
            sumw_num[-1] + num.view(flow=True)["value"][-1],
            sumw2_num[-1] + num.view(flow=True)["value"][-1],
        )
        sumw_denom[0], sumw2_denom[0] = (
            sumw_denom[0] + denom.view(flow=True)["value"][0],
            sumw2_denom[0] + denom.view(flow=True)["value"][0],
        )
        sumw_denom[-1], sumw2_denom[-1] = (
            sumw_denom[-1] + denom.view(flow=True)["value"][-1],
            sumw2_denom[-1] + denom.view(flow=True)["value"][-1],
        )

    rsumw = sumw_num / sumw_denom

    if unc == "clopper-pearson":
        rsumw_err = np.abs(clopper_pearson_interval(sumw_num, sumw_denom) - rsumw)
    elif unc == "poisson-ratio":
        # poisson ratio n/m is equivalent to binomial n/(n+m)
        rsumw_err = np.abs(
            clopper_pearson_interval(sumw_num, sumw_num + sumw_denom) - rsumw
        )
    elif unc == "num":
        rsumw_err = np.abs(poisson_interval(rsumw, sumw2_num / sumw_denom**2) - rsumw)
    elif unc == "efficiency":
        rsumw_err = np.abs(
            normal_interval(sumw_num, sumw_denom, sumw2_num, sumw2_denom)
        )
    else:
        raise ValueError("Unrecognized uncertainty option: %r" % unc)

    ## if additional uncertainties
    if ext_denom_error is not None:
        if denom_fill_opts is {}:
            print("suggest to use different style for additional error")
        if np.shape(rsumw_err) != np.shape(ext_denom_error / sumw_denom):
            raise ValueError("Imcompatible error length")
        rsumw_err = np.sqrt(rsumw_err**2 + (ext_denom_error / sumw_denom) ** 2)

    if error_opts is not None:
        opts = {
            "label": label,
            "linestyle": "none",
            "lw": 1,
            "marker": "o",
            "color": "k",
        }
        opts.update(error_opts)
        emarker = opts.pop("emarker", "")
        errbar = ax.errorbar(x=centers, y=rsumw, xerr=ranges, yerr=rsumw_err, **opts)
        plt.setp(errbar[1], "marker", emarker)

    if denom_fill_opts is not None:
        unity = np.ones_like(sumw_denom)
        denom_unc = poisson_interval(unity, sumw2_denom / sumw_denom**2)
        opts = {
            "hatch": "////",
            "facecolor": "none",
            "lw": 0,
            "color": "k",
            "alpha": 0.4,
        }
        if ext_denom_error is not None:
            denom_unc[0] = (
                unity[0]
                - np.sqrt(
                    (denom_unc - unity) ** 2 + (ext_denom_error / sumw_denom) ** 2
                )[0]
            )
            denom_unc[1] = (
                unity[1]
                + np.sqrt(
                    (denom_unc - unity) ** 2 + (ext_denom_error / sumw_denom) ** 2
                )[1]
            )
            opts = denom_fill_opts
        ax.stairs(denom_unc[0], edges=edges, baseline=denom_unc[1], **opts)

    if guide_opts is not None:
        opts = {"linestyle": "--", "color": (0, 0, 0, 0.5), "linewidth": 1}
        opts.update(guide_opts)
        if clear is not False:
            ax.axhline(1.0, **opts)

    if clear:
        ax.autoscale(axis="x", tight=True)
        ax.set_ylim(0, None)

    return ax



## Calculate SF error

[docs]
def SFerror(collated, discr, flow=None):
    err_up = (
        collated["mc"][discr][{"syst": "SFup", "flav": sum}].values()
        - collated["mc"][discr][{"syst": "SF", "flav": sum}].values()
    )
    err_dn = (
        collated["mc"][discr][{"syst": "SFdn", "flav": sum}].values()
        - collated["mc"][discr][{"syst": "SF", "flav": sum}].values()
    )
    if flow is not None:
        err_up = (
            collated["mc"][discr][{"syst": "SFup", "flav": sum}].view(flow=True)[
                "value"
            ]
            - collated["mc"][discr][{"syst": "SF", "flav": sum}].view(flow=True)[
                "value"
            ]
        )
        err_dn = (
            collated["mc"][discr][{"syst": "SFdn", "flav": sum}].view(flow=True)[
                "value"
            ]
            - collated["mc"][discr][{"syst": "SF", "flav": sum}].view(flow=True)[
                "value"
            ]
        )

    if "C" in discr:  ## scale uncertainties for charm tagger by 2
        err_up = np.sqrt(
            np.add(
                np.power(err_up, 2),
                np.power(
                    2
                    * (
                        collated["mc"][discr][{"syst": "SFup", "flav": 4}].values()
                        - collated["mc"][discr][{"syst": "SF", "flav": 4}].values()
                    ),
                    2,
                ),
            )
        )
        err_dn = np.sqrt(
            np.add(
                np.power(err_dn, 2),
                np.power(
                    2
                    * (
                        collated["mc"][discr][{"syst": "SFdn", "flav": 4}].values()
                        - collated["mc"][discr][{"syst": "SF", "flav": 4}].values()
                    ),
                    2,
                ),
            )
        )
        if flow:
            err_up = np.sqrt(
                np.add(
                    np.power(err_up, 2),
                    np.power(
                        2
                        * (
                            collated["mc"][discr][{"syst": "SFup", "flav": 4}].view(
                                flow=True
                            )["value"]
                            - collated["mc"][discr][{"syst": "SF", "flav": 4}].view(
                                flow=True
                            )["value"]
                        ),
                        2,
                    ),
                )
            )
            err_dn = np.sqrt(
                np.add(
                    np.power(err_dn, 2),
                    np.power(
                        2
                        * (
                            collated["mc"][discr][{"syst": "SFdn", "flav": 4}].view(
                                flow=True
                            )["value"]
                            - collated["mc"][discr][{"syst": "SF", "flav": 4}].view(
                                flow=True
                            )["value"]
                        ),
                        2,
                    ),
                )
            )

    return np.array([err_dn, err_up])




[docs]
def autoranger(hist, flow=None):
    val, axis = hist.values(), hist.axes[-1].edges
    if flow == "show":
        val = hist.view(flow=True)["value"]
        axis = np.array(
            [
                axis[0] - (axis[1] - axis[0]) * 3,
                axis[0] - (axis[1] - axis[0]),
                *axis,
                axis[-1] + (axis[1] - axis[0]),
                axis[-1] + (axis[1] - axis[0]) * 3,
            ]
        )
    for i in range(len(val)):
        if val[i] != 0:
            mins = i
            break
    for i in reversed(range(len(val))):
        if val[i] != 0:
            maxs = i + 1
            break
    return axis[mins], axis[maxs]




[docs]
def MCerrorband(
    hmc,
    ax=None,
    flow=None,
    label="Stat. unc.",
    fill_opts=None,
    ext_error=None,
    clear=False,
):
    """Create a ratio plot, dividing two compatible histograms
    Parameters
    ----------
        hmc : Hist
            A single-axis histogram
        ax : matplotlib.axes.Axes, optional
            Axes object (if None, one is created)
        flow : str, optional {None, "show", "sum"}
            Whether plot the under/overflow bin. If "show", add additional under/overflow bin. If "sum", add the under/overflow bin content to first/last bin.
        fill_opts : dict, optional
            A dictionary of options to pass to the matplotlib
            `ax.fill_between <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.axes.Axes.fill_between.html>`_ call
            internal to this function, filling the denominator uncertainty band.  Leave blank for defaults.
        label : str, optional
            Associate a label to this entry (note: y axis label set by ``num.label``)
        ext_error: list of np.array[error_up,error_down], optional
            External MC errors not stored in the original histogram
        clear : bool, optional
            Whether to clear Axes before drawing (if passed); if False, this function will skip drawing the legend

    Returns
    -------
        ax : matplotlib.axes.Axes
            A matplotlib `Axes <https://matplotlib.org/3.1.1/api/axes_api.html>`_ object
    """
    if ax is None:
        fig, ax = plt.subplots(1, 1)
    else:
        if not isinstance(ax, plt.Axes):
            raise ValueError("ax must be a matplotlib Axes object")
        if clear:
            ax.clear()

    if ext_error is None:
        values = hmc.values() + np.sqrt(hmc.variances())
        baseline = hmc.values() - np.sqrt(hmc.variances())
        edges = hmc.axes[0].edges

        if flow == "show":
            edges = np.array(
                [
                    edges[0] - (edges[1] - edges[0]) * 2,
                    *edges,
                    edges[-1] + (edges[1] - edges[0]) * 2,
                ]
            )
            values = hmc.view(flow=True)["value"] + np.sqrt(
                hmc.view(flow=True)["variance"]
            )
            baseline = hmc.view(flow=True)["value"] - np.sqrt(
                hmc.view(flow=True)["variance"]
            )
        if flow == "sum":
            values[0], values[-1] = (
                hmc.view(flow=True)["value"] + np.sqrt(hmc.view(flow=True)["variance"])
            )[0] + values[0], (
                hmc.view(flow=True)["value"] + np.sqrt(hmc.view(flow=True)["variance"])
            )[
                -1
            ] + values[
                -1
            ]
            baseline[0], baseline[-1] = (
                hmc.view(flow=True)["value"] - np.sqrt(hmc.view(flow=True)["variance"])
            )[0] + baseline[0], (
                hmc.view(flow=True)["value"] - np.sqrt(hmc.view(flow=True)["variance"])
            )[
                -1
            ] + baseline[
                -1
            ]

        ax.stairs(
            values=values,
            baseline=baseline,
            edges=edges,
            label=label,
            **errband_opts,
        )
    else:
        values = hmc.values() + ext_error[1]
        baseline = hmc.values() - ext_error[0]
        edges = hmc.axes[-1].edges

        if flow == "show":
            edges = np.array(
                [
                    edges[0] - (edges[1] - edges[0]) * 2,
                    *edges,
                    edges[-1] + (edges[1] - edges[0]) * 2,
                ]
            )
            values = hmc.view(flow=True)["value"] + ext_error[1]
            baseline = hmc.view(flow=True)["value"] - ext_error[0]
        if flow == "sum":
            values[0], values[-1] = (hmc.view(flow=True)["value"] + ext_error[1])[
                0
            ] + values[0], (hmc.view(flow=True)["value"] + ext_error[0])[-1] + values[
                -1
            ]
            baseline[0], baseline[-1] = (hmc.view(flow=True)["value"] - ext_error[1])[
                0
            ] + baseline[0], (hmc.view(flow=True)["value"] - ext_error[0])[
                -1
            ] + baseline[
                -1
            ]

        ax.stairs(
            values=values,
            baseline=baseline,
            edges=edges,
            label=label,
            **fill_opts,
        )



## Very nice implementtion from Kenneth Long:
## https://gist.github.com/kdlong/d697ee691c696724fc656186c25f8814

[docs]
def rebin_hist(h, axis_name, edges):
    if type(edges) == int:
        return h[{axis_name: hist.rebin(edges)}]

    ax = h.axes[axis_name]
    ax_idx = [a.name for a in h.axes].index(axis_name)
    if not all([np.isclose(x, ax.edges).any() for x in edges]):
        raise ValueError(
            f"Cannot rebin histogram due to incompatible edges for axis '{ax.name}'\n"
            f"Edges of histogram are {ax.edges}, requested rebinning to {edges}"
        )

    # If you rebin to a subset of initial range, keep the overflow and underflow
    overflow = ax.traits.overflow or (
        edges[-1] < ax.edges[-1] and not np.isclose(edges[-1], ax.edges[-1])
    )
    underflow = ax.traits.underflow or (
        edges[0] > ax.edges[0] and not np.isclose(edges[0], ax.edges[0])
    )
    flow = overflow or underflow
    new_ax = hist.axis.Variable(
        edges, name=ax.name, label=ax.label, overflow=overflow, underflow=underflow
    )
    axes = list(h.axes)
    axes[ax_idx] = new_ax

    hnew = hist.Hist(*axes, name=h.name, storage=h._storage_type())

    # Offset from bin edge to avoid numeric issues
    offset = 0.5 * np.min(ax.edges[1:] - ax.edges[:-1])
    edges_eval = edges + offset
    edge_idx = ax.index(edges_eval)
    # Avoid going outside the range, reduceat will add the last index anyway
    if edge_idx[-1] == ax.size + ax.traits.overflow:
        edge_idx = edge_idx[:-1]

    if underflow:
        # Only if the original axis had an underflow should you offset
        if ax.traits.underflow:
            edge_idx += 1
        edge_idx = np.insert(edge_idx, 0, 0)

    # Take is used because reduceat sums i:len(array) for the last entry, in the case
    # where the final bin isn't the same between the initial and rebinned histogram, you
    # want to drop this value. Add tolerance of 1/2 min bin width to avoid numeric issues
    hnew.values(flow=flow)[...] = np.add.reduceat(
        h.values(flow=flow), edge_idx, axis=ax_idx
    ).take(indices=range(new_ax.size + underflow + overflow), axis=ax_idx)
    if hnew._storage_type() == hist.storage.Weight():
        hnew.variances(flow=flow)[...] = np.add.reduceat(
            h.variances(flow=flow), edge_idx, axis=ax_idx
        ).take(indices=range(new_ax.size + underflow + overflow), axis=ax_idx)
    return hnew