analysis.py

import BatchEffects
import BiomartFunctions
import CCLETools
import Cellosaurus
import ENSEMBLTools
import GeneSetScoring
import GeneOntology
import HGNCFunctions
import LMMelTools
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.patheffects as PathEffects
import numpy as np
import os
import pandas as pd
import scipy.cluster.hierarchy as SciPyClus
import scipy.stats
from scipy.stats import gaussian_kde
from lifelines import CoxPHFitter
from lifelines.estimation import KaplanMeierFitter
from lifelines.statistics import logrank_test as KMlogRankTest

#   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #
# Python script for the analysis and figures in the manuscript:
#       Cursons et al (2019), Cancer Immunology Research.
#       DOI: not-yet-known
#
# This script uses a collection of python libraries as well as a set of custom libraries generated by Joe Cursons. These
#   dependencies will be uploaded over the coming weeks, but for priority access to my private repository please feel
#   free to contact me directly:    cursons.j (at) wehi.edu.au
#                                   joe.cursons (at) gmail.com
#
# NB: file paths for data are hard coded below and users will be required to update these paths as appropriate for
#       their filesystem and data locations
#
# Single cell RNA-seq analyses were performed by Sepideh (Momeneh) Foroutan: momeneh.foroutan (at) unimelb.edu.au
#   Associated R scripts will be uploaded over the coming weeks (e.g. for the generation of UMAP plots in Fig. 3).
#
# For The Cancer Genome Atlas (TCGA) skin cutaneous melanoma (SKCM) data, users are directed towards the NIH/NCI genomic
#   data commons (GDC):   https://portal.gdc.cancer.gov/projects/TCGA-SKCM
# Data can be downloaded directly using the genomic data commons client (gdc-client), with a manifest generated from the
#   GDC website
# The authors thank TCGA for making these data freely available, and in particular we would like to acknowledge the
#   patients who generously donated samples for this project.
# For further details please refer to the original TCGA SKCM manuscript:
#       Genomic Classification of Cutaneous Melanoma. Cell. (2015). 161(7): pp. 1681-1696
#       https://www.cell.com/cell/fulltext/S0092-8674(15)00634-0
#       doi: 10.1016/j.cell.2015.05.044
#
# This script also uses data from a number of other studies:
#   The LM-MEL melanoma cell line panel
#   GSE60424
#   GSE24759
#
# This script has dependencies for a number of python libraries. Unfortunately due to space/reference limitations we
#   were unable to include the full set of citations within our manuscript, but we would like to acknowledge the
#   developers who work on these open source tools:
#       matplotlib: https://matplotlib.org/
#       scipy: https://www.scipy.org/
#       numpy: https://www.numpy.org/
#       lifelines: https://lifelines.readthedocs.io/en/latest/
#       pandas: https://pandas.pydata.org/
#
#   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #
# This script has been split into three classes with corresponding functions:
#   PreProc: a series of functions which performs pre-processing on different data sets to make sample accession easier
#               for corresponding subsets of samples etc
#       * split_tcga_met_vs_pri()
#       * tcga_skcm_rna_data()
#       * tcga_skcm_data()
#       * tcga_skcm_classifications()
#       * tcga_skcm_met_sites()
#       * lm_mel_data()
#       * gse60424_data()
#       * gse24759_data()
#       * gse24759_subsets()
#       * tcga_histology()
#       * density_scatters()
#       * refine_NK_signature()
#   Analyse:
#       * split_one_marker_three_partitions()
#       * split_two_markers_four_partitions()
#   Plot:
#       * fig_one_and_supp_table_one()
#       * fig_two()
#       * fig_four()
#       * fig_five()
#		* supp_fig_one()
#		* supp_fig_three()
#		* supp_fig_four()
#		* supp_fig_five()
#		* supp_fig_six()
#		* supp_fig_seven()
#		* supp_fig_eight()
#		* supp_fig_nine()
#		* NB: figure 3 (UMAP plots of single cell RNA-seq data) were generated using associated R scripts
#		* NB: Supplementary Figure S2 (workflow figure) was created using a graphical editor
#
#   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #
#                                           DATA PRE-PROCESSING FUNCTIONS
class PreProc:

    strDataBaseLoc=os.getcwd() 
    strExtraAnalysisPath = os.getcwd() 

    listClinDataOfInt = ['gender',
                         'age_at_diagnosis',
                         'vital_status',
                         'days_to_death',
                         'days_to_last_follow_up',
                         'site_of_resection_or_biopsy']

	# As part of the supplementary material we investigated survival effects associated with metastatic tumour site; to facilitate a statistical analysis a number of different regions were grouped based on similar locations according to this dictionary:
    dictMetSiteGroupings = {
        'Brain, NOS': 'CNS',
        'Frontal lobe':'CNS',
        'Spinal cord':'CNS',
        'Parietal lobe':'CNS',
        'Connective, subcutaneous and other soft tissues':'Connective',
        'Connective, subcutaneous and other soft tissues, NOS':'Connective',
        'Connective, subcutaneous and other soft tissues of lower limb and hip':'Connective',
        'Connective, subcutaneous and other soft tissues of upper limb and shoulder':'Connective',
        'Connective, subcutaneous and other soft tissues of thorax': 'Connective',
        'Connective, subcutaneous and other soft tissues of pelvis': 'Connective',
        'Connective, subcutaneous and other soft tissues of abdomen': 'Connective',
        'Connective, subcutaneous and other soft tissues of trunk, NOS': 'Connective',
        'Connective, subcutaneous and other soft tissues of head, face, and neck': 'Connective',
        'Peritoneum, NOS': 'Internal, other',
        'Thorax, NOS': 'Internal, other',
        'Abdomen, NOS': 'Internal, other',
        'Pelvis, NOS': 'Internal, other',
        'Liver': 'Internal organ',
        'Spleen': 'Internal organ',
        'Adrenal gland, NOS': 'Internal organ',
        'Parotid gland': 'Internal organ',
        'Pelvic lymph nodes': 'Lymph nodes of inguinal region or leg',
        'Intra-abdominal lymph nodes': 'Lymph node, NOS',
        'Small intestine, NOS': 'Epithelial tissue',
        'Vagina, NOS': 'Epithelial tissue',
        'Colon, NOS': 'Epithelial tissue',
        'Vulva, NOS': 'Epithelial tissue',
        'Skin of trunk': 'Skin',
        'Skin of lower limb and hip':'Skin',
        'Skin of upper limb and shoulder':'Skin',
        'Skin of scalp and neck':'Skin',
        'Skin, NOS':'Skin',
        'Upper lobe, lung':'Lung',
        'Lower lobe, lung':'Lung',
        'Lung, NOS':'Lung'}


    def split_tcga_met_vs_pri(flagResult=False):
        """Process the GDC sample sheet & clinical metadata file to label samples and split primary/metastatic tumours"""
		# This function processes the NIH/NCI genomic data commons (GDC) sample sheet for the TCGA SKCM cohort, together
        #   with the clinical metadata file to identify samples split by the presence of primary/metastatic tumour
        #   samples and the availability of data on patient age.
        #
        # As outlined within the corresponding manuscript there are unexpected survival differences between patients
        #   with primary and metastatic tumours, and thus this analysis has focussed on the larger set of metastatic
        #   samples. Furthermore to examine age-associated survival effects we have dropped samples where patient age
        #   and/or survival data are unavailable.
        #
        # The TCGA SKCM sample sheet and clinical data can be downloaded directly from the genomic data commons website
        #   (i.e. the gdc-client is not required):
        #       https://portal.gdc.cancer.gov/projects/TCGA-SKCM
        #
        #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #

        # Load the required TCGA SKCM meta-data
        dfSampMap = pd.read_table(os.path.join(PreProc.strDataBaseLoc, 'gdc_sample_sheet.2018-03-26.tsv'), sep='\t',
                                  header=0, index_col=None)
        dfClinData = pd.read_table(
            os.path.join(PreProc.strDataBaseLoc, 'clinical.tsv'),
                sep='\t', header=0, index_col=None)
        dfClinData.set_index('submitter_id', inplace=True)

        # identify samples with valid survival data; note that 'survival time' are split into two columns depending on
        #   whether the patient was deceased (days_to_death) or alive (days_to_last_follow_up) at data release
        arrayDeathDataAreClean = np.bitwise_and((dfClinData['vital_status'] == 'dead').values.astype(np.bool),
                                                ~(dfClinData['days_to_death'] == '--').values.astype(np.bool))
        arrayAliveDataAreClean = np.bitwise_and((dfClinData['vital_status'] == 'alive').values.astype(np.bool),
                                                ~(dfClinData['days_to_last_follow_up'] == '--').values.astype(np.bool))
        arraySurvDataAreClean = np.bitwise_or(arrayDeathDataAreClean,arrayAliveDataAreClean)

        # extract the index labels corresponding to these rows
        listSurvDataAreClean = [dfClinData.index.tolist()[i] for i in np.where(arraySurvDataAreClean)[0]]

        # Identify target sample subsets with metastatic/primary tumours
        listPatsPriTum = dfSampMap['Case ID'][dfSampMap['Sample Type'] == 'Primary Tumor'].values.tolist()
        listPatsMetTum = dfSampMap['Case ID'][dfSampMap['Sample Type'] == 'Metastatic'].values.tolist()

        # identify the corresponding patient subsets
        listPatsMetTumOnly = [strPatient
                              for strPatient in listPatsMetTum
                              if np.bitwise_and(strPatient not in listPatsPriTum,
                                                strPatient in listSurvDataAreClean)]
        listPatsPriTumOnly = [strPatient
                              for strPatient in listPatsPriTum
                              if np.bitwise_and(strPatient not in listPatsMetTum,
                                                strPatient in listSurvDataAreClean)]
        listPatsWithMetTumAndAge = [strPatient
                                    for strPatient in listPatsMetTumOnly
                                    if not dfClinData['age_at_diagnosis'].loc[strPatient] == '--']

        # output as a dictionary
        return {'MetOnlyPat':listPatsMetTumOnly,
                'PriOnlyPat':listPatsPriTumOnly,
                'MetOnlyPatWithAge':listPatsWithMetTumAndAge}

    def tcga_skcm_rna_data(flagResult=False,
                           strMessRNADataFolder='mRNAseq_preproc',
                           strMessRNADataFilename='SKCM.uncv2.mRNAseq_RSEM_all.txt'):
		"""Process the TCGA SKCM RSEM normalised RNA-seq data into a dataframe"""
		
        pathMessRNAData = os.path.join(PreProc.strDataBaseLoc, strMessRNADataFolder)
        dfRSEMData = pd.read_table(
            os.path.join(pathMessRNAData, strMessRNADataFilename),
            sep='\t', header=0, index_col=None)
        listGeneAndEntrez = dfRSEMData['HYBRIDIZATION R'].values.tolist()

        # For some reason there exists 2 entries of the gene SLC35E2 which has subsequently been split into SLC35E2A and
        #  SLC35E2B - the Entrez ID is corrected for this accordingly
        numRowMisLabelled = listGeneAndEntrez.index('SLC35E2|728661')
        listGeneAndEntrez[numRowMisLabelled] = 'SLC35E2B|728661'
        dfRSEMData['HYBRIDIZATION R'] = pd.Series(listGeneAndEntrez, index=dfRSEMData.index.tolist())

        # Drop all entries/rows where the HGNC identifier as unknown as I match on HGNC symbol
        arrayRowsWithCleanGenes = np.where([not strGene[0:2] == '?|' for strGene in listGeneAndEntrez])[0]
        dfOut = dfRSEMData.iloc[arrayRowsWithCleanGenes,:].copy(deep=True)

        # Split the identifier column to extract the HGNC symbols and set this as the index
        listHGNC = [strRow.split('|')[0] for strRow in dfOut['HYBRIDIZATION R'].values.tolist()]
        dfOut['HGNC'] = pd.Series(listHGNC, index=dfOut.index.tolist())
        dfOut.set_index('HGNC', inplace=True)
        dfOut.drop(columns=['HYBRIDIZATION R'],
                   inplace=True)

        return dfOut

    def tcga_skcm_data(flagResult=False,
                       flagPerformExtraction=False,
                       strMergedFileName='merged_tcga_skcm.pickle',
                       listClinDataToMerge=listClinDataOfInt):
		"""Combine the various TCGA SKCM data sets and perform gene set scoring"""
		
		# specify gene lists used for analysis
        listBottcherNKGenes = ['NCR3', 'KLRB1', 'PRF1', 'CD160', 'NCR1']
        listBottchercDC1Genes = ['XCR1', 'CLEC9A', 'CLNK', 'BATF3']

        if not os.path.exists(strMergedFileName):
            flagPerformExtraction = True

        if flagPerformExtraction:

			# Load a processed dictionary with the original TCGA 'Immune' gene set and classifications for immune high/low
            dictTCGAClassifications = PreProc.tcga_skcm_classifications()
			
            #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #
            # Load molecular (RNA-seq) data
			dfRSEMData = PreProc.tcga_skcm_rna_data()

			# Process the sample names as a list
            listColumns = dfRSEMData.columns.tolist()
            listSamples = [strCol for strCol in listColumns if strCol[0:4]=='TCGA']
            numTotSamples = len(listSamples)
			
			# Pull out the genes within these data and create a set for later comparisons
            listTCGAGenes = dfRSEMData.index.tolist()
            setTCGAGenes = set(listTCGAGenes)


            #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #
            # Extract and curate clinical data
            dfClinData = pd.read_table(
                os.path.join(PreProc.strDataBaseLoc, 'clinical.tsv'),
                sep='\t', header=0, index_col=None)
            dfClinData.set_index('submitter_id', inplace=True)
            listSampleCaseID = [strSample.rpartition('-')[0] for strSample in listSamples]

            dfClinDataOut = dfClinData[listClinDataToMerge].loc[listSampleCaseID]
            dfClinDataOut['SampleID'] = listSamples
            dfClinDataOut.set_index('SampleID', inplace=True)

			# identify samples which are dead (i.e. where a death event as happened)
            arrayPatientIsDead = np.array([strStatus == 'dead'
                                           for strStatus in dfClinDataOut['vital_status'].tolist()],
                                          dtype=np.bool)

			# find entries for dead patients where days_to_death is specified, and entries for living patients where 
			#  days_to_last_follow_up is specified
            arrayDeathDataAreClean = np.bitwise_and((dfClinDataOut['vital_status'] == 'dead').values.astype(np.bool),
                                                    ~(dfClinDataOut['days_to_death'] == '--').values.astype(np.bool))
            arrayAliveDataAreClean = np.bitwise_and((dfClinDataOut['vital_status'] == 'alive').values.astype(np.bool),
                                                    ~(dfClinDataOut['days_to_last_follow_up'] == '--').values.astype(np.bool))


			# create a single vector of survival times which merges days to death/days to last follow up as required
            arraySurvivalTimes = np.nan*np.ones(len(arrayPatientIsDead), dtype=np.float)
            for iRow in range(len(arrayPatientIsDead)):
                if arrayDeathDataAreClean[iRow]:
                    arraySurvivalTimes[iRow] = np.float(dfClinDataOut['days_to_death'].iloc[iRow])
                elif arrayAliveDataAreClean[iRow]:
                    arraySurvivalTimes[iRow] = np.float(dfClinDataOut['days_to_last_follow_up'].iloc[iRow])

			# output within the clinical data dataframe with survival time converted to months
            dfClinDataOut['death_event'] = arrayPatientIsDead
            dfClinDataOut.drop(labels=['vital_status', 'days_to_death', 'days_to_last_follow_up'],
                               axis=1,
                               inplace=True)
            dfClinDataOut['surv_time'] = pd.Series(arraySurvivalTimes/30.5, index=dfClinDataOut.index.tolist())
            dfClinDataOut.rename(columns={'age_at_diagnosis':'Age'},
                                 inplace=True)

            #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #
            # Process molecular (RNA-seq) data and perform gene set scoring
            arrayRSEMData = dfRSEMData[listSamples].values.astype(np.float)

            arrayFlatCleanData = np.ravel(np.nan_to_num(arrayRSEMData))
            numMinNonZeroVal = np.min(arrayFlatCleanData[arrayFlatCleanData > 0])

            dfRSEMOut = pd.DataFrame(data=np.log10(arrayRSEMData + numMinNonZeroVal),
                                     index=listTCGAGenes,
                                     columns=listSamples)
            dfRSEMOut = dfRSEMOut.transpose()

			# Extract the T cell signature and identify genes overlapping with the TCGA data
            dictTCellSig = GeneSetScoring.CuratedList.t_cells_in_tumour()
            listTCellUpGenesInTCGA = list(set(dictTCellSig['T cells']['UpGenes']).intersection(setTCGAGenes))

			# Extract the NK cell signature and identify genes overlapping with the TCGA data
            dfNKSigCuration = pd.read_table(os.path.join(Plot.strOutputFolder, 'NK_genes_curated.tsv'),
                                            sep='\t', header=0, index_col=0)
			# Specifically select the genes which have passed our filtering criteria (Fig. S2 of the manuscript)
            listLocalNKSigGenes = dfNKSigCuration[dfNKSigCuration['CursonsGuimaraes_sigGene']==True].index.tolist()
            listNKUpGenesInTCGA = list(set(listLocalNKSigGenes).intersection(setTCGAGenes))

			# Extract the Foroutan TGF-B EMT signature and identify genes overlapping with the TCGA data
            dictTGFBEMTScore = GeneSetScoring.ExtractList.foroutan2016_tgfb_mes_score()
            listTGFBEMTUp = list(set(dictTGFBEMTScore['Up']).intersection(setTCGAGenes))
            listTGFBEMTDn = list(set(dictTGFBEMTScore['Down']).intersection(setTCGAGenes))

			# Extract the Tan Epithelial & Mesenchymal signatures and identify genes overlapping with the TCGA data
            dictTanEMTSigs = GeneSetScoring.ExtractList.tan2012_tumour_genes()
            listEpiGenes = list(set(dictTanEMTSigs['epi_genes']).intersection(setTCGAGenes))
            listMesGenes = list(set(dictTanEMTSigs['mes_genes']).intersection(setTCGAGenes))

			
			# For Fig. S9 we compare signatures across the TCGA and LM-MEL data sets; to facilitate this we must first
			#  find the set of consistent genes
			#  --> extract the LM-MEL data and create a set of genes
            dfLMMEL = PreProc.lm_mel_data()
            setLMMELGenes = set(dfLMMEL.columns.tolist())

            listTCGAImmuneGenes = dictTCGAClassifications['listGenesToScore']

			# identify the intersection of the LM-MEL gene list with all other genes
            listTCGAGenesVsLMMEL = list(setTCGAGenes.intersection(setLMMELGenes))
            listEpiGenesVsLMMEL = list(set(listEpiGenes).intersection(setLMMELGenes))
            listMesGenesVsLMMEL = list(set(listMesGenes).intersection(setLMMELGenes))
            listTGFBEMTUpVsLMMEL = list(set(listTGFBEMTUp).intersection(setLMMELGenes))
            listTGFBEMTDnVsLMMEL = list(set(listTGFBEMTDn).intersection(setLMMELGenes))

			# Create a set of arrays for the output data;
			# NB: to avoid a number of pandas allocation warnings this is done as arrays which are later loaded into the
			#  output dataframe
            arrayBottcherNKScore = np.zeros(numTotSamples,
                                            dtype=np.float)
            arrayBottcherDCScore = np.zeros(numTotSamples,
                                            dtype=np.float)
            arrayTCellScore = np.zeros(numTotSamples,
                                       dtype=np.float)
            arrayImmuneScore = np.zeros(numTotSamples,
                                    dtype=np.float)
            arrayNKScore = np.zeros(numTotSamples,
                                    dtype=np.float)
            arrayEpiScore = np.zeros(numTotSamples,
                                    dtype=np.float)
            arrayMesScore = np.zeros(numTotSamples,
                                    dtype=np.float)
            arrayTGFBEMTScore = np.zeros(numTotSamples,
                                    dtype=np.float)
            arrayEpiScoreVsLMMEL = np.zeros(numTotSamples,
                                    dtype=np.float)
            arrayMesScoreVsLMMEL = np.zeros(numTotSamples,
                                    dtype=np.float)
            arrayTGFBEMTScoreVsLMMEL = np.zeros(numTotSamples,
                                    dtype=np.float)

			# step through each sample and perform scoring for the appropriate gene sets
            print('Scoring TCGA samples for various gene sets/signatures')
            for iSample in range(numTotSamples):
                print('Sample ' + '{}'.format(iSample+1) + ' of ' + '{}'.format(numTotSamples))
                arrayNKScore[iSample] = GeneSetScoring.FromInput.single_sample_rank_score(
                    listAllGenes=listTCGAGenes,
                    arrayTranscriptAbundance=dfRSEMOut.iloc[iSample, :].values.astype(np.float),
                    listUpGenesToScore=listNKUpGenesInTCGA,
                    flagApplyNorm=True)
                arrayBottcherNKScore[iSample] = GeneSetScoring.FromInput.single_sample_rank_score(
                    listAllGenes=listTCGAGenes,
                    arrayTranscriptAbundance=dfRSEMOut.iloc[iSample, :].values.astype(np.float),
                    listUpGenesToScore=listBottcherNKGenes,
                    flagApplyNorm=True)
                arrayBottcherDCScore[iSample] = GeneSetScoring.FromInput.single_sample_rank_score(
                    listAllGenes=listTCGAGenes,
                    arrayTranscriptAbundance=dfRSEMOut.iloc[iSample, :].values.astype(np.float),
                    listUpGenesToScore=listBottchercDC1Genes,
                    flagApplyNorm=True)
                arrayTCellScore[iSample] = GeneSetScoring.FromInput.single_sample_rank_score(
                    listAllGenes=listTCGAGenes,
                    arrayTranscriptAbundance=dfRSEMOut.iloc[iSample, :].values.astype(np.float),
                    listUpGenesToScore=listTCellUpGenesInTCGA,
                    flagApplyNorm=True)
                arrayImmuneScore[iSample] = GeneSetScoring.FromInput.single_sample_rank_score(
                    listAllGenes=listTCGAGenes,
                    arrayTranscriptAbundance=dfRSEMOut.iloc[iSample, :].values.astype(np.float),
                    listUpGenesToScore=listTCGAImmuneGenes,
                    flagApplyNorm=True)
                arrayEpiScore[iSample] = GeneSetScoring.FromInput.single_sample_rank_score(
                    listAllGenes=listTCGAGenes,
                    arrayTranscriptAbundance=dfRSEMOut.iloc[iSample, :].values.astype(np.float),
                    listUpGenesToScore=listEpiGenes,
                    flagApplyNorm=True)
                arrayMesScore[iSample] = GeneSetScoring.FromInput.single_sample_rank_score(
                    listAllGenes=listTCGAGenes,
                    arrayTranscriptAbundance=dfRSEMOut.iloc[iSample, :].values.astype(np.float),
                    listUpGenesToScore=listMesGenes,
                    flagApplyNorm=True)
                arrayTGFBEMTScore[iSample] = GeneSetScoring.FromInput.single_sample_rank_score(
                    listAllGenes=listTCGAGenes,
                    arrayTranscriptAbundance=dfRSEMOut.iloc[iSample, :].values.astype(np.float),
                    listUpGenesToScore=listTGFBEMTUp,
                    listDownGenesToScore=listTGFBEMTDn,
                    flagApplyNorm=True)
                arrayEpiScoreVsLMMEL[iSample] = GeneSetScoring.FromInput.single_sample_rank_score(
                    listAllGenes=listTCGAGenesVsLMMEL,
                    arrayTranscriptAbundance=dfRSEMOut[listTCGAGenesVsLMMEL].iloc[iSample].values.astype(np.float),
                    listUpGenesToScore=listEpiGenesVsLMMEL,
                    flagApplyNorm=True)
                arrayMesScoreVsLMMEL[iSample] = GeneSetScoring.FromInput.single_sample_rank_score(
                    listAllGenes=listTCGAGenesVsLMMEL,
                    arrayTranscriptAbundance=dfRSEMOut[listTCGAGenesVsLMMEL].iloc[iSample].values.astype(np.float),
                    listUpGenesToScore=listMesGenesVsLMMEL,
                    flagApplyNorm=True)
                arrayTGFBEMTScoreVsLMMEL[iSample] = GeneSetScoring.FromInput.single_sample_rank_score(
                    listAllGenes=listTCGAGenesVsLMMEL,
                    arrayTranscriptAbundance=dfRSEMOut[listTCGAGenesVsLMMEL].iloc[iSample].values.astype(np.float),
                    listUpGenesToScore=listTGFBEMTUpVsLMMEL,
                    listDownGenesToScore=listTGFBEMTDnVsLMMEL,
                    flagApplyNorm=True)

			# Load the numpy arrays into the appropriate columns within the output dataframe
            dfRSEMOut['NK Score'] = arrayNKScore
            dfRSEMOut['NK-Cytokine Score'] = arrayNKScore
            dfRSEMOut['NK-Cytotoxic Score'] = arrayNKScore

            dfRSEMOut['Bottcher NK Score'] = arrayBottcherNKScore
            dfRSEMOut['Bottcher cDC1 Score'] = arrayBottcherDCScore
            dfRSEMOut['T cell Score'] = arrayTCellScore
            dfRSEMOut['TCGA Immune Score'] = arrayImmuneScore
            dfRSEMOut['TGF-B EMT Score'] = arrayTGFBEMTScore
            dfRSEMOut['Epithelial Score'] = arrayEpiScore
            dfRSEMOut['Mesenchymal Score'] = arrayMesScore
            dfRSEMOut['TGF-B EMT Score (vs. LM-MEL)'] = arrayTGFBEMTScoreVsLMMEL
            dfRSEMOut['Epithelial Score (vs. LM-MEL)'] = arrayEpiScoreVsLMMEL
            dfRSEMOut['Mesenchymal Score (vs. LM-MEL)'] = arrayMesScoreVsLMMEL

			# join the RNA & gene set score data to the clinical data 
            dfMerged = dfRSEMOut.join(dfClinDataOut)

			# save the resulting dataframe for re-loading
            dfMerged.to_pickle(strMergedFileName)

        else:
			
			# load the pre-processed dataframe
            dfMerged = pd.read_pickle(strMergedFileName)

			# extract required information
            dfRSEMData = PreProc.tcga_skcm_rna_data()
            listTCGAGenes = dfRSEMData.index.tolist()


        listClinDataOut = ['gender',
                           'age_at_diagnosis',
                           'death_event',
                           'surv_time']
						   
		# return a dictionary with the required information
        return {'df':dfMerged,
                'listGenes':listTCGAGenes,
                'listClin':listClinDataOut}

    def tcga_skcm_classifications(flagResult=False,
                                  strGeneListFolder=os.getcwd()):
		"""Load supplementary data tables from the original TCGA SKCM manuscript including the immune gene signature and
		classifications of immune high/immune low samples"""
								  
        strTCGAGeneListFileName = 'TCGA_SKCM_SuppTable4.xlsx'
        strTCGASampleClassificationFileName = 'TCGA_SKCM_SuppTable1.xlsx'

        # load the TCGA data as a merged pandas dataframe
        dictTCGASKCM = PreProc.tcga_skcm_data()
        listTCGAGenes = dictTCGASKCM['listGenes']

		# load a dictionary which maps between HGNC synonyms using the downloadable HGNC database
        dictHGNCSynonyms = HGNCFunctions.Mapping.create_synonym_dict()
        # unfortunately I need to hard code a couple of synonyms which aren't being picked up from the database export
        dictHGNCSynonyms['TNFRSF14B'] = 'TNFRSF13B'
        # there are several synonyms for CT12.1: XAGE1A-XAGE1E; XAGE1D is the only one present within the TCGA SKCM data
        dictHGNCSynonyms['CT12.1'] = 'XAGE1D'
        # it seems that FDCSP is the accepted HGNC symbol now, but this is absent in the TCGA data, whereas a previous
        #  synonym C4orf7 is present
        dictHGNCSynonyms['FDCSP'] = 'C4orf7'
        # similar CT55 is the HGNC symbol but CXorf48 is present
        dictHGNCSynonyms['CT55'] = 'CXorf48'

        #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #
        # SPLIT SAMPLES ON IMMUNE SCORE AND EXAMINE SURVIVAL
        # there are a number of published immune signatures associated with survival in melanoma patients, here we use
        #  the genes which classify the 'immune' genes within the original TCGA SKCM paper:
        #     The Cancer Genome Atlas Network (2015). Genomic Classification of Cutaneous Melanoma. Cell, 161: 1681-1696
        #         doi: 10.1016/j.cell.2015.05.044

        dfTCGASampleClasses = pd.read_excel(os.path.join(strGeneListFolder, strTCGASampleClassificationFileName),
                                             sheet_name='Supplemental Table S1D', header=1,
                                             index_col=None)

        listTCGAClassifiedImmune = dfTCGASampleClasses['Name'][dfTCGASampleClasses['RNASEQ-CLUSTER_CONSENHIER']=='immune'].tolist()

        listOrigTCGASamples = dfTCGASampleClasses['Name'].tolist()

        dfTCGAGeneLists = pd.read_excel(os.path.join(strGeneListFolder, strTCGAGeneListFileName),
                                        sheet_name='Supplemental Table S4A', header=1,
                                        index_col=None)
        listClusterLabels = dfTCGAGeneLists['RNA Cluster'].tolist()

        # the original Excel table used merged rows which pandas doesn't like; replace NaNs with an appropriate 'fill down'
        # --> begin by finding all of the rows where there are NaNs
        arrayNaNClusterLabel = dfTCGAGeneLists['RNA Cluster'].isnull()
        # and extracting the indices which are not NaN
        arrayNotNaNClusterLabelIndices = np.where(dfTCGAGeneLists['RNA Cluster'].notnull())[0]
        # step through each row with a NaN cluster label
        for iRow in np.where(arrayNaNClusterLabel)[0]:
            # find the lowest non-NaN row (maximum index) which is above this
            numLabelIndex = arrayNotNaNClusterLabelIndices[np.max(np.where(iRow > arrayNotNaNClusterLabelIndices)[0])]
            # extract the string corresponding to this row
            strLabel = listClusterLabels[numLabelIndex]
            # and fill as necessary
            dfTCGAGeneLists['RNA Cluster'].iloc[iRow] = strLabel

        # identify all rows corresponding to the 'Immune' RNA cluster
        arrayRowsOfInt = np.where(dfTCGAGeneLists['RNA Cluster'] == 'Immune')[0]
        listImmuneGenesClean = []
        for iRow in arrayRowsOfInt:
            strGenes = dfTCGAGeneLists['Individual Genes'].iloc[iRow]
            # check that the entry is not NaN (one of the rows contains NaN..?!?)
            if strGenes == strGenes:
                # there are a few random semi-colons in place of commas as separators..
                strGenes = strGenes.replace(';', ',')
                # split on the commas
                arrayGenes = strGenes.split(', ')
                for strGene in arrayGenes:
                    if not '/' in strGene:
                        if len(strGene) > 0:
                            # ensure that the gene name is fully upper case; there is a 'CD1c' instead of 'CD1C'
                            listImmuneGenesClean.append(strGene.upper())
                    else:
                        # '79A/B' is listed in the B-cells row; I think it's a fair assumption that this is meant to be
                        #  'CD79A/B', i.e. components of the B-cell receptor
                        if strGene == '79A/B':
                            strGene = 'CD79A/B'
                        # a slightly different text pattern is used for showing synonyms of TBX21..
                        if strGene == 'TBX21/TBET':
                            strGene = 'TBX21(TBET)'
                        arraySplitGenes = strGene.split('/')
                        strFirstGene = arraySplitGenes[0]
                        if strFirstGene[0:2] == 'CD':
                            # I'm going to work on the assumption that all prefixes end at the last number
                            arrayIsNumeric = np.zeros(len(strFirstGene),
                                                      dtype=np.bool)
                            for iLetter in range(len(strFirstGene)):
                                arrayIsNumeric[iLetter] = strFirstGene[iLetter].isnumeric()
                            numLastNumberPos = np.max(np.where(arrayIsNumeric)[0])
                            strPrefix = strFirstGene[0:(numLastNumberPos+1)]
                        elif np.any([strFirstGene[0:2] == 'CT',
                                     strFirstGene[0:3] == 'CCL',
                                     strFirstGene[0:3] == 'XCL',
                                     strFirstGene[0:3] == 'CCR',
                                     strFirstGene[0:4] == 'CXCL',
                                     strFirstGene[0:4] == 'CXCR']):
                            # I'm going to work on the assumption that all prefixes end one prior to the first number
                            arrayIsNumeric = np.zeros(len(strFirstGene),
                                                      dtype=np.bool)
                            for iLetter in range(len(strFirstGene)):
                                arrayIsNumeric[iLetter] = strFirstGene[iLetter].isnumeric()
                            numLastNumberPos = np.min(np.where(arrayIsNumeric)[0])
                            strPrefix = strFirstGene[0:(numLastNumberPos)]
                        elif strFirstGene[0:3] == 'GZM':
                            # I'm just going to hard code this one..
                            strPrefix = 'GZM'
                        elif strFirstGene[0:4] == 'MAGE':
                            # I'm just going to hard code this one..
                            strPrefix = 'MAGE'

                        listImmuneGenesClean.append(strFirstGene)

                        for iSplit in range(1, len(arraySplitGenes)):
                            strCleanOut = strPrefix + arraySplitGenes[iSplit]
                            listImmuneGenesClean.append(strCleanOut)

        arrayHasSynonymIndices = np.where(['(' in strGene for strGene in listImmuneGenesClean])[0]
        for iRow in arrayHasSynonymIndices:
            strGene = listImmuneGenesClean[iRow]
            strGeneClean = strGene.partition('(')[0]
            listImmuneGenesClean[iRow] = strGeneClean

        # perform gene set scoring over this set of immune genes
        #  NB: in 'Genomic Classification of Cutaneous Melanoma' from which this gene list is derived, the authors state
        #       "A significant number of genes overexpressed in this subclass were associated with immune cell subsets
        #       (T cells, B cells and NK cells), immune signaling molecules, co-stimulatory and co-inhibitory immune
        #       checkpoint proteins, cytokines, chemokine, and corresponding receptors (Table S4A-S4B)."
        #      Accordingly, this gene set is simply being used as an 'expected up-regulated' gene list and I have not
        #       attempted to further refine this gene list
        listImmuneGenesMatched = []
        for strGene in listImmuneGenesClean:
            if strGene in listTCGAGenes:
                listImmuneGenesMatched.append(strGene)
            else:
                print('warning: ' + strGene + ' cannot be found in the TCGA SKCM mRNA abundance data')
                if strGene in dictHGNCSynonyms.keys():
                    strAlias = dictHGNCSynonyms[strGene]
                    print('\t\t\tmapping via ' + strAlias)
                    listImmuneGenesMatched.append(strAlias)
                elif strGene=='DC247':
                    print('\t\t\tmanually mapping to CD247, assuming this is typographical error;')
                    print('\t\t\tCD247 is also known as TCRzeta')
                    listImmuneGenesMatched.append('CD247')
                else:
                    print('\t\t\tcannot map to alias, please check for typographical errors')
                    print('\tWARNING: ' + strGene + ' is being discarded from the analysis')

        # convert to a unique list by creating a set (I know MS4A1 is present twice as 'CD20' was listed in the gene list
        #  together with MS4A1)
        listGenesToScore = list(set(listImmuneGenesMatched))

        return {'listGenesToScore': listGenesToScore,
                'listImmuneGenesClean': listImmuneGenesClean,
                'listTCGAClassifiedImmune': listTCGAClassifiedImmune,
                'listOrigTCGASamples': listOrigTCGASamples}

    def tcga_skcm_met_sites(flagResult=False,
                            flagProduceOutputTable=False):
		"""Load detailed metastatic tumour sites from the TCGA SKCM data"""

        # load the TCGA data as a merged pandas dataframe
        dictTCGASKCM = PreProc.tcga_skcm_data(flagPerformExtraction=False)
        dfTCGA = dictTCGASKCM['df']
        listTCGAGenes = dictTCGASKCM['listGenes']
        numTranscripts = len(listTCGAGenes)

        dictPatGroups = PreProc.split_tcga_met_vs_pri()

        listMetOnlyPatientsWithAge = dictPatGroups['MetOnlyPatWithAge']
        # append the '-06' suffix indicative of metastatic samples
        listMetOnlySamplesWithAge = [strPat + '-06' for strPat in listMetOnlyPatientsWithAge]

        dfTCGAMets = dfTCGA.loc[listMetOnlySamplesWithAge]

        listMetSites = dfTCGAMets['site_of_resection_or_biopsy'].tolist()
        for iPatient in range(len(listMetSites)):
            strMetSite = listMetSites[iPatient]
            if strMetSite in PreProc.dictMetSiteGroupings.keys():
                listMetSites[iPatient] = PreProc.dictMetSiteGroupings[strMetSite]

        # convert to a set to get a unique list
        listUniqueMetSites = list(set(listMetSites))


        # Output the site groupings in a format that can easily be converted to a table for the final manuscript
        listUniqueInputMetSites = dfTCGAMets['site_of_resection_or_biopsy'].unique().tolist()
        listGroupedMetSiteForQuant = []
        listOrigMetSiteForQuant = []
        listNumAtOrigMetSite = []
        for strMetSite in listUniqueInputMetSites:
            numSamplesInOrigMetSite = np.sum([strMetSite == strObsSite
                                              for strObsSite in dfTCGAMets['site_of_resection_or_biopsy'].tolist()])
            listNumAtOrigMetSite.append(numSamplesInOrigMetSite)
            listOrigMetSiteForQuant.append(strMetSite)
            if strMetSite in PreProc.dictMetSiteGroupings.keys():
                listGroupedMetSiteForQuant.append(PreProc.dictMetSiteGroupings[strMetSite])
            else:
                listGroupedMetSiteForQuant.append(strMetSite)

        if flagProduceOutputTable:
            with open(os.path.join(Plot.strOutputFolder, 'MetSiteGroupings.tsv'), 'w+') as handFile:
                for strGroupedSite in listUniqueMetSites:
                    arrayRowIsForGroupedSite = np.array([strGroupedSite == strTestSite
                                                         for strTestSite in listGroupedMetSiteForQuant],
                                                        dtype=np.bool)
                    arrayRowsForGroupedSite = np.where(arrayRowIsForGroupedSite)[0]
                    strForSubsets = ''
                    numTotalForSubset = 0
                    for iRow in range(len(arrayRowsForGroupedSite)):
                        numRow = arrayRowsForGroupedSite[iRow]
                        strOrigSite = listOrigMetSiteForQuant[numRow]
                        numInOrigSite = listNumAtOrigMetSite[numRow]
                        numTotalForSubset = numTotalForSubset + numInOrigSite

                        strForOrigSite = '"' + strOrigSite + '" (n=' + '{}'.format(numInOrigSite) + ')'

                        if iRow == 0:
                            strForSubsets = strForOrigSite
                        else:
                            strForSubsets = strForSubsets + ', ' + strForOrigSite

                    print(strGroupedSite + ' (n=' + '{}'.format(numTotalForSubset) + ')\t' + strForSubsets, file=handFile)

        # As we are examining metastatic melanoma we will use the 'Lymph node, NOS' corresponding to unspecified lymph
        #  nodes for the baseline hazard --> remove this from listUniqueMetSites which is used to generate the dataframe
        #  used in fitting the Cox proportional hazards model
        listUniqueMetSites.remove('Lymph node, NOS')

        # create a binary table which flags whether a tumor corresponds to a specific metastatic site
        arrayMetSiteAnnot = np.zeros((np.shape(dfTCGAMets)[0],len(listUniqueMetSites)),
                                     dtype=np.float)
        for iMetSite in range(len(listUniqueMetSites)):
            strMetSite = listUniqueMetSites[iMetSite]
            arrayPatientMetIsAtSite = [strPatientSite == strMetSite
                                       for strPatientSite in listMetSites]
            arrayMetSiteAnnot[np.where(arrayPatientMetIsAtSite), iMetSite] = 1.0

        dfMetSites = pd.DataFrame(data=arrayMetSiteAnnot,
                                  columns=listUniqueMetSites,
                                  index=dfTCGAMets.index.tolist())

        return dfMetSites

    def lm_mel_data(flagResult=False,
                    flagPerformExtraction=False,
                    strTempFileName='proc_LMMEL.pickle'):
		"""Process the LM-MEL microarray data and return a dataframe for later indexing"""
		
		# specify some thresholds used for pre-processing of the LM-MEL data
        numLMMelDynValueThresh = 4.5
        numFractReqAboveThresh = 0.15

        if not os.path.exists(strTempFileName):
            flagPerformExtraction = True

        if flagPerformExtraction:
			# load the LM-MEL mRNA abundance data
            dfLMMELIn = LMMelTools.LMMelData.mrna_abundance()
			
			# identify the cell lines
            listLMMELIndex = dfLMMELIn.index.tolist()
            listLMMELLines = [strIndex for strIndex in listLMMELIndex if strIndex[0:len('LM-MEL')] == 'LM-MEL']
            arraySampleRows = np.where([strRow in listLMMELLines for strRow in listLMMELIndex])[0]
            numCellLines = len(arraySampleRows)

			# identify the genes
            listLMMELGenes = dfLMMELIn.loc['Symbol'].tolist()

			# retain only genes which have a good dynamic range (range of abundances), and genes where expression exceeds a
			#  specified threshold within a certain subset of cells (numLMMelDynValueThresh & numFractReqAboveThresh, 
			#  specified above)
            print('Cleaning LM-MEL mRNA transcript abundance data.. this may take some time')
            arrayNumLinesHaveGeneAboveThresh = \
                np.sum(dfLMMELIn.loc[listLMMELLines].values.astype(np.float) > numLMMelDynValueThresh,
                       axis=0)
            arrayGeneHasGoodDynRange = arrayNumLinesHaveGeneAboveThresh >= numFractReqAboveThresh*len(listLMMELLines)
            arrayGeneIndicesGoodDynRange = np.where(arrayGeneHasGoodDynRange)[0]

            # drop all columns which have low quality probes with a poor signal
            listUniqueOutputGenes = dfLMMELIn.loc['Symbol', arrayGeneIndicesGoodDynRange].unique().tolist()
            listUniqueOutputGenes.remove(np.nan)
            setUniqueOutputGenes = set(listUniqueOutputGenes)

			# produce an output dataframe
            dfLMMEL = pd.DataFrame(data=np.zeros((len(listLMMELLines),len(listUniqueOutputGenes)),
                                                 dtype=np.float),
                                   index=listLMMELLines,
                                   columns=listUniqueOutputGenes)

			# In Fig. S9 the TCGA and LM-MEL data are compared through gene set scoring thus a consistent set of shared genes
			#  must first be identified
            dictTCGASKCM = PreProc.tcga_skcm_data(flagPerformExtraction=False)
            listTCGAGenes = dictTCGASKCM['listGenes']
            setTCGAGenes = set(listTCGAGenes)
            listUniqueOutputGenesVsTCGA = list(setUniqueOutputGenes.intersection(setTCGAGenes))
			
			# As there is some multimapping in the LM-MEL microarray data (multiple probes -> one gene), take the median value
			#  across any probes that passed earlier filtering
            for strGene in listUniqueOutputGenes:
                arrayColIndicesForGene = \
                    np.where(
                        np.bitwise_and([strGene == strGeneToCheck for strGeneToCheck in listLMMELGenes],
                                       arrayGeneHasGoodDynRange))[0]
                dfLMMEL[strGene] =\
                    np.median(dfLMMELIn.iloc[arraySampleRows,arrayColIndicesForGene].values.astype(np.float), axis=1)

			# Extract the Foroutan TGF-B EMT gene signature
            dictTGFBEMTScore = GeneSetScoring.ExtractList.foroutan2016_tgfb_mes_score()
            listTGFBEMTUp = list(set(dictTGFBEMTScore['Up']).intersection(set(listUniqueOutputGenes)))
            listTGFBEMTDn = list(set(dictTGFBEMTScore['Down']).intersection(set(listUniqueOutputGenes)))
            listTGFBEMTUpVsTCGA = list(set(listTGFBEMTUp).intersection(setTCGAGenes))
            listTGFBEMTDnVsTCGA = list(set(listTGFBEMTDn).intersection(setTCGAGenes))

			# Extract the Tan Epithelial & Mesenchymal gene signatures
            dictTanEMTSigs = GeneSetScoring.ExtractList.tan2012_cell_line_genes()
            listEpiGenes = list(set(dictTanEMTSigs['epi_genes']).intersection(set(listUniqueOutputGenes)))
            listMesGenes = list(set(dictTanEMTSigs['mes_genes']).intersection(set(listUniqueOutputGenes)))
            listEpiGenesVsTCGA = list(set(listEpiGenes).intersection(setTCGAGenes))
            listMesGenesVsTCGA = list(set(listMesGenes).intersection(setTCGAGenes))

			# create arrays for the initial scoring to avoid a number of warnings from pandas
            arrayEpiScore = np.zeros(numCellLines,
                                    dtype=np.float)
            arrayMesScore = np.zeros(numCellLines,
                                    dtype=np.float)
            arrayTGFBEMTScore = np.zeros(numCellLines,
                                    dtype=np.float)
            arrayEpiScoreVsTCGA = np.zeros(numCellLines,
                                    dtype=np.float)
            arrayMesScoreVsTCGA = np.zeros(numCellLines,
                                    dtype=np.float)
            arrayTGFBEMTScoreVsTCGA = np.zeros(numCellLines,
                                    dtype=np.float)

			# step through each cell line and score for the required gene sets
            print('Scoring TCGA samples for various gene sets/signatures')
            for iSample in range(numCellLines):
                print('Sample ' + '{}'.format(iSample+1) + ' of ' + '{}'.format(numCellLines))

                arrayEpiScore[iSample] = GeneSetScoring.FromInput.single_sample_rank_score(
                    listAllGenes=listUniqueOutputGenes,
                    arrayTranscriptAbundance=dfLMMEL.iloc[iSample, :].values.astype(np.float),
                    listUpGenesToScore=listEpiGenes,
                    flagApplyNorm=True)
                arrayMesScore[iSample] = GeneSetScoring.FromInput.single_sample_rank_score(
                    listAllGenes=listUniqueOutputGenes,
                    arrayTranscriptAbundance=dfLMMEL.iloc[iSample, :].values.astype(np.float),
                    listUpGenesToScore=listMesGenes,
                    flagApplyNorm=True)
                arrayTGFBEMTScore[iSample] = GeneSetScoring.FromInput.single_sample_rank_score(
                    listAllGenes=listUniqueOutputGenes,
                    arrayTranscriptAbundance=dfLMMEL.iloc[iSample, :].values.astype(np.float),
                    listUpGenesToScore=listTGFBEMTUp,
                    listDownGenesToScore=listTGFBEMTDn,
                    flagApplyNorm=True)

                arrayEpiScoreVsTCGA[iSample] = GeneSetScoring.FromInput.single_sample_rank_score(
                    listAllGenes=listUniqueOutputGenesVsTCGA,
                    arrayTranscriptAbundance=dfLMMEL[listUniqueOutputGenesVsTCGA].iloc[iSample].values.astype(np.float),
                    listUpGenesToScore=listEpiGenesVsTCGA,
                    flagApplyNorm=True)
                arrayMesScoreVsTCGA[iSample] = GeneSetScoring.FromInput.single_sample_rank_score(
                    listAllGenes=listUniqueOutputGenesVsTCGA,
                    arrayTranscriptAbundance=dfLMMEL[listUniqueOutputGenesVsTCGA].iloc[iSample].values.astype(np.float),
                    listUpGenesToScore=listMesGenesVsTCGA,
                    flagApplyNorm=True)
                arrayTGFBEMTScoreVsTCGA[iSample] = GeneSetScoring.FromInput.single_sample_rank_score(
                    listAllGenes=listUniqueOutputGenesVsTCGA,
                    arrayTranscriptAbundance=dfLMMEL[listUniqueOutputGenesVsTCGA].iloc[iSample].values.astype(np.float),
                    listUpGenesToScore=listTGFBEMTUpVsTCGA,
                    listDownGenesToScore=listTGFBEMTDnVsTCGA,
                    flagApplyNorm=True)

			# map gene set scores to the appropriate columns within the output dataframe
            dfLMMEL['TGF-B EMT Score'] = arrayTGFBEMTScore
            dfLMMEL['Epithelial Score'] = arrayEpiScore
            dfLMMEL['Mesenchymal Score'] = arrayMesScore
            dfLMMEL['TGF-B EMT Score (vs. TCGA)'] = arrayTGFBEMTScoreVsTCGA
            dfLMMEL['Epithelial Score (vs. TCGA)'] = arrayEpiScoreVsTCGA
            dfLMMEL['Mesenchymal Score (vs. TCGA)'] = arrayMesScoreVsTCGA

			# save as a pickle for later re-use
            dfLMMEL.to_pickle(strTempFileName)

        else:
		
			# load the pre-processed pickle
            dfLMMEL = pd.read_pickle(strTempFileName)

        return dfLMMEL

    def gse60424_data(flagResult=False,
                      strDataLoc=os.getcwd(),
                      strDataFile='bldCells_TPM.tsv',
                      strMetaDataFile='SraRunTable.txt'):
		"""Process the Linsley et al data and return a dataframe for later indexing."""
		# Linsley PS, et al. (2014). Copy number loss of the interferon gene cluster in melanomas is 
		#  linked to reduced T cell infiltrate and poor patient prognosis. PLoS One. 9(10):e109760. 
		#  DOI: 10.1371/journal.pone.0109760
		
		# A TPM normalised version of the data from GSE60424 (Linsley et al) was prepared by M Foroutan (see corresponding R data)
		# --> load the gene expression data
        dfData = pd.read_table(os.path.join(strDataLoc, strDataFile), sep='\t', header=0, index_col=0)
        listGenesEntrez = dfData.index.tolist()
        listSamples = dfData.columns.tolist()

		# load a sample metadata table which maps sample IDs to labels
        dfMetaData = pd.read_table(os.path.join(strDataLoc, strMetaDataFile), sep='\t', header=0, index_col=None)

		# load a dictionary for mapping HGNC symbols to Entrez gene IDs and invert it
        dictHGNZToEntrez = BiomartFunctions.IdentMappers.defineHGNCSymbolToEntrezIDDict()
        dictEntrezToHGNC = dict(zip(dictHGNZToEntrez.values(),dictHGNZToEntrez.keys()))

		# step through all genes in the data and convert to HGNC symbols where available (if absent retain the Entrez number as
		#  a string with the 'Entrez:' prefix)
        listGenesHGNC = []
        for numEntrez in listGenesEntrez:
            strHGNC = 'Entrez:' + '{}'.format(numEntrez)
            if numEntrez in dictEntrezToHGNC.keys():
                if dictEntrezToHGNC[numEntrez] == dictEntrezToHGNC[numEntrez]:
                    strHGNC = dictEntrezToHGNC[numEntrez]
            listGenesHGNC.append(strHGNC)

		# map sample cell types from the metadata table and use this to create a dictionary for later mapping
        listSamplesCellType = [dfMetaData['celltype_s'][dfMetaData['Run_s'] == strSample].values[0]
							   for strSample in listSamples]
        dictSampleIDToType = dict(zip(listSamples, listSamplesCellType))

		# create an output dataframe
        dfOut=pd.DataFrame(data=dfData.values,
                           index=listGenesHGNC,
                           columns=listSamples)
						   
		# return the dataframe together with a dictionary for mapping samples
        return dfOut, dictSampleIDToType


    def gse24759_data(flagResult=False,
                      strDataLoc=os.getcwd(),
                      strDataFile='GSE24759_series_matrix.txt',
                      strProbeMapFile='GPL4685-15513.txt',
                      flagPerformExtraction=False,
                      strProcDataFilename='GSE24759_proc.pickle',
                      strProcMetaDataFilename='GSE24759_metadata_proc.pickle'):
		"""Process the Novershtern et al data and return a dataframe for later indexing."""
		# Novershtern N, et al. (2011). Densely interconnected transcriptional circuits control cell states
		#  in human hematopoiesis. Cell. 144(2): 296-309. 
		#  DOI: 10.1016/j.cell.2011.01.004

        if not np.bitwise_and(os.path.exists(os.path.join(strDataLoc, strProcDataFilename)),
                              os.path.exists(os.path.join(strDataLoc, strProcMetaDataFilename))):
            flagPerformExtraction = True

        if flagPerformExtraction:
			# extract the first 56 lines from the metadata file; due to the formatting this file has issues
			#  when trying to parse with pandas
            listMetaData = []
            with open(os.path.join(strDataLoc, strDataFile), 'r+') as handFile:
                counter = 0
                for line in handFile:
                    listMetaData.append(line)
                    counter += 1
                    if counter == 56: break
					
			# step through the resulting list and identify specific rows
            numSampleIDRow = np.where(['!Series_sample_id' in strRow for strRow in listMetaData])[0][0]
            numSampleTitleRow = np.where(['!Sample_title' in strRow for strRow in listMetaData])[0][0]
            numSampleSourceRow = np.where(['!Sample_source_name' in strRow for strRow in listMetaData])[0][0]

			# Process the row containing sample IDs and produce a new list (listSampleIDs)
            strSampleIDsIn = listMetaData[numSampleIDRow]
            strIDsClean = strSampleIDsIn.split('!Series_sample_id\t"')[1]
            strIDsCleaner = strIDsClean.split(' "\n')[0]
            listSampleIDs = strIDsCleaner.split(' ')

			# Process the row containing sample titles and produce a new list (listSampleTitles)
            strSampleTitlesIn = listMetaData[numSampleTitleRow]
            strTitlesClean = strSampleTitlesIn.split('!Sample_title\t"')[1]
            strTitlesCleaner = strTitlesClean.split('"\n')[0]
            listSampleTitles = strTitlesCleaner.split('"\t"')

			# Process the row containing sample sources and produce a new list (listSampleSources)
            strSampleSourcesIn = listMetaData[numSampleSourceRow]
            strSourcesClean = strSampleSourcesIn.split('!Sample_source_name_ch1\t"')[1]
            strSourcesCleaner = strSourcesClean.split('"\n')[0]
            listSampleSources = strSourcesCleaner.split('"\t"')

			# read in the expression/microarray data
            dfData = pd.read_table(os.path.join(strDataLoc, strDataFile), sep='\t', header=0, index_col=0,
                                   comment='!')
            listProbes = dfData.index.tolist()
			
			# create an output dataframe for the metadata
            dfMetaData = pd.DataFrame(data=[listSampleTitles,listSampleSources],
                                      index=['Title','Source'],
                                      columns=listSampleIDs)
			
			# read in the table which maps probes to genes
            dfProbeMap = pd.read_table(os.path.join(strDataLoc, strProbeMapFile),
                                       sep='\t', header=0, index_col=0,
                                   comment='#')

			# map probes to gene symbols if possible
            listProbesToGenes = []
            for strProbe in listProbes:
                strGene = 'failed_map:' + strProbe
                if strProbe in dfProbeMap.index.tolist():
                    if dfProbeMap['Gene Symbol'].loc[strProbe] == dfProbeMap['Gene Symbol'].loc[strProbe]:
                        strGene = dfProbeMap['Gene Symbol'].loc[strProbe]
                listProbesToGenes.append(strGene)

			# take a set to identify the unique list of genes
            print('Merging genes across probes, this may take some time..')
            listOutGenes = sorted(list(set(listProbesToGenes)))

			# create an output array and populate with transcript abundance data
            arrayOutData = np.zeros((len(listOutGenes), np.shape(dfData)[1]),
                                    dtype=np.float)
            for iGene in range(len(listOutGenes)):
                arrayGeneOrigRowIndices = np.where([strGene == listOutGenes[iGene] for strGene in listProbesToGenes])[0]
                arrayOutData[iGene,:] = np.median(dfData.iloc[arrayGeneOrigRowIndices,:], axis=0)

			# load into an output dataframe
            dfOut = pd.DataFrame(data=arrayOutData,
                                 columns=dfData.columns,
                                 index=listOutGenes)
			# save the transcript abundance and sample metadata to pickle for later re-use
            dfOut.to_pickle(os.path.join(strDataLoc, strProcDataFilename))
            dfMetaData.to_pickle(os.path.join(strDataLoc, strProcMetaDataFilename))

        else:
			# load the pre-processed transcript abundance and metadata
            dfOut = pd.read_pickle(os.path.join(strDataLoc, strProcDataFilename))
            dfMetaData = pd.read_pickle(os.path.join(strDataLoc, strProcMetaDataFilename))

        return dfOut, dfMetaData

    def gse24759_subsets(flagResult=False):
		"""Specify the samples & groups to be retained from the Novershtern et al data and return a dictionary."""
		# Novershtern N, et al. (2011). Densely interconnected transcriptional circuits control cell states
		#  in human hematopoiesis. Cell. 144(2): 296-309. 
		#  DOI: 10.1016/j.cell.2011.01.004
		
		# create a list of all samples grouped by similar cell types
        listOfListsAllSamplesTypeGrouped = [['Pro B-cell', 'Early B-cell'],
                                            ['NaÃ¯ve B-cells', 'Mature B-cell class able to switch',
                                             'Mature B-cell class switched', 'Mature B-cells'],
                                            ['Basophils', 'Eosinophill'],
                                            ['Plasmacytoid Dendritic Cell'],
                                            ['Common myeloid progenitor',
                                             'Myeloid Dendritic Cell'],
                                            ['Erythroid_CD34+ CD71+ GlyA-', 'Erythroid_CD34- CD71+ GlyA+',
                                             'Erythroid_CD34- CD71+ GlyA-', 'Erythroid_CD34- CD71- GlyA+',
                                             'Erythroid_CD34- CD71lo GlyA+'],
                                            ['Colony Forming Unit-Granulocyte ',
                                             'Granulocyte (Neutrophil)',
                                             'Granulocyte (Neutrophilic Metamyelocyte)',
                                             'Granulocyte/monocyte progenitor'],
                                            ['Hematopoietic stem cell_CD133+ CD34dim',
                                             'Hematopoietic stem cell_CD38- CD34+'],
                                            ['Mature NK cell_CD56+ CD16+ CD3-',
                                             'Mature NK cell_CD56- CD16+ CD3-',
                                             'Mature NK cell_CD56- CD16- CD3-'],
                                            ['Megakaryocyte',
                                             'Megakaryocyte/ erythroid progenitor',
                                             'Colony Forming Unit-Megakaryocytic '],
                                            ['Colony Forming Unit-Monocyte ',
                                             'Monocyte'],
                                            ['Naive CD4+ T-cell',
                                             'CD4+ Central Memory',
                                             'CD4+ Effector Memory'],
                                            ['Naive CD8+ T-cell',
                                             'CD8+ Central Memory',
                                             'CD8+ Effector Memory',
                                             'CD8+ Effector Memory RA'],
                                            ['NKT']]

        # The following groups have been dropped:
        #   Drop all 'colony forming unit' as these imply cells which have undergone culture rather than primary
        #       cell types
        #     --> 'Colony Forming Unit-Granulocyte'
        #         'Colony Forming Unit-Megakaryocytic'
        #         'Colony Forming Unit-Monocyte'
        #
        #   Also drop the following cell subgroups which appear to have some batch effects
        #         'Granulocyte (Neutrophil)'
        #         'Mature NK cell_CD56- CD16+ CD3-'
        #         'Megakaryocyte'

		# create a list of output samples grouped by similar cell types
        listOfListsSampleTypeGrouped = [['Pro B-cell', 'Early B-cell'],
                                        ['NaÃ¯ve B-cells', 'Mature B-cell class able to switch',
                                         'Mature B-cell class switched', 'Mature B-cells'],
                                        ['Basophils', 'Eosinophill'],
                                        ['Plasmacytoid Dendritic Cell'],
                                        ['Common myeloid progenitor', 'Myeloid Dendritic Cell'],
                                        ['Erythroid_CD34+ CD71+ GlyA-', 'Erythroid_CD34- CD71+ GlyA+',
                                         'Erythroid_CD34- CD71+ GlyA-', 'Erythroid_CD34- CD71- GlyA+',
                                         'Erythroid_CD34- CD71lo GlyA+'],
                                        ['Granulocyte (Neutrophilic Metamyelocyte)', 'Granulocyte/monocyte progenitor'],
                                        ['Hematopoietic stem cell_CD133+ CD34dim',
                                         'Hematopoietic stem cell_CD38- CD34+'],
                                        ['Mature NK cell_CD56+ CD16+ CD3-',
                                         'Mature NK cell_CD56- CD16- CD3-'],
                                        ['Megakaryocyte/ erythroid progenitor',
                                         ],
                                        ['Monocyte'],
                                        ['Naive CD4+ T cell',
                                         'CD4+ Central Memory',
                                         'CD4+ Effector Memory'],
                                        ['Naive CD8+ T cell',
                                         'CD8+ Central Memory',
                                         'CD8+ Effector Memory',
                                         'CD8+ Effector Memory RA'],
                                        ['NKT']]
										
		# create a list which labels the groups of output samples
        listSampleTypeLabels = ['Immature B cells',
                                'Mature B cells',
                                'Baso/Eosinophils',
                                'Plasmacytoid DCs',
                                'Myeloid cells',
                                'Erythroid cells',
                                'Granulocytes',
                                'HSCs',
                                'NK cells',
                                'Megakaryocytes',
                                'Monocytes',
                                'CD4$^{+}$ T cells',
                                'CD8$^{+}$ T cells',
                                'NK T cells']

		# return these lists as a dictionary
        return {'listOfListsSampleTypeGrouped':listOfListsSampleTypeGrouped,
                'listOfListsAllSamplesTypeGrouped':listOfListsAllSamplesTypeGrouped,
                'listSampleTypeLabels':listSampleTypeLabels
                }


    def density_scatters(flagResult=False,
                         arrayXIn=np.zeros(1,dtype=np.float),
                         arrayYIn=np.zeros(1,dtype=np.float)):
		"""Process matched X/Y vectors and calculate the local density of samples for
			colouring scatter plots"""
		# use the np.vstack function to merge the x/y vectors into a 2D array
        arrayJointDist = np.vstack([arrayXIn, arrayYIn])
		# perform Gaussian kernel density estimation across these data
        arrayJointProb = gaussian_kde(arrayJointDist)(arrayJointDist)

		# sort the samples by their z-position to prevent weird plotting effects
        arrayIndexByZPos = arrayJointProb.argsort()
        arrayXToPlot, arrayYToPlot, arrayZForColor = \
            arrayXIn[arrayIndexByZPos], \
            arrayYIn[arrayIndexByZPos], \
            arrayJointProb[arrayIndexByZPos]
			
		# output (x,y) coordinates + a density array for color
        return arrayXToPlot, arrayYToPlot, arrayZForColor

    def refine_NK_signature(flagResult=False):
		"""Process results from the differential expression analysis of bulk sorted cells and examine
			single cell transcript abundance to refine the NK signature genes"""

		# load a dictionary for mapping between ENSEMBL gene IDs and HGNC symbols and reverse this
        dictENSGToHGNC = ENSEMBLTools.Load.dict_ensg_to_hgnc()
        dictHGNCToENSG = dict(zip(dictENSGToHGNC.values(),dictENSGToHGNC.keys()))

        # CCLE data are mapped to GRCh37 and it appears that CCL4 and CCL5 have had their ENSG IDs updated:
        # CCL4: ENSG00000275302 was never present in GRCh37
        # CCL5: ENSG00000161570 --> ENSG00000271503
        dictHGNCToENSG['CCL5'] = 'ENSG00000161570'
        dictENSGToHGNC['ENSG00000161570'] = 'CCL5'

		# Load the Huntington NK gene lists
        dictHuntingtonGenes = GeneSetScoring.ExtractList.huntington_nk_markers()
		
        # Load the CIBERSORT/LM22 (Newmann et al) gene lists
        dictOfDictsCIBERSORTLists = GeneSetScoring.ExtractList.cibersort_genes()

		# Load the LM7 (Tosolini et al) gene lists
        dictOfDictsLM7Lists = GeneSetScoring.ExtractList.tosolini_lm7_subset()

		# create sets/unique lists
        setCIBERSORTRestingUp = set(dictOfDictsCIBERSORTLists['NK cells resting']['UpGenes'])
        setCIBERSORTActivatedUp = set(dictOfDictsCIBERSORTLists['NK cells activated']['UpGenes'])
        setCIBERSORTCombined = setCIBERSORTRestingUp.union(setCIBERSORTActivatedUp)
        setHuntingtonUp = set(dictHuntingtonGenes['UpGenes'])
        setLM7Up = set(dictOfDictsLM7Lists['NK']['UpGenes'])

		# create an 'initial' NK gene list from the union of the LM7, LM22 & Huntington NK gene lists
        listInitialNKMarkerHGNC = list(setCIBERSORTRestingUp.union(setCIBERSORTActivatedUp.union(setHuntingtonUp.union(setLM7Up))))

		# remove the nan value which has crept in
        listInitialNKMarkerHGNC.remove(np.nan)

		# create an output dataframe
        dfAllNKGenes = pd.DataFrame(data=[],
                                    index=listInitialNKMarkerHGNC)
        dfAllNKGenes.to_csv(os.path.join(Plot.strOutputFolder, 'NK_genes_initial.tsv'),
                            header=True,
                            index=True,
                            sep='\t')

        listInitialNKMarkerENSG = [dictHGNCToENSG[strGene]
                                   for strGene in listInitialNKMarkerHGNC if strGene in dictHGNCToENSG.keys()]

		# load the differential expression analysis results from the bulk sorted immune populations (see the corresponding
		#  R code associated with this repository)
        dfBulkRNATest = pd.read_table(os.path.join(PreProc.strExtraAnalysisPath, 'DEstat_sigCombined.txt'),
                                      sep='\t', header=0, index_col=None)
        listBulkRNAGenes = dfBulkRNATest['SYMBOL'].tolist()

		# check for genes that are significantly differentially expressed with a logFC > 1 (i.e. apply the TREAT criteria)
        listGenesPassBulkRNATest = \
            dfBulkRNATest['SYMBOL'][np.bitwise_and(dfBulkRNATest['logFC'].values.astype(np.float) > 1,
                                                   dfBulkRNATest['adj.P.Val'].values.astype(np.float) < 0.05)].tolist()

		# load the summary statistics from the Tirosh et al single cell data (see the corresponding
		#  R code associated with this repository)
        dfSingleCellRNASummaryStats = \
            pd.read_table(os.path.join(PreProc.strExtraAnalysisPath,
                                       'NK_CombinedSig_GeneSummary_Tirosh.txt'),
                          sep='\t', header=0, index_col=None)

        numDropoutMedianThresh = 0.5
        numDropoutNKUpperQThresh = 2.67

		# extract the genes within this dataframe
        listGenesInSingleCellData = dfSingleCellRNASummaryStats['Genes'].unique().tolist()
		
		# extract the cell types within the dataframe excluding NK cells
        listNonNKCellTypesInSingleCellData = dfSingleCellRNASummaryStats['Non.malignant'].unique().tolist()
        listNonNKCellTypesInSingleCellData.remove('NK')

		# step through each gene
        listGenesPassSingleCellTest = []
        for strGene in listGenesInSingleCellData:
			# extract the median abundance of all cells to check for extensive dropout
            arrayAllCellMedian = \
                dfSingleCellRNASummaryStats['median'][
                    dfSingleCellRNASummaryStats['Genes']==strGene].values.astype(np.float)

			# extract the upper quartile/75th percentile & median value within non-NK cells
            arrayOtherCellUpperQ = \
                dfSingleCellRNASummaryStats['thirdQ'][
                    np.bitwise_and(dfSingleCellRNASummaryStats['Genes'] == strGene,
                                   dfSingleCellRNASummaryStats['Non.malignant'].str.contains(
                                       '|'.join(listNonNKCellTypesInSingleCellData)))].values.astype(np.float)

            arrayOtherCellMedian = \
                dfSingleCellRNASummaryStats['median'][
                    np.bitwise_and(dfSingleCellRNASummaryStats['Genes'] == strGene,
                                   dfSingleCellRNASummaryStats['Non.malignant'].str.contains(
                                       '|'.join(listNonNKCellTypesInSingleCellData)))].values.astype(np.float)

            if np.all(arrayAllCellMedian < numDropoutMedianThresh):
				# consider the gene as heavily influenced by dropout; examine the upper quartile of expression within NK cells
                numNKUpperQ = \
                    dfSingleCellRNASummaryStats['thirdQ'][
                        np.bitwise_and(dfSingleCellRNASummaryStats['Genes']==strGene,
                                       dfSingleCellRNASummaryStats['Non.malignant'] == 'NK')].values.astype(np.float)
				# compare the upper quartile from NK cells to all other cell types and check against a dropout threshold
                if np.bitwise_and(numNKUpperQ > numDropoutNKUpperQThresh,
                                  np.all(numNKUpperQ > arrayOtherCellUpperQ)):
                    listGenesPassSingleCellTest.append(strGene)

            else:
				# consider the gene as not heavily influenced by dropout
                numNKLowerQ = \
                    dfSingleCellRNASummaryStats['firstQ'][
                        np.bitwise_and(dfSingleCellRNASummaryStats['Genes'] == strGene,
                                       dfSingleCellRNASummaryStats['Non.malignant'] == 'NK')].values.astype(np.float)
				# compare the lower quartile from NK cells to the median from all other cell types
                if np.all(numNKLowerQ > arrayOtherCellMedian):
                    listGenesPassSingleCellTest.append(strGene)

		# identify the set of genes which pass through both tests
        listGenesPassNKVsImmune = list(set(listGenesPassBulkRNATest).intersection(set(listGenesPassSingleCellTest)))

		# load cell line mapping data from Cellosaurus
        dfCellosaurus = Cellosaurus.Load.parse_input_file()
        listCellosaurusCCLENames = dfCellosaurus['CCLE_ID'].unique().tolist()
        listCellosaurusCCLENames.remove('na')

		# identify cell lines which are derived from solid tumours (i.e. not haematopoeitic and lymphoid)
        listNonHaemAndLymphLines = [strCellLine
                                    for strCellLine in listCellosaurusCCLENames
                                    if 'HAEMATOPOIETIC_AND_LYMPHOID_TISSUE' not in strCellLine]
        arrayCellosaurusRowIsOfInt = np.array([strCellLine in listNonHaemAndLymphLines
                                               for strCellLine in dfCellosaurus['CCLE_ID'].values.tolist()],
                                              dtype=np.bool)
		# convert to CVCL IDs for indexing the CCLE data
        listNonHaemAndLymphLineCVCLs = [dfCellosaurus['CVCL_ID'].values.tolist()[i]
                                        for i in np.where(arrayCellosaurusRowIsOfInt)[0]]

		# Load the CCLE RNA transcript abundance data
        dfCCLE, dictHeaderToCVCL = CCLETools.Load.rnaseq_transcript_abundance()
		# Extract CCLE cell lines
        listCCLEColumns = dfCCLE.columns.tolist()
        listCCLECVCLs = [dictHeaderToCVCL[strCellLine] for strCellLine in listCCLEColumns]
		# Extract CCLE genes
        listCCLEIndex = dfCCLE.index.tolist()
        listCCLEGenes = [strRow for strRow in listCCLEIndex if strRow[0:4]=='ENSG']
        dictCVCLToCCLEHeader = dict(zip(dictHeaderToCVCL.values(),
                                        dictHeaderToCVCL.keys()))

		# Identify the 25th percentile of abundance from all non-zero values
        arrayFlatAllCCLEData = \
            np.nan_to_num(np.ravel(dfCCLE.reindex(listCCLEGenes).values.astype(np.float)))
        arrayLogNonZeroCCLEData = np.log2(arrayFlatAllCCLEData[arrayFlatAllCCLEData > 0] + 1)
        numCCLEAbundThresh = np.percentile(arrayLogNonZeroCCLEData, 25)

		
        listNonHaemAndLymphLineCVCLsInCCLE = [strCVCL for strCVCL in listNonHaemAndLymphLineCVCLs
                                              if strCVCL in listCCLECVCLs]

        listCCLEColumnsToPlot = [dictCVCLToCCLEHeader[strCVCL]
                                 for strCVCL in listNonHaemAndLymphLineCVCLsInCCLE]

        listInitialNKMarkerENSGInCCLE = [strGene for strGene in listInitialNKMarkerENSG if
                                          strGene in listCCLEIndex]

		# Determine the median abundance of all genes across the CCLE solid cancer cell lines and compare this to the
		#  25th percentile threshold derived above
        arrayGeneAbund = \
            np.log2(np.nan_to_num(
                dfCCLE[listCCLEColumnsToPlot].reindex(listInitialNKMarkerENSGInCCLE).values.astype(np.float)) + 1)
        arrayMedianIsBelowThresh = np.median(arrayGeneAbund, axis=1) < numCCLEAbundThresh

		# identify genes which do not show high expression across the CCLE data
        listENSGMedianBelowCCLEThresh = [listInitialNKMarkerENSGInCCLE[i] for i in np.where(arrayMedianIsBelowThresh)[0]]
        listGeneMedianBelowCCLEThresh = [dictENSGToHGNC[strGene] for strGene in listENSGMedianBelowCCLEThresh]

		# create a series of lists with output gene properties
        listIsLM22 = []
        listIsLM7 = []
        listIsHuntington = []
        listPassesBulkTests = []
        listPassesSingCellTests = []
        listBulkTestLogFC = []
        listBulkTestPVal = []
        listPassesCCLETests = []
        listRetainedGene = []
		
		# step through each gene and populate the output lists
        for strGene in listInitialNKMarkerHGNC:
            if strGene in setHuntingtonUp:
                listIsHuntington.append(True)
            else:
                listIsHuntington.append('-')

            if strGene in setLM7Up:
                listIsLM7.append(True)
            else:
                listIsLM7.append('-')

            if strGene in setCIBERSORTCombined:
                listIsLM22.append(True)
            else:
                listIsLM22.append('-')

            flagSingleCellTest = False
            if strGene in listGenesPassSingleCellTest:
                flagSingleCellTest = True
                listPassesSingCellTests.append(True)
            else:
                listPassesSingCellTests.append('-')

            if strGene in listGenesPassBulkRNATest:
                listPassesBulkTests.append(True)
            else:
                listPassesBulkTests.append('-')

            flagBulkTest = False
            if strGene in listBulkRNAGenes:
                numBulkRNAIndex = listBulkRNAGenes.index(strGene)
                listBulkTestLogFC.append(dfBulkRNATest['logFC'].iloc[numBulkRNAIndex].astype(np.float))
                listBulkTestPVal.append(dfBulkRNATest['adj.P.Val'].iloc[numBulkRNAIndex].astype(np.float))
                if dfBulkRNATest['adj.P.Val'].iloc[numBulkRNAIndex].astype(np.float) < 0.05:
                    flagBulkTest = True
            else:
                listBulkTestLogFC.append('-')
                listBulkTestPVal.append('-')

            flagCCLETest = False
            if strGene in listGeneMedianBelowCCLEThresh:
                flagCCLETest = True
                listPassesCCLETests.append(True)
            else:
                listPassesCCLETests.append('-')

            if np.all([flagSingleCellTest, flagBulkTest, flagCCLETest]):
                listRetainedGene.append(True)
            else:
                listRetainedGene.append(False)
		
		# load the gene properties into the appropriate columns of an output dataframe
        dfAllNKGenes['Pass_bulk_tests'] = pd.Series(listPassesBulkTests, index=listInitialNKMarkerHGNC)
        dfAllNKGenes['Pass_scRNA_tests'] = pd.Series(listPassesSingCellTests, index=listInitialNKMarkerHGNC)
        dfAllNKGenes['Pass_CCLE_abund_tests'] = pd.Series(listPassesCCLETests, index=listInitialNKMarkerHGNC)
        dfAllNKGenes['CursonsGuimaraes_sigGene'] = pd.Series(listRetainedGene, index=listInitialNKMarkerHGNC)

        dfAllNKGenes['LM22'] = pd.Series(listIsLM22, index=listInitialNKMarkerHGNC)
        dfAllNKGenes['LM7'] = pd.Series(listIsLM7, index=listInitialNKMarkerHGNC)
        dfAllNKGenes['Huntington'] = pd.Series(listIsHuntington, index=listInitialNKMarkerHGNC)

        dfAllNKGenes['BulkRNA_logFC'] = pd.Series(listBulkTestLogFC,
                                                  index=listInitialNKMarkerHGNC)

        dfAllNKGenes['BulkRNA_adjPVal'] = pd.Series(listBulkTestPVal,
                                                  index=listInitialNKMarkerHGNC)


		# step through the processed single-cell data and calculate information for output
        listSingleCellStats = ['min', 'firstQ', 'median', 'mean', 'thirdQ', 'max']
        listCellTypes = dfSingleCellRNASummaryStats['Non.malignant'].unique().tolist()

        listOutColumns = []
        for strCellType in listCellTypes:
            for strStat in listSingleCellStats:
                listOutColumns.append(strCellType + '_' + strStat)

		# create an output dataframe for the single-cell RNA-seq data
        dfSingleCellDataForOutput = \
            pd.DataFrame(data=np.nan*np.ones((len(listInitialNKMarkerHGNC),
                                              len(listCellTypes)*len(listSingleCellStats))),
                         index=listInitialNKMarkerHGNC,
                         columns=listOutColumns)
        for strCellType in listCellTypes:
            for strStat in listSingleCellStats:
                strColumn = strCellType + '_' + strStat
                for strGene in listInitialNKMarkerHGNC:
                    if strGene in listGenesInSingleCellData:
                        arrayRowIsOfInt = \
                            np.bitwise_and(
                                np.array([strGene == strDataGene for strDataGene in dfSingleCellRNASummaryStats['Genes'].tolist()], dtype=np.bool),
                                dfSingleCellRNASummaryStats['Non.malignant'].str.match(strCellType).values.astype(np.bool),
                                           )
                        if np.sum(arrayRowIsOfInt) == 1:
                            numRow = np.where(arrayRowIsOfInt)[0]
                            dfSingleCellDataForOutput[strColumn].loc[strGene] = \
                                dfSingleCellRNASummaryStats[strStat].loc[numRow].values.astype(np.float)
                        else:
                            a=1

		# combine the gene information with the single cell data
        dfAllNKGenes = dfAllNKGenes.join(dfSingleCellDataForOutput.reindex(dfAllNKGenes.index.tolist()))

		# Output a table with the curated gene list and all associated information (this corresponds to Table S1 of the manuscript)
        dfAllNKGenes.reindex(sorted(listInitialNKMarkerHGNC)).to_csv(
            os.path.join(Plot.strOutputFolder, 'NK_genes_curated.tsv'),
                            header=True,
                            index=True,
                            sep='\t')

        return flagResult

#   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #
#                                                   ANALYSIS FUNCTIONS
class Analyse:

    def split_one_marker_three_partitions(strMarkerToSplit='undefined',
                                          dfForAnalysis=pd.DataFrame(),
                                          flagShowSplitValues=False,
                                          flagExcludeMarkerInLabel=False):
		"""Split a group of patients across a single marker (gene, gene set score) & examine survival."""

		# Note that this function is hard coded to split patients at the 33rd and 66th percentile to produce 3 groups of  equal size
        numThreshOne = np.percentile(dfForAnalysis[strMarkerToSplit].values.astype(np.float), 33)
        numThreshTwo = np.percentile(dfForAnalysis[strMarkerToSplit].values.astype(np.float), 66)

        listPatGroupLow = dfForAnalysis[dfForAnalysis[strMarkerToSplit] <= numThreshOne].index.tolist()
        listPatGroupMed = dfForAnalysis[np.bitwise_and(dfForAnalysis[strMarkerToSplit] > numThreshOne,
                                                     dfForAnalysis[strMarkerToSplit] <= numThreshTwo)].index.tolist()
        listPatGroupHigh = dfForAnalysis[dfForAnalysis[strMarkerToSplit] > numThreshTwo].index.tolist()

        if flagExcludeMarkerInLabel:
            strLowGroup = 'Low ($n$=' + '{}'.format(len(listPatGroupLow)) + ')'
            strMedGroup = 'Med ($n$=' + '{}'.format(len(listPatGroupMed)) + ')'
            strHighGroup = 'High ($n$=' + '{}'.format(len(listPatGroupHigh)) + ')'
        else:
            strLowGroup = 'Low ' + strMarkerToSplit + ' ($n$=' + '{}'.format(len(listPatGroupLow)) + ')'
            strMedGroup = 'Med ' + strMarkerToSplit + ' ($n$=' + '{}'.format(len(listPatGroupMed)) + ')'
            strHighGroup = 'High ' + strMarkerToSplit + ' ($n$=' + '{}'.format(len(listPatGroupHigh)) + ')'

        kmfLow = KaplanMeierFitter()
        kmfLow.fit(
            dfForAnalysis['surv_time'].reindex(listPatGroupLow).values.astype(np.float),
            event_observed=dfForAnalysis['death_event'].reindex(listPatGroupLow),
            label=strLowGroup)

        kmfMed = KaplanMeierFitter()
        kmfMed.fit(
            dfForAnalysis['surv_time'].reindex(listPatGroupMed).values.astype(np.float),
            event_observed=dfForAnalysis['death_event'].reindex(listPatGroupMed),
            label=strMedGroup)

        kmfHigh = KaplanMeierFitter()
        kmfHigh.fit(
            dfForAnalysis['surv_time'].reindex(listPatGroupHigh).values.astype(np.float),
            event_observed=dfForAnalysis['death_event'].reindex(listPatGroupHigh),
            label=strHighGroup)

        structLowVsMedKMLogRank = KMlogRankTest(
            dfForAnalysis['surv_time'].ix[listPatGroupLow],
            dfForAnalysis['surv_time'].ix[listPatGroupMed],
            event_observed_A=dfForAnalysis['death_event'].ix[listPatGroupLow],
            event_observed_B=dfForAnalysis['death_event'].ix[listPatGroupMed])

        structLowVsHighKMLogRank = KMlogRankTest(
            dfForAnalysis['surv_time'].ix[listPatGroupLow],
            dfForAnalysis['surv_time'].ix[listPatGroupHigh],
            event_observed_A=dfForAnalysis['death_event'].ix[listPatGroupLow],
            event_observed_B=dfForAnalysis['death_event'].ix[listPatGroupHigh])

        structMedVsHighKMLogRank = KMlogRankTest(
            dfForAnalysis['surv_time'].ix[listPatGroupMed],
            dfForAnalysis['surv_time'].ix[listPatGroupHigh],
            event_observed_A=dfForAnalysis['death_event'].ix[listPatGroupMed],
            event_observed_B=dfForAnalysis['death_event'].ix[listPatGroupHigh])

        return {'kmfLow':kmfLow,
                'kmfMed':kmfMed,
                'kmfHigh':kmfHigh,
                'LowVsMed':structLowVsMedKMLogRank,
                'LowVsHigh': structLowVsHighKMLogRank,
                'MedVsHigh': structMedVsHighKMLogRank,}

    def split_two_markers_four_partitions(strMarkerOneToSplit='undefined',
                                          strMarkerTwoToSplit='undefined',
                                          dfForAnalysis=pd.DataFrame()):
		"""Split a group of patients across two markers (gene, gene set score) & examine survival."""

		# NB: this is hard coded to split the groups by the median value across both markers; this can produce groups of uneven size when markers are correlated. 
        numThreshVarOne = np.percentile(dfForAnalysis[strMarkerOneToSplit].values.astype(np.float), 50)
        numThreshVarTwo = np.percentile(dfForAnalysis[strMarkerTwoToSplit].values.astype(np.float), 50)

		# identify patient groups which are high/low for the two markers
        listLoLo = dfForAnalysis[np.bitwise_and(dfForAnalysis[strMarkerOneToSplit].values.astype(np.float) <= numThreshVarOne,
                                             dfForAnalysis[strMarkerTwoToSplit].values.astype(np.float) <= numThreshVarTwo)].index.tolist()
        listLoHi = dfForAnalysis[np.bitwise_and(dfForAnalysis[strMarkerOneToSplit].values.astype(np.float) <= numThreshVarOne,
                                             dfForAnalysis[strMarkerTwoToSplit].values.astype(np.float) > numThreshVarTwo)].index.tolist()
        listHiLo = dfForAnalysis[np.bitwise_and(dfForAnalysis[strMarkerOneToSplit].values.astype(np.float) > numThreshVarOne,
                                             dfForAnalysis[strMarkerTwoToSplit].values.astype(np.float) <= numThreshVarTwo)].index.tolist()
        listHiHi = dfForAnalysis[np.bitwise_and(dfForAnalysis[strMarkerOneToSplit].values.astype(np.float) > numThreshVarOne,
                                             dfForAnalysis[strMarkerTwoToSplit].values.astype(np.float) > numThreshVarTwo)].index.tolist()

		# create a Kaplan-Meier fitter object (from lifelines) for each group and then fit
        kmfLoLo = KaplanMeierFitter()
        kmfLoLo.fit(
            dfForAnalysis['surv_time'].reindex(listLoLo).values.astype(np.float),
            event_observed=dfForAnalysis['death_event'].reindex(listLoLo),
            label=strMarkerOneToSplit+'$^{Lo}$/' + strMarkerTwoToSplit +
                  '$^{Lo}$ ($n$=' + '{}'.format(len(listLoLo)) + ')')


        kmfLoHi = KaplanMeierFitter()
        kmfLoHi.fit(
            dfForAnalysis['surv_time'].reindex(listLoHi).values.astype(np.float),
            event_observed=dfForAnalysis['death_event'].reindex(listLoHi),
            label=strMarkerOneToSplit+'$^{Lo}$/' + strMarkerTwoToSplit +
                  '$^{Hi}$ ($n$=' + '{}'.format(len(listLoHi)) + ')')


        kmfHiLo = KaplanMeierFitter()
        kmfHiLo.fit(
            dfForAnalysis['surv_time'].reindex(listHiLo).values.astype(np.float),
            event_observed=dfForAnalysis['death_event'].reindex(listHiLo),
            label=strMarkerOneToSplit+'$^{Hi}$/' + strMarkerTwoToSplit +
                  '$^{Lo}$ ($n$=' + '{}'.format(len(listHiLo)) + ')')


        kmfHiHi = KaplanMeierFitter()
        kmfHiHi.fit(
            dfForAnalysis['surv_time'].reindex(listHiHi).values.astype(np.float),
            event_observed=dfForAnalysis['death_event'].reindex(listHiHi),
            label=strMarkerOneToSplit+'$^{Hi}$/' + strMarkerTwoToSplit +
                  '$^{Hi}$ ($n$=' + '{}'.format(len(listHiHi)) + ')')

		# perform a Kaplan-Meier log-rank test between each of the groups
        structLoLoVsLoHiKMLogRank = KMlogRankTest(
            dfForAnalysis['surv_time'].ix[listLoLo],
            dfForAnalysis['surv_time'].ix[listLoHi],
            event_observed_A=dfForAnalysis['death_event'].ix[listLoLo],
            event_observed_B=dfForAnalysis['death_event'].ix[listLoHi])

        structLoLoVsHiLoKMLogRank = KMlogRankTest(
            dfForAnalysis['surv_time'].ix[listLoLo],
            dfForAnalysis['surv_time'].ix[listHiLo],
            event_observed_A=dfForAnalysis['death_event'].ix[listLoLo],
            event_observed_B=dfForAnalysis['death_event'].ix[listHiLo])

        structLoLoVsHiHiKMLogRank = KMlogRankTest(
            dfForAnalysis['surv_time'].ix[listLoLo],
            dfForAnalysis['surv_time'].ix[listHiHi],
            event_observed_A=dfForAnalysis['death_event'].ix[listLoLo],
            event_observed_B=dfForAnalysis['death_event'].ix[listHiHi])


        structLoHiVsHiLoKMLogRank = KMlogRankTest(
            dfForAnalysis['surv_time'].ix[listLoHi],
            dfForAnalysis['surv_time'].ix[listHiLo],
            event_observed_A=dfForAnalysis['death_event'].ix[listLoHi],
            event_observed_B=dfForAnalysis['death_event'].ix[listHiLo])

        structLoHiVsHiHiKMLogRank = KMlogRankTest(
            dfForAnalysis['surv_time'].ix[listLoHi],
            dfForAnalysis['surv_time'].ix[listHiHi],
            event_observed_A=dfForAnalysis['death_event'].ix[listLoHi],
            event_observed_B=dfForAnalysis['death_event'].ix[listHiHi])

        structHiLoVsHiHiKMLogRank = KMlogRankTest(
            dfForAnalysis['surv_time'].ix[listHiLo],
            dfForAnalysis['surv_time'].ix[listHiHi],
            event_observed_A=dfForAnalysis['death_event'].ix[listHiLo],
            event_observed_B=dfForAnalysis['death_event'].ix[listHiHi])

		# return all of these results as a dictionary
        return {'kmfLoLo':kmfLoLo,
                'kmfLoHi':kmfLoHi,
                'kmfHiLo':kmfHiLo,
                'kmfHiHi':kmfHiHi,
                'LoLoVsLoHi':structLoLoVsLoHiKMLogRank,
                'LoLoVsHiLo':structLoLoVsHiLoKMLogRank,
                'LoLoVsHiHi':structLoLoVsHiHiKMLogRank,
                'LoHiVsHiLo':structLoHiVsHiLoKMLogRank,
                'LoHiVsHiHi':structLoHiVsHiHiKMLogRank,
                'HiLoVsHiHi':structHiLoVsHiHiKMLogRank}


#   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #
#                                                   PLOTTING FUNCTIONS
class Plot:

    strOutputFolder = os.getcwd() 
    numFontSize = 10
    listFileFormat=['png', 'pdf']

    def fig_one_and_supp_table_one(flagResult=False,
                strTempDataFile='per_gene_hazard_ratio.pickle',
                flagCalculateHRs=False):
		"""Produce figure one within the corresponding manuscript: identify genes with a significant and highly-negative
			hazard coefficient and produce Kaplan-Meier survival plots for selected genes"""

		# specify some parameters for spacing sub-figures
        numXSpacerForSigAnnot = 0.009
        numYSpacerForSigAnnot = 0.023

		# specify an RNA abundance threshold to discard genes which have very low abundance/no signal for survival analysis
        numPercForGlobalThresh = 10

		# specify co-variates for the survival plots
        listCoVarsToPlotSurvEffect = [['Age'], ['Age','IFNG'], ['Age','KLRD1'], ['Age','IL15'], ['Age','B2M']]
        listMarkerSubPlotLabels = ['(B)', '(C)', '(D)', '(E)', '(F)']

		# specify an array of x-positions used for labelling significance of the survival curve differences
        arrayStartXForSig = np.array([0.815, 0.80, 0.785, 0.80, 0.80], dtype=np.float)

        # for the figure all genes associated with "GO:0002376 (immune system process)" were bolded
        listGOForHazRatioDisp = ['GO:0002376']

        if not os.path.exists(strTempDataFile):
            flagCalculateHRs = True

        # load the TCGA data as a merged pandas dataframe
        dictTCGASKCM = PreProc.tcga_skcm_data(flagPerformExtraction=False)
        dfTCGA = dictTCGASKCM['df']
        listTCGAGenes = dictTCGASKCM['listGenes']
        numTranscripts = len(listTCGAGenes)

		# Load the dictionary with patient groups
        dictPatGroups = PreProc.split_tcga_met_vs_pri()

		# select the subset of metastatic tumours samples where patient age data are available
        listMetOnlyPatientsWithAge = dictPatGroups['MetOnlyPatWithAge']
        # append the '-06' suffix indicative of metastatic samples
        listMetOnlySamplesWithAge = [strPat + '-06' for strPat in listMetOnlyPatientsWithAge]
        numPatients = len(listMetOnlySamplesWithAge)
        dfTCGAMets = dfTCGA.loc[listMetOnlySamplesWithAge]

		# determine a threshold for abundance
        arrayFlatAllAbundData = np.ravel(np.nan_to_num(dfTCGAMets[listTCGAGenes].values.astype(np.float)))
        numMinVal = np.min(arrayFlatAllAbundData)
        numGlobalThresh = np.percentile(arrayFlatAllAbundData[arrayFlatAllAbundData > numMinVal], numPercForGlobalThresh)

        # convert patient age from days to years
        arrayPatientAge = dfTCGAMets['Age'].values.astype(np.float)
        dfTCGAMets['Age'] = arrayPatientAge/365.25

        # convert patient sex to a Boolean identifying female patients --> use males (larger group) as baseline hazard
        dfTCGAMets['isFemale'] = pd.Series(np.array([strSex=='female'
                                                     for strSex in dfTCGAMets['gender'].tolist()]),
                                           index=dfTCGAMets.index.tolist())

		# Load a dataframe containing the metastatic tumour sites
        dfMetSites = PreProc.tcga_skcm_met_sites()

		# Create a Cox proportional hazards model fitter instance (from lifelines)
        structAgeGenderSitePropHazard = CoxPHFitter()
		
		# Extract required data into a dataframe for the Cox PH model where age, gender & metastatic location are the only
		#  covariates
        dfForCoxPH = dfTCGAMets[['Age', 'isFemale', 'death_event', 'surv_time']].join(dfMetSites)
		# Fit the Cox PH model
        structAgeGenderSitePropHazard.fit(dfForCoxPH, duration_col='surv_time',
                                          event_col='death_event', step_size=0.1)
										  
		# extract the Cox PH model summary as a dataframe and output
        dfHazardSummary = structAgeGenderSitePropHazard.summary
        dfHazardSummary.to_csv(os.path.join(Plot.strOutputFolder,'AgeGenderSite_CoxPH.tsv'),
                               sep='\t', header=True)

        if flagCalculateHRs:
			# step through every gene and produce a Cox PH model with patient age as the only other covariate
            dfPerGeneHazRatio = pd.DataFrame(index=listTCGAGenes,
                                             columns=['Coef', 'Lower CI', 'Upper CI', 'p-value'],
                                             data=np.zeros((numTranscripts, 4), dtype=np.float))
            for iTranscript in range(numTranscripts):
                print('iTranscript ' + '{}'.format(iTranscript) + ' of ' + '{}'.format(numTranscripts))
				# intialise a Cox proportional hazards model
                structAgeGenePropHazard = CoxPHFitter()

				# check that the gene passes transcript abundance thresholds
                arrayGeneData = dfTCGAMets[listTCGAGenes[iTranscript]].values.astype(np.float)
                numAbundRange = np.ptp(arrayGeneData)
                numFractAboveGlobalPercThresh = np.float(np.sum(arrayGeneData > numGlobalThresh))/np.float(numPatients)

                if np.bitwise_and(numAbundRange > 0.5, numFractAboveGlobalPercThresh > 0.20):
					# extract the data subset for the Cox PH model and perform fitting
                    dfForCoxPH = dfTCGAMets[['Age','death_event','surv_time', listTCGAGenes[iTranscript]]]
                    structAgeGenePropHazard.fit(dfForCoxPH, duration_col='surv_time', event_col='death_event')

					# extract the corresponding parameters for the survival analysis
                    dfPerGeneHazRatio['Coef'].iloc[iTranscript] = structAgeGenePropHazard.hazards_[listTCGAGenes[iTranscript]].loc['coef']
                    arrayConfInt = structAgeGenePropHazard.confidence_intervals_[listTCGAGenes[iTranscript]].loc[:].values.astype(np.float)
                    dfPerGeneHazRatio['Lower CI'].iloc[iTranscript] = arrayConfInt[0]
                    dfPerGeneHazRatio['Upper CI'].iloc[iTranscript] = arrayConfInt[1]
                    numGeneIndexInHR = structAgeGenePropHazard.hazards_.columns.tolist().index(listTCGAGenes[iTranscript])
                    dfPerGeneHazRatio['p-value'].iloc[iTranscript] = structAgeGenePropHazard._compute_p_values()[numGeneIndexInHR]
                else:
                    dfPerGeneHazRatio['p-value'].iloc[iTranscript] = 1.0

            dfPerGeneHazRatio.to_pickle(strTempDataFile)

        else:

            dfPerGeneHazRatio = pd.read_pickle(strTempDataFile)

		# identify the top 100 'protective' genes with a negative hazard ratio
        dfProtectiveGenes = dfPerGeneHazRatio[dfPerGeneHazRatio['Coef'] < 0].copy(deep=True)
        arrayTop100GeneRows = np.argsort(dfProtectiveGenes['p-value'])[0:100]

		# extract these into a dataframe
        dfTopGenes = dfProtectiveGenes.iloc[arrayTop100GeneRows].copy(deep=True)
		# determine gene ranking by the average hazard ratio coefficient
        arrayTopGenesRankedByCoef = np.argsort(dfTopGenes['Coef'].values.astype(np.float))

        numMinCoefDisp = -2.3

        listGenesForHazRatioDisp = []
        for strGOAnnot in listGOForHazRatioDisp:
			# identify all genes present within the specified GO category (immune function) and children nodes/GO annotations
            listGenesInGO = GeneOntology.Map.genes_with_starting_category(strStartingCategory=strGOAnnot)
            for strGene in listGenesInGO:
                if strGene not in listGenesForHazRatioDisp:
                    listGenesForHazRatioDisp.append(strGene)

		# create the output figure
        handFig = plt.figure()
        handFig.set_size_inches(w=5,h=7)

        handAx1 = handFig.add_axes([0.12, 0.07, 0.17, 0.90])
        structAx1Pos = handAx1.get_position()
		
		# Plot the hazard ratio coefficients
        handAx1.plot(dfTopGenes['Coef'].iloc[arrayTopGenesRankedByCoef].values.astype(np.float),
                        np.arange(start=0.5, stop=100.5, step=1),
                        'o', color='r',
                    zorder=8, markersize=1.5)
		# Plot the confidence intervals for the hazard ratio coefficients
        for iGene in range(len(arrayTopGenesRankedByCoef)):
            handAx1.plot([dfTopGenes['Lower CI'].iloc[arrayTopGenesRankedByCoef[iGene]].astype(np.float),
                         dfTopGenes['Upper CI'].iloc[arrayTopGenesRankedByCoef[iGene]].astype(np.float)],
                        [0.5+np.float(iGene), 0.5+np.float(iGene)],
                        '-', color='k', zorder=7, lw=1)

		# specify a number of axis/tick parameters for tidying up the figure
        handAx1.set_xticks([-2, -1, 0])
        handAx1.set_yticks([])
        handAx1.set_ylim([0, 100])
        handAx1.set_xlim([numMinCoefDisp-0.01, 0.1])
        handAx1.axvline(x=0, ymin=0, ymax=1, lw=0.5, c='0.5')
        handAx1.set_title('Hazard coefficient', fontsize=Plot.numFontSize)

		# step through each gene and if this is present withing the GO annotated list weight as bold
        for iGene in range(len(arrayTopGenesRankedByCoef)):
            strGene = dfTopGenes.index.tolist()[arrayTopGenesRankedByCoef[iGene]]
            if strGene in listGenesForHazRatioDisp:
                handAx1.text(numMinCoefDisp-0.1, np.float(iGene)+0.5, strGene, fontsize=4.5, ha='right', va='center',
                             style='italic', weight='bold')
            else:
                handAx1.text(numMinCoefDisp-0.1, np.float(iGene)+0.5, strGene, fontsize=4.5, ha='right', va='center',
                             style='italic')

		# draw in some vertical lines to improve readability
        for numY in np.arange(start=0, stop=99, step=4):
            handAx1.axhline(y=numY, xmin=0, xmax=1, lw=0.5, c='0.5', ALPHA=0.8, zorder=2)

		# extract the p-values from the data
        arrayLogPValsToPlot = np.zeros((len(arrayTopGenesRankedByCoef),1), dtype=np.float)
		# multiply by the number of genes for the equivalent of a Bonferroni correction
        arrayLogPVals = np.log10(dfTopGenes['p-value'].iloc[arrayTopGenesRankedByCoef[::-1]].values.astype(np.float)*np.shape(dfProtectiveGenes)[0])
        arrayLogPValsToPlot[:,0] = arrayLogPVals

		# create the axis for the p-values and plot as a heatmap
        handAx2 = handFig.add_axes([0.31, 0.07, 0.03, 0.90])
        handPVals = handAx2.imshow(
            arrayLogPValsToPlot,
            interpolation='nearest',
            cmap=plt.cm.Reds_r,
            vmin=-9,
            vmax=0,
            aspect='auto')
        handAx2.set_xticks([])
        handAx2.set_yticks([])

		# create an axis for the p-value heatmap colormap
        handAx2CMap = handFig.add_axes([0.39, 0.65, 0.02, 0.12])
        structAxPos = handAx2CMap.get_position()
        handColorBarPVals = handFig.colorbar(handPVals,
                                            cax=handAx2CMap)
        handColorBarPVals.ax.tick_params(labelsize=Plot.numFontSize*0.7)

        # tidy up the tick locations
        handColorBarPVals.locator = plt.MaxNLocator(3)
        handColorBarPVals.update_ticks()
        handFig.text(structAxPos.x0 - structAxPos.width,
                     structAxPos.y0 + 0.5*structAxPos.height,
                     'log$_{10}$($p$-value)',
                     ha='center', va='center',
                     rotation=90,
                     fontsize=0.7*Plot.numFontSize)

		# label the sub-figure
        handFig.text(structAx1Pos.x0 - 0.48*structAx1Pos.width,
                     structAx1Pos.y0 + 1.02*structAx1Pos.height,
                     '(A)',
                     ha='center', va='center',
                     fontsize=Plot.numFontSize,
                     weight='bold')

		# use gridspec to layout the survival plots
        arrayGridSpec = gridspec.GridSpec(nrows=len(listCoVarsToPlotSurvEffect), ncols=1,
                                          left=0.62, right=0.97,
                                          bottom=0.07, top=0.97,
                                          wspace=0.4, hspace=0.45)

		# step through each specificed marker
        for iMarker in range(len(listCoVarsToPlotSurvEffect)):

            if len(listCoVarsToPlotSurvEffect[iMarker]) == 1:
				# if only a single marker is used to split the samples split into 3 partitions (Low/Med/High)
                strMarker = listCoVarsToPlotSurvEffect[iMarker][0]

                dictKMFs = Analyse.split_one_marker_three_partitions(strMarkerToSplit=strMarker,
                                                                     dfForAnalysis=dfTCGAMets)
                kmfLow = dictKMFs['kmfLow']
                kmfMed = dictKMFs['kmfMed']
                kmfHigh = dictKMFs['kmfHigh']

                handAx = plt.subplot(arrayGridSpec[iMarker])
                structAxPos = handAx.get_position()
				
				# plot the Kaplan-Meier function for each patient subset
                for kmf in [kmfLow, kmfMed, kmfHigh]:
                    kmf.plot(ax=handAx)

            elif len(listCoVarsToPlotSurvEffect[iMarker]) == 2:
				# if two markers are given  split the samples split into 4 partitions (LoLo, LoHi, HiLo, HiHi)
                strMarkerOne = listCoVarsToPlotSurvEffect[iMarker][0]
                strMarkerTwo = listCoVarsToPlotSurvEffect[iMarker][1]

                dictKMFs = Analyse.split_two_markers_four_partitions(strMarkerOneToSplit=strMarkerOne,
                                                                     strMarkerTwoToSplit=strMarkerTwo,
                                                                     dfForAnalysis=dfTCGAMets)
                kmfLoLo = dictKMFs['kmfLoLo']
                kmfLoHi = dictKMFs['kmfLoHi']
                kmfHiLo = dictKMFs['kmfHiLo']
                kmfHiHi = dictKMFs['kmfHiHi']

                handAx = plt.subplot(arrayGridSpec[iMarker])
                structAxPos = handAx.get_position()
				# plot the Kaplan-Meier function for each patient subset
                for kmf in [kmfLoLo, kmfLoHi, kmfHiLo, kmfHiHi]:
                    kmf.plot(ax=handAx)

			# tidy up the axis labeling, limits and tick marks
            handAx.set_ylim([0, 1])
            arrayXLim = handAx.get_xlim()
            handAx.set_xlim([0, arrayXLim[1]])
            arrayXTicksInMo = np.arange(start=0, stop=arrayXLim[1], step=60)
            arrayXTicksInYr = np.arange(start=0, stop=((arrayXTicksInMo[-1])/12)+1, step=5, dtype=np.int)
            handAx.set_xticks(arrayXTicksInMo)
            handAx.set_yticks([0, 0.5, 1.0])

			# for the final plot label the x-axis
            if iMarker == 4:
                handAx.set_xlabel('Time (years)', fontsize=Plot.numFontSize)
                handAx.set_xticklabels(arrayXTicksInYr)
                for handTick in handAx.xaxis.get_major_ticks():
                    handTick.label.set_fontsize(Plot.numFontSize)
            else:
                handAx.set_xlabel('')
                handAx.set_xticklabels([])

			# for all plots label the y-axis
            handAx.set_ylabel('Overall survival', fontsize=Plot.numFontSize)
            for handTick in handAx.yaxis.get_major_ticks():
                handTick.label.set_fontsize(Plot.numFontSize)
			
			# output the legend labelling each of the LM curves
            plt.legend(loc='lower right',
                       bbox_to_anchor=(1.10, 0.62),
                       fontsize=Plot.numFontSize*0.5,
                       scatterpoints=1,
                       framealpha=1,
                       ncol=1)

            #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #
            # Annotate the survival curves with significance
            #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #
            if len(listCoVarsToPlotSurvEffect[iMarker]) == 1:
				# the Analyse.survival functions also return p-values for comparison
                numSigAnnotStartY = structAxPos.y0 + 1.02*structAxPos.height
                numSigAnnotStartX = arrayStartXForSig[iMarker]

                iPlottedSig = 0
				# step through each comparison (Low/Med/High) and draw in asterisks to label significance
                if dictKMFs['LowVsMed'].p_value < 5E-2:
                    if dictKMFs['LowVsMed'].p_value < 1E-6:
                        strToPlot = '***'
                    elif dictKMFs['LowVsMed'].p_value < 1E-3:
                        strToPlot = '**'
                    else:
                        strToPlot = '*'

                    handFig.text(numSigAnnotStartX - (iPlottedSig*numXSpacerForSigAnnot)+0.003,
                                 numSigAnnotStartY+0.002,
                                 strToPlot,
                                 rotation=90,
                                 fontsize=Plot.numFontSize*0.5,
                                 ha='center', va='bottom')

                    handAx.annotate('',
                                    xy=[numSigAnnotStartX - (iPlottedSig*numXSpacerForSigAnnot),
                                        numSigAnnotStartY],
                                    xycoords='figure fraction',
                                    xytext=[numSigAnnotStartX - (iPlottedSig*numXSpacerForSigAnnot),
                                            numSigAnnotStartY - numYSpacerForSigAnnot],
                                    textcoords='figure fraction',
                                    annotation_clip=False,
                                    arrowprops=dict(facecolor='black',
                                                    linewidth=1,
                                                    arrowstyle='-'))

                    iPlottedSig = iPlottedSig + 1

                if dictKMFs['LowVsHigh'].p_value < 5E-2:
                    if dictKMFs['LowVsHigh'].p_value < 1E-6:
                        strToPlot = '***'
                    elif dictKMFs['LowVsHigh'].p_value < 1E-3:
                        strToPlot = '**'
                    else:
                        strToPlot = '*'

                    handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot)+0.003,
                                 numSigAnnotStartY + 0.002,
                                 strToPlot,
                                 rotation=90,
                                 fontsize=Plot.numFontSize*0.5,
                                 ha='center', va='bottom')

                    handAx.annotate('',
                                    xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                        numSigAnnotStartY],
                                    xycoords='figure fraction',
                                    xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                            numSigAnnotStartY - 2*numYSpacerForSigAnnot],
                                    textcoords='figure fraction',
                                    annotation_clip=False,
                                    arrowprops=dict(facecolor='black',
                                                    linewidth=1,
                                                    arrowstyle='-'))

                    iPlottedSig = iPlottedSig + 1


                if dictKMFs['MedVsHigh'].p_value < 5E-2:
                    if dictKMFs['MedVsHigh'].p_value < 1E-6:
                        strToPlot = '***'
                    elif dictKMFs['MedVsHigh'].p_value < 1E-3:
                        strToPlot = '**'
                    else:
                        strToPlot = '*'

                    handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot)+0.003,
                                 numSigAnnotStartY + 0.002 - numYSpacerForSigAnnot,
                                 strToPlot,
                                 rotation=90,
                                 fontsize=Plot.numFontSize*0.5,
                                 ha='center', va='bottom')

                    handAx.annotate('',
                                    xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                        numSigAnnotStartY - numYSpacerForSigAnnot],
                                    xycoords='figure fraction',
                                    xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                            numSigAnnotStartY - 2*numYSpacerForSigAnnot],
                                    textcoords='figure fraction',
                                    annotation_clip=False,
                                    arrowprops=dict(facecolor='black',
                                                    linewidth=1,
                                                    arrowstyle='-'))


            if len(listCoVarsToPlotSurvEffect[iMarker]) == 2:

                numSigAnnotStartY = structAxPos.y0 + 1.20*structAxPos.height
                numSigAnnotStartX = arrayStartXForSig[iMarker]

                iPlottedSig = 0

                if dictKMFs['LoLoVsLoHi'].p_value < 5E-2:
                    if dictKMFs['LoLoVsLoHi'].p_value < 1E-6:
                        strToPlot = '***'
                    elif dictKMFs['LoLoVsLoHi'].p_value < 1E-3:
                        strToPlot = '**'
                    else:
                        strToPlot = '*'

                    handFig.text(numSigAnnotStartX - (iPlottedSig*numXSpacerForSigAnnot)+0.005,
                                 numSigAnnotStartY+0.002,
                                 strToPlot,
                                 rotation=90,
                                 fontsize=Plot.numFontSize*0.5,
                                 ha='center', va='bottom')

                    handAx.annotate('',
                                    xy=[numSigAnnotStartX - (iPlottedSig*numXSpacerForSigAnnot),
                                        numSigAnnotStartY],
                                    xycoords='figure fraction',
                                    xytext=[numSigAnnotStartX - (iPlottedSig*numXSpacerForSigAnnot),
                                            numSigAnnotStartY - numYSpacerForSigAnnot],
                                    textcoords='figure fraction',
                                    annotation_clip=False,
                                    arrowprops=dict(facecolor='black',
                                                    linewidth=1,
                                                    arrowstyle='-'))

                    iPlottedSig = iPlottedSig + 1

                if dictKMFs['LoLoVsHiLo'].p_value < 5E-2:
                    if dictKMFs['LoLoVsHiLo'].p_value < 1E-6:
                        strToPlot = '***'
                    elif dictKMFs['LoLoVsHiLo'].p_value < 1E-3:
                        strToPlot = '**'
                    else:
                        strToPlot = '*'

                    handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot)+0.005,
                                 numSigAnnotStartY + 0.002,
                                 strToPlot,
                                 rotation=90,
                                 fontsize=Plot.numFontSize*0.5,
                                 ha='center', va='bottom')

                    handAx.annotate('',
                                    xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                        numSigAnnotStartY],
                                    xycoords='figure fraction',
                                    xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                            numSigAnnotStartY - 2*numYSpacerForSigAnnot],
                                    textcoords='figure fraction',
                                    annotation_clip=False,
                                    arrowprops=dict(facecolor='black',
                                                    linewidth=1,
                                                    arrowstyle='-'))

                    iPlottedSig = iPlottedSig + 1

                if dictKMFs['LoLoVsHiHi'].p_value < 5E-2:
                    if dictKMFs['LoLoVsHiHi'].p_value < 1E-6:
                        strToPlot = '***'
                    elif dictKMFs['LoLoVsHiHi'].p_value < 1E-3:
                        strToPlot = '**'
                    else:
                        strToPlot = '*'

                    handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot)+0.005,
                                 numSigAnnotStartY + 0.002,
                                 strToPlot,
                                 rotation=90,
                                 fontsize=Plot.numFontSize*0.5,
                                 ha='center', va='bottom')

                    handAx.annotate('',
                                    xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                        numSigAnnotStartY],
                                    xycoords='figure fraction',
                                    xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                            numSigAnnotStartY - 3*numYSpacerForSigAnnot],
                                    textcoords='figure fraction',
                                    annotation_clip=False,
                                    arrowprops=dict(facecolor='black',
                                                    linewidth=1,
                                                    arrowstyle='-'))

                    iPlottedSig = iPlottedSig + 1

                if dictKMFs['LoHiVsHiLo'].p_value < 5E-2:
                    if dictKMFs['LoHiVsHiLo'].p_value < 1E-6:
                        strToPlot = '***'
                    elif dictKMFs['LoHiVsHiLo'].p_value < 1E-3:
                        strToPlot = '**'
                    else:
                        strToPlot = '*'

                    handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot)+0.005,
                                 numSigAnnotStartY + 0.002 - numYSpacerForSigAnnot,
                                 strToPlot,
                                 rotation=90,
                                 fontsize=Plot.numFontSize*0.5,
                                 ha='center', va='bottom')

                    handAx.annotate('',
                                    xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                        numSigAnnotStartY - numYSpacerForSigAnnot],
                                    xycoords='figure fraction',
                                    xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                            numSigAnnotStartY - 2*numYSpacerForSigAnnot],
                                    textcoords='figure fraction',
                                    annotation_clip=False,
                                    arrowprops=dict(facecolor='black',
                                                    linewidth=1,
                                                    arrowstyle='-'))

                    iPlottedSig = iPlottedSig + 1

                    if dictKMFs['LoHiVsHiHi'].p_value < 5E-2:
                        if dictKMFs['LoHiVsHiHi'].p_value < 1E-6:
                            strToPlot = '***'
                        elif dictKMFs['LoHiVsHiHi'].p_value < 1E-3:
                            strToPlot = '**'
                        else:
                            strToPlot = '*'

                        handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.005,
                                     numSigAnnotStartY + 0.002 - numYSpacerForSigAnnot,
                                     strToPlot,
                                     rotation=90,
                                     fontsize=Plot.numFontSize * 0.5,
                                     ha='center', va='bottom')

                        handAx.annotate('',
                                        xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                            numSigAnnotStartY - numYSpacerForSigAnnot],
                                        xycoords='figure fraction',
                                        xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                                numSigAnnotStartY - 3 * numYSpacerForSigAnnot],
                                        textcoords='figure fraction',
                                        annotation_clip=False,
                                        arrowprops=dict(facecolor='black',
                                                        linewidth=1,
                                                        arrowstyle='-'))

                    iPlottedSig = iPlottedSig + 1

                    if dictKMFs['HiLoVsHiHi'].p_value < 5E-2:
                        if dictKMFs['HiLoVsHiHi'].p_value < 1E-6:
                            strToPlot = '***'
                        elif dictKMFs['HiLoVsHiHi'].p_value < 1E-3:
                            strToPlot = '**'
                        else:
                            strToPlot = '*'

                        handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.005,
                                     numSigAnnotStartY + 0.002 - 2*numYSpacerForSigAnnot,
                                     strToPlot,
                                     rotation=90,
                                     fontsize=Plot.numFontSize * 0.5,
                                     ha='center', va='bottom')

                        handAx.annotate('',
                                        xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                            numSigAnnotStartY - 2*numYSpacerForSigAnnot],
                                        xycoords='figure fraction',
                                        xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                                numSigAnnotStartY - 3 * numYSpacerForSigAnnot],
                                        textcoords='figure fraction',
                                        annotation_clip=False,
                                        arrowprops=dict(facecolor='black',
                                                        linewidth=1,
                                                        arrowstyle='-'))


            handFig.text(structAxPos.x0 - 0.40*structAxPos.width,
                         structAxPos.y0 + 1.10*structAxPos.height,
                         listMarkerSubPlotLabels[iMarker],
                         ha='center', va='center',
                         fontsize=Plot.numFontSize,
                         weight='bold')

            # hide the right and top spines
            handAx.spines['right'].set_visible(False)
            handAx.spines['top'].set_visible(False)
            # only show ticks on the left and bottom spines
            handAx.yaxis.set_ticks_position('left')
            handAx.xaxis.set_ticks_position('bottom')

        for strFormat in Plot.listFileFormat:
            handFig.savefig(os.path.join(Plot.strOutputFolder, 'Fig1.' + strFormat), ext=strFormat, dpi=300)

        plt.close(handFig)

        return flagResult

    def fig_two(flagResult=False):

        dictGSE24759Subsets = PreProc.gse24759_subsets()
        listOfListsGSE24759SampleTypeGrouped = dictGSE24759Subsets['listOfListsSampleTypeGrouped']
        listGSE24759SampleTypeLabels = dictGSE24759Subsets['listSampleTypeLabels']
        listGSE24759SmpTypeOrder = [strCellType
                                    for listGroup in listOfListsGSE24759SampleTypeGrouped
                                    for strCellType in listGroup]

        listGSE60424TypeOrder = ['B-cells', 'CD4', 'CD8', 'Monocytes', 'NK', 'Neutrophils']
        listGSE60424TypeDisp = ['B-cells', 'CD4$^{+}$ T cells', 'CD8$^{+}$ T cells',
                                'Monocytes', 'NK cells', 'Neutrophils']

        dictPatGroups = PreProc.split_tcga_met_vs_pri()
        listPatsMetTumOnly = dictPatGroups['MetOnlyPat']
        listPatsMetTumOnlySamples = [strPat + '-06' for strPat in listPatsMetTumOnly]

        # load the TCGA data as a merged pandas dataframe
        dictTCGASKCM = PreProc.tcga_skcm_data(flagPerformExtraction=False)
        dfTCGA = dictTCGASKCM['df']
        listTCGAGenes = dictTCGASKCM['listGenes']

        dfTCGAMets = dfTCGA.loc[listPatsMetTumOnlySamples]

        arraySortedByNKScore = np.argsort(dfTCGAMets['NK Score'].values.astype(np.float))
        listMetsSortedByNKScore = [dfTCGAMets.index.tolist()[i] for i in arraySortedByNKScore]

        dfNKSigCuration = pd.read_table(os.path.join(Plot.strOutputFolder, 'NK_genes_curated.tsv'),
                                        sep='\t', header=0, index_col=0)
        listLocalNKSigGenes = dfNKSigCuration[dfNKSigCuration['CursonsGuimaraes_sigGene'] == True].index.tolist()

        listNKUpGenesInTCGA = sorted(list(set(listLocalNKSigGenes).intersection(set(listTCGAGenes))))

        dfTCGANKGenes = dfTCGAMets[listNKUpGenesInTCGA].loc[listMetsSortedByNKScore].transpose()
        numNKGenesInTCGA = len(dfTCGANKGenes.index)

        # arrayFlatAbundData = np.ravel(np.nan_to_num(dfTCGANKGenes.values.astype(np.float)))

        print('Loading sorted cell data..')
        dfGSE24759, dfGSE24759SmpToType = PreProc.gse24759_data()
        dfGSE60424, dictGSE60424SmpToType = PreProc.gse60424_data()

        listGSE64024SmpOrder = []
        for strCellType in listGSE60424TypeOrder:
            for strSample in dfGSE60424.columns.tolist():
                if dictGSE60424SmpToType[strSample] in strCellType:
                    listGSE64024SmpOrder.append(strSample)

        dfGSE60424NKGenes = dfGSE60424[listGSE64024SmpOrder].loc[listNKUpGenesInTCGA]

        arrayLogGSE60424Data = np.log2(dfGSE60424NKGenes.values.astype(np.float) + 1)

        listGSE24759SmpOfInt = (dfGSE24759SmpToType.loc['Source']=='peripheral blood').index.tolist()

        listGSE24759SmpOrder = []
        arrayGSE24759NumEachType = np.zeros(len(listGSE24759SmpTypeOrder), dtype=np.int)
        for strCellType in listGSE24759SmpTypeOrder:
            numOfType = 0
            for strSample in listGSE24759SmpOfInt:
                if strCellType in dfGSE24759SmpToType[strSample].loc['Title']:
                    listGSE24759SmpOrder.append(strSample)
                    numOfType = numOfType + 1
            arrayGSE24759NumEachType[listGSE24759SmpTypeOrder.index(strCellType)] = numOfType


        arrayGSE24759NKScores = np.zeros(len(listGSE24759SmpOrder), dtype=np.float)
        for iSample in range(len(listGSE24759SmpOrder)):
            strSample = listGSE24759SmpOrder[iSample]
            arrayGSE24759NKScores[iSample] = GeneSetScoring.FromInput.single_sample_rank_score(
                listAllGenes=dfGSE24759.index.tolist(),
                arrayTranscriptAbundance=dfGSE24759[strSample].values.astype(np.float),
                listUpGenesToScore=listNKUpGenesInTCGA,
                flagApplyNorm=True)

        dictGeneNameTCGAToGSE24759 = {'CD247': 'CD3Z',
                                      'IKZF3':'ZNFN1A3',
                                      'KLRC1': 'KLRC1 /// KLRC2'
                                      }

        arrayGSE24759Out = np.zeros((len(listNKUpGenesInTCGA), len(listGSE24759SmpOrder)), dtype=np.float)
        for iGene in range(len(listNKUpGenesInTCGA)):
            if listNKUpGenesInTCGA[iGene] in dfGSE24759.index.tolist():
                arrayGSE24759Out[iGene,:] = dfGSE24759[listGSE24759SmpOrder].loc[listNKUpGenesInTCGA[iGene]]
            else:
                if listNKUpGenesInTCGA[iGene] in dictGeneNameTCGAToGSE24759.keys():
                    strAlias = dictGeneNameTCGAToGSE24759[listNKUpGenesInTCGA[iGene]]
                    arrayGSE24759Out[iGene,:] = dfGSE24759[listGSE24759SmpOrder].loc[strAlias]
                else:
                    arrayGSE24759Out[iGene,:] = np.nan

        dfGSE24759Out = pd.DataFrame(data=arrayGSE24759Out,
                                     index=listNKUpGenesInTCGA,
                                     columns=listGSE24759SmpOrder)

        handFig = plt.figure()
        handFig.set_size_inches(w=6, h=4)

        # # # # # # # # # # # #
        # # GSE60424 - Transcript abundance data
        handAx = handFig.add_axes([0.35, 0.10, 0.12, 0.75])
        structAxPos = handAx.get_position()

        handGSE60424Abund = handAx.matshow(arrayLogGSE60424Data,
                       cmap=plt.cm.viridis,
                       vmin=np.min(np.ravel(arrayLogGSE60424Data)),
                       vmax=np.max(np.ravel(arrayLogGSE60424Data)),
                       aspect='auto')
        handAx.set_xticks([])
        handAx.set_yticks([])

        numSampPerType = len(listGSE64024SmpOrder)/len(listGSE60424TypeOrder)
        for iCellType in range(len(listGSE60424TypeOrder)):

            numStartColIndex = np.int(iCellType*numSampPerType)
            numEndColIndex = np.int((iCellType+1)*numSampPerType)

            if iCellType < len(listGSE60424TypeOrder):
                handAx.axvline(x=numEndColIndex-0.5,
                               ymin=0,
                               ymax=1,
                               color='k',
                               linewidth=0.75)
                handAx.axvline(x=numEndColIndex-0.5,
                               ymin=0,
                               ymax=1,
                               color='w',
                               linewidth=0.5)

            handAx.text(np.mean([numStartColIndex, numEndColIndex])-1.5,
                        -0.6,
                        listGSE60424TypeDisp[iCellType],
                        fontsize=Plot.numFontSize*0.5,
                        ha='left', va='bottom',
                        rotation=50)

        handFig.text(structAxPos.x0,
                     structAxPos.y0 + 1.16*structAxPos.height,
                     '(B)',
                     fontsize=Plot.numFontSize*0.9,
                     ha='center', va='center',
                     fontweight='bold')

        # use this to anchor the (A) subpanel label which is being ported across from an R plot
        handFig.text(0.018,
                     structAxPos.y0 + 1.16 * structAxPos.height,
                     '(A)',
                     fontsize=Plot.numFontSize * 0.9,
                     ha='center', va='center',
                     fontweight='bold')


        handGSE60424CMap = handFig.add_axes([0.35, 0.06, 0.07, 0.02])

        structAxPos = handGSE60424CMap.get_position()
        handColorBarPVals = handFig.colorbar(handGSE60424Abund,
                                             cax=handGSE60424CMap,
                                             ticks=[0, 4, 8, 12],
                                             orientation='horizontal')
        handColorBarPVals.ax.tick_params(labelsize=Plot.numFontSize * 0.5)

        handFig.text(structAxPos.x0 + 1.05 * structAxPos.width,
                     structAxPos.y0 + 0.05 * structAxPos.height,
                     'log(TPM+1)',
                     fontsize=Plot.numFontSize * 0.6,
                     ha='left', va='center')

        # # # # # # # # # # #
        # GSE24759 - Transcript abundance array
        handAx = handFig.add_axes([0.56, 0.10, 0.40, 0.75])
        structAxPos = handAx.get_position()

        arrayFlatGSE24759Out = np.ravel(np.nan_to_num(dfGSE24759Out.values))

        handGSE24759Abund = handAx.matshow(dfGSE24759Out.values,
                       cmap=plt.cm.viridis,
                       vmin=np.min(arrayFlatGSE24759Out[arrayFlatGSE24759Out > 0]),
                       vmax=np.max(arrayFlatGSE24759Out),
                       aspect='auto')
        handAx.set_xticks([])
        handAx.set_yticks([])

        numCounter = 0
        for iGroup in range(len(listOfListsGSE24759SampleTypeGrouped)):
            numInGroup = 0
            for iCellType in range(len(listOfListsGSE24759SampleTypeGrouped[iGroup])):
                strType = listOfListsGSE24759SampleTypeGrouped[iGroup][iCellType]
                numOutIndex = listGSE24759SmpTypeOrder.index(strType)
                numOfCellType = arrayGSE24759NumEachType[numOutIndex]
                numInGroup = numInGroup + numOfCellType

            handAx.text(x=np.float(numCounter)+(np.float(numInGroup)/2)-0.5-(np.float(numInGroup)/4),
                        y=-0.6,
                        s=listGSE24759SampleTypeLabels[iGroup],
                        fontsize=Plot.numFontSize*0.5,
                        rotation=50,
                        ha='left',
                        va='bottom')

            numCounter = numCounter + numInGroup

            if iGroup < len(listOfListsGSE24759SampleTypeGrouped):
                handAx.axvline(x=(np.float(numCounter)-0.5),
                               ymin=0.0,
                               ymax=1.0,
                               c='k',
                               lw=0.75,
                               clip_on=False)
                handAx.axvline(x=(np.float(numCounter)-0.5),
                               ymin=0.0,
                               ymax=1.0,
                               c='w',
                               lw=0.5,
                               clip_on=False)

        for iGene in range(numNKGenesInTCGA):
            handFig.text(0.515, structAxPos.y0+(numNKGenesInTCGA-(iGene+0.5))*(structAxPos.height/numNKGenesInTCGA),
                        dfTCGANKGenes.index.tolist()[iGene],
                        fontsize=Plot.numFontSize*0.5,
                        style='italic',
                        ha='center', va='center')

        handFig.text(structAxPos.x0,
                     structAxPos.y0 + 1.16*structAxPos.height,
                     '(C)',
                     fontsize=Plot.numFontSize*0.9,
                     ha='center', va='center',
                     fontweight='bold')


        handGSE24759CMap = handFig.add_axes([0.56+(0.40/2)-0.13, 0.06, 0.15, 0.02])

        structAxPos = handGSE24759CMap.get_position()
        handColorBarPVals = handFig.colorbar(handGSE24759Abund,
                                             cax=handGSE24759CMap,
                                             orientation='horizontal')
        handColorBarPVals.ax.tick_params(labelsize=Plot.numFontSize * 0.5)

        handFig.text(structAxPos.x0 + 1.05 * structAxPos.width,
                     structAxPos.y0 + 0.05 * structAxPos.height,
                     'log(Abund.)',
                     fontsize=Plot.numFontSize * 0.6,
                     ha='left', va='center')

        for strFormat in Plot.listFileFormat:
            handFig.savefig(os.path.join(Plot.strOutputFolder, 'Fig2_BC.' + strFormat), ext=strFormat, dpi=300)

        plt.close(handFig)

        return flagResult

    def fig_four(flagResult=False):

        listMarkersToPlot=[['NK Score'],
                           ['CD3D'],
                           ['IL15'],
                           ['IL2RB'],
                           ['CD274'],
                           ['CCL5'],
                           ['XCL1'],
                           ['GZMB'],
                           ['FASLG']
                           ]
        listOfPlotPosTuples = [(0,0),
                               (0,1),
                               (0,2),
                               (1,0),
                               (1,1),
                               (1,2),
                               (2,0),
                               (2,1),
                               (2,2)]
        arrayStartXForSig = [0.46, 0.67, 0.875,
                             0.473, 0.665, 0.87,
                             0.476, 0.667, 0.865]

        numXSpacerForSigAnnot = 0.007
        numYSpacerForSigAnnot = 0.026

        numLeftLimit = 0.07
        numHeatMapWidth = 0.19

        dictPatGroups = PreProc.split_tcga_met_vs_pri()
        listPatsMetTumOnly = dictPatGroups['MetOnlyPat']
        listPatsMetTumOnlySamples = [strPat + '-06' for strPat in listPatsMetTumOnly]

        # load the TCGA data as a merged pandas dataframe
        dictTCGASKCM = PreProc.tcga_skcm_data(flagPerformExtraction=False)
        dfTCGA = dictTCGASKCM['df']

        dfTCGAMets = dfTCGA.loc[listPatsMetTumOnlySamples]

        arrayNKScore = dfTCGAMets['NK Score'].values.astype(np.float)

        arrayPatientsSortedByNKScore = np.argsort(arrayNKScore)
        listPatientsSortedByScore = [listPatsMetTumOnlySamples[i] for i in arrayPatientsSortedByNKScore]

        dfNKSigCuration = pd.read_table(os.path.join(Plot.strOutputFolder, 'NK_genes_curated.tsv'),
                                        sep='\t', header=0, index_col=0)
        listLocalNKSigGenes = \
            sorted(dfNKSigCuration[dfNKSigCuration['CursonsGuimaraes_sigGene'] == True].index.tolist())

        dfNKGenesInTCGA = dfTCGAMets[listLocalNKSigGenes].reindex(listPatientsSortedByScore).transpose().copy(deep=True)

        dfNKScoreCorr = dfTCGAMets[listLocalNKSigGenes].corrwith(dfTCGAMets['NK Score'])

        arrayNKThresh = np.percentile(arrayNKScore, np.linspace(start=0, stop=100, num=4)[1:-1])

        listHighNKScorePatients = \
            dfTCGAMets[
                dfTCGAMets['NK Score'].values.astype(np.float) > arrayNKThresh[1]].index.tolist()

        dfHighNKMets = dfTCGAMets.reindex(listHighNKScorePatients).copy(deep=True)

        arrayGridSpec = gridspec.GridSpec(nrows=3, ncols=3,
                                          left=0.43, right=0.98,
                                          bottom=0.36, top=0.94,
                                          wspace=0.25, hspace=0.35)

        arrayGridSpecTwo = gridspec.GridSpec(nrows=1, ncols=2,
                                          left=0.50, right=0.90,
                                          bottom=0.07, top=0.26,
                                          wspace=0.70, hspace=0.30)


        handFig = plt.figure()
        handFig.set_size_inches(w=7, h=5)

        handAx = handFig.add_axes([numLeftLimit, 0.24, numHeatMapWidth, 0.67])
        handAbund = handAx.matshow(dfNKGenesInTCGA,
                       cmap=plt.cm.viridis,
                       aspect='auto')
        handAx.set_xticks([])
        handAx.set_yticks([])
        for iGene in range(np.shape(dfNKGenesInTCGA)[0]):
            strGene = dfNKGenesInTCGA.index.tolist()[iGene]
            handAx.text(-5, iGene,
                        strGene,
                        fontsize=Plot.numFontSize*0.7,
                        style='italic',
                        ha='right', va='center')

        handAbundCMapAx = handFig.add_axes([numLeftLimit, 0.96, numHeatMapWidth/2, 0.01])
        structAxPos = handAbundCMapAx.get_position()
        handColorBarPVals = handFig.colorbar(handAbund,
                                             cax=handAbundCMapAx,
                                             ticks=[-2, 0, 2, 4],
                                             orientation='horizontal')
        handColorBarPVals.ax.tick_params(labelsize=Plot.numFontSize * 0.7)

        handFig.text(structAxPos.x0 + 0.5 * structAxPos.width,
                     structAxPos.y0 + 1.04 * structAxPos.height,
                     'log$_{10}$(TPM)',
                     fontsize=Plot.numFontSize * 0.7,
                     ha='center', va='bottom')

        arrayCorrData = np.zeros((len(listLocalNKSigGenes), 1), dtype=np.float)
        arrayCorrData[:,0] = dfNKScoreCorr.iloc[:]
        handAx = handFig.add_axes([numLeftLimit+numHeatMapWidth+0.01,
                                   0.24, 0.02, 0.67])
        handCorr = handAx.matshow(arrayCorrData,
                       cmap=plt.cm.PRGn,
                       aspect='auto',
                       vmin=-1.0,
                       vmax=1.0)
        handAx.set_xticks([])
        handAx.set_yticks([])

        handCorrCMapAx = handFig.add_axes([numLeftLimit+numHeatMapWidth+0.015, 0.93, 0.01, 0.055])
        structAxPos = handCorrCMapAx.get_position()
        handColorBarPVals = handFig.colorbar(handCorr,
                                             cax=handCorrCMapAx,
                                             ticks=[-1, 0, 1])
        handColorBarPVals.ax.tick_params(labelsize=Plot.numFontSize * 0.7)

        handFig.text(structAxPos.x0 - 0.5 * structAxPos.width,
                     structAxPos.y0 + 0.5 * structAxPos.height,
                     '$r_{P}$(NK Score)',
                     fontsize=Plot.numFontSize*0.7,
                     ha='right', va='center')


        handAx = handFig.add_axes([numLeftLimit, 0.01, numHeatMapWidth, 0.15])
        handAx.plot(np.linspace(start=1, stop=len(listPatientsSortedByScore)+1, num=len(listPatientsSortedByScore)),
                    dfTCGAMets['NK Score'].reindex(listPatientsSortedByScore),
                    'ko',
                    markersize=3)
        handAx.set_title('TCGA SKCM\nmetastatic tumors', fontsize=Plot.numFontSize*0.7)
        handAx.set_xlim([0, len(listPatientsSortedByScore)+1])
        handAx.set_yticks([-0.25, 0, 0.25])
        for handTick in handAx.yaxis.get_major_ticks():
            handTick.label.set_fontsize(Plot.numFontSize*0.7)

        handAx.set_xticks([])
        handAx.set_ylim([-0.4, 0.4])
        structAxPos = handAx.get_position()
        handFig.text(0.01, structAxPos.y0 + 0.5*structAxPos.height,
                    'NK Score', rotation=90,
                    ha='center', va='center', fontsize=Plot.numFontSize*0.7)

        handAx.axhline(y=0.0, xmin=0.0, xmax=1.0,
                       linestyle='--',
                       color='0.5',
                       alpha=0.7,
                       linewidth=0.75)

        handAx.axvline(x=(1/3)*(len(listPatientsSortedByScore)+1), ymin=0.0, ymax=1.0,
                       linestyle='--',
                       color='r',
                       alpha=0.7,
                       linewidth=0.75)

        handAx.axvline(x=(2/3)*(len(listPatientsSortedByScore)+1), ymin=0.0, ymax=1.0,
                       linestyle='--',
                       color='r',
                       alpha=0.7,
                       linewidth=0.75)

        listToLabel = ['Low', 'Med' ,'High']
        for iCat in range(len(listToLabel)):
            handFig.text(structAxPos.x0 + ((1/6)+(iCat*(1/3)))*structAxPos.width,
                         structAxPos.y0 + 0.95*structAxPos.height,
                        listToLabel[iCat],
                        ha='center', va='top', fontsize=Plot.numFontSize*0.7)


        for iMarker in range(len(listMarkersToPlot)):

            handAx = plt.subplot(arrayGridSpec[listOfPlotPosTuples[iMarker][0], listOfPlotPosTuples[iMarker][1]])

            if len(listMarkersToPlot[iMarker]) == 1:

                strMarker = listMarkersToPlot[iMarker][0]

                dictKMFs = Analyse.split_one_marker_three_partitions(strMarkerToSplit=strMarker,
                                                                     dfForAnalysis=dfTCGAMets,
                                                                     flagShowSplitValues=False)
                kmfLow = dictKMFs['kmfLow']
                kmfMed = dictKMFs['kmfMed']
                kmfHigh = dictKMFs['kmfHigh']

                for kmf in [kmfLow, kmfMed, kmfHigh]:
                    kmf.plot(ax=handAx)

            elif len(listMarkersToPlot[iMarker]) == 2:

                strMarkerOne = listMarkersToPlot[iMarker][0]
                strMarkerTwo = listMarkersToPlot[iMarker][1]

                dictKMFs = Analyse.split_two_markers_four_partitions(strMarkerOneToSplit=strMarkerOne,
                                                                     strMarkerTwoToSplit=strMarkerTwo,
                                                                     dfForAnalysis=dfTCGAMets)
                kmfLoLo = dictKMFs['kmfLoLo']
                kmfLoHi = dictKMFs['kmfLoHi']
                kmfHiLo = dictKMFs['kmfHiLo']
                kmfHiHi = dictKMFs['kmfHiHi']

                for kmf in [kmfLoLo, kmfLoHi, kmfHiLo, kmfHiHi]:
                    kmf.plot(ax=handAx)

            structAxPos = handAx.get_position()

            handAx.set_ylim([0, 1])
            arrayXLim = handAx.get_xlim()
            handAx.set_xlim([0, arrayXLim[1]])

            arrayXTicksInMo = np.arange(start=0, stop=arrayXLim[1], step=120)
            arrayXTicksInYr = np.arange(start=0, stop=((arrayXTicksInMo[-1])/12)+1, step=10, dtype=np.int)

            handAx.set_xticks(arrayXTicksInMo)
            handAx.set_yticks([0, 0.5, 1.0])

            if listOfPlotPosTuples[iMarker][0] == 2:
                handAx.set_xlabel('Time (years)', fontsize=Plot.numFontSize*0.7)
                handAx.set_xticklabels(arrayXTicksInYr)
                for handTick in handAx.xaxis.get_major_ticks():
                    handTick.label.set_fontsize(Plot.numFontSize*0.7)
            else:
                handAx.set_xlabel('')
                handAx.set_xticklabels([])

            if listOfPlotPosTuples[iMarker][1] == 0:
                handAx.set_ylabel('Overall survival', fontsize=Plot.numFontSize*0.7)
                for handTick in handAx.yaxis.get_major_ticks():
                    handTick.label.set_fontsize(Plot.numFontSize*0.7)
            else:
                handAx.set_ylabel('')
                handAx.set_yticklabels([])

            # hide the right and top spines
            handAx.spines['right'].set_visible(False)
            handAx.spines['top'].set_visible(False)
            # only show ticks on the left and bottom spines
            handAx.yaxis.set_ticks_position('left')
            handAx.xaxis.set_ticks_position('bottom')

            plt.legend(loc='lower right',
                       bbox_to_anchor=(1.12, 0.71),
                       fontsize=Plot.numFontSize*0.45,
                       scatterpoints=1,
                       framealpha=1,
                       ncol=1)

            #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #
            # Annotate the survival curves with significance
            #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #
            if len(listMarkersToPlot[iMarker]) == 1:

                numSigAnnotStartY = structAxPos.y0 + 1.13 * structAxPos.height
                numSigAnnotStartX = arrayStartXForSig[iMarker]

                iPlottedSig = 0

                if dictKMFs['LowVsMed'].p_value < 5E-2:
                    if dictKMFs['LowVsMed'].p_value < 1E-6:
                        strToPlot = '***'
                    elif dictKMFs['LowVsMed'].p_value < 1E-3:
                        strToPlot = '**'
                    else:
                        strToPlot = '*'

                    handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.003,
                                 numSigAnnotStartY + 0.002,
                                 strToPlot,
                                 rotation=90,
                                 fontsize=Plot.numFontSize * 0.5,
                                 ha='center', va='bottom')

                    handAx.annotate('',
                                    xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                        numSigAnnotStartY],
                                    xycoords='figure fraction',
                                    xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                            numSigAnnotStartY - numYSpacerForSigAnnot],
                                    textcoords='figure fraction',
                                    annotation_clip=False,
                                    arrowprops=dict(facecolor='black',
                                                    linewidth=1,
                                                    arrowstyle='-'))

                    iPlottedSig = iPlottedSig + 1

                if dictKMFs['LowVsHigh'].p_value < 5E-2:
                    if dictKMFs['LowVsHigh'].p_value < 1E-6:
                        strToPlot = '***'
                    elif dictKMFs['LowVsHigh'].p_value < 1E-3:
                        strToPlot = '**'
                    else:
                        strToPlot = '*'

                    handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.003,
                                 numSigAnnotStartY + 0.002,
                                 strToPlot,
                                 rotation=90,
                                 fontsize=Plot.numFontSize * 0.5,
                                 ha='center', va='bottom')

                    handAx.annotate('',
                                    xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                        numSigAnnotStartY],
                                    xycoords='figure fraction',
                                    xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                            numSigAnnotStartY - 2 * numYSpacerForSigAnnot],
                                    textcoords='figure fraction',
                                    annotation_clip=False,
                                    arrowprops=dict(facecolor='black',
                                                    linewidth=1,
                                                    arrowstyle='-'))

                    iPlottedSig = iPlottedSig + 1

                if dictKMFs['MedVsHigh'].p_value < 5E-2:
                    if dictKMFs['MedVsHigh'].p_value < 1E-6:
                        strToPlot = '***'
                    elif dictKMFs['MedVsHigh'].p_value < 1E-3:
                        strToPlot = '**'
                    else:
                        strToPlot = '*'

                    handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.003,
                                 numSigAnnotStartY + 0.002 - numYSpacerForSigAnnot,
                                 strToPlot,
                                 rotation=90,
                                 fontsize=Plot.numFontSize * 0.5,
                                 ha='center', va='bottom')

                    handAx.annotate('',
                                    xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                        numSigAnnotStartY - numYSpacerForSigAnnot],
                                    xycoords='figure fraction',
                                    xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                            numSigAnnotStartY - 2 * numYSpacerForSigAnnot],
                                    textcoords='figure fraction',
                                    annotation_clip=False,
                                    arrowprops=dict(facecolor='black',
                                                    linewidth=1,
                                                    arrowstyle='-'))

        listOfGenesForSurvivalComp = ['XCL2', 'GZMB']
        for iGene in range(len(listOfGenesForSurvivalComp)):
            strGene = listOfGenesForSurvivalComp[iGene]

            arrayHighNKGeneLoHiThresholds = \
                np.percentile(dfHighNKMets[strGene].values.astype(np.float),
                              np.linspace(start=0, stop=100, num=4)[1:-1])

            listHighNKLowExpr = dfHighNKMets[dfHighNKMets[strGene] < arrayHighNKGeneLoHiThresholds[0]].index.tolist()
            listHighNKHighExpr = dfHighNKMets[dfHighNKMets[strGene] > arrayHighNKGeneLoHiThresholds[1]].index.tolist()

            structLowVsHighExprKMLogRank = KMlogRankTest(
                dfTCGA['surv_time'].ix[listHighNKLowExpr],
                dfTCGA['surv_time'].ix[listHighNKHighExpr],
                event_observed_A=dfTCGA['death_event'].ix[listHighNKLowExpr],
                event_observed_B=dfTCGA['death_event'].ix[listHighNKHighExpr])

            numHighNKKMLogRankPVal = structLowVsHighExprKMLogRank.p_value

            dictKMFs = Analyse.split_one_marker_three_partitions(
                strMarkerToSplit=listOfGenesForSurvivalComp[iGene],
                dfForAnalysis=dfHighNKMets)

            kmfLow = dictKMFs['kmfLow']
            kmfHigh = dictKMFs['kmfHigh']

            handAx = plt.subplot(arrayGridSpecTwo[iGene])

            for kmf in [kmfLow, kmfHigh]:
                kmf.plot(ax=handAx)

            # hide the right and top spines
            handAx.spines['right'].set_visible(False)
            handAx.spines['top'].set_visible(False)
            # only show ticks on the left and bottom spines
            handAx.yaxis.set_ticks_position('left')
            handAx.xaxis.set_ticks_position('bottom')

            handAx.set_yticks([0, 0.5, 1.0])
            if iGene == 0:
                handAx.set_ylabel('Overall survival', fontsize=Plot.numFontSize*0.7)
                for handTick in handAx.yaxis.get_major_ticks():
                    handTick.label.set_fontsize(Plot.numFontSize * 0.7)

            elif iGene > 0:
                handAx.set_yticklabels([])
                handAx.set_ylabel('')

            structAxPos = handAx.get_position()
            handFig.text(structAxPos.x0 + 0.85*structAxPos.width,
                         structAxPos.y0 + 0.80*structAxPos.height,
                         '$p$-value = ' + '{:03.2E}'.format(numHighNKKMLogRankPVal),
                         ha='center', va='top',
                         fontsize=Plot.numFontSize*0.5)

            plt.legend(loc='lower center',
                       bbox_to_anchor=(0.85, 0.80),
                       fontsize=Plot.numFontSize * 0.45,
                       scatterpoints=1,
                       framealpha=1,
                       ncol=1)

            arrayXTicksInMo = np.arange(start=0, stop=arrayXLim[1], step=120)
            arrayXTicksInYr = np.arange(start=0, stop=((arrayXTicksInMo[-1]) / 12) + 1, step=10, dtype=np.int)

            handAx.set_xticks(arrayXTicksInMo)

            handAx.set_xlabel('Time (years)', fontsize=Plot.numFontSize*0.7)
            handAx.set_xticklabels(arrayXTicksInYr)
            for handTick in handAx.xaxis.get_major_ticks():
                handTick.label.set_fontsize(Plot.numFontSize*0.7)

        handFig.text(0.01, 0.998, '(A)', fontsize=Plot.numFontSize*1.2, fontweight='bold', ha='left', va='top')
        handFig.text(0.35, 0.998, '(B)', fontsize=Plot.numFontSize*1.2, fontweight='bold', ha='center', va='top')

        handFig.text(0.35, 0.27, '(C)', fontsize=Plot.numFontSize*1.2, fontweight='bold', ha='center', va='top')
        handFig.text(0.38, 0.17,
                     'High\nNK score\ntumors',
                     fontsize=Plot.numFontSize*0.7,
                     fontweight='bold',
                     ha='center', va='center')

        for strFormat in Plot.listFileFormat:
            handFig.savefig(os.path.join(Plot.strOutputFolder, 'Fig4.' + strFormat), ext=strFormat, dpi=300)
        plt.close(handFig)

        return flagResult

    def fig_five(flagResult=False):

		# create a list of marker pairs to plot for comparison across the scatter plots ([x, y])
        listMarkersToPlot=[['Epithelial Score', 'NK Score'],
                           ['Mesenchymal Score', 'NK Score'],
                           ['TGF-B EMT Score', 'NK Score'],
                           ['TGF-B EMT Score', 'Mesenchymal Score'],
                           ['Age (years)', 'NK Score'],
                           ['Age (years)', 'TGF-B EMT Score']
                           ]
	    # create a corresponding list of tuples with the plot positions (for GridSpec)
        listOfPlotPosTuples = [(0,0),
                               (0,1),
                               (0,2),
                               (1,0),
                               (1,1),
                               (1,2)]
		# and a corresponding list of sub-figure labels
        listMarkerPairSubPlotLabels = ['(A)', '(B)', '(C)', '(D)', '(E)', '(F)']

		# load the dictionary for primary vs metastatic samples/patients
        dictPatGroups = PreProc.split_tcga_met_vs_pri()
		# for the patienst with age and only metastatic tumours, create a list with the tumour sample IDs
        listPatsMetTumOnly = dictPatGroups['MetOnlyPatWithAge']
        listPatsMetTumOnlySamples = [strPat + '-06' for strPat in listPatsMetTumOnly]

        # load the TCGA data as a merged pandas dataframe
        dictTCGASKCM = PreProc.tcga_skcm_data(flagPerformExtraction=False)
        dfTCGA = dictTCGASKCM['df']

		# output the NK score for all samples
        dfTCGA['NK Score'].to_csv(os.path.join(Plot.strOutputFolder, 'SKCM-AllSamples-NKScore.csv'), sep=',',
                          index_label=True, header=True)
		# output the other gene set scores for all samples
        dfTCGA[['Epithelial Score', 'Mesenchymal Score',
                'TGF-B EMT Score', 'NK Score']].to_csv(
            os.path.join(Plot.strOutputFolder, 'SKCM-AllSamples-GeneSetScores.csv'),
            sep=',',index_label=True, header=True)

		# load data for the metastatic tumour samples
        dfTCGAMets = dfTCGA.loc[listPatsMetTumOnlySamples]

		# output the samples used for this analysis
        dfTCGAPats = pd.DataFrame(data=listPatsMetTumOnlySamples,
                                  columns=['MetOnly.Samples'])
        dfTCGAPats.to_csv(os.path.join(Plot.strOutputFolder, 'SKCM-MetOnlySamples.csv'), sep=',',
                          index_label=None, index=None,
                          header=False)

		# load patient ages and determine the median value for thresholding
        arrayPatientAge = dfTCGAMets['Age'].values.astype(np.float)
        numAgeThresh = np.percentile(arrayPatientAge, 50)

		# split patients by above/below this threshold
        arrayIsBelowAgeThresh = arrayPatientAge < numAgeThresh
        listPatientsBelowAgeThresh = dfTCGAMets['Age'].iloc[arrayIsBelowAgeThresh].index.tolist()
        listPatientsAboveAgeThresh = dfTCGAMets['Age'].iloc[~arrayIsBelowAgeThresh].index.tolist()

		# calculate patient age in years
        dfTCGAMets['Age (years)'] = pd.Series(arrayPatientAge/365.25, index=dfTCGAMets.index.tolist())

		# initialise the gridspec for plotting
        arrayGridSpec = matplotlib.gridspec.GridSpec(nrows=2, ncols=3,
                                            left=0.13, right=0.96,
                                            bottom=0.48, top=0.96,
                                            hspace=0.5, wspace=0.5)

        handFig = plt.figure()
        handFig.set_size_inches(w=6, h=6)

		# for each marekr pair
        for iMarker in range(len(listMarkersToPlot)):

            strMarkerOne = listMarkersToPlot[iMarker][0]
            strMarkerTwo = listMarkersToPlot[iMarker][1]

			# apply the KDE coloring function
            arrayXToPlot, arrayYToPlot, arrayForColor = PreProc.density_scatters(
                arrayXIn=dfTCGAMets[strMarkerOne].values.astype(np.float),
                arrayYIn=dfTCGAMets[strMarkerTwo].values.astype(np.float))

            handAx = plt.subplot(arrayGridSpec[listOfPlotPosTuples[iMarker][0], listOfPlotPosTuples[iMarker][1]])
			# plot the scatter plot with markers colored according to sample density
            handAx.scatter(arrayXToPlot, arrayYToPlot,
                           c=arrayForColor, s=10, edgecolor='', cmap=plt.cm.viridis)

			# calculate the Pearson's and Spearman's correlations
            structPearsCorr = scipy.stats.pearsonr(arrayXToPlot, arrayYToPlot)
            structSpearCorr = scipy.stats.spearmanr(arrayXToPlot, arrayYToPlot)

			# determine the data ranges
            numXMin = np.min(arrayXToPlot)
            numXMax = np.max(arrayXToPlot)
            numXRange = np.ptp(arrayXToPlot)

            numYMin = np.min(arrayYToPlot)
            numYMax = np.max(arrayYToPlot)
            numYRange = np.ptp(arrayYToPlot)

			# specify square/equal axes
            handAx.set_xlim([numXMin-0.05*numXRange, numXMax+0.05*numXRange])
            handAx.set_ylim([numYMin-0.05*numYRange, numYMax+0.05*numYRange])

			# resize the tick labels
            for handTick in handAx.xaxis.get_major_ticks():
                handTick.label.set_fontsize(Plot.numFontSize*0.7)

            for handTick in handAx.yaxis.get_major_ticks():
                handTick.label.set_fontsize(Plot.numFontSize*0.7)

			# specify the axis labels
            handAx.set_xlabel(strMarkerOne, fontsize=Plot.numFontSize*0.7)
            handAx.set_ylabel(strMarkerTwo, fontsize=Plot.numFontSize*0.7)

			# put the Pearson's & Spearman's correlations into the plot title
            handAx.set_title('$r_{P}$ = ' + '{:03.2f}'.format(structPearsCorr[0]) +
                             '; $r_{S}$ = ' + '{:03.2f}'.format(structSpearCorr.correlation),
                             fontsize=Plot.numFontSize*0.5)

			# label the subplot
            structAxPos = handAx.get_position()
            handFig.text(structAxPos.x0 - 0.25*structAxPos.width,
                         structAxPos.y0 + 1.01*structAxPos.height,
                         listMarkerPairSubPlotLabels[iMarker],
                         ha='center', va='center',
                         fontsize=Plot.numFontSize*0.8,
                         weight='bold')

		# create a GridSpec for the survival/Kaplan-Meier curves
        arrayGridSpec = matplotlib.gridspec.GridSpec(nrows=1, ncols=2,
                                            left=0.13, right=0.96,
                                            bottom=0.17, top=0.38,
                                            hspace=0.5, wspace=0.5)

        handAx = plt.subplot(arrayGridSpec[0,0])
        structAxPos = handAx.get_position()
		# split by TGF-B EMT and NK score
        dictKMFs = Analyse.split_two_markers_four_partitions(strMarkerOneToSplit='NK Score',
                                                             strMarkerTwoToSplit='TGF-B EMT Score',
                                                             dfForAnalysis=dfTCGAMets.loc[listPatientsBelowAgeThresh])

        kmfLoLo = dictKMFs['kmfLoLo']
        kmfLoHi = dictKMFs['kmfLoHi']
        kmfHiLo = dictKMFs['kmfHiLo']
        kmfHiHi = dictKMFs['kmfHiHi']

		# plot the Kaplan-Meier curve for each subset
        for kmf in [kmfLoLo, kmfLoHi, kmfHiLo, kmfHiHi]:
            kmf.plot(ax=handAx)

		# relabel the x and y axes
        handAx.set_ylim([0, 1])
        arrayXLim = handAx.get_xlim()
        handAx.set_xlim([0, arrayXLim[1]])

		# specify x ticks as years
        arrayXTicksInMo = np.arange(start=0, stop=arrayXLim[1], step=60)
        arrayXTicksInYr = np.arange(start=0, stop=((arrayXTicksInMo[-1])/12)+1, step=5, dtype=np.int)

        handAx.set_xticks(arrayXTicksInMo)
        handAx.set_yticks([0, 0.5, 1.0])

        # hide the right and top spines
        handAx.spines['right'].set_visible(False)
        handAx.spines['top'].set_visible(False)
        # only show ticks on the left and bottom spines
        handAx.yaxis.set_ticks_position('left')
        handAx.xaxis.set_ticks_position('bottom')

		# label the subplot with the patient age group
        handAx.set_title('Patients < ' + '{:02.1f}'.format(numAgeThresh/(30.5*12.0)) + ' y.o.',
                         fontsize=Plot.numFontSize*0.7)

		# label the axes
        handAx.set_xlabel('Time (years)', fontsize=Plot.numFontSize*0.7)
        handAx.set_xticklabels(arrayXTicksInYr)
        for handTick in handAx.xaxis.get_major_ticks():
            handTick.label.set_fontsize(Plot.numFontSize*0.7)

        handAx.set_ylabel('Overall survival', fontsize=Plot.numFontSize*0.7)
        for handTick in handAx.yaxis.get_major_ticks():
            handTick.label.set_fontsize(Plot.numFontSize*0.7)

		# add a legend for the KM curves
        plt.legend(loc='upper center',
                   bbox_to_anchor=(0.5, -0.31),
                   fontsize=Plot.numFontSize*0.5,
                   scatterpoints=1,
                   framealpha=1,
                   ncol=1)

		# annotate KM curves that are significantly different
        arrayStartXForSig = [0.155, 0.65]
        numXSpacerForSigAnnot = 0.01
        numYSpacerForSigAnnot = 0.025

        numSigAnnotStartY = 0.09
        numSigAnnotStartX = arrayStartXForSig[0]

        iPlottedSig = 0
		numOneStarPValThresh = 5E-2
		numTwoStarPValThresh = 1E-6
		
        if dictKMFs['LoLoVsLoHi'].p_value < numOneStarPValThresh:
            if dictKMFs['LoLoVsLoHi'].p_value < numTwoStarPValThresh:
                strToPlot = '***'
            elif dictKMFs['LoLoVsLoHi'].p_value < 1E-3:
                strToPlot = '**'
            else:
                strToPlot = '*'

            handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.005,
                         numSigAnnotStartY + 0.002,
                         strToPlot,
                         rotation=90,
                         fontsize=Plot.numFontSize * 0.5,
                         ha='center', va='bottom')

            handAx.annotate('',
                            xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                numSigAnnotStartY],
                            xycoords='figure fraction',
                            xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                    numSigAnnotStartY - numYSpacerForSigAnnot],
                            textcoords='figure fraction',
                            annotation_clip=False,
                            arrowprops=dict(facecolor='black',
                                            linewidth=1,
                                            arrowstyle='-'))

            iPlottedSig = iPlottedSig + 1

        if dictKMFs['LoLoVsHiLo'].p_value < numOneStarPValThresh:
            if dictKMFs['LoLoVsHiLo'].p_value < numTwoStarPValThresh:
                strToPlot = '***'
            elif dictKMFs['LoLoVsHiLo'].p_value < 1E-3:
                strToPlot = '**'
            else:
                strToPlot = '*'

            handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.005,
                         numSigAnnotStartY + 0.002,
                         strToPlot,
                         rotation=90,
                         fontsize=Plot.numFontSize * 0.5,
                         ha='center', va='bottom')

            handAx.annotate('',
                            xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                numSigAnnotStartY],
                            xycoords='figure fraction',
                            xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                    numSigAnnotStartY - 2 * numYSpacerForSigAnnot],
                            textcoords='figure fraction',
                            annotation_clip=False,
                            arrowprops=dict(facecolor='black',
                                            linewidth=1,
                                            arrowstyle='-'))

            iPlottedSig = iPlottedSig + 1

        if dictKMFs['LoLoVsHiHi'].p_value < numOneStarPValThresh:
            if dictKMFs['LoLoVsHiHi'].p_value < numTwoStarPValThresh:
                strToPlot = '***'
            elif dictKMFs['LoLoVsHiHi'].p_value < 1E-3:
                strToPlot = '**'
            else:
                strToPlot = '*'

            handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.005,
                         numSigAnnotStartY + 0.002,
                         strToPlot,
                         rotation=90,
                         fontsize=Plot.numFontSize * 0.5,
                         ha='center', va='bottom')

            handAx.annotate('',
                            xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                numSigAnnotStartY],
                            xycoords='figure fraction',
                            xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                    numSigAnnotStartY - 3 * numYSpacerForSigAnnot],
                            textcoords='figure fraction',
                            annotation_clip=False,
                            arrowprops=dict(facecolor='black',
                                            linewidth=1,
                                            arrowstyle='-'))

            iPlottedSig = iPlottedSig + 1

        if dictKMFs['LoHiVsHiLo'].p_value < numOneStarPValThresh:
            if dictKMFs['LoHiVsHiLo'].p_value < numTwoStarPValThresh:
                strToPlot = '***'
            elif dictKMFs['LoHiVsHiLo'].p_value < 1E-3:
                strToPlot = '**'
            else:
                strToPlot = '*'

            handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.005,
                         numSigAnnotStartY + 0.002 - numYSpacerForSigAnnot,
                         strToPlot,
                         rotation=90,
                         fontsize=Plot.numFontSize * 0.5,
                         ha='center', va='bottom')

            handAx.annotate('',
                            xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                numSigAnnotStartY - numYSpacerForSigAnnot],
                            xycoords='figure fraction',
                            xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                    numSigAnnotStartY - 2 * numYSpacerForSigAnnot],
                            textcoords='figure fraction',
                            annotation_clip=False,
                            arrowprops=dict(facecolor='black',
                                            linewidth=1,
                                            arrowstyle='-'))

            iPlottedSig = iPlottedSig + 1

        if dictKMFs['LoHiVsHiHi'].p_value < numOneStarPValThresh:
            if dictKMFs['LoHiVsHiHi'].p_value < numTwoStarPValThresh:
                strToPlot = '***'
            elif dictKMFs['LoHiVsHiHi'].p_value < 1E-3:
                strToPlot = '**'
            else:
                strToPlot = '*'

            handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.005,
                         numSigAnnotStartY + 0.002 - numYSpacerForSigAnnot,
                         strToPlot,
                         rotation=90,
                         fontsize=Plot.numFontSize * 0.5,
                         ha='center', va='bottom')

            handAx.annotate('',
                            xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                numSigAnnotStartY - numYSpacerForSigAnnot],
                            xycoords='figure fraction',
                            xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                    numSigAnnotStartY - 3 * numYSpacerForSigAnnot],
                            textcoords='figure fraction',
                            annotation_clip=False,
                            arrowprops=dict(facecolor='black',
                                            linewidth=1,
                                            arrowstyle='-'))

            iPlottedSig = iPlottedSig + 1

        if dictKMFs['HiLoVsHiHi'].p_value < numOneStarPValThresh:
            if dictKMFs['HiLoVsHiHi'].p_value < numTwoStarPValThresh:
                strToPlot = '***'
            elif dictKMFs['HiLoVsHiHi'].p_value < 1E-3:
                strToPlot = '**'
            else:
                strToPlot = '*'

            handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.005,
                         numSigAnnotStartY + 0.002 - 2 * numYSpacerForSigAnnot,
                         strToPlot,
                         rotation=90,
                         fontsize=Plot.numFontSize * 0.5,
                         ha='center', va='bottom')

            handAx.annotate('',
                            xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                numSigAnnotStartY - 2 * numYSpacerForSigAnnot],
                            xycoords='figure fraction',
                            xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                    numSigAnnotStartY - 3 * numYSpacerForSigAnnot],
                            textcoords='figure fraction',
                            annotation_clip=False,
                            arrowprops=dict(facecolor='black',
                                            linewidth=1,
                                            arrowstyle='-'))


        handFig.text(structAxPos.x0 - 0.2*structAxPos.width,
                     structAxPos.y0 + 1.05*structAxPos.height,
                     '(G)',
                     ha='center', va='center',
                     fontsize=Plot.numFontSize*0.8,
                     weight='bold')

        handAx = plt.subplot(arrayGridSpec[0,1])
        structAxPos = handAx.get_position()
        dictKMFs = Analyse.split_two_markers_four_partitions(strMarkerOneToSplit='NK Score',
                                                             strMarkerTwoToSplit='TGF-B EMT Score',
                                                             dfForAnalysis=dfTCGAMets.loc[listPatientsAboveAgeThresh])
        kmfLoLo = dictKMFs['kmfLoLo']
        kmfLoHi = dictKMFs['kmfLoHi']
        kmfHiLo = dictKMFs['kmfHiLo']
        kmfHiHi = dictKMFs['kmfHiHi']

        for kmf in [kmfLoLo, kmfLoHi, kmfHiLo, kmfHiHi]:
            kmf.plot(ax=handAx)

        handAx.set_ylim([0, 1])
        arrayXLim = handAx.get_xlim()
        handAx.set_xlim([0, arrayXLim[1]])

        arrayXTicksInMo = np.arange(start=0, stop=arrayXLim[1], step=60)
        arrayXTicksInYr = np.arange(start=0, stop=((arrayXTicksInMo[-1])/12)+1, step=5, dtype=np.int)

        handAx.set_xticks(arrayXTicksInMo)
        handAx.set_yticks([0, 0.5, 1.0])

        handAx.set_xlabel('Time (years)', fontsize=Plot.numFontSize*0.7)
        handAx.set_xticklabels(arrayXTicksInYr)
        for handTick in handAx.xaxis.get_major_ticks():
            handTick.label.set_fontsize(Plot.numFontSize*0.7)

        handAx.set_ylabel('Overall survival', fontsize=Plot.numFontSize*0.7)
        for handTick in handAx.yaxis.get_major_ticks():
            handTick.label.set_fontsize(Plot.numFontSize*0.7)

        # hide the right and top spines
        handAx.spines['right'].set_visible(False)
        handAx.spines['top'].set_visible(False)
        # only show ticks on the left and bottom spines
        handAx.yaxis.set_ticks_position('left')
        handAx.xaxis.set_ticks_position('bottom')

        handAx.set_title('Patients > ' + '{:02.1f}'.format(numAgeThresh/(30.5*12.0)) + ' y.o.',
                         fontsize=Plot.numFontSize*0.7)

        numSigAnnotStartX = arrayStartXForSig[1]
        iPlottedSig = 0

        if dictKMFs['LoLoVsLoHi'].p_value < 5E-2:
            if dictKMFs['LoLoVsLoHi'].p_value < 1E-6:
                strToPlot = '***'
            elif dictKMFs['LoLoVsLoHi'].p_value < 1E-3:
                strToPlot = '**'
            else:
                strToPlot = '*'

            handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.005,
                         numSigAnnotStartY + 0.002,
                         strToPlot,
                         rotation=90,
                         fontsize=Plot.numFontSize * 0.5,
                         ha='center', va='bottom')

            handAx.annotate('',
                            xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                numSigAnnotStartY],
                            xycoords='figure fraction',
                            xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                    numSigAnnotStartY - numYSpacerForSigAnnot],
                            textcoords='figure fraction',
                            annotation_clip=False,
                            arrowprops=dict(facecolor='black',
                                            linewidth=1,
                                            arrowstyle='-'))

            iPlottedSig = iPlottedSig + 1

        if dictKMFs['LoLoVsHiLo'].p_value < 5E-2:
            if dictKMFs['LoLoVsHiLo'].p_value < 1E-6:
                strToPlot = '***'
            elif dictKMFs['LoLoVsHiLo'].p_value < 1E-3:
                strToPlot = '**'
            else:
                strToPlot = '*'

            handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.005,
                         numSigAnnotStartY + 0.002,
                         strToPlot,
                         rotation=90,
                         fontsize=Plot.numFontSize * 0.5,
                         ha='center', va='bottom')

            handAx.annotate('',
                            xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                numSigAnnotStartY],
                            xycoords='figure fraction',
                            xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                    numSigAnnotStartY - 2 * numYSpacerForSigAnnot],
                            textcoords='figure fraction',
                            annotation_clip=False,
                            arrowprops=dict(facecolor='black',
                                            linewidth=1,
                                            arrowstyle='-'))

            iPlottedSig = iPlottedSig + 1

        if dictKMFs['LoLoVsHiHi'].p_value < 5E-2:
            if dictKMFs['LoLoVsHiHi'].p_value < 1E-6:
                strToPlot = '***'
            elif dictKMFs['LoLoVsHiHi'].p_value < 1E-3:
                strToPlot = '**'
            else:
                strToPlot = '*'

            handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.005,
                         numSigAnnotStartY + 0.002,
                         strToPlot,
                         rotation=90,
                         fontsize=Plot.numFontSize * 0.5,
                         ha='center', va='bottom')

            handAx.annotate('',
                            xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                numSigAnnotStartY],
                            xycoords='figure fraction',
                            xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                    numSigAnnotStartY - 3 * numYSpacerForSigAnnot],
                            textcoords='figure fraction',
                            annotation_clip=False,
                            arrowprops=dict(facecolor='black',
                                            linewidth=1,
                                            arrowstyle='-'))

            iPlottedSig = iPlottedSig + 1

        if dictKMFs['LoHiVsHiLo'].p_value < 5E-2:
            if dictKMFs['LoHiVsHiLo'].p_value < 1E-6:
                strToPlot = '***'
            elif dictKMFs['LoHiVsHiLo'].p_value < 1E-3:
                strToPlot = '**'
            else:
                strToPlot = '*'

            handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.005,
                         numSigAnnotStartY + 0.002 - numYSpacerForSigAnnot,
                         strToPlot,
                         rotation=90,
                         fontsize=Plot.numFontSize * 0.5,
                         ha='center', va='bottom')

            handAx.annotate('',
                            xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                numSigAnnotStartY - numYSpacerForSigAnnot],
                            xycoords='figure fraction',
                            xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                    numSigAnnotStartY - 2 * numYSpacerForSigAnnot],
                            textcoords='figure fraction',
                            annotation_clip=False,
                            arrowprops=dict(facecolor='black',
                                            linewidth=1,
                                            arrowstyle='-'))

            iPlottedSig = iPlottedSig + 1

            if dictKMFs['LoHiVsHiHi'].p_value < 5E-2:
                if dictKMFs['LoHiVsHiHi'].p_value < 1E-6:
                    strToPlot = '***'
                elif dictKMFs['LoHiVsHiHi'].p_value < 1E-3:
                    strToPlot = '**'
                else:
                    strToPlot = '*'

                handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.005,
                             numSigAnnotStartY + 0.002 - numYSpacerForSigAnnot,
                             strToPlot,
                             rotation=90,
                             fontsize=Plot.numFontSize * 0.5,
                             ha='center', va='bottom')

                handAx.annotate('',
                                xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                    numSigAnnotStartY - numYSpacerForSigAnnot],
                                xycoords='figure fraction',
                                xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                        numSigAnnotStartY - 3 * numYSpacerForSigAnnot],
                                textcoords='figure fraction',
                                annotation_clip=False,
                                arrowprops=dict(facecolor='black',
                                                linewidth=1,
                                                arrowstyle='-'))

            iPlottedSig = iPlottedSig + 1

            if dictKMFs['HiLoVsHiHi'].p_value < 5E-2:
                if dictKMFs['HiLoVsHiHi'].p_value < 1E-6:
                    strToPlot = '***'
                elif dictKMFs['HiLoVsHiHi'].p_value < 1E-3:
                    strToPlot = '**'
                else:
                    strToPlot = '*'

                handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.005,
                             numSigAnnotStartY + 0.002 - 2 * numYSpacerForSigAnnot,
                             strToPlot,
                             rotation=90,
                             fontsize=Plot.numFontSize * 0.5,
                             ha='center', va='bottom')

                handAx.annotate('',
                                xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                    numSigAnnotStartY - 2 * numYSpacerForSigAnnot],
                                xycoords='figure fraction',
                                xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                        numSigAnnotStartY - 3 * numYSpacerForSigAnnot],
                                textcoords='figure fraction',
                                annotation_clip=False,
                                arrowprops=dict(facecolor='black',
                                                linewidth=1,
                                                arrowstyle='-'))

        handFig.text(structAxPos.x0 - 0.2*structAxPos.width,
                     structAxPos.y0 + 1.05*structAxPos.height,
                     '(H)',
                     ha='center', va='center',
                     fontsize=Plot.numFontSize*0.8,
                     weight='bold')

        plt.legend(loc='upper center',
                   bbox_to_anchor=(0.5, -0.31),
                   fontsize=Plot.numFontSize*0.5,
                   scatterpoints=1,
                   framealpha=1,
                   ncol=1)

        for strFormat in Plot.listFileFormat:
            handFig.savefig(os.path.join(Plot.strOutputFolder, 'Fig5.' + strFormat), ext=strFormat, dpi=300)
        plt.close(handFig)

        return flagResult

    def supp_fig_one(flagResult=False):

        print('Loading sorted cell data..')
        dfGSE24759, dfGSE24759SmpToType = PreProc.gse24759_data()

        dictGSE24759Subsets = PreProc.gse24759_subsets()
        listOfListsGSE24759SampleTypeGrouped = dictGSE24759Subsets['listOfListsAllSamplesTypeGrouped']
        listGSE24759SampleTypeLabels = dictGSE24759Subsets['listSampleTypeLabels']

        dictSmpTypeToIndex = {}
        for iGroup in range(len(listOfListsGSE24759SampleTypeGrouped)):
            for iSmp in range(len(listOfListsGSE24759SampleTypeGrouped[iGroup])):
                dictSmpTypeToIndex[listOfListsGSE24759SampleTypeGrouped[iGroup][iSmp]] = iGroup

        # listSamples = dfGSE24759.columns.tolist()
        listGSE24759SmpOfInt = (dfGSE24759SmpToType.loc['Source']=='peripheral blood').index.tolist()

        listSamplesInCD56DimCD16HiNKSubset = []
        arrayGroupNumber = np.zeros(len(listGSE24759SmpOfInt), dtype=np.int)
        for iSmp in range(len(listGSE24759SmpOfInt)):
            strSmpBarcode = listGSE24759SmpOfInt[iSmp]
            strSmpTitle = dfGSE24759SmpToType[strSmpBarcode].loc['Title']
            strSmpType = strSmpTitle.split(',')[0]
            arrayGroupNumber[iSmp] = dictSmpTypeToIndex[strSmpType]
            if strSmpType == 'Mature NK cell_CD56- CD16+ CD3-':
                listSamplesInCD56DimCD16HiNKSubset.append(iSmp)

        arraySamplesInCD56DimCD16HiSubset = np.array(listSamplesInCD56DimCD16HiNKSubset, dtype=np.int)

        dataFrameTransp = dfGSE24759[listGSE24759SmpOfInt].transpose()

        numGroupsByName = len(listGSE24759SampleTypeLabels)

        numSamplesByNumber = len(arrayGroupNumber)


        arrayNumSamplesInEachGroup = np.zeros(len(listGSE24759SampleTypeLabels), dtype=np.int)
        for iGroup in range(len(listGSE24759SampleTypeLabels)):
            arrayNumSamplesInEachGroup[iGroup] = np.sum(arrayGroupNumber == iGroup)

        numSmallestGroup = np.min(arrayNumSamplesInEachGroup)

        arrayFlatData = np.ravel(np.nan_to_num(dfGSE24759.values.astype(np.float)))

        numAbundThresh = np.percentile(arrayFlatData, 10)

        dictPCA = BatchEffects.PrinComp.mean_centre_pca(
            dfGSE24759[listGSE24759SmpOfInt].transpose(),
            numPCs=5,
            flagFilterOnObs=True,
            numMinObsForFilter=numSmallestGroup,
            numValForFilter=numAbundThresh)

        listOfListsPCsToComp = [[1, 3],
                                [2, 4]]
        listPCCompSublabels = ['(B)', '(C)']

        numMaxYTicks = 5

        arrayColorNorm = matplotlib.colors.Normalize(vmin=0, vmax=20)
        arrayColorMap = matplotlib.cm.ScalarMappable(norm=arrayColorNorm, cmap=plt.cm.tab20)

        # calculate the median transcript abundance across all samples
        arrayMedianGeneAbund = np.median(dataFrameTransp.values, axis=0)

        # for every sample move through and calculate the relative log expression (difference from each genes
        #  median value)
        listDataToPlot = []
        for iSample in range(numSamplesByNumber):
            listDataToPlot.append(dataFrameTransp.iloc[iSample,:].values - arrayMedianGeneAbund)


        handFig = plt.figure()
        handFig.set_size_inches(w=6,
                                h=7)

        arrayGridSpec = gridspec.GridSpec(nrows=1,ncols=1,
                                          left=0.09, right=0.97,
                                          bottom=0.50, top=0.90)

        handAx = plt.subplot(arrayGridSpec[0])
        structAxPos = handAx.get_position()

        handBoxPlot = handAx.boxplot(listDataToPlot,
                                     notch=False,
                                     vert=True)
        plt.setp(handBoxPlot['boxes'],
                 color='0.3')

        plt.setp(handBoxPlot['whiskers'],
                 color='0.6',
                 linestyle='-')

        for numSample in range(numSamplesByNumber):
            plt.setp(handBoxPlot['medians'][numSample],
                     color=arrayColorMap.to_rgba(arrayGroupNumber[numSample]))

        plt.setp(handBoxPlot['fliers'],
                 mec='0.75', mfc='0.75',
                 marker='o',
                 markersize=0.5,
                 alpha=0.5)

        handAx.set_ylabel('Relative log difference')
        handAx.set_xlabel('Samples')

        handAx.set_xticks([])

        arrayYTickLoc = plt.MaxNLocator(numMaxYTicks)
        handAx.yaxis.set_major_locator(arrayYTickLoc)
        for handTick in handAx.yaxis.get_major_ticks():
            handTick.label.set_fontsize(Plot.numFontSize*0.6)

        arrayXLim = handAx.get_xlim()
        numXRange = np.max(arrayXLim) - np.min(arrayXLim)
        arrayYLim = handAx.get_ylim()

        for iGroup in range(numGroupsByName):
            plt.scatter(arrayXLim[0]-0.2*numXRange, arrayYLim[0],
                        c=arrayColorMap.to_rgba(iGroup),
                        label=listGSE24759SampleTypeLabels[iGroup])

        handAx.set_xlim(arrayXLim)

        plt.legend(loc='lower center',
                   bbox_to_anchor=(0.50, 0.99),
                   fontsize=Plot.numFontSize*0.6,
                   scatterpoints=1,
                   ncol=5,
                   fancybox=True,
                   shadow=True)

        handAx.axhline(y=0, xmin=0.0, xmax=1.0,
                       color='w',
                       linewidth=0.75,
                       alpha=0.5)
        handAx.axhline(y=0, xmin=0.0, xmax=1.0,
                       color='k',
                       linewidth=0.5,
                       alpha=0.5)

        handFig.text(structAxPos.x0-0.07*structAxPos.width,
                     structAxPos.y0 + 1.15*structAxPos.height,
                     '(A)',
                     ha='center', va='center',
                     fontsize=Plot.numFontSize,
                     weight='bold'
                     )

        arrayGridSpec = gridspec.GridSpec(nrows=1,ncols=2,
                                          left=0.15, right=0.97,
                                          bottom=0.20, top=0.44,
                                          wspace=0.7)

        for iComp in range(len(listOfListsPCsToComp)):
            handAx = plt.subplot(arrayGridSpec[iComp])
            structAxPos = handAx.get_position()

            numPCX = listOfListsPCsToComp[iComp][0]
            numPCY = listOfListsPCsToComp[iComp][1]

            for iGroup in range(len(listGSE24759SampleTypeLabels)):
                arraySampleIndices = np.where(arrayGroupNumber == iGroup)[0]

                if listGSE24759SampleTypeLabels[iGroup] == 'NK cells':
                    numZOrder = 12
                else:
                    numZOrder = 10

                handAx.scatter(dictPCA['arraySampleLoadings'][arraySampleIndices, numPCX - 1],
                               dictPCA['arraySampleLoadings'][arraySampleIndices, numPCY - 1],
                               label=listGSE24759SampleTypeLabels[iGroup],
                               color=arrayColorMap.to_rgba(iGroup),
                               zorder=numZOrder,
                               alpha=0.7)

                if listGSE24759SampleTypeLabels[iGroup] == 'NK cells':
                    handAx.scatter(dictPCA['arraySampleLoadings'][arraySamplesInCD56DimCD16HiSubset, numPCX - 1],
                                   dictPCA['arraySampleLoadings'][arraySamplesInCD56DimCD16HiSubset, numPCY - 1],
                                   label='CD56$^{Lo}$/CD16$^{Hi}$ NK cells',
                                   color=arrayColorMap.to_rgba(iGroup),
                                   marker='s',
                                   edgecolor='k',
                                   zorder=numZOrder)

            handAx.set_xlabel('PC' + '{}'.format(numPCX) + ' (' +
                              '{:02.1f}'.format(dictPCA['structPCA'].explained_variance_ratio_[numPCX - 1] * 100.0) +
                              '% explained)')
            handAx.set_ylabel('PC' + '{}'.format(numPCY) + ' (' +
                              '{:02.1f}'.format(dictPCA['structPCA'].explained_variance_ratio_[numPCY - 1] * 100.0) +
                              '% explained)')

            handFig.text(structAxPos.x0-0.40*structAxPos.width,
                         structAxPos.y0 + 1.03*structAxPos.height,
                         listPCCompSublabels[iComp],
                         ha='center', va='center',
                         fontsize=Plot.numFontSize,
                         weight='bold'
                         )

            if iComp == 1:
                plt.legend(loc='upper center',
                           bbox_to_anchor=(-0.3, -0.3),
                           fontsize=Plot.numFontSize*0.6,
                           scatterpoints=1,
                           ncol=4,
                           fancybox=True,
                           shadow=True)

        for strFormat in Plot.listFileFormat:
            handFig.savefig(os.path.join(Plot.strOutputFolder,
                                         'FigS1.' + strFormat), ext=strFormat, dpi=300)
        plt.close(handFig)


        return flagResult

    def supp_fig_three(flagResult=False):

        # load the TCGA data as a merged pandas dataframe
        dictTCGASKCM = PreProc.tcga_skcm_data()
        dfTCGA = dictTCGASKCM['df']

        dictPatGroups = PreProc.split_tcga_met_vs_pri()
        listPatsPriTumOnly = dictPatGroups['PriOnlyPat']
        listPatsMetTumOnly = dictPatGroups['MetOnlyPat']
        listPatsWithMetTumAndAge = dictPatGroups['MetOnlyPatWithAge']

        listPriTumOnlySamples = [strPatient+'-01' for strPatient in listPatsPriTumOnly]
        listMetTumOnlySamples = [strPatient+'-06' for strPatient in listPatsMetTumOnly]
        listMetOnlySamplesWithAge = [strPat + '-06' for strPat in listPatsWithMetTumAndAge]

        dfMetOnly = dfTCGA.loc[listMetOnlySamplesWithAge].copy(deep=True)
        arrayPatientAge = dfMetOnly['Age'].values.astype(np.float)

        dfMetOnly['Age'] = arrayPatientAge

        numPriTumPats = len(listPatsPriTumOnly)
        numMetTumPats = len(listPatsMetTumOnly)

        kmfPriTumour = KaplanMeierFitter()

        kmfPriTumour.fit(
            dfTCGA['surv_time'].loc[listPriTumOnlySamples],
            event_observed=dfTCGA['death_event'].loc[listPriTumOnlySamples],
            label='Primary Tumours\n(n=' + '{:d}'.format(numPriTumPats) + ')')

        kmfMetTumour = KaplanMeierFitter()

        kmfMetTumour.fit(
            dfTCGA['surv_time'].loc[listMetTumOnlySamples],
            event_observed=dfTCGA['death_event'].loc[listMetTumOnlySamples],
            label='Metastatic Tumours\n(n=' + '{:d}'.format(numMetTumPats) + ')')


        # perform a Kaplan-Meier log-rank test to calculate a p-value for survival curve differences
        structKMLogRankPriVsMet = KMlogRankTest(
            dfTCGA['surv_time'].loc[listPriTumOnlySamples],
            dfTCGA['surv_time'].loc[listMetTumOnlySamples],
            event_observed_A=dfTCGA['death_event'].loc[listPriTumOnlySamples],
            event_observed_B=dfTCGA['death_event'].loc[listMetTumOnlySamples])


        dictKMFs = Analyse.split_one_marker_three_partitions(strMarkerToSplit='Age',
                                                             dfForAnalysis=dfMetOnly,
                                                             flagShowSplitValues=True)
        kmfMetLowAge = dictKMFs['kmfLow']
        kmfMetMedAge = dictKMFs['kmfMed']
        kmfMetHighAge = dictKMFs['kmfHigh']

        #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #
        # Identify target sample subsets for gender-stratified patients with metastatic tumours

        listMalePatsWithMet = dfMetOnly[dfMetOnly['gender'] == 'male'].index.tolist()
        listFemalePatsWithMet = dfMetOnly[dfMetOnly['gender'] == 'female'].index.tolist()

        numMalePatients = len(listMalePatsWithMet)
        numFemalePatients = len(listFemalePatsWithMet)

        kmfMetMaleTumour = KaplanMeierFitter()

        kmfMetMaleTumour.fit(
            dfMetOnly['surv_time'].loc[listMalePatsWithMet],
            event_observed=dfMetOnly['death_event'].loc[listMalePatsWithMet],
            label='Male (n=' + '{:d}'.format(numMalePatients) + ')')

        kmfMetFemaleTumour = KaplanMeierFitter()

        kmfMetFemaleTumour.fit(
            dfMetOnly['surv_time'].loc[listFemalePatsWithMet],
            event_observed=dfMetOnly['death_event'].loc[listFemalePatsWithMet],
            label='Female (n=' + '{:d}'.format(numFemalePatients) + ')')

        # perform a Kaplan-Meier log-rank test to calculate a p-value for survival curve differences
        structKMLogRankSexDiff = KMlogRankTest(
            dfTCGA['surv_time'].loc[listFemalePatsWithMet],
            dfTCGA['surv_time'].loc[listMalePatsWithMet],
            event_observed_A=dfTCGA['death_event'].loc[listFemalePatsWithMet],
            event_observed_B=dfTCGA['death_event'].loc[listMalePatsWithMet])


        arrayGridSpec = gridspec.GridSpec(nrows=2, ncols=2,
                                          left=0.10, right=0.98,
                                          bottom=0.10, top=0.90,
                                          wspace=0.40, hspace=0.50)

        #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #
        # Create the output figure
        #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #
        handFig = plt.figure()
        handFig.set_size_inches(w=6, h=6)

        #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #
        # Survival differences over the full study length
        handAx = plt.subplot(arrayGridSpec[0,0])
        structAxPos = handAx.get_position()

        kmfPriTumour.plot(ax=handAx)

        kmfMetTumour.plot(ax=handAx)

        handAx.set_title('TCGA SKCM patient survival\nprimary vs metastatic tumours',
                         fontsize=Plot.numFontSize)
        handAx.set_ylabel('Overall survival', fontsize=Plot.numFontSize)
        handAx.set_xlabel('Time (years)', fontsize=Plot.numFontSize)
        handAx.set_ylim([0, 1])

        arrayXLim = handAx.get_xlim()
        handAx.set_xlim([0, arrayXLim[1]])

        arrayXTicksInMo = np.arange(start=0, stop=arrayXLim[1], step=60)
        arrayXTicksInYr = np.arange(start=0, stop=((arrayXTicksInMo[-1])/12)+1, step=5, dtype=np.int)

        handAx.set_xticks(arrayXTicksInMo)
        handAx.set_xticklabels(arrayXTicksInYr)

        for handTick in handAx.xaxis.get_major_ticks():
            handTick.label.set_fontsize(Plot.numFontSize)

        handAx.set_yticks([0, 0.5, 1.0])
        for handTick in handAx.yaxis.get_major_ticks():
            handTick.label.set_fontsize(Plot.numFontSize)

        plt.legend(loc='upper center',
                                bbox_to_anchor=(0.70, 1.00),
                                fontsize=Plot.numFontSize*0.6,
                                scatterpoints=1,
                                ncol=1,
                                fancybox=True,
                                shadow=True)

        handAx.spines['top'].set_visible(False)
        handAx.spines['right'].set_visible(False)

        handAx.yaxis.set_ticks_position('left')
        handAx.xaxis.set_ticks_position('bottom')

        arrayXLim = handAx.get_xlim()
        arrayYLim = handAx.get_ylim()

        numTextXPos = arrayXLim[0] + 0.70*(arrayXLim[1] - arrayXLim[0])
        numTextYPos = arrayYLim[0] + 0.67*(arrayYLim[1] - arrayYLim[0])

        handAx.text(numTextXPos,
                    numTextYPos,
                    '$p$-value = ' + '{:03.2E}'.format(structKMLogRankPriVsMet.p_value),
                    ha='center', va='center',
                    fontsize=Plot.numFontSize*0.7)

        handFig.text(structAxPos.x0 - 0.2*structAxPos.width,
                     structAxPos.y0 + 1.15*structAxPos.height,
                     '(A)',
                     fontsize=Plot.numFontSize,
                     weight='bold')


        #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #
        # Survival differences over the first five years
        handAx = plt.subplot(arrayGridSpec[0,1])
        structAxPos = handAx.get_position()

        kmfPriTumour.plot(ax=handAx)

        kmfMetTumour.plot(ax=handAx)

        handAx.set_title('TCGA SKCM patient survival\nprimary vs metastatic tumours',
                         fontsize=Plot.numFontSize)
        handAx.set_ylabel('Overall survival', fontsize=Plot.numFontSize)
        handAx.set_xlabel('Time (months)', fontsize=Plot.numFontSize)
        handAx.set_ylim([0, 1])
        handAx.set_xlim([0, 60])

        # arrayXTickLoc = plt.MaxNLocator(dictPlotSettings['numMaxXTicks'])
        # handAx.xaxis.set_major_locator(arrayXTickLoc)
        handAx.set_xticks(np.arange(start=0, stop=61, step=12))
        for handTick in handAx.xaxis.get_major_ticks():
            handTick.label.set_fontsize(Plot.numFontSize)

        # arrayYTickLoc = plt.MaxNLocator(dictPlotSettings['numMaxYTicks'])
        # handAx.yaxis.set_major_locator(arrayYTickLoc)
        handAx.set_yticks([0, 0.5, 1.0])
        for handTick in handAx.yaxis.get_major_ticks():
            handTick.label.set_fontsize(Plot.numFontSize)

        plt.legend(loc='upper center',
                                bbox_to_anchor=(0.28, 0.30),
                                fontsize=Plot.numFontSize*0.6,
                                scatterpoints=1,
                                ncol=1,
                                fancybox=True,
                                shadow=True)

        handAx.spines['top'].set_visible(False)
        handAx.spines['right'].set_visible(False)

        handAx.yaxis.set_ticks_position('left')
        handAx.xaxis.set_ticks_position('bottom')

        handFig.text(structAxPos.x0 - 0.2*structAxPos.width,
                     structAxPos.y0 + 1.15*structAxPos.height,
                     '(B)',
                     fontsize=Plot.numFontSize,
                     weight='bold')


        #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #
        # Survival differences associated with age in patients with metastatic tumours

        handAx = plt.subplot(arrayGridSpec[1,0])
        structAxPos = handAx.get_position()

        for kmfToPlot in [kmfMetLowAge, kmfMetMedAge, kmfMetHighAge]:
            kmfToPlot.plot(ax=handAx)


        handAx.set_title('TCGA SKCM patient survival\nmetastatic tumours split by age',
                         fontsize=Plot.numFontSize)
        handAx.set_ylabel('Overall survival', fontsize=Plot.numFontSize)
        handAx.set_xlabel('Time (years)', fontsize=Plot.numFontSize)
        handAx.set_ylim([0, 1])

        arrayXLim = handAx.get_xlim()
        handAx.set_xlim([0, arrayXLim[1]])

        arrayXTicksInMo = np.arange(start=0, stop=arrayXLim[1], step=60)
        arrayXTicksInYr = np.arange(start=0, stop=((arrayXTicksInMo[-1])/12)+1, step=5, dtype=np.int)

        handAx.set_xticks(arrayXTicksInMo)
        handAx.set_xticklabels(arrayXTicksInYr)

        for handTick in handAx.xaxis.get_major_ticks():
            handTick.label.set_fontsize(Plot.numFontSize)

        handAx.set_yticks([0, 0.5, 1.0])
        for handTick in handAx.yaxis.get_major_ticks():
            handTick.label.set_fontsize(Plot.numFontSize)

        plt.legend(loc='upper center',
                                bbox_to_anchor=(0.70, 1.00),
                                fontsize=Plot.numFontSize*0.6,
                                scatterpoints=1,
                                ncol=1,
                                fancybox=True,
                                shadow=True)

        handAx.spines['top'].set_visible(False)
        handAx.spines['right'].set_visible(False)

        handAx.yaxis.set_ticks_position('left')
        handAx.xaxis.set_ticks_position('bottom')

        numXSpacerForSigAnnot = 0.01
        numYSpacerForSigAnnot = 0.028

        numSigAnnotStartY = structAxPos.y0 + 0.95 * structAxPos.height
        numSigAnnotStartX = 0.26

        iPlottedSig = 0

        if dictKMFs['LowVsMed'].p_value < 5E-2:
            if dictKMFs['LowVsMed'].p_value < 1E-6:
                strToPlot = '***'
            elif dictKMFs['LowVsMed'].p_value < 1E-3:
                strToPlot = '**'
            else:
                strToPlot = '*'

            handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.003,
                         numSigAnnotStartY + 0.002,
                         strToPlot,
                         rotation=90,
                         fontsize=Plot.numFontSize * 0.5,
                         ha='center', va='bottom')

            handAx.annotate('',
                            xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                numSigAnnotStartY],
                            xycoords='figure fraction',
                            xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                    numSigAnnotStartY - numYSpacerForSigAnnot],
                            textcoords='figure fraction',
                            annotation_clip=False,
                            arrowprops=dict(facecolor='black',
                                            linewidth=1,
                                            arrowstyle='-'))

            iPlottedSig = iPlottedSig + 1

        if dictKMFs['LowVsHigh'].p_value < 5E-2:
            if dictKMFs['LowVsHigh'].p_value < 1E-6:
                strToPlot = '***'
            elif dictKMFs['LowVsHigh'].p_value < 1E-3:
                strToPlot = '**'
            else:
                strToPlot = '*'

            handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.003,
                         numSigAnnotStartY + 0.002,
                         strToPlot,
                         rotation=90,
                         fontsize=Plot.numFontSize * 0.5,
                         ha='center', va='bottom')

            handAx.annotate('',
                            xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                numSigAnnotStartY],
                            xycoords='figure fraction',
                            xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                    numSigAnnotStartY - 2 * numYSpacerForSigAnnot],
                            textcoords='figure fraction',
                            annotation_clip=False,
                            arrowprops=dict(facecolor='black',
                                            linewidth=1,
                                            arrowstyle='-'))

            iPlottedSig = iPlottedSig + 1

        if dictKMFs['MedVsHigh'].p_value < 5E-2:
            if dictKMFs['MedVsHigh'].p_value < 1E-6:
                strToPlot = '***'
            elif dictKMFs['MedVsHigh'].p_value < 1E-3:
                strToPlot = '**'
            else:
                strToPlot = '*'

            handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.003,
                         numSigAnnotStartY + 0.002 - numYSpacerForSigAnnot,
                         strToPlot,
                         rotation=90,
                         fontsize=Plot.numFontSize * 0.5,
                         ha='center', va='bottom')

            handAx.annotate('',
                            xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                numSigAnnotStartY - numYSpacerForSigAnnot],
                            xycoords='figure fraction',
                            xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                    numSigAnnotStartY - 2 * numYSpacerForSigAnnot],
                            textcoords='figure fraction',
                            annotation_clip=False,
                            arrowprops=dict(facecolor='black',
                                            linewidth=1,
                                            arrowstyle='-'))

        handFig.text(structAxPos.x0 - 0.2*structAxPos.width,
                     structAxPos.y0 + 1.15*structAxPos.height,
                     '(C)',
                     fontsize=Plot.numFontSize,
                     weight='bold')


        #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #
        # Survival differences associated with age in patients with metastatic tumours

        handAx = plt.subplot(arrayGridSpec[1,1])
        structAxPos = handAx.get_position()

        kmfMetMaleTumour.plot(ax=handAx)
        kmfMetFemaleTumour.plot(ax=handAx)


        handAx.set_title('TCGA SKCM patient survival\nmetastatic tumours split by sex',
                         fontsize=Plot.numFontSize)
        handAx.set_ylabel('Overall survival', fontsize=Plot.numFontSize)
        handAx.set_xlabel('Time (years)', fontsize=Plot.numFontSize)
        handAx.set_ylim([0, 1])

        arrayXLim = handAx.get_xlim()
        handAx.set_xlim([0, arrayXLim[1]])

        numTextXPos = arrayXLim[0] + 0.70*(arrayXLim[1] - arrayXLim[0])
        numTextYPos = arrayYLim[0] + 0.72*(arrayYLim[1] - arrayYLim[0])

        handAx.text(numTextXPos,
                    numTextYPos,
                    '$p$-value = ' + '{:03.2E}'.format(structKMLogRankSexDiff.p_value),
                    ha='center', va='center',
                    fontsize=Plot.numFontSize*0.7)

        arrayXTicksInMo = np.arange(start=0, stop=arrayXLim[1], step=60)
        arrayXTicksInYr = np.arange(start=0, stop=((arrayXTicksInMo[-1])/12)+1, step=5, dtype=np.int)

        handAx.set_xticks(arrayXTicksInMo)
        handAx.set_xticklabels(arrayXTicksInYr)

        for handTick in handAx.xaxis.get_major_ticks():
            handTick.label.set_fontsize(Plot.numFontSize)

        handAx.set_yticks([0, 0.5, 1.0])
        for handTick in handAx.yaxis.get_major_ticks():
            handTick.label.set_fontsize(Plot.numFontSize)

        plt.legend(loc='upper center',
                   bbox_to_anchor=(0.70, 1.00),
                   fontsize=Plot.numFontSize*0.6,
                   scatterpoints=1,
                   ncol=1,
                   fancybox=True,
                   shadow=True)

        handAx.spines['top'].set_visible(False)
        handAx.spines['right'].set_visible(False)

        handAx.yaxis.set_ticks_position('left')
        handAx.xaxis.set_ticks_position('bottom')

        handFig.text(structAxPos.x0 - 0.2*structAxPos.width,
                     structAxPos.y0 + 1.15*structAxPos.height,
                     '(D)',
                     fontsize=Plot.numFontSize,
                     weight='bold')

        #   #   #   #   #   #   #   #   #   #   #   #   #   #   #   #
        # Output in the specified formats and close
        for strFileType in Plot.listFileFormat:
            handFig.savefig(os.path.join(Plot.strOutputFolder, 'FigS3.' + strFileType), ext=strFileType, dpi=300)

        plt.close(handFig)

        return flagResult

    def supp_fig_four(flagResult=False,
                      strDataLoc=os.getcwd(),
                      strTempFile='CrossClusteredTranscripts.pickle'):

        listGenesForCrossCorrDisp = ['IFNG', 'CD274', 'KLRD1', 'NCR1', 'NCR3', 'PRF1', 'GZMA', 'GZMH',
                                     'MITF', 'SOX9', 'SOX10', 'CD3E', 'CD3B', 'CD247', 'MLANA', 'IL15',
                                     'IL12', 'CD4', 'CD19', 'IKZF1', 'IKZF2', 'IKZF3', 'PAX5']

        dictTCGASKCM = PreProc.tcga_skcm_data(flagPerformExtraction=False)
        dfTCGA = dictTCGASKCM['df']
        listTCGAGenes = dictTCGASKCM['listGenes']

        dictPatGroups = PreProc.split_tcga_met_vs_pri()
        listPatsMetTumOnly = dictPatGroups['MetOnlyPat']
        listPatsMetTumOnlySamples = [strPat + '-06' for strPat in listPatsMetTumOnly]

        if os.path.exists(strDataLoc,strTempFile):
            dfCrossClusteredMessRNAs = pd.read_pickle(os.path.join(strDataLoc, strTempFile))

        else:
            print('calculating cross correlations..')
            dfCrossCorr = dfTCGA[listTCGAGenes].reindex(listPatsMetTumOnlySamples).corr(method='pearson')

            print('clustering cross correlations..')
            arrayCrossCorrForClustering = np.nan_to_num(dfCrossCorr.values.astype(np.float))

            arrayCrossCorrLink = SciPyClus.linkage(arrayCrossCorrForClustering, method='average')
            arrayCrossCorrGeneOrder = SciPyClus.leaves_list(arrayCrossCorrLink)

            arrayCrossCorrRowClustered = arrayCrossCorrForClustering[:,arrayCrossCorrGeneOrder]
            arrayCrossCorrClustered = arrayCrossCorrRowClustered[arrayCrossCorrGeneOrder,:]

            dfCrossClusteredMessRNAs = pd.DataFrame(data=arrayCrossCorrClustered,
                                                    index=[listTCGAGenes[i] for i in arrayCrossCorrGeneOrder],
                                                    columns=[listTCGAGenes[i] for i in arrayCrossCorrGeneOrder])
            dfCrossClusteredMessRNAs.to_pickle(os.path.join(strDataLoc, strTempFile))

        handFig = plt.figure()
        handFig.set_size_inches(w=6, h=6)

        handAx = handFig.add_axes([0.20, 0.20, 0.70, 0.70])
        handCrossCorr = handAx.imshow(
                dfCrossClusteredMessRNAs.values.astype(np.float),
                interpolation='nearest',
                cmap=plt.cm.PRGn,
                vmin=-1.0,
                vmax=1.0,
                aspect='auto')

        handAx.set_xticks([])
        handAx.set_yticks([])


        arrayLabelledGenePos = np.linspace(1, len(listTCGAGenes), len(listGenesForCrossCorrDisp))
        numLabGenesOut = 0
        for iGene in range(len(arrayCrossCorrGeneOrder)):
            strGene = dfCrossCorr.index.tolist()[arrayCrossCorrGeneOrder[iGene]]
            if strGene in listGenesForCrossCorrDisp:
                numTextYPos = arrayLabelledGenePos[numLabGenesOut]
                handAx.text(-800, numTextYPos, strGene,
                            fontsize=4.5, ha='right', va='center',)
                handAx.annotate("",
                                xy=(-1, iGene), xycoords='data',
                                xytext=(-800, numTextYPos), textcoords='data',
                                arrowprops=dict(arrowstyle='-',
                                                connectionstyle="arc,angleA=0,angleB=0,armA=10,armB=-15,rad=0.0",
                                                linewidth=0.5)
                                )
                numLabGenesOut = numLabGenesOut + 1

        handAxCMap = handFig.add_axes([0.92, 0.55, 0.03, 0.20])
        handColorBarPVals = handFig.colorbar(handCrossCorr,
                                            cax=handAxCMap,
                                             ticks=[-1, 0, 1])
        handColorBarPVals.ax.tick_params(labelsize=Plot.numFontSize*0.7)


        handAxCMap.set_title('r$_{P}$', fontsize=Plot.numFontSize*0.7)

        handFig.savefig(os.path.join(Plot.strOutputFolder, 'GeneCrossCorr.png'), ext='png', dpi=300)
        handFig.savefig(os.path.join(Plot.strOutputFolder, 'GeneCrossCorr.pdf'), ext='pdf', dpi=300)
        plt.close(handFig)

        return flagResult

    def supp_fig_five(flagResult=False):

        numImmScoreThresh = -0.10

		# Load a processed dictionary with the original TCGA 'Immune' gene set and classifications for immune high/low
        dictTCGAClassifications = PreProc.tcga_skcm_classifications()

        # load the TCGA data as a merged pandas dataframe
        dictTCGASKCM = PreProc.tcga_skcm_data(flagPerformExtraction=False)
        dfTCGA = dictTCGASKCM['df']

        listTCGASamples = dfTCGA.index.tolist()
        numTCGASamples = len(listTCGASamples)

        listTCGAClassifiedImmune = dictTCGAClassifications['listTCGAClassifiedImmune']
        listOrigTCGASamples = dictTCGAClassifications['listOrigTCGASamples']

        arrayTCGADataFrameSampleInImmuneSubset = np.zeros(numTCGASamples, dtype=np.bool)
        for iSample in range(numTCGASamples):
            strSample = listTCGASamples[iSample]
            if strSample in listTCGAClassifiedImmune:
                arrayTCGADataFrameSampleInImmuneSubset[iSample]=True

        listOrigTCGASamplesNotImmFlag = list(set(listOrigTCGASamples).difference(set(listTCGAClassifiedImmune)))
        listNewTGCASamples = set(listTCGASamples).difference(set(listOrigTCGASamples))

        sliceOrigTCGAImmScore = dfTCGA['TCGA Immune Score'].reindex(listOrigTCGASamples)
        sliceOrigTCGAImmScoreHigh = dfTCGA['TCGA Immune Score'].reindex(listOrigTCGASamplesNotImmFlag)
        sliceOrigTCGAImmScoreNotHigh = dfTCGA['TCGA Immune Score'].reindex(listTCGAClassifiedImmune)

        sliceNewTCGAImmSore = dfTCGA['TCGA Immune Score'].reindex(listNewTGCASamples)

        listAllTCGAImmuneHigh = dfTCGA['TCGA Immune Score'][
            np.bitwise_and(dfTCGA['TCGA Immune Score'].notnull().values.astype(np.bool),
                           dfTCGA['TCGA Immune Score'].values.astype(np.float) > numImmScoreThresh)].index.tolist()
        listAllTCGANotImmuneHigh = dfTCGA['TCGA Immune Score'][
            np.bitwise_and(dfTCGA['TCGA Immune Score'].notnull().values.astype(np.bool),
                           dfTCGA['TCGA Immune Score'].values.astype(np.float) <= numImmScoreThresh)].index.tolist()

        listHasValidSurvivalData = \
            dfTCGA[np.bitwise_and(dfTCGA['death_event'].notnull().values.astype(np.bool),
                                  dfTCGA['surv_time'].notnull().values.astype(np.bool))].index.tolist()

        listTCGAImmuneHighWithValidSurv = [strSample
                                           for strSample in listAllTCGAImmuneHigh
                                           if strSample in listHasValidSurvivalData]

        listAllTCGANotImmuneHighWithValidSurv = [strSample
                                           for strSample in listAllTCGANotImmuneHigh
                                           if strSample in listHasValidSurvivalData]


        dfContTable = pd.DataFrame(data=np.zeros((3,3), dtype=np.int),
                                   columns=['Score Low','Score High', 'Total'],
                                   index=['TCGA Imm. Low', 'TCGA Imm. High', 'Total'])

        dfContTable['Score Low'].loc['TCGA Imm. Low'] = len(set(listOrigTCGASamplesNotImmFlag).intersection(set(listAllTCGANotImmuneHigh)))
        dfContTable['Score Low'].loc['TCGA Imm. High'] = len(set(listTCGAClassifiedImmune).intersection(set(listAllTCGANotImmuneHigh)))
        dfContTable['Score High'].loc['TCGA Imm. Low'] = len(set(listOrigTCGASamplesNotImmFlag).intersection(set(listAllTCGAImmuneHigh)))
        dfContTable['Score High'].loc['TCGA Imm. High'] = len(set(listTCGAClassifiedImmune).intersection(set(listAllTCGAImmuneHigh)))

        numTCGAImmLowTotal = np.sum(dfContTable[['Score Low', 'Score High']].loc['TCGA Imm. Low'].values.astype(np.float))
        dfContTable['Total'].loc['TCGA Imm. Low'] = numTCGAImmLowTotal
        numTCGAImmHighTotal = np.sum(dfContTable[['Score Low', 'Score High']].loc['TCGA Imm. High'].values.astype(np.float))
        dfContTable['Total'].loc['TCGA Imm. High'] = numTCGAImmHighTotal

        numScoreLowTotal = np.sum(dfContTable['Score Low'].reindex(['TCGA Imm. Low', 'TCGA Imm. High']).values.astype(np.float))
        dfContTable['Score Low'].loc['Total'] = numScoreLowTotal
        numScoreHighTotal = np.sum(dfContTable['Score High'].reindex(['TCGA Imm. Low', 'TCGA Imm. High']).values.astype(np.float))
        dfContTable['Score High'].loc['Total'] = numScoreHighTotal

        dfContTable.to_csv(os.path.join(Plot.strOutputFolder, 'FigS3_contTable.csv'), sep=',')

        kmfImmHigh = KaplanMeierFitter()
        kmfImmHigh.fit(
            dfTCGA['surv_time'].reindex(listTCGAImmuneHighWithValidSurv).values.astype(np.float),
            event_observed=dfTCGA['death_event'].reindex(listTCGAImmuneHighWithValidSurv),
            label='Immune high (n=' + '{}'.format(len(listTCGAImmuneHighWithValidSurv)) + ')')

        kmfNotImmHigh = KaplanMeierFitter()
        kmfNotImmHigh.fit(
            dfTCGA['surv_time'].reindex(listAllTCGANotImmuneHighWithValidSurv).values.astype(np.float),
            event_observed=dfTCGA['death_event'].reindex(listAllTCGANotImmuneHighWithValidSurv),
            label='Not immune high (n=' + '{}'.format(len(listAllTCGANotImmuneHighWithValidSurv)) + ')')


        structLowVsHighImmuneKMLogRank = KMlogRankTest(
            dfTCGA['surv_time'].ix[listAllTCGANotImmuneHighWithValidSurv],
            dfTCGA['surv_time'].ix[listTCGAImmuneHighWithValidSurv],
            event_observed_A=dfTCGA['death_event'].ix[listAllTCGANotImmuneHighWithValidSurv],
            event_observed_B=dfTCGA['death_event'].ix[listTCGAImmuneHighWithValidSurv])

        numLowVsHighImmuneKMLogRankPVal = structLowVsHighImmuneKMLogRank.p_value

        #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #  -  #
        # CREATE THE OUTPUT FIGURE

        handFig = plt.figure()
        handFig.set_size_inches(w=2.5,h=6)

        arrayGridSpec = gridspec.GridSpec(nrows=3, ncols=1,
                                          bottom=0.15, top=0.95,
                                          left=0.20, right=0.90,
                                          hspace=0.6)

        numMinScore = np.min(dfTCGA['TCGA Immune Score'][dfTCGA['TCGA Immune Score'].notnull()].values.astype(np.float))
        numMaxScore = np.max(dfTCGA['TCGA Immune Score'][dfTCGA['TCGA Immune Score'].notnull()].values.astype(np.float))

        arrayBins = np.linspace(start=numMinScore-0.01, stop=numMaxScore+0.01, num=40)

        handAx = plt.subplot(arrayGridSpec[0,0])
        structAxPos = handAx.get_position()

        plt.hist([sliceOrigTCGAImmScoreNotHigh[sliceOrigTCGAImmScoreNotHigh.notnull()].values.astype(np.float),
                  sliceOrigTCGAImmScoreHigh[sliceOrigTCGAImmScoreHigh.notnull()].values.astype(np.float)],
                 bins=arrayBins, stacked=True,
                 color=[(0.3, 0.7, 0.3), (0.5, 0.5, 0.5)],
                 label=['Immune High', 'Not immune high'])

        plt.legend(loc='upper right',
                       bbox_to_anchor=(1.05, 1.01),
                       fontsize=Plot.numFontSize*0.55,
                       scatterpoints=1,
                       framealpha=1,
                       ncol=1)

        handAx.set_xlabel('Immune Score', fontsize=Plot.numFontSize*0.8)
        handAx.set_title('Original TCGA SKCM cohort', fontsize=Plot.numFontSize)
        handAx.set_ylabel('Frequency', fontsize=Plot.numFontSize*0.8)
        handAx.set_xlim([numMinScore-0.02, numMaxScore+0.02])

        handAx.axvline(x=numImmScoreThresh,
                       ymin=0.0, ymax=1.0,
                       color='w', linewidth=1.25)
        handAx.axvline(x=numImmScoreThresh,
                       ymin=0.0, ymax=1.0,
                       color='r', linewidth=0.75)

        handFig.text(x=structAxPos.x0 - 0.2*structAxPos.width,
                     y=structAxPos.y0 + 1.08*structAxPos.height,
                     s='(A)',
                     ha='center', va='center',
                     fontsize=Plot.numFontSize*1.1,
                     weight='bold')

        handAx = plt.subplot(arrayGridSpec[1,0])
        structAxPos = handAx.get_position()

        plt.hist([sliceOrigTCGAImmScore[sliceOrigTCGAImmScore.notnull()].values.astype(np.float),
                  sliceNewTCGAImmSore[sliceNewTCGAImmSore.notnull()].values.astype(np.float)],
                 bins=arrayBins, stacked=True,
                 color=[(0.7, 0.7, 0.7),(0.3, 0.3, 0.3)],
                 label=['Original cohort', 'New samples'])

        plt.legend(loc='upper right',
                       bbox_to_anchor=(1.05, 1.01),
                       fontsize=Plot.numFontSize*0.55,
                       scatterpoints=1,
                       framealpha=1,
                       ncol=1)

        handAx.set_xlabel('Immune Score', fontsize=Plot.numFontSize*0.8)
        handAx.set_title('All TCGA SKCM samples', fontsize=Plot.numFontSize)
        handAx.set_ylabel('Frequency', fontsize=Plot.numFontSize*0.8)
        handAx.set_xlim([numMinScore-0.02, numMaxScore+0.02])

        handAx.axvline(x=numImmScoreThresh,
                       ymin=0.0, ymax=1.0,
                       color='w', linewidth=1.25)
        handAx.axvline(x=numImmScoreThresh,
                       ymin=0.0, ymax=1.0,
                       color='r', linewidth=0.75)

        handFig.text(x=structAxPos.x0 - 0.2*structAxPos.width,
                     y=structAxPos.y0 + 1.08*structAxPos.height,
                     s='(B)',
                     ha='center', va='center',
                     fontsize=Plot.numFontSize*1.1,
                     weight='bold')

        handAx = plt.subplot(arrayGridSpec[2,0])
        structAxPos = handAx.get_position()

        for kmf in [kmfImmHigh, kmfNotImmHigh]:
            kmf.plot(ax=handAx)


        arrayXLim = handAx.get_xlim()
        handAx.set_xlim([0, arrayXLim[1]])

        arrayXTicksInMo = np.arange(start=0, stop=arrayXLim[1], step=60)
        arrayXTicksInYr = np.arange(start=0, stop=((arrayXTicksInMo[-1])/12)+1, step=5, dtype=np.int)

        handAx.set_xticks(arrayXTicksInMo)
        handAx.set_yticks([0, 0.5, 1.0])

        handAx.set_xlabel('Time (years)', fontsize=Plot.numFontSize*0.8)
        handAx.set_xticklabels(arrayXTicksInYr)
        for handTick in handAx.xaxis.get_major_ticks():
            handTick.label.set_fontsize(Plot.numFontSize)

        handAx.set_ylabel('Overall survival', fontsize=Plot.numFontSize*0.8)
        for handTick in handAx.yaxis.get_major_ticks():
            handTick.label.set_fontsize(Plot.numFontSize)

        handAx.spines['top'].set_visible(False)
        handAx.spines['right'].set_visible(False)

        handAx.yaxis.set_ticks_position('left')
        handAx.xaxis.set_ticks_position('bottom')
        handAx.text(arrayXLim[0] + 0.70*np.ptp(arrayXLim),
                    0.85,
                    '$p$-value = ' + '{:03.2E}'.format(numLowVsHighImmuneKMLogRankPVal),
                    ha='center', va='center', fontsize=Plot.numFontSize*0.75)


        handAx.set_title('All TCGA SKCM samples', fontsize=Plot.numFontSize)

        handFig.text(x=structAxPos.x0 - 0.2*structAxPos.width,
                     y=structAxPos.y0 + 1.08*structAxPos.height,
                     s='(C)',
                     ha='center', va='center',
                     fontsize=Plot.numFontSize*1.1,
                     weight='bold')

        plt.legend(loc='upper center',
                       bbox_to_anchor=(0.5, -0.35),
                       fontsize=Plot.numFontSize*0.7,
                       scatterpoints=1,
                       framealpha=1,
                       ncol=1)

        for strFormat in Plot.listFileFormat:
            handFig.savefig(os.path.join(Plot.strOutputFolder, 'FigS5.' + strFormat),
                            ext=strFormat, dpi=300)
        plt.close(handFig)

        return flagResult

    def supp_fig_six(flagResult=False):

        listOfGenesForSurvivalComp = ['IL15', 'XCL1', 'XCL2',
                                      'CCL5', 'FLT3LG', 'GZMA',
                                      'GZMB', 'FASLG']

        dictPatGroups = PreProc.split_tcga_met_vs_pri()
        listPatsMetTumOnly = dictPatGroups['MetOnlyPat']
        listPatsMetTumOnlySamples = [strPat + '-06' for strPat in listPatsMetTumOnly]

        # load the TCGA data as a merged pandas dataframe
        dictTCGASKCM = PreProc.tcga_skcm_data(flagPerformExtraction=False)
        dfTCGA = dictTCGASKCM['df']

        dfTCGAMets = dfTCGA.loc[listPatsMetTumOnlySamples]

        arrayNKScore = dfTCGAMets['NK Score'].values.astype(np.float)
        arrayNKRangeForFits = np.linspace(np.min(arrayNKScore), np.max(arrayNKScore), 50)

        arrayNKThresh = np.percentile(arrayNKScore, np.linspace(start=0, stop=100, num=4)[1:-1])

        listLowNKScorePatients = \
            dfTCGAMets[
                dfTCGAMets['NK Score'].values.astype(np.float) < arrayNKThresh[0]].index.tolist()

        listHighNKScorePatients = \
            dfTCGAMets[
                dfTCGAMets['NK Score'].values.astype(np.float) > arrayNKThresh[1]].index.tolist()

        dfLowNKMets = dfTCGAMets.reindex(listLowNKScorePatients).copy(deep=True)
        dfHighNKMets = dfTCGAMets.reindex(listHighNKScorePatients).copy(deep=True)

        arrayGridSpecOne = matplotlib.gridspec.GridSpec(nrows=len(listOfGenesForSurvivalComp), ncols=1,
                                                     hspace=0.5, wspace=0.3,
                                                     left=0.16, right=0.32,
                                                     bottom=0.04, top=0.95)

        arrayGridSpecTwo = matplotlib.gridspec.GridSpec(nrows=len(listOfGenesForSurvivalComp), ncols=2,
                                                     hspace=0.5, wspace=0.6,
                                                     left=0.45, right=0.90,
                                                     bottom=0.04, top=0.95)

        handFig = plt.figure()
        handFig.set_size_inches(w=5, h=8.5)

        for iGene in range(len(listOfGenesForSurvivalComp)):
            strGene = listOfGenesForSurvivalComp[iGene]

            arrayGeneData = np.nan_to_num(dfTCGAMets[strGene].values.astype(np.float))

            arrayLowNKGeneLoHiThresholds = \
                np.percentile(dfLowNKMets[strGene].values.astype(np.float),
                              np.linspace(start=0, stop=100, num=4)[1:-1])

            listLowNKLowExpr = dfLowNKMets[dfLowNKMets[strGene] < arrayLowNKGeneLoHiThresholds[0]].index.tolist()
            listLowNKHighExpr = dfLowNKMets[dfLowNKMets[strGene] > arrayLowNKGeneLoHiThresholds[1]].index.tolist()

            arrayHighNKGeneLoHiThresholds = \
                np.percentile(dfHighNKMets[strGene].values.astype(np.float),
                              np.linspace(start=0, stop=100, num=4)[1:-1])

            listHighNKLowExpr = dfHighNKMets[dfHighNKMets[strGene] < arrayHighNKGeneLoHiThresholds[0]].index.tolist()
            listHighNKHighExpr = dfHighNKMets[dfHighNKMets[strGene] > arrayHighNKGeneLoHiThresholds[1]].index.tolist()


            handAx = plt.subplot(arrayGridSpecOne[iGene])
            structAxPos = handAx.get_position()


            arrayXToPlot, arrayYToPlot, arrayColor = PreProc.density_scatters(
                arrayXIn=arrayNKScore, arrayYIn=arrayGeneData)

            handAx.scatter(arrayXToPlot, arrayYToPlot,
                           cmap=plt.cm.magma,
                           c=arrayColor,
                           alpha=0.8,
                           linewidth=0.0)

            handAx.set_xlim([-0.35, 0.21])

            arrayXLim = handAx.get_xlim()
            arrayYLim = handAx.get_ylim()

            handAx.axhline(y=arrayLowNKGeneLoHiThresholds[0],
                           xmin=0.0, xmax=(arrayNKThresh[0]-arrayXLim[0])/np.ptp(arrayXLim),
                           color='c', linestyle='--', linewidth=1)
            handAx.axhline(y=arrayLowNKGeneLoHiThresholds[1],
                           xmin=0.0, xmax=(arrayNKThresh[0]-arrayXLim[0])/np.ptp(arrayXLim),
                           color='c', linestyle='--', linewidth=1)
            handAx.add_patch(
                matplotlib.patches.Rectangle([arrayXLim[0], arrayLowNKGeneLoHiThresholds[0]],
                                             arrayNKThresh[0]-arrayXLim[0], np.ptp(arrayLowNKGeneLoHiThresholds),
                                             edgecolor='0.7', lw=1,
                                             facecolor='0.7', fill=True, alpha=0.5))

            handAx.axvline(x=arrayNKThresh[0], ymin=0.0, ymax=1.0,
                           color='c', linestyle='--', linewidth=1)
            handAx.add_patch(
                matplotlib.patches.Rectangle([arrayNKThresh[0], arrayYLim[0]],
                                             np.ptp(arrayNKThresh), np.ptp(arrayYLim),
                                             edgecolor='0.7', lw=1,
                                             facecolor='0.7', fill=True, alpha=0.5))


            handAx.axhline(y=arrayHighNKGeneLoHiThresholds[0],
                           xmin=(arrayNKThresh[1]-arrayXLim[0])/np.ptp(arrayXLim), xmax=1.0,
                           color='c', linestyle='--', linewidth=1)
            handAx.axhline(y=arrayHighNKGeneLoHiThresholds[1],
                           xmin=(arrayNKThresh[1]-arrayXLim[0])/np.ptp(arrayXLim), xmax=1.0,
                           color='c', linestyle='--', linewidth=1)
            handAx.add_patch(
                matplotlib.patches.Rectangle([arrayNKThresh[1], arrayHighNKGeneLoHiThresholds[0]],
                                             arrayXLim[1]-arrayNKThresh[1], np.ptp(arrayHighNKGeneLoHiThresholds),
                                             edgecolor='0.7', lw=1,
                                             facecolor='0.7', fill=True, alpha=0.5))

            handAx.axvline(x=arrayNKThresh[1], ymin=0.0, ymax=1.0,
                           color='c', linestyle='--', linewidth=1)


            handFig.text(structAxPos.x0-0.4*structAxPos.width,
                         structAxPos.y0 + 0.5*structAxPos.height,
                         strGene,
                         ha='center', va='center', fontstyle='italic',
                         rotation=90, fontsize=Plot.numFontSize*0.7)


            for handTick in handAx.yaxis.get_major_ticks():
                handTick.label.set_fontsize(Plot.numFontSize * 0.7)


            if strGene == 'IL15':
                handFig.text(0.03,
                             structAxPos.y0 + 0.5 * structAxPos.height,
                             'NK (and T) cell\nactivity',
                             ha='center', va='center', rotation=90, fontsize=Plot.numFontSize*0.7)
            elif strGene == 'CCL5':
                handFig.text(0.03,
                             structAxPos.y0 + 1.2 * structAxPos.height,
                             'NK mediated DC recruitment',
                             ha='center', va='center', rotation=90, fontsize=Plot.numFontSize*0.7)
                handAx.annotate('',
                                xy=[structAxPos.x0 - structAxPos.width * 0.55,
                                    structAxPos.y0 + structAxPos.height * 3.8],
                                xycoords='figure fraction',
                                xytext=[structAxPos.x0 - structAxPos.width * 0.55,
                                        structAxPos.y0 - structAxPos.height * 1.4],
                                textcoords='figure fraction',
                                annotation_clip=False,
                                arrowprops=dict(facecolor='black',
                                                linewidth=2,
                                                arrowstyle='-'))
            elif strGene == 'GZMB':
                handFig.text(0.03,
                             structAxPos.y0 + 0.5 * structAxPos.height,
                             'NK mediated\ncytotoxicity',
                             ha='center', va='center', rotation=90, fontsize=Plot.numFontSize*0.7)
                handAx.annotate('',
                                xy=[structAxPos.x0 - structAxPos.width * 0.55,
                                    structAxPos.y0 + structAxPos.height * 2.4],
                                xycoords='figure fraction',
                                xytext=[structAxPos.x0 - structAxPos.width * 0.55,
                                        structAxPos.y0 - structAxPos.height * 1.4],
                                textcoords='figure fraction',
                                annotation_clip=False,
                                arrowprops=dict(facecolor='black',
                                                linewidth=2,
                                                arrowstyle='-'))


            handAx.set_xticks([-0.20, 0, 0.20])

            if iGene == len(listOfGenesForSurvivalComp)-1:
                handAx.set_xlabel('NK Score', fontsize = Plot.numFontSize*0.7)
                for handTick in handAx.xaxis.get_major_ticks():
                    handTick.label.set_fontsize(Plot.numFontSize*0.7)
            else:
                handAx.set_xlabel('')
                handAx.set_xticklabels([])


            handAx = plt.subplot(arrayGridSpecTwo[iGene, 0])

            dictKMFs = Analyse.split_one_marker_three_partitions(
                strMarkerToSplit=listOfGenesForSurvivalComp[iGene],
                dfForAnalysis=dfLowNKMets,
                flagExcludeMarkerInLabel=True)

            kmfLow = dictKMFs['kmfLow']
            kmfHigh = dictKMFs['kmfHigh']

            structLowVsHighExprKMLogRank = KMlogRankTest(
                dfTCGA['surv_time'].ix[listLowNKLowExpr],
                dfTCGA['surv_time'].ix[listLowNKHighExpr],
                event_observed_A=dfTCGA['death_event'].ix[listLowNKLowExpr],
                event_observed_B=dfTCGA['death_event'].ix[listLowNKHighExpr])

            numLowNKKMLogRankPVal = structLowVsHighExprKMLogRank.p_value

            for kmf in [kmfLow, kmfHigh]:
                kmf.plot(ax=handAx)

            arrayXLim = handAx.get_xlim()

            arrayXTicksInMo = np.arange(start=0, stop=arrayXLim[1], step=120)
            arrayXTicksInYr = np.arange(start=0, stop=((arrayXTicksInMo[-1])/12)+1, step=10, dtype=np.int)

            handAx.set_xticks(arrayXTicksInMo)
            if iGene == len(listOfGenesForSurvivalComp) - 1:
                handAx.set_xticklabels(arrayXTicksInYr, fontsize=Plot.numFontSize * 0.7)
                handAx.set_xlabel('Time (years)', fontsize=Plot.numFontSize*0.7)
            else:
                handAx.set_xticklabels([])
                handAx.set_xlabel('')

            for handTick in handAx.yaxis.get_major_ticks():
                handTick.label.set_fontsize(Plot.numFontSize * 0.7)

            handAx.set_title('$p$-value = ' + '{:03.2E}'.format(numLowNKKMLogRankPVal), fontsize=Plot.numFontSize*0.7)

            handAx.set_yticks([0, 0.5, 1.0])
            handAx.set_ylabel('Overall survival', fontsize=Plot.numFontSize*0.7)


            plt.legend(loc='upper right',
                       bbox_to_anchor=(1.40, 1.10),
                       fontsize=Plot.numFontSize * 0.5,
                       scatterpoints=1,
                       framealpha=1,
                       ncol=1)

            if strGene == 'IL15':
                structAxPos = handAx.get_position()
                handFig.text(structAxPos.x0 + 0.5*structAxPos.width,
                             structAxPos.y0 + 1.4*structAxPos.height,
                             'NK score low',
                             ha='center', va='center',
                             fontsize=Plot.numFontSize*0.7,
                             fontweight='bold')


            handAx = plt.subplot(arrayGridSpecTwo[iGene, 1])

            structLowVsHighExprKMLogRank = KMlogRankTest(
                dfTCGA['surv_time'].ix[listHighNKLowExpr],
                dfTCGA['surv_time'].ix[listHighNKHighExpr],
                event_observed_A=dfTCGA['death_event'].ix[listHighNKLowExpr],
                event_observed_B=dfTCGA['death_event'].ix[listHighNKHighExpr])

            numHighNKKMLogRankPVal = structLowVsHighExprKMLogRank.p_value

            dictKMFs = Analyse.split_one_marker_three_partitions(
                strMarkerToSplit=listOfGenesForSurvivalComp[iGene],
                dfForAnalysis=dfHighNKMets,
                flagExcludeMarkerInLabel=True)

            kmfLow = dictKMFs['kmfLow']
            kmfHigh = dictKMFs['kmfHigh']

            handAx.set_title('$p$-value = ' + '{:03.2E}'.format(numHighNKKMLogRankPVal),
                             fontsize=Plot.numFontSize*0.7)

            for kmf in [kmfLow, kmfHigh]:
                kmf.plot(ax=handAx)

            handAx.set_yticks([0, 0.5, 1.0])
            handAx.set_yticklabels([])

            arrayXTicksInMo = np.arange(start=0, stop=arrayXLim[1], step=120)
            arrayXTicksInYr = np.arange(start=0, stop=((arrayXTicksInMo[-1])/12)+1, step=10, dtype=np.int)

            handAx.set_xticks(arrayXTicksInMo)
            if iGene == len(listOfGenesForSurvivalComp) - 1:
                handAx.set_xticklabels(arrayXTicksInYr, fontsize=Plot.numFontSize * 0.7)
                handAx.set_xlabel('Time (years)', fontsize=Plot.numFontSize*0.7)
            else:
                handAx.set_xticklabels([])
                handAx.set_xlabel('')

            plt.legend(loc='upper right',
                       bbox_to_anchor=(1.40, 1.10),
                       fontsize=Plot.numFontSize * 0.5,
                       scatterpoints=1,
                       framealpha=1,
                       ncol=1)

            if strGene == 'IL15':
                structAxPos = handAx.get_position()
                handFig.text(structAxPos.x0 + 0.5*structAxPos.width,
                             structAxPos.y0 + 1.4*structAxPos.height,
                             'NK score high',
                             ha='center', va='center',
                             fontsize=Plot.numFontSize*0.7,
                             fontweight='bold')

        for strFormat in Plot.listFileFormat:
            handFig.savefig(os.path.join(Plot.strOutputFolder, 'FigS6.'+strFormat),
                            ext=strFormat, dpi=300)

        return flagResult

    def supp_fig_seven(flagResult=False):

        listMarkersToPlot=[['NK Score', 'TCGA Immune Score'],
                           ['NK Score', 'T cell Score'],
                           ['NK Score', 'Bottcher NK Score'],
                           ['NK Score', 'Bottcher cDC1 Score']
                           ]
        listOfAssocPlotPosTuples = [(0,0),
                                    (0,1),
                                    (2,0),
                                    (2,1)]
        listAssocPlotSubPlotLabels = ['(A)', '(B)',
                                      '(E)', '(F)']
        listOfSurvPlotPosTuples = [(1,0),
                                   (1,1),
                                   (3,0),
                                   (3,1)]
        listSurvPlotSubPlotLabels = ['(C)', '(D)',
                                     '(G)', '(H)']

        dictPatGroups = PreProc.split_tcga_met_vs_pri()
        listPatsMetTumOnly = dictPatGroups['MetOnlyPatWithAge']

        listPatsMetTumOnlySamples = [strPat + '-06' for strPat in listPatsMetTumOnly]

        # load the TCGA data as a merged pandas dataframe
        dictTCGASKCM = PreProc.tcga_skcm_data(flagPerformExtraction=False)
        dfTCGA = dictTCGASKCM['df']
        # listTCGAGenes = dictTCGASKCM['listGenes']

        dfTCGAMets = dfTCGA.reindex(listPatsMetTumOnlySamples)


        arrayGridSpec = gridspec.GridSpec(nrows=4, ncols=2,
                                          left=0.14, right=0.94,
                                          bottom=0.07, top=0.97,
                                          wspace=0.65, hspace=0.65)

        handFig = plt.figure()
        handFig.set_size_inches(w=4, h=6)

        for iMarker in range(len(listMarkersToPlot)):

            strMarkerOne = listMarkersToPlot[iMarker][0]
            strMarkerTwo = listMarkersToPlot[iMarker][1]


            handAx = plt.subplot(arrayGridSpec[listOfAssocPlotPosTuples[iMarker][0],
                                               listOfAssocPlotPosTuples[iMarker][1]])
            structAxPos = handAx.get_position()

            arrayXToPlot, arrayYToPlot, arrayForColor = PreProc.density_scatters(
                arrayXIn=np.nan_to_num(dfTCGAMets[strMarkerOne].values.astype(np.float)),
                arrayYIn=np.nan_to_num(dfTCGAMets[strMarkerTwo].values.astype(np.float)))
            handAx.scatter(arrayXToPlot, arrayYToPlot,
                           c=arrayForColor,
                           s=10,
                           edgecolor='',
                           cmap=plt.cm.viridis)

            numXMin = np.min(arrayXToPlot)
            numXMax = np.max(arrayXToPlot)
            numXRange = np.ptp(arrayXToPlot)

            numYMin = np.min(arrayYToPlot)
            numYMax = np.max(arrayYToPlot)
            numYRange = np.ptp(arrayYToPlot)

            handAx.set_xlim([numXMin-0.05*numXRange, numXMax+0.05*numXRange])
            handAx.set_ylim([numYMin-0.05*numYRange, numYMax+0.05*numYRange])

            for handTick in handAx.xaxis.get_major_ticks():
                handTick.label.set_fontsize(Plot.numFontSize*0.60)

            for handTick in handAx.yaxis.get_major_ticks():
                handTick.label.set_fontsize(Plot.numFontSize*0.60)

            handAx.set_xlabel(strMarkerOne, fontsize=Plot.numFontSize*0.60)
            handAx.set_ylabel(strMarkerTwo, fontsize=Plot.numFontSize*0.60)

            dictKMFs = Analyse.split_one_marker_three_partitions(strMarkerToSplit=strMarkerTwo,
                                                                 dfForAnalysis=dfTCGAMets,
                                                                 flagShowSplitValues=False)
            kmfLow = dictKMFs['kmfLow']
            kmfMed = dictKMFs['kmfMed']
            kmfHigh = dictKMFs['kmfHigh']

            handFig.text(x=structAxPos.x0 - 0.30*structAxPos.width,
                         y=structAxPos.y0 + 1.08*structAxPos.height,
                         s=listAssocPlotSubPlotLabels[iMarker],
                         ha='center', va='center',
                         fontsize=Plot.numFontSize*0.8,
                         weight='bold')


            handAx = plt.subplot(arrayGridSpec[listOfSurvPlotPosTuples[iMarker][0],
                                               listOfSurvPlotPosTuples[iMarker][1]])
            structAxPos = handAx.get_position()
            for kmf in [kmfLow, kmfMed, kmfHigh]:
                kmf.plot(ax=handAx)

            handAx.set_ylim([0, 1])
            arrayXLim = handAx.get_xlim()
            handAx.set_xlim([0, arrayXLim[1]])

            arrayXTicksInMo = np.arange(start=0, stop=arrayXLim[1], step=60)
            arrayXTicksInYr = np.arange(start=0, stop=((arrayXTicksInMo[-1])/12)+1, step=5, dtype=np.int)

            handAx.set_xticks(arrayXTicksInMo)
            handAx.set_yticks([0, 0.5, 1.0])

            handAx.set_xlabel('Time (years)', fontsize=Plot.numFontSize*0.60)
            handAx.set_xticklabels(arrayXTicksInYr)
            for handTick in handAx.xaxis.get_major_ticks():
                handTick.label.set_fontsize(Plot.numFontSize*0.60)

            handAx.set_ylabel('Overall survival', fontsize=Plot.numFontSize*0.60)
            for handTick in handAx.yaxis.get_major_ticks():
                handTick.label.set_fontsize(Plot.numFontSize*0.60)

            arrayStartXForSig = [0.185, 0.745, 0.2, 0.675]
            numXSpacerForSigAnnot = 0.01
            numYSpacerForSigAnnot = 0.02

            numSigAnnotStartY = structAxPos.y0 + 1.15 * structAxPos.height
            numSigAnnotStartX = arrayStartXForSig[iMarker]

            iPlottedSig = 0

            if dictKMFs['LowVsMed'].p_value < 5E-2:
                if dictKMFs['LowVsMed'].p_value < 1E-6:
                    strToPlot = '***'
                elif dictKMFs['LowVsMed'].p_value < 1E-3:
                    strToPlot = '**'
                else:
                    strToPlot = '*'

                handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.003,
                             numSigAnnotStartY + 0.002,
                             strToPlot,
                             rotation=90,
                             fontsize=Plot.numFontSize * 0.5,
                             ha='center', va='bottom')

                handAx.annotate('',
                                xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                    numSigAnnotStartY],
                                xycoords='figure fraction',
                                xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                        numSigAnnotStartY - numYSpacerForSigAnnot],
                                textcoords='figure fraction',
                                annotation_clip=False,
                                arrowprops=dict(facecolor='black',
                                                linewidth=1,
                                                arrowstyle='-'))

                iPlottedSig = iPlottedSig + 1

            if dictKMFs['LowVsHigh'].p_value < 5E-2:
                if dictKMFs['LowVsHigh'].p_value < 1E-6:
                    strToPlot = '***'
                elif dictKMFs['LowVsHigh'].p_value < 1E-3:
                    strToPlot = '**'
                else:
                    strToPlot = '*'

                handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.003,
                             numSigAnnotStartY + 0.002,
                             strToPlot,
                             rotation=90,
                             fontsize=Plot.numFontSize * 0.5,
                             ha='center', va='bottom')

                handAx.annotate('',
                                xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                    numSigAnnotStartY],
                                xycoords='figure fraction',
                                xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                        numSigAnnotStartY - 2 * numYSpacerForSigAnnot],
                                textcoords='figure fraction',
                                annotation_clip=False,
                                arrowprops=dict(facecolor='black',
                                                linewidth=1,
                                                arrowstyle='-'))

                iPlottedSig = iPlottedSig + 1

            if dictKMFs['MedVsHigh'].p_value < 5E-2:
                if dictKMFs['MedVsHigh'].p_value < 1E-6:
                    strToPlot = '***'
                elif dictKMFs['MedVsHigh'].p_value < 1E-3:
                    strToPlot = '**'
                else:
                    strToPlot = '*'

                handFig.text(numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot) + 0.003,
                             numSigAnnotStartY + 0.002 - numYSpacerForSigAnnot,
                             strToPlot,
                             rotation=90,
                             fontsize=Plot.numFontSize * 0.5,
                             ha='center', va='bottom')

                handAx.annotate('',
                                xy=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                    numSigAnnotStartY - numYSpacerForSigAnnot],
                                xycoords='figure fraction',
                                xytext=[numSigAnnotStartX - (iPlottedSig * numXSpacerForSigAnnot),
                                        numSigAnnotStartY - 2 * numYSpacerForSigAnnot],
                                textcoords='figure fraction',
                                annotation_clip=False,
                                arrowprops=dict(facecolor='black',
                                                linewidth=1,
                                                arrowstyle='-'))


            plt.legend(loc='lower right',
                       bbox_to_anchor=(1.17, 0.82),
                       fontsize=Plot.numFontSize*0.4,
                       scatterpoints=1,
                       framealpha=1,
                       ncol=1)

            handFig.text(x=structAxPos.x0 - 0.30*structAxPos.width,
                         y=structAxPos.y0 + 1.08*structAxPos.height,
                         s=listSurvPlotSubPlotLabels[iMarker],
                         ha='center', va='center',
                         fontsize=Plot.numFontSize*0.8,
                         weight='bold')

            handAx.spines['top'].set_visible(False)
            handAx.spines['right'].set_visible(False)

            handAx.yaxis.set_ticks_position('left')
            handAx.xaxis.set_ticks_position('bottom')

        for strFormat in Plot.listFileFormat:
            handFig.savefig(os.path.join(Plot.strOutputFolder, 'FigS7.' + strFormat),
                            ext=strFormat, dpi=300)
        plt.close(handFig)

        return flagResult

    def supp_fig_eight(flagResult=False):

        dictGSE24759Subsets = PreProc.gse24759_subsets()
        listOfListsGSE24759SampleTypeGrouped = dictGSE24759Subsets['listOfListsSampleTypeGrouped']
        listGSE24759SampleTypeLabels = dictGSE24759Subsets['listSampleTypeLabels']
        listGSE24759SmpTypeOrder = [strCellType
                                    for listGroup in listOfListsGSE24759SampleTypeGrouped
                                    for strCellType in listGroup]

        listGSE60424TypeOrder = ['B-cells', 'CD4', 'CD8', 'Monocytes', 'NK', 'Neutrophils']
        listGSE60424TypeDisp = ['B-cells', 'CD4$^{+}$ T cells', 'CD8$^{+}$ T cells',
                                'Monocytes', 'NK cells', 'Neutrophils']

        dictPatGroups = PreProc.split_tcga_met_vs_pri()
        listPatsMetTumOnly = dictPatGroups['MetOnlyPatWithAge']
        listPatsMetTumOnlySamples = [strPat + '-06' for strPat in listPatsMetTumOnly]
        numPatients = len(listPatsMetTumOnlySamples)

        # load the TCGA data as a merged pandas dataframe
        dictTCGASKCM = PreProc.tcga_skcm_data(flagPerformExtraction=False)
        dfTCGA = dictTCGASKCM['df']
        listTCGAGenes = dictTCGASKCM['listGenes']

        dfTCGAMets = dfTCGA.reindex(listPatsMetTumOnlySamples)

        arraySortedByTCellScore = np.argsort(dfTCGAMets['T cell Score'].values.astype(np.float))
        listMetsSortedByTCellScore = [dfTCGAMets.index.tolist()[i] for i in arraySortedByTCellScore]

        dictLocalTCellSig = GeneSetScoring.CuratedList.t_cells_in_tumour()
        listTCellsUpGenesInTCGA = sorted(list(set(dictLocalTCellSig['T cells']['UpGenes']).intersection(set(listTCGAGenes))))

        dfTCGANKGenes = dfTCGAMets[listTCellsUpGenesInTCGA].reindex(listMetsSortedByTCellScore).transpose()

        arrayFlatAbundData = np.ravel(np.nan_to_num(dfTCGANKGenes.values.astype(np.float)))

        print('Loading sorted cell data..')
        dfGSE24759, dfGSE24759SmpToType = PreProc.gse24759_data()
        dfGSE60424, dictGSE60424SmpToType = PreProc.gse60424_data()


        listGSE64024SmpOrder = []
        for strCellType in listGSE60424TypeOrder:
            for strSample in dfGSE60424.columns.tolist():
                if dictGSE60424SmpToType[strSample] in strCellType:
                    listGSE64024SmpOrder.append(strSample)

        dfGSE60424NKGenes = dfGSE60424[listGSE64024SmpOrder].reindex(listTCellsUpGenesInTCGA)
        arrayFlatGSE60424 = np.ravel(np.nan_to_num(dfGSE60424NKGenes.values.astype(np.float)))
        numMinNonZeroGSE60424 = np.min(arrayFlatGSE60424[arrayFlatGSE60424 > 0])
        arrayLogGSE60424Data = np.log10(dfGSE60424NKGenes.values.astype(np.float) + numMinNonZeroGSE60424)

        arrayGSE60424TCellScores = np.zeros(len(listGSE64024SmpOrder), dtype=np.float)
        for iSample in range(len(listGSE64024SmpOrder)):
            strSample = listGSE64024SmpOrder[iSample]
            arrayGSE60424TCellScores[iSample] = GeneSetScoring.FromInput.single_sample_rank_score(
                listAllGenes=dfGSE60424.index.tolist(),
                arrayTranscriptAbundance=dfGSE60424[strSample].values.astype(np.float),
                listUpGenesToScore=listTCellsUpGenesInTCGA,
                flagApplyNorm=True)

        listGSE24759SmpOfInt = (dfGSE24759SmpToType.loc['Source']=='peripheral blood').index.tolist()

        listGSE24759SmpOrder = []
        arrayGSE24759NumEachType = np.zeros(len(listGSE24759SmpTypeOrder), dtype=np.int)
        for strCellType in listGSE24759SmpTypeOrder:
            numOfType = 0
            for strSample in listGSE24759SmpOfInt:
                if strCellType in dfGSE24759SmpToType[strSample].loc['Title']:
                    listGSE24759SmpOrder.append(strSample)
                    numOfType = numOfType + 1
            arrayGSE24759NumEachType[listGSE24759SmpTypeOrder.index(strCellType)] = numOfType


        arrayGSE24759TCellScores = np.zeros(len(listGSE24759SmpOrder), dtype=np.float)
        for iSample in range(len(listGSE24759SmpOrder)):
            strSample = listGSE24759SmpOrder[iSample]
            arrayGSE24759TCellScores[iSample] = GeneSetScoring.FromInput.single_sample_rank_score(
                listAllGenes=dfGSE24759.index.tolist(),
                arrayTranscriptAbundance=dfGSE24759[strSample].values.astype(np.float),
                listUpGenesToScore=listTCellsUpGenesInTCGA,
                flagApplyNorm=True)

        arrayGSE24759Out = np.zeros((len(listTCellsUpGenesInTCGA), len(listGSE24759SmpOrder)), dtype=np.float)
        for iGene in range(len(listTCellsUpGenesInTCGA)):
            if listTCellsUpGenesInTCGA[iGene] in dfGSE24759.index.tolist():
                arrayGSE24759Out[iGene,:] = dfGSE24759[listGSE24759SmpOrder].loc[listTCellsUpGenesInTCGA[iGene]]
            else:
                arrayGSE24759Out[iGene,:] = np.nan

        dfGSE24759Out = pd.DataFrame(data=arrayGSE24759Out,
                                     index=listTCellsUpGenesInTCGA,
                                     columns=listGSE24759SmpOrder)

        handFig = plt.figure()
        handFig.set_size_inches(w=6, h=9)
        handAx = handFig.add_axes([0.07, 0.13, 0.33, 0.75])

        handAx.matshow(dfTCGANKGenes.values,
                    interpolation='none',
                    cmap=plt.cm.magma,
                    vmin=np.min(arrayFlatAbundData),
                    vmax=np.max(arrayFlatAbundData),
                    aspect='auto')

        structAxPos = handAx.get_position()
        handAx.set_xticks([])
        handAx.set_yticks([])

        handFig.text(x=structAxPos.x0 + 0.5*structAxPos.width,
                     y=structAxPos.y0 + 1.01*structAxPos.height,
                     s='TCGA SKCM metastatic tumours',
                     fontsize=Plot.numFontSize*0.8,
                     ha='center',
                     va='bottom')

        for iGene in range(len(dfTCGANKGenes.index)):
            handAx.text(-2, iGene,
                        dfTCGANKGenes.index.tolist()[iGene],
                        fontsize=Plot.numFontSize*0.4,
                        style='italic',
                        ha='right', va='center')

        # # # # # # # # # # # #
        # # TCGA SKCM - NK scores
        handAx = handFig.add_axes([0.07, 0.03, 0.33, 0.08])
        arraySortedTCellScores = dfTCGAMets['T cell Score'].reindex(listMetsSortedByTCellScore).values.astype(np.float)

        handAx.plot(np.arange(start=0, stop=len(listMetsSortedByTCellScore), step=1),
                    arraySortedTCellScores,
                    '-',
                    color='k',
                    lw=0.5)

        handAx.set_xlim([0, len(listMetsSortedByTCellScore)])

        handAx.set_ylim([-0.5, 0.5])

        structMajorTickLoc = matplotlib.ticker.FixedLocator(np.linspace(-0.5, 0.5, 3))
        structMinorTickLoc = matplotlib.ticker.FixedLocator(np.linspace(-0.5, 0.5, 5))
        handAx.yaxis.set_major_locator(structMajorTickLoc)
        handAx.yaxis.set_minor_locator(structMinorTickLoc)
        for handTick in handAx.yaxis.get_major_ticks():
            handTick.label.set_fontsize(Plot.numFontSize*0.5)

        handAx.set_xticks([])

        handAx.text(np.float(len(listMetsSortedByTCellScore))/2,
                    -0.5,
                        'T cell score',
                        fontsize=Plot.numFontSize*0.5,
                        ha='center', va='center',
                    path_effects=[PathEffects.withStroke(linewidth=2,foreground="w")])

        # # # # # # # # # # # #
        # # GSE60424 - Transcript abundance data
        handAx = handFig.add_axes([0.42, 0.13, 0.12, 0.75])
        structAxPos = handAx.get_position()

        handAx.matshow(arrayLogGSE60424Data,
                       cmap=plt.cm.viridis,
                       vmin=np.min(np.ravel(np.nan_to_num(arrayLogGSE60424Data))),
                       vmax=np.max(np.ravel(np.nan_to_num(arrayLogGSE60424Data))),
                       aspect='auto')
        handAx.set_xticks([])
        handAx.set_yticks([])

        numSampPerType = len(listGSE64024SmpOrder)/len(listGSE60424TypeOrder)
        for iCellType in range(len(listGSE60424TypeOrder)):

            numStartColIndex = np.int(iCellType*numSampPerType)
            numEndColIndex = np.int((iCellType+1)*numSampPerType)

            if iCellType < len(listGSE60424TypeOrder):
                handAx.axvline(x=numEndColIndex-0.5,
                               ymin=0,
                               ymax=1,
                               color='k',
                               linewidth=0.75)
                handAx.axvline(x=numEndColIndex-0.5,
                               ymin=0,
                               ymax=1,
                               color='w',
                               linewidth=0.5)

            handAx.text(np.mean([numStartColIndex, numEndColIndex])-1.5,
                        -0.6,
                        listGSE60424TypeDisp[iCellType],
                        fontsize=Plot.numFontSize*0.5,
                        ha='left', va='bottom',
                        rotation=60)

        # # # # # # # # # # # #
        # # GSE60424 - NK scores
        handAx = handFig.add_axes([0.42, 0.03, 0.12, 0.08])
        structAxPos = handAx.get_position()

        handAx.set_xlim([0, len(listGSE60424TypeOrder)])

        handAx.set_ylim([-0.50, 0.50])

        structMajorTickLoc = matplotlib.ticker.FixedLocator(np.linspace(-0.5, 0.5, 3))
        structMinorTickLoc = matplotlib.ticker.FixedLocator(np.linspace(-0.5, 0.5, 5))
        handAx.yaxis.set_major_locator(structMajorTickLoc)
        handAx.yaxis.set_minor_locator(structMinorTickLoc)

        handAx.set_yticklabels([])
        handAx.set_xticks([])

        arrayXSpacer = np.linspace(start=0.2, stop=0.8, num=np.int(numSampPerType))
        for iCellType in range(len(listGSE60424TypeOrder)):
            numStartColIndex = np.int(iCellType*numSampPerType)
            numEndColIndex = np.int((iCellType+1)*numSampPerType)

            for iObs in range(np.int(numSampPerType)):
                handAx.plot(np.float(iCellType) + arrayXSpacer[iObs],
                            arrayGSE60424TCellScores[numStartColIndex+iObs],
                            'o',
                            markersize=0.5,
                            color='k',
                            alpha=0.7)

        handAx.text(np.float(len(listGSE60424TypeOrder))/2,
                    -0.5,
                        'T cell score',
                        fontsize=Plot.numFontSize*0.5,
                        ha='center', va='center',
                    path_effects=[PathEffects.withStroke(linewidth=2,foreground="w")])

        # # # # # # # # # # # #
        # # GSE24759 - Transcript abundance array
        handAx = handFig.add_axes([0.56, 0.13, 0.40, 0.75])
        structAxPos = handAx.get_position()

        arrayFlatGSE24759Out = np.ravel(np.nan_to_num(dfGSE24759Out.values))

        handAx.matshow(dfGSE24759Out.values,
                       cmap=plt.cm.viridis,
                       vmin=np.min(arrayFlatGSE24759Out[arrayFlatGSE24759Out > 0]),
                       vmax=np.max(arrayFlatGSE24759Out),
                       aspect='auto')
        handAx.set_xticks([])
        handAx.set_yticks([])

        numCounter = 0
        for iGroup in range(len(listOfListsGSE24759SampleTypeGrouped)):
            numInGroup = 0
            for iCellType in range(len(listOfListsGSE24759SampleTypeGrouped[iGroup])):
                strType = listOfListsGSE24759SampleTypeGrouped[iGroup][iCellType]
                numOutIndex = listGSE24759SmpTypeOrder.index(strType)
                numOfCellType = arrayGSE24759NumEachType[numOutIndex]
                numInGroup = numInGroup + numOfCellType

            handAx.text(x=np.float(numCounter)+(np.float(numInGroup)/2)-0.5-(np.float(numInGroup)/4),
                        y=-0.6,
                        s=listGSE24759SampleTypeLabels[iGroup],
                        fontsize=Plot.numFontSize*0.5,
                        rotation=60,
                        ha='left',
                        va='bottom')

            numCounter = numCounter + numInGroup

            if iGroup < len(listOfListsGSE24759SampleTypeGrouped):
                handAx.axvline(x=(np.float(numCounter)-0.5),
                               ymin=0.0,
                               ymax=1.0,
                               c='k',
                               lw=0.75,
                               clip_on=False)
                handAx.axvline(x=(np.float(numCounter)-0.5),
                               ymin=0.0,
                               ymax=1.0,
                               c='w',
                               lw=0.5,
                               clip_on=False)


        # # # # # # # # # # # #
        # # GSE24759 - NK score

        handAx = handFig.add_axes([0.56, 0.03, 0.40, 0.08])
        structAxPos = handAx.get_position()

        handAx.set_xlim([0, len(listGSE24759SmpOrder)])

        handAx.set_ylim([-0.50, 0.50])

        structMajorTickLoc = matplotlib.ticker.FixedLocator(np.linspace(-0.5, 0.5, 3))
        structMinorTickLoc = matplotlib.ticker.FixedLocator(np.linspace(-0.5, 0.5, 5))
        handAx.yaxis.set_major_locator(structMajorTickLoc)
        handAx.yaxis.set_minor_locator(structMinorTickLoc)

        handAx.set_yticklabels([])
        handAx.set_xticks([])

        handAx.plot(np.arange(start=0, stop=len(listGSE24759SmpOrder), step=1),
                    arrayGSE24759TCellScores,
                    'o',
                    markersize=0.5,
                    color='k',
                    alpha=0.7)

        handAx.text(np.float(len(listGSE24759SmpOrder))/2,
                    -0.5,
                        'T cell score',
                        fontsize=Plot.numFontSize*0.5,
                        ha='center', va='center',
                    path_effects=[PathEffects.withStroke(linewidth=2,foreground="w")])

        for strFormat in Plot.listFileFormat:
            handFig.savefig(os.path.join(Plot.strOutputFolder,'FigS8.' + strFormat), ext=strFormat, dpi=300)


        plt.close(handFig)

        return flagResult

    def supp_fig_nine(flagResult=False):

        numMaxXTicks = 5
        numMaxYTicks = 5

        numLMMELHighAbundThresh = 6.0
        numFractLMMELLinesReqAboveAbundThresh = 0.35

        dfLMMEL = PreProc.lm_mel_data(flagPerformExtraction=False)
        listLMMELLines = dfLMMEL.index.tolist()
        listLMMELGenes = dfLMMEL.columns.tolist()

        listSelectedMarkers = ['THBS1', 'MITF', 'CDH1']

        listMarkersToPlot=[['Epithelial Score', 'Mesenchymal Score'],
                           ['Epithelial Score', 'TGF-B EMT Score'],
                           ['TGF-B EMT Score', 'Mesenchymal Score']
                           ]
        listOfPlotPosTuples = [(0,0),
                               (0,1),
                               (1,0)]
        listMarkerPairSubPlotLabels = ['(A)', '(B)', '(C)']

        arrayCellLinesAboveThreshPerGene = np.sum(dfLMMEL.values.astype(np.float) > numLMMELHighAbundThresh, axis=0)
        arrayGeneIndicesPassAbundThresh = \
            np.where(arrayCellLinesAboveThreshPerGene > numFractLMMELLinesReqAboveAbundThresh*len(listLMMELLines))[0]

        listGenesGoodDynRange = [listLMMELGenes[i] for i in arrayGeneIndicesPassAbundThresh]

        dictPatGroups = PreProc.split_tcga_met_vs_pri()
        listPatsMetTumOnly = dictPatGroups['MetOnlyPat']
        listPatsMetTumOnlySamples = [strPat + '-06' for strPat in listPatsMetTumOnly]

        # load the TCGA data as a merged pandas dataframe
        dictTCGASKCM = PreProc.tcga_skcm_data(flagPerformExtraction=False)
        dfTCGA = dictTCGASKCM['df']
        listTCGAGenes = dictTCGASKCM['listGenes']

        dfTCGAMets = dfTCGA.reindex(listPatsMetTumOnlySamples)


        dfCorrWithScore = dfTCGAMets[listTCGAGenes].corrwith(dfTCGAMets['NK Score'])
        dfCorrWithScoreClean = dfCorrWithScore[dfCorrWithScore.notnull()]

        arrayGeneRanksCorrWithScore = np.argsort(dfCorrWithScoreClean.values.astype(np.float))
        listAntiCorrGenes = []
        for i in arrayGeneRanksCorrWithScore:
            if len(listAntiCorrGenes) < 40:
                if dfCorrWithScoreClean.index.tolist()[i] in listGenesGoodDynRange:
                    listAntiCorrGenes.append(dfCorrWithScoreClean.index.tolist()[i])
        listCorrGenes = []
        for i in arrayGeneRanksCorrWithScore[::-1]:
            if len(listCorrGenes) < 40:
                if dfCorrWithScoreClean.index.tolist()[i] in listGenesGoodDynRange:
                    listCorrGenes.append(dfCorrWithScoreClean.index.tolist()[i])

        arrayColorNorm = matplotlib.colors.Normalize(vmin=0,
                                                     vmax=10)
        arrayColorsForMap = matplotlib.cm.ScalarMappable(norm=arrayColorNorm,
                                                         cmap=matplotlib.cm.tab10)

        listAllMarkersToPlot = listSelectedMarkers + listCorrGenes + listAntiCorrGenes
        listOfListsMarkersToPlot = [listSelectedMarkers, listCorrGenes, listAntiCorrGenes]
        listToDispMarkerSubGroups = ['Selected\nmarkers', 'NK score\ncorrelated', 'NK score\nanti-correlated']

        numMSUpperThresh = 0.07
        numMSLowerThresh = 0.01

        numXYDiff = 0.03

        listSubsetLabels = ['Mesenchymal Low,\nTGF-B EMT High',
                            'Mesenchymal Low,\nTGF-B EMT Low',
                            'Mesenchymal High,\nTGF-B EMT Low',
                            'Mesenchymal High,\nTGF-B EMT High']
        listLMMELInSubsets = [[None]]*4
        listTCGAInSubsets = [[None]]*4
        listCellLinesToPlotDisp = ['Mes. Lo &\nTGF-B EMT Hi',
                                   'Mes. Lo &\nTGF-B EMT Lo',
                                   'Mes. Hi &\nTGF-B EMT Lo',
                                   'Mes. Hi &\nTGF-B EMT Hi']

        listMesLowLines = dfLMMEL[dfLMMEL['Mesenchymal Score (vs. TCGA)'] < numMSLowerThresh].index.tolist()
        listMesHighLines = dfLMMEL[dfLMMEL['Mesenchymal Score (vs. TCGA)'] > numMSUpperThresh].index.tolist()
        sliceMesLowLines = dfLMMEL.reindex(listMesLowLines)
        sliceMesHighLines = dfLMMEL.reindex(listMesHighLines)

        listMesLowPatients = dfTCGAMets[dfTCGAMets['Mesenchymal Score (vs. LM-MEL)'] < numMSLowerThresh].index.tolist()
        listMesHighPatients = dfTCGAMets[dfTCGAMets['Mesenchymal Score (vs. LM-MEL)'] > numMSUpperThresh].index.tolist()
        sliceMesLowPatients = dfTCGAMets.reindex(listMesLowPatients)
        sliceMesHighPatients = dfTCGAMets.reindex(listMesHighPatients)

        listLMMELInSubsets[0] =\
            sliceMesLowLines[sliceMesLowLines['Mesenchymal Score (vs. TCGA)'] >
                             sliceMesLowLines['TGF-B EMT Score (vs. TCGA)']+numXYDiff].index.tolist()
        listLMMELInSubsets[1] =\
            sliceMesLowLines[sliceMesLowLines['Mesenchymal Score (vs. TCGA)'] <
                             sliceMesLowLines['TGF-B EMT Score (vs. TCGA)']-numXYDiff].index.tolist()
        listLMMELInSubsets[2] =\
            sliceMesHighLines[sliceMesHighLines['Mesenchymal Score (vs. TCGA)'] >
                              sliceMesHighLines['TGF-B EMT Score (vs. TCGA)']+numXYDiff].index.tolist()
        listLMMELInSubsets[3] =\
            sliceMesHighLines[sliceMesHighLines['Mesenchymal Score (vs. TCGA)'] <
                              sliceMesHighLines['TGF-B EMT Score (vs. TCGA)']-numXYDiff].index.tolist()

        listTCGAInSubsets[0] =\
            sliceMesLowPatients[sliceMesLowPatients['Mesenchymal Score (vs. LM-MEL)'] >
                                sliceMesLowPatients['TGF-B EMT Score (vs. LM-MEL)']+numXYDiff].index.tolist()
        listTCGAInSubsets[1] =\
            sliceMesLowPatients[sliceMesLowPatients['Mesenchymal Score (vs. LM-MEL)'] <
                                sliceMesLowPatients['TGF-B EMT Score (vs. LM-MEL)']-numXYDiff].index.tolist()
        listTCGAInSubsets[2] =\
            sliceMesHighPatients[sliceMesHighPatients['Mesenchymal Score (vs. LM-MEL)'] >
                                 sliceMesHighPatients['TGF-B EMT Score (vs. LM-MEL)']+numXYDiff].index.tolist()
        listTCGAInSubsets[3] =\
            sliceMesHighPatients[sliceMesHighPatients['Mesenchymal Score (vs. LM-MEL)'] <
                                 sliceMesHighPatients['TGF-B EMT Score (vs. LM-MEL)']-numXYDiff].index.tolist()

        listOfKMFs = []
        listCellLinesToPlot = []
        listOfListsCellLinesToPlot = [[None]]*4
        listGroupsToPlot = []
        for iSubset in range(len(listTCGAInSubsets)):
            kmfForSubset = KaplanMeierFitter()
            listOfKMFs.append(kmfForSubset.fit(
                dfTCGAMets['surv_time'].reindex(listTCGAInSubsets[iSubset]).values.astype(np.float),
                event_observed=dfTCGAMets['death_event'].reindex(listTCGAInSubsets[iSubset]),
                label=listSubsetLabels[iSubset] + ' (n=' + '{}'.format(len(listTCGAInSubsets[iSubset])) + ')'))
            if len(listTCGAInSubsets[iSubset]) > 20:
                listCellLinesToPlot = listCellLinesToPlot + listLMMELInSubsets[iSubset]
                listOfListsCellLinesToPlot[iSubset] = listLMMELInSubsets[iSubset]
                listGroupsToPlot.append(iSubset)

        dfSelectedMarkers = dfLMMEL[listAllMarkersToPlot].copy(deep=True)
        arraySelMarkers = dfSelectedMarkers.values.astype(np.float)
        arraySelMarkersNorm = np.zeros(np.shape(arraySelMarkers), dtype=np.float)
        for iGene in range(np.shape(arraySelMarkers)[1]):
            numMean = np.mean(arraySelMarkers[:,iGene])
            numStDev = np.std(arraySelMarkers[:,iGene])
            arraySelMarkersNorm[:,iGene] = (arraySelMarkers[:,iGene] - numMean)/numStDev

        dfSelectedMarkersToPlot = pd.DataFrame(data=arraySelMarkersNorm,
                                               columns=listAllMarkersToPlot,
                                               index=dfLMMEL.index.tolist())

        handFig = plt.figure()
        handFig.set_size_inches(w=6,h=9)

        arrayGridSpec = matplotlib.gridspec.GridSpec(nrows=2, ncols=2,
                                                     left=0.14, right=0.95,
                                                     bottom=0.51, top=0.97,
                                                     hspace=0.35, wspace=0.4)

        for iPlot in range(len(listMarkersToPlot)):
            strMarkerOne = listMarkersToPlot[iPlot][0]
            strMarkerTwo = listMarkersToPlot[iPlot][1]

            handAx = plt.subplot(arrayGridSpec[listOfPlotPosTuples[iPlot][0], listOfPlotPosTuples[iPlot][1]])
            structAxPos = handAx.get_position()

            strMarkerOneTCGA = [strCol for strCol in dfTCGA.columns.tolist()
                                if np.bitwise_and(strMarkerOne in strCol,
                                                  'vs.' in strCol)]
            strMarkerTwoTCGA = [strCol for strCol in dfTCGA.columns.tolist()
                                if np.bitwise_and(strMarkerTwo in strCol,
                                                  'vs.' in strCol)]
            strMarkerOneLMMEL = [strCol for strCol in dfLMMEL.columns.tolist()
                                if np.bitwise_and(strMarkerOne in strCol,
                                                  'vs.' in strCol)]
            strMarkerTwoLMMEL = [strCol for strCol in dfLMMEL.columns.tolist()
                                if np.bitwise_and(strMarkerTwo in strCol,
                                                  'vs.' in strCol)]

            numXRange = np.max([np.ptp(dfTCGAMets[strMarkerOneTCGA].values.astype(np.float)),
                                np.ptp(dfLMMEL[strMarkerOneLMMEL].values.astype(np.float))])
            numXMid = np.median([np.min(dfTCGAMets[strMarkerOneTCGA].values.astype(np.float)),
                                np.min(dfLMMEL[strMarkerOneLMMEL].values.astype(np.float))]) + numXRange/2
            numYRange = np.max([np.ptp(dfTCGAMets[strMarkerTwoTCGA].values.astype(np.float)),
                                np.ptp(dfLMMEL[strMarkerTwoLMMEL].values.astype(np.float))])
            numYMid = np.median([np.min(dfTCGAMets[strMarkerTwoTCGA].values.astype(np.float)),
                                np.min(dfLMMEL[strMarkerTwoLMMEL].values.astype(np.float))]) + numYRange/2

            numMaxRange = np.max([numXRange, numYRange])
            arrayExtent = [numXMid-0.65*numMaxRange, numXMid+0.65*numMaxRange,
                           numYMid-0.65*numMaxRange, numYMid+0.65*numMaxRange]

            handAx.hexbin(dfTCGAMets[strMarkerOneTCGA].values.astype(np.float),
                          dfTCGAMets[strMarkerTwoTCGA].values.astype(np.float),
                          bins='log',
                          cmap=plt.cm.magma,
                          gridsize=30,
                          extent=arrayExtent,
                          alpha=0.70)
            handAx.set_xlabel(strMarkerOne, fontsize=Plot.numFontSize)
            handAx.set_ylabel(strMarkerTwo, fontsize=Plot.numFontSize)

            handAx.scatter(dfLMMEL[strMarkerOneLMMEL].reindex(list(set(listLMMELLines).difference(set(listCellLinesToPlot)))).values.astype(np.float),
                           dfLMMEL[strMarkerTwoLMMEL].reindex(list(set(listLMMELLines).difference(set(listCellLinesToPlot)))).values.astype(np.float),
                           edgecolor='w',
                           color='0.7',
                           s=15)

            numOutSubset = 0
            for iSubgroup in range(len(listOfListsCellLinesToPlot)):
                if not listOfListsCellLinesToPlot[iSubgroup] == [None]:
                    handAx.scatter(
                        dfLMMEL[strMarkerOneLMMEL].reindex(listOfListsCellLinesToPlot[iSubgroup]).values.astype(np.float),
                       dfLMMEL[strMarkerTwoLMMEL].reindex(listOfListsCellLinesToPlot[iSubgroup]).values.astype(np.float),
                       edgecolor='w',
                       color=arrayColorsForMap.to_rgba(numOutSubset),
                       s=15)
                    numOutSubset = numOutSubset + 1


            handAx.set_xlim([numXMid-0.60*numMaxRange, numXMid+0.60*numMaxRange])
            handAx.set_ylim([numYMid-0.60*numMaxRange, numYMid+0.60*numMaxRange])

            arrayXTickLoc = plt.MaxNLocator(numMaxXTicks)
            handAx.xaxis.set_major_locator(arrayXTickLoc)
            for handTick in handAx.xaxis.get_major_ticks():
                handTick.label.set_fontsize(Plot.numFontSize)

            arrayYTickLoc = plt.MaxNLocator(numMaxYTicks)
            handAx.yaxis.set_major_locator(arrayYTickLoc)
            for handTick in handAx.yaxis.get_major_ticks():
                handTick.label.set_fontsize(Plot.numFontSize)

            handFig.text(structAxPos.x0 - 0.24*structAxPos.width,
                         structAxPos.y0 + 1.04*structAxPos.height,
                         listMarkerPairSubPlotLabels[iPlot],
                         fontsize=Plot.numFontSize,
                         ha='center', va='center',
                         weight='bold')

        handAx = plt.subplot(arrayGridSpec[1,1])
        structAxPos = handAx.get_position()
        for iSubset in listGroupsToPlot:
            if len(listTCGAInSubsets[iSubset]) > 20:
                listOfKMFs[iSubset].plot(ax=handAx)

        handAx.set_ylabel('Overall survival', fontsize=Plot.numFontSize)
        handAx.set_xlabel('Time (years)', fontsize=Plot.numFontSize)
        handAx.set_ylim([0, 1])

        arrayXLim = handAx.get_xlim()
        handAx.set_xlim([0, arrayXLim[1]])

        arrayXTicksInMo = np.arange(start=0, stop=arrayXLim[1], step=60)
        arrayXTicksInYr = np.arange(start=0, stop=((arrayXTicksInMo[-1])/12)+1, step=5, dtype=np.int)

        handAx.set_xticks(arrayXTicksInMo)
        handAx.set_xticklabels(arrayXTicksInYr)

        for handTick in handAx.xaxis.get_major_ticks():
            handTick.label.set_fontsize(Plot.numFontSize)

        handAx.set_yticks([0, 0.5, 1.0])
        for handTick in handAx.yaxis.get_major_ticks():
            handTick.label.set_fontsize(Plot.numFontSize)

        plt.legend(loc='upper right',
                                bbox_to_anchor=(1.05, 1.05),
                                fontsize=Plot.numFontSize*0.6,
                                scatterpoints=1,
                                ncol=1,
                                fancybox=True,
                                shadow=True)

        handAx.spines['top'].set_visible(False)
        handAx.spines['right'].set_visible(False)

        handAx.yaxis.set_ticks_position('left')
        handAx.xaxis.set_ticks_position('bottom')


        handFig.text(structAxPos.x0 - 0.24*structAxPos.width,
                     structAxPos.y0 + 1.04*structAxPos.height,
                     '(D)',
                     fontsize=Plot.numFontSize,
                     ha='center', va='center',
                     weight='bold')


        arrayGridSpec = matplotlib.gridspec.GridSpec(nrows=1, ncols=1,
                                                     left=0.14, right=0.95,
                                                     bottom=0.11, top=0.44,
                                                     hspace=0.35, wspace=0.4)
        handAx = plt.subplot(arrayGridSpec[0])
        structAxPos = handAx.get_position()
        handAx.matshow(dfSelectedMarkersToPlot.reindex(listCellLinesToPlot).values.astype(np.float),
                       cmap=plt.cm.PRGn,
                       vmin=-3,
                       vmax=3,
                       aspect='auto')

        numTot = 0
        for iSubgroup in range(len(listOfListsCellLinesToPlot)):
            if not listOfListsCellLinesToPlot[iSubgroup] == [None]:
                numLines = len(listOfListsCellLinesToPlot[iSubgroup])
                numTot = numTot + numLines
                if iSubgroup < len(listOfListsCellLinesToPlot)-1:
                    handAx.axhline(y=numTot-0.5, xmin=0.0, xmax=1.0,
                                   c='w',
                                   lw=1.25)
                    handAx.axhline(y=numTot-0.5, xmin=0.0, xmax=1.0,
                                   c='k',
                                   lw=0.75)

        numTot = 0
        for iSubgroup in range(len(listOfListsMarkersToPlot)):
            numGenes = len(listOfListsMarkersToPlot[iSubgroup])
            numTot = numTot + numGenes
            if iSubgroup < len(listOfListsMarkersToPlot)-1:
                handAx.axvline(x=numTot-0.5, ymin=0.0, ymax=1.0,
                               c='w',
                               lw=1.25)
                handAx.axvline(x=numTot-0.5, ymin=0.0, ymax=1.0,
                               c='k',
                               lw=0.75)
            # if iSubgroup > 0:
            numRelXPos = (numTot - numGenes/2)/len(listAllMarkersToPlot)
            handFig.text(structAxPos.x0 + numRelXPos*structAxPos.width,
                         0.01,
                         listToDispMarkerSubGroups[iSubgroup],
                         ha='center', va='bottom',
                         fontsize=Plot.numFontSize)

        numOutSubset = 0
        numOutLine = 0
        for iSubgroup in range(len(listOfListsCellLinesToPlot)):
            if not listOfListsCellLinesToPlot[iSubgroup] == [None]:
                for iCellLine in range(len(listOfListsCellLinesToPlot[iSubgroup])):
                    handAx.text(-0.7, numOutLine,
                                listOfListsCellLinesToPlot[iSubgroup][iCellLine],
                                fontsize=Plot.numFontSize*0.7,
                                ha='right', va='center',
                                color=arrayColorsForMap.to_rgba(numOutSubset))
                    numOutLine = numOutLine + 1
                numOutSubset = numOutSubset + 1

                numRelYPos = (numOutLine-0.5*len(listOfListsCellLinesToPlot[iSubgroup]))/len(listCellLinesToPlot)
                handFig.text(0.02, structAxPos.y0 + (1-numRelYPos)*structAxPos.height,
                             listCellLinesToPlotDisp[iSubgroup],
                             fontsize=Plot.numFontSize*0.7,
                             rotation=90,
                             ha='center', va='center')

        for iGene in range(len(listAllMarkersToPlot)):
            handAx.text(iGene, len(listCellLinesToPlot)+0.1,
                        listAllMarkersToPlot[iGene],
                        fontsize=Plot.numFontSize*0.5,
                        ha='center', va='top', rotation=90,
                        style='italic')

        handAx.set_xticks([])
        handAx.set_yticks([])

        handFig.text(structAxPos.x0 - 0.10*structAxPos.width,
                     structAxPos.y0 + 1.03*structAxPos.height,
                     '(E)',
                     fontsize=Plot.numFontSize,
                     ha='center', va='center',
                     weight='bold')


        for strFormat in Plot.listFileFormat:
            handFig.savefig(os.path.join(Plot.strOutputFolder, 'FigS9.' + strFormat),
                            ext=strFormat, dpi=300)
        plt.close(handFig)

        return flagResult


# Execute the associated functions as required
# _ = PreProc.refine_NK_signature()
#
#
# _ = Plot.fig_one_and_supp_table_one()
# _ = Plot.fig_two()
#
# _ = Plot.fig_four()
# _ = Plot.fig_five()
#
#
# _ = Plot.supp_fig_one()
#
# _ = Plot.supp_fig_three()
# _ = Plot.supp_fig_four()
# _ = Plot.supp_fig_five()
# _ = Plot.supp_fig_six()
# _ = Plot.supp_fig_seven()
# _ = Plot.supp_fig_nine()