3.1.1_analyze_enpact_SPrediXcan_results

Author

Saideep Gona

Published

December 6, 2023

Context

Need some unified functions for analyzing SPrediXcan results.

Format should be a dictionary with the following tiers:

GWAS trait
Model type
Dataframe with SPrediXcan results

Specify inputs

Code

import os,sys
import mixbox as mx

color_pal = {
    "conditions": {
        "Flu": (255, 0, 0),
        "NI": (0, 0, 255),
    },
    "models": {
        "EnPACT":(255, 255, 255),
        "Standard":(0,0,0),
        "GTeX_WB":(0,64,0)
    },
    "subsets": {
        "immune": (255, 165, 0) 
    }
}

input_files = {
    "T2D":{
        "Flu_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/T2D/T2D.txt",
        "NI_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/NI_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/T2D/T2D.txt",
        "Flu_ws4_ns_3_7_lnormTRUE_beforeTRUE_scale1":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_3_7_lnormTRUE_beforeTRUE_scale1/intermediates/TWAS/T2D/T2D.txt",
        "NI_ws4_ns_3_7_lnormTRUE_beforeTRUE_scale1":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/NI_ws4_ns_3_7_lnormTRUE_beforeTRUE_scale1/intermediates/TWAS/T2D/T2D.txt",
        "Flu_ws4_ns_4_4_lnormTRUE_beforeTRUE_scale1":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_4_4_lnormTRUE_beforeTRUE_scale1/intermediates/TWAS/T2D/T2D.txt",
        "NI_ws4_ns_4_4_lnormTRUE_beforeTRUE_scale1":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/NI_ws4_ns_4_4_lnormTRUE_beforeTRUE_scale1/intermediates/TWAS/T2D/T2D.txt",
        "Flu_standard":"/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/standard_predictDB_aracena/Flu_T2D_filtered.txt",
        "NI_standard":"/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/standard_predictDB_aracena/NI_T2D_filtered.txt", 
        "GTeX_WB": "/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/GTex_WB/T2D_SPrediXcan_results_GTex_WB_trimmed.txt"
    },
    "AE": {
        "Flu_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
        "NI_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/NI_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
        "Flu_ws4_ns_3_7_lnormTRUE_beforeTRUE_scale1":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_3_7_lnormTRUE_beforeTRUE_scale1/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
        "NI_ws4_ns_3_7_lnormTRUE_beforeTRUE_scale1":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/NI_ws4_ns_3_7_lnormTRUE_beforeTRUE_scale1/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
        "Flu_ws4_ns_4_4_lnormTRUE_beforeTRUE_scale1":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_4_4_lnormTRUE_beforeTRUE_scale1/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
        "NI_ws4_ns_4_4_lnormTRUE_beforeTRUE_scale1":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/NI_ws4_ns_4_4_lnormTRUE_beforeTRUE_scale1/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
        "Flu_standard":"/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/standard_predictDB_aracena/Flu_AE_Farreira_filtered.txt",
        "NI_standard":"/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/standard_predictDB_aracena/NI_AE_Farreira_filtered.txt",
        "GTeX_WB":"/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/GTex_WB/AE_SPrediXcan_results_GTex_WB_trimmed.txt"
    }
}

# "GTeX_WB", 

# gwas_traits = ["T2D", "AE"]
# expression_models = ["Flu_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto", 
#                      "NI_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto", "GTeX_WB"]
# path_to_results = "/beagle3/haky/users/saideep/projects/Con_EnPACT/SPrediXcan_results"

# for trait in gwas_traits:
#     for model in expression_models:
#         input_file = os.path.join(path_to_results, trait, model, f"{trait}_SPrediXcan_results_{model}.csv")
#         if not os.path.exists(input_file):
#             print(f"File {input_file} does not exist")
#             sys.exit(1)

Plotting functions

Code


import pandas as pd
import scipy.stats as stats

def to_latent(rgb_non_latent, alpha=0.5):
    return [x/255.0 for x in rgb_non_latent] + [alpha]

def load_results(input_files, tag=False, only_tag=False, model_contains_string=[""], model_excludes_string=[""]):

    output_dict = {}
    for gwas_type in input_files.keys():
        output_dict[gwas_type] = {}
        for model_type in input_files[gwas_type].keys():

            # Check if model name contains all substrings
            does_mcs_all = True
            if type(model_contains_string) == str:
                model_contains_string = [model_contains_string]
            for mcs in model_contains_string:
                if mcs not in model_type:
                    does_mcs_all = False
                    break
            if not does_mcs_all:
                print("Does not contain all substrings")
                continue

            # Check if model name contains any exclusion substrings
            does_mes_any = False
            if type(model_excludes_string) == str:
                model_excludes_string = [model_excludes_string]
            for mes in model_excludes_string:
                if mes == "":
                    continue
                if mes in model_type:
                    does_mes_any = True
                    break
            if does_mes_any:
                print("Contains exclusion substring")
                continue


            # Check if tagged datasets should be loaded
            if only_tag:
                pass
            else:
                cur_df = pd.read_csv(input_files[gwas_type][model_type], sep=",")
                output_dict[gwas_type][model_type] = cur_df
            
            if tag:
                if type(tag) == str:
                    cur_df = pd.read_csv(input_files[gwas_type][model_type].replace(".txt", "_"+tag+".txt"), sep=",")
                    cur_df_no_na = cur_df.dropna(subset=["pvalue"])
                    output_dict[gwas_type][model_type+"_"+tag] = cur_df_no_na
                else:
                    for t in tag:
                        cur_df = pd.read_csv(input_files[gwas_type][model_type].replace(".txt", "_"+t+".txt"), sep=",")
                        cur_df_no_na = cur_df.dropna(subset=["pvalue"])
                        output_dict[gwas_type][model_type+"_"+t] = cur_df_no_na
            
    return output_dict

# Create function for creating subsetted tables 

def create_gene_subsets_SPrediXcan_results(gene_table, tag, SPrediXcan_results_files, gene_table_col="ensembl"):

    loaded_results = load_results(SPrediXcan_results_files)

    for gwas in SPrediXcan_results_files.keys():
        for model in SPrediXcan_results_files[gwas].keys():
            cur_df = loaded_results[gwas][model]
            cur_df_subset = cur_df[cur_df['gene'].isin(gene_table[gene_table_col])]
            # print("subsetting")
            # print(cur_df_subset.shape)
            # print(sum(cur_df_subset['gene'].isin(gene_table[gene_table_col])))
            if not SPrediXcan_results_files[gwas][model].endswith(".txt"):
                print("Error: File does not end with .txt")
                return
            cur_df_subset.to_csv(SPrediXcan_results_files[gwas][model].replace(".txt", "_"+tag+".txt"), sep=",", index=False)

Code

import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import beta

import upsetplot

def qqunif_maxp(p, label, BF=False, CI=False, mlog10_p_thres=30, maxp=1, color=False, lw=1.0):
    nn = len(p)
    xx = -np.log10((np.arange(1, nn + 1) * maxp) / (nn + 1))

    if not color:
        plt.scatter(xx, -np.sort(np.log10(p)), s = 8, label=label)
        plt.plot(xx, -np.sort(np.log10(p)), lw=lw)
    else:
        if "secondary" in color:
            plt.plot(xx, -np.sort(np.log10(p)), lw=lw, label=label, color=to_latent(color["secondary"]))
        else:
            plt.plot(xx, -np.sort(np.log10(p)), lw=lw, color=to_latent(color["primary"]))
        plt.scatter(xx, -np.sort(np.log10(p)), s = 8, label=label, color=to_latent(color["primary"]))

    if BF:
        plt.axhline(y=-np.log10(0.05/nn), linestyle=':', color='grey')

    if CI:
        c95 = np.zeros(nn)
        c05 = np.zeros(nn)

        for i in range(1, nn + 1):
            c95[i - 1] = beta.ppf(0.95, i, nn - i + 1) * maxp
            c05[i - 1] = beta.ppf(0.05, i, nn - i + 1) * maxp

        plt.plot(xx, -np.log10(c95))
        plt.plot(xx, -np.log10(c05))


def qq_plot_unif(S_results, GWAS, save_dir, colors=False, BF=False, CI=False, title_tag = "", plot_filename="default"):

    plt.figure(figsize=(10, 6))
    # plt.axhline(-np.log10(0.05), color='red', linestyle='-', label='FDR = 0.05')
    # plt.axhline(-np.log10(0.10), color='orange', linestyle='--', label='FDR = 0.10')
    # plt.axhline(-np.log10(0.25), color='yellow', linestyle='-.', label='FDR = 0.25')

    for S_result in S_results.keys():
        if not colors:
            qqunif_maxp(S_results[S_result].pvalue, label=f'{S_result}', BF=BF, CI=CI)
        else:
            qqunif_maxp(S_results[S_result].pvalue, label=f'{S_result}', BF=BF, CI=CI, color=colors[S_result])
        
    plt.xlabel("Expected -log10(p)")
    plt.ylabel("Observed -log10(p)")
    plt.title(f"QQ-unif plot for gwas: {GWAS}, "+title_tag)
    plt.legend()

    if plot_filename == "default":
        plt.savefig(os.path.join(save_dir,f'{GWAS}_qqplot.png'))
    else:
        plt.savefig(os.path.join(save_dir,plot_filename))
    plt.show()


def hist_plot(S_results, GWAS, model, save_dir):
    plt.figure(figsize=(10, 6))

    plt.hist(S_results.pvalue, bins=100)
    plt.xlabel("Expected -log10(p)")
    plt.ylabel("Observed -log10(p)")
    plt.title(f"QQ-unif plot for {GWAS},{model}")
    plt.legend()

    plt.savefig(os.path.join(save_dir,f'{GWAS}_{model}_histogram.png'))
    plt.show()

def scatter_plots(S_results):
    # Assumes equal rows in all dataframes
    for S_result in S_results.keys():
        dict_for_plot = {}
        for model in S_results[S_result].keys():
            sorted_cur_df = S_results[S_result][model].sort_values(by=["gene"])
            dict_for_plot[model] = list(sorted_cur_df["pvalue"])
        df_for_plot = pd.DataFrame(dict_for_plot)
        pd.plotting.scatter_matrix(df_for_plot, figsize=(len(S_results[S_result].keys())*5,len(S_results[S_result].keys())*5), diagonal='kde')



def create_subset_df(list_of_dfs, subset_col="gene", join="inner"):
    subset_df = list_of_dfs[0].copy()
    for df in list_of_dfs[1:]:
        subset_df = subset_df.merge(df, how=join,on=subset_col)
    return subset_df

def print_dims(results_dict):
    for gwas_type in results_dict.keys():
        for model_type in results_dict[gwas_type].keys():
            print(f"{gwas_type} {model_type} : {results_dict[gwas_type][model_type].shape}")

def upset_plot(loaded_results, pval_thresh=0.05, substrings=[""], exclude_substrings=[""], percent=True, colors=False):
    contents = {}
    for gwas_type in loaded_results.keys():
        for model_type in loaded_results[gwas_type].keys():

            # Check if substrings are present in keywords
            all_substrings_present = True
            for substring in substrings:
                if substring not in (model_type+"_"+gwas_type):
                    all_substrings_present = False
            if not all_substrings_present:
                continue

            # Check if substrings for exclusion are present in keywords
            exclude_substring = False
            for exclude_sub in exclude_substrings:
                if exclude_sub == "":
                    continue
                if exclude_sub in (model_type+"_"+gwas_type):
                    exclude_substring = True
            if exclude_substring:
                continue        

            # Filter by pvalue
            cur_df = loaded_results[gwas_type][model_type]
            cur_df_filt = cur_df.loc[cur_df["pvalue"]<pval_thresh]

            cur_gene_list = list(cur_df_filt["gene"])
            contents[model_type+"_"+gwas_type] = cur_gene_list

    contents_for_plot = upsetplot.from_contents(contents)

    try:
        if percent:
            if not colors:
                u = upsetplot.UpSet(contents_for_plot, orientation='horizontal', 
                        sort_by='cardinality',
                        show_percentages=True,
                        )
            else:
                u = upsetplot.UpSet(contents_for_plot, orientation='horizontal', 
                        sort_by='cardinality',
                        show_percentages=True
                        )
                for twas_type in contents.keys():
                    twas_type_wo_gwas = "_".join(twas_type.split("_")[0:-1])
                    u.style_categories(twas_type, shading_facecolor=to_latent(colors[twas_type_wo_gwas]["primary"]))

        else:
            if not colors:
                u = upsetplot.UpSet(contents_for_plot, orientation='horizontal', 
                        sort_by='cardinality',
                        show_counts=True,
                        )
            else:
                u = upsetplot.UpSet(contents_for_plot, orientation='horizontal', 
                        sort_by='cardinality',
                        show_counts=True,
                        )
                for twas_type in contents.keys():
                    twas_type_wo_gwas = "_".join(twas_type.split("_")[0:-1])
                    u.style_categories(twas_type, shading_facecolor=to_latent(colors[twas_type_wo_gwas]["primary"]))
                    
        u.plot()
        plt.suptitle("Gene overlap, pvalue < "+str(pval_thresh))
        plt.show()
    except Exception as e:
        print(e)


def scatter_plot(first_df, second_df, ax, x_lab="x", y_lab="y", sort_by="gene", bf=True, colors=False):

    common_genes = list(set(first_df["gene"]).intersection(set(second_df["gene"])))
    first_df_subset = first_df[first_df["gene"].isin(common_genes)].sort_values(by=[sort_by])
    second_df_subset = second_df[second_df["gene"].isin(common_genes)].sort_values(by=[sort_by])


    nan_filt = np.logical_not(np.logical_or(np.isnan(-np.log10(list(first_df_subset["pvalue"]))), np.isnan(-np.log10(list(second_df_subset["pvalue"])))))

    r,p = stats.pearsonr(x=-np.log10(first_df_subset[nan_filt]["pvalue"]),y= -np.log10(second_df_subset[nan_filt]["pvalue"]))
    n = len(first_df_subset[nan_filt]["pvalue"])
    
    ax.scatter(-np.log10(first_df_subset[nan_filt]["pvalue"]), -np.log10(second_df_subset[nan_filt]["pvalue"]), s=8)
    if bf:
        ax.axhline(y=-np.log10(0.05/n), linestyle=':', color='grey')
        ax.axvline(x=-np.log10(0.05/n), linestyle=':', color='grey')
    ax.text(.05, .8, "Pearson's r ={:.2f}".format(r), transform=ax.transAxes)
    ax.text(.05, .7, f"n ={n}", transform=ax.transAxes)
    ax.set_xlabel(x_lab, fontsize=12, weight = 'bold')
    ax.set_ylabel(y_lab, fontsize=12, weight = 'bold')
    if not colors:
        pass
    else:
        ax.xaxis.label.set_color(to_latent(colors["_".join(x_lab.split("_")[0:-1])]["primary"], alpha=1.0))
        ax.yaxis.label.set_color(to_latent(colors["_".join(y_lab.split("_")[0:-1])]["primary"], alpha=1.0))
    ax.axline((0, 0), slope=1)

def scatter_plot_all(results_dict, plot_title= "-log10 pvalue for pair-wise intersected genes", colors=False):

    results_count = 0
    for gwas_type in results_dict.keys():
        for model_type in results_dict[gwas_type].keys():
            results_count += 1

    fig = plt.figure(figsize=(4*results_count, 4*results_count), layout="constrained")
    fig.suptitle(plot_title, fontsize=16)
    spec = fig.add_gridspec(ncols=results_count, nrows=results_count)

    ind1 = 0
    ind2 = 0
    for gwas_type1 in results_dict.keys():
        for model_type1 in results_dict[gwas_type].keys():
            ind2 = 0
            for gwas_type2 in results_dict.keys():
                for model_type2 in results_dict[gwas_type].keys():

                    ax = fig.add_subplot(spec[ind1, ind2])
                    scatter_plot(results_dict[gwas_type1][model_type1], results_dict[gwas_type2][model_type2], ax,
                                 x_lab=model_type1+"_"+gwas_type1, y_lab=model_type2+"_"+gwas_type2, colors=colors)

                    ind2 += 1
            ind2 = 0
            ind1 += 1

Plots

Code

# loaded_results = load_results(input_files)

# print_dims(loaded_results)

Code

# upset_plot(loaded_results, pval_thresh=0.05)

Generate Histograms

Code

# for gwas in loaded_results.keys():
#     for model in loaded_results[gwas].keys():
#         pass
        # hist_plot(loaded_results[gwas][model], gwas, model, "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/plots")

Generate qqplots

Investigating immune-related genes

The gene list here is from the IRIS database of immune-related genes: https://pubmed.ncbi.nlm.nih.gov/15780753/ and downloaded from: https://www.innatedb.com/moleculeSearch.do

Code

# Load in immune gene list

path_to_immune_genes = "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/InnateDB_genes.csv"

immune_genes = pd.read_csv(path_to_immune_genes, sep=",")
immune_genes.head()

# Also look at common Flu-NI genes

# flu_genes = loaded_results['T2D']['Enpact_Flu']
# ni_genes = loaded_results['T2D']['Enpact_NI']
# list_of_dfs = [flu_genes, ni_genes]

# flu_ni_subset = create_subset_df([flu_genes, ni_genes], subset_col="gene")
# print(flu_genes.shape, ni_genes.shape)
# flu_ni_subset.shape

	id	species	taxonId	ensembl	entrez	name	fullname	synonym	signature	chromStart	...	goFunctions	goLocalizations	cerebralLocalization	nrIntxsValidated	nrIntxsPredicted	transcripts	humanOrthologs	mouseOrthologs	bovineOrthologs	lastModified
0	285	Homo sapiens	9606	ENSG00000158270	81035	COLEC12	collectin sub-family member 12	NaN	COLEC12\|IDBG-285\|ENSG00000158270\|81035	319361	...	GO:0005044: scavenger receptor activity#GO:000...	GO:0005044: scavenger receptor activity,GO:000...	Plasma membrane	2	0	228706	NaN	Colec12\|IDBG-128247\|ENSMUSG00000036103\|140792	COLEC12\|IDBG-644123\|ENSBTAG00000007705\|504741	2017-02-20
1	885	Homo sapiens	9606	ENSG00000177663	23765	IL17RA	interleukin 17 receptor A	CANDF5\|CD217\|CDw217\|hIL-17R\|IL-17RA\|IL17R	IL17RA\|IDBG-885\|ENSG00000177663\|23765	17084954	...	GO:0005515: protein binding#GO:0030368: interl...	GO:0005515: protein binding,GO:0030368: interl...	Extracellular	21	14	887\|810276	NaN	Il17ra\|IDBG-183749\|ENSMUSG00000002897\|16172	IL17RA\|IDBG-641259\|ENSBTAG00000011067\|525040	2017-02-20
2	1072	Homo sapiens	9606	ENSG00000154721	58494	JAM2	junctional adhesion molecule 2	NaN	JAM2\|IDBG-1072\|ENSG00000154721\|58494	25639272	...	GO:0005515: protein binding#GO:0046982: protei...	GO:0005515: protein binding,GO:0046982: protei...	Cell surface	11	0	1074\|227998\|480678\|586152	NaN	Jam2\|IDBG-172706\|ENSMUSG00000053062\|67374	JAM2\|IDBG-630318\|ENSBTAG00000000603\|538846	2017-02-20
3	1100	Homo sapiens	9606	ENSG00000142192	351	APP	amyloid beta (A4) precursor protein	AAA\|ABETA\|ABPP\|AD1\|APPI\|CTFgamma\|CVAP\|PN-II\|PN2	APP\|IDBG-1100\|ENSG00000142192\|351	25880550	...	GO:0003677: DNA binding#GO:0004867: serine-typ...	GO:0003677: DNA binding,GO:0004867: serine-typ...	Nucleus	2628	31	1102\|1104\|1106\|1108\|1110\|1112\|361147\|361148\|36...	NaN	App\|IDBG-172954\|ENSMUSG00000022892\|11820	APP\|IDBG-630293\|ENSBTAG00000017753\|280722	2017-02-20
4	1597	Homo sapiens	9606	ENSG00000156299	102724932,7074	TIAM1	T-cell lymphoma invasion and metastasis 1	NaN	TIAM1\|IDBG-1597\|ENSG00000156299\|102724932,7074	31118416	...	GO:0005057: receptor signaling protein activit...	GO:0005057: receptor signaling protein activit...	Cell surface	41	5	1599\|361857\|361858\|597056	NaN	Tiam1\|IDBG-174466\|ENSMUSG00000002489\|21844	BT.74139\|IDBG-630064\|ENSBTAG00000017839\|	2017-02-20

5 rows × 26 columns

Code

create_gene_subsets_SPrediXcan_results(immune_genes,"immune", input_files)
# create_gene_subsets_SPrediXcan_results(flu_ni_subset,"flu_ni", input_files, gene_table_col="gene")

Code

# Reload with filtered results

# loaded_results = load_results(input_files, tag="immune")

# print_dims(loaded_results)

Code


# print(loaded_results['T2D']['Enpact_Flu'].shape)
# print(loaded_results['T2D']['Enpact_Flu_immune'].shape)
# print(loaded_results['T2D']['GTeX_WB_immune'].shape)

How does the new normalization (with isoform quantification) compare with the old in Flu Condition, Immune GWAS?

Code


# input_files_compare_normalization = {
#     "AE": {
#         "Flu_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
#         "Flu_gene_length":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_3_7_lnormTRUE_beforeTRUE_scale1/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt"
#     }
# }
# loaded_results_compare_normalization = load_results(input_files_compare_normalization, tag="immune")
# gwas ="AE"


# qq_plot_unif(loaded_results_compare_normalization[gwas], gwas, "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/plots",
#                 plot_filename=f"compare_normalization_qqplot.png", BF=True, title_tag="EnPACT gene length vs. isoform length normalization")


# upset_plot(loaded_results_compare_normalization, pval_thresh=1.0, exclude_substrings=["immune"])
# upset_plot(loaded_results_compare_normalization, pval_thresh=1.0, substrings=["immune"])

# upset_plot(loaded_results_compare_normalization, pval_thresh=0.05, exclude_substrings=["immune"])
# upset_plot(loaded_results_compare_normalization, pval_thresh=0.05, substrings=["immune"])


# scatter_plot_all(load_results(input_files_compare_normalization))

How does EnPACT compare between Flu, NI in Allergy Eczema GWAS?

Code

input_files_compare_fluni = {
    "AE": {
        "Flu_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
        "NI_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/NI_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt"
    }
}
loaded_results_compare_fluni = load_results(input_files_compare_fluni, tag="immune")
gwas ="AE"

color_pal_compare_fluni = {
    "Flu_sourcekallisto": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5)},
    "NI_sourcekallisto": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["NI"], 
                                              0.5)},
    "Flu_sourcekallisto_immune": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},
    "NI_sourcekallisto_immune": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["NI"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},                                              
}

qq_plot_unif(loaded_results_compare_fluni[gwas], gwas, "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/plots",
                plot_filename=f"compare_fluni_{gwas}_qqplot.png", BF=True, title_tag="EnPACT Flu vs. NI", colors=color_pal_compare_fluni)


scatter_plot_all(load_results(input_files_compare_fluni), colors=color_pal_compare_fluni, 
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for EnPACT Flu vs. NI in {gwas}")
scatter_plot_all(loaded_results_compare_fluni, colors=color_pal_compare_fluni,
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for EnPACT Flu vs. NI in {gwas}")

upset_plot(loaded_results_compare_fluni, pval_thresh=1.0, exclude_substrings=["immune"], colors=color_pal_compare_fluni)
upset_plot(loaded_results_compare_fluni, pval_thresh=1.0, substrings=["immune"], colors=color_pal_compare_fluni)

upset_plot(loaded_results_compare_fluni, pval_thresh=0.05, exclude_substrings=["immune"], colors=color_pal_compare_fluni)
upset_plot(loaded_results_compare_fluni, pval_thresh=0.05, substrings=["immune"], colors=color_pal_compare_fluni)

How does EnPACT compare between Flu, NI in T2D GWAS?

Code

input_files_compare_fluni = {
    "T2D": {
        "Flu_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/T2D/T2D.txt",
        "NI_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/NI_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/T2D/T2D.txt"
    }
}
loaded_results_compare_fluni = load_results(input_files_compare_fluni, tag="immune")
gwas ="T2D"

color_pal_compare_fluni = {
    "Flu_sourcekallisto": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5)},
    "NI_sourcekallisto": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["NI"], 
                                              0.5)},
    "Flu_sourcekallisto_immune": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},
    "NI_sourcekallisto_immune": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["NI"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},                                              
}

qq_plot_unif(loaded_results_compare_fluni[gwas], gwas, "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/plots",
                plot_filename=f"compare_fluni_{gwas}_qqplot.png", BF=True, title_tag="EnPACT Flu vs. NI", colors=color_pal_compare_fluni)

scatter_plot_all(load_results(input_files_compare_fluni), colors=color_pal_compare_fluni, 
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for EnPACT Flu vs. NI in {gwas}")
scatter_plot_all(loaded_results_compare_fluni, colors=color_pal_compare_fluni,
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for EnPACT Flu vs. NI in {gwas}")

upset_plot(loaded_results_compare_fluni, pval_thresh=1.0, exclude_substrings=["immune"], colors=color_pal_compare_fluni)
upset_plot(loaded_results_compare_fluni, pval_thresh=1.0, substrings=["immune"], colors=color_pal_compare_fluni)

upset_plot(loaded_results_compare_fluni, pval_thresh=0.05, exclude_substrings=["immune"], colors=color_pal_compare_fluni)
upset_plot(loaded_results_compare_fluni, pval_thresh=0.05, substrings=["immune"], colors=color_pal_compare_fluni)

How does EnPACT compare with GTEx?

Code


input_files_compare_gtex = {
    "AE": {
        "Flu_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
        "GTeX_WB":"/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/GTex_WB/AE_SPrediXcan_results_GTex_WB_trimmed.txt"
    }
}
gwas = "AE"
loaded_results_compare_gtex = load_results(input_files_compare_gtex, tag="immune")

color_pal_compare_gtex = {
    "Flu_sourcekallisto": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5)},
    "GTeX_WB": {"primary": color_pal["models"]["GTeX_WB"]},
    "Flu_sourcekallisto_immune": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},
    "GTeX_WB_immune": {"primary": color_pal["models"]["GTeX_WB"],
                                    "secondary": color_pal["subsets"]["immune"]},                                              
}

qq_plot_unif(loaded_results_compare_gtex[gwas], gwas, "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/plots",
                plot_filename=f"compare_gtex_{gwas}_qqplot.png", BF=True, title_tag="EnPACT Flu vs. GTeX WB", colors=color_pal_compare_gtex)


scatter_plot_all(load_results(input_files_compare_gtex), colors=color_pal_compare_gtex, 
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for EnPACT Flu vs. GTeX WB in {gwas}")
scatter_plot_all(loaded_results_compare_gtex, colors=color_pal_compare_gtex,
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for EnPACT Flu vs. GTeX WB in {gwas}")

upset_plot(loaded_results_compare_gtex, pval_thresh=1.0, exclude_substrings=["immune"], colors=color_pal_compare_gtex)
upset_plot(loaded_results_compare_gtex, pval_thresh=1.0, substrings=["immune"], colors=color_pal_compare_gtex)

upset_plot(loaded_results_compare_gtex, pval_thresh=0.05, exclude_substrings=["immune"], colors=color_pal_compare_gtex)
upset_plot(loaded_results_compare_gtex, pval_thresh=0.05, substrings=["immune"], colors=color_pal_compare_gtex)

                                               id
Flu_sourcekallisto_AE GTeX_WB_AE                 
True                  True        ENSG00000174123
                      True        ENSG00000134987
                      True        ENSG00000197712
                      True        ENSG00000115604
                      True        ENSG00000172057
...                                           ...
False                 True        ENSG00000250889
                      True        ENSG00000237372
                      True        ENSG00000272255
                      True        ENSG00000273759
                      True        ENSG00000253214

[19601 rows x 1 columns]
                                                             id
Flu_sourcekallisto_immune_AE GTeX_WB_immune_AE                 
True                         True               ENSG00000174123
                             True               ENSG00000115604
                             True               ENSG00000115602
                             False              ENSG00000123384
                             True               ENSG00000174130
...                                                         ...
False                        True               ENSG00000129450
                             True               ENSG00000205220
                             True               ENSG00000121933
                             True               ENSG00000221995
                             True               ENSG00000173486

[1445 rows x 1 columns]
                                               id
Flu_sourcekallisto_AE GTeX_WB_AE                 
True                  True        ENSG00000174123
                      True        ENSG00000134987
                      True        ENSG00000197712
                      False       ENSG00000115604
                      True        ENSG00000172057
...                                           ...
False                 True        ENSG00000140455
                      True        ENSG00000244509
                      True        ENSG00000135722
                      True        ENSG00000122376
                      True        ENSG00000184574

[2866 rows x 1 columns]
                                                             id
Flu_sourcekallisto_immune_AE GTeX_WB_immune_AE                 
True                         True               ENSG00000174123
                             False              ENSG00000115604
                             True               ENSG00000115602
                             False              ENSG00000123384
                             True               ENSG00000174130
...                                                         ...
False                        True               ENSG00000213949
                             True               ENSG00000229314
                             True               ENSG00000168811
                             True               ENSG00000197885
                             True               ENSG00000177575

[292 rows x 1 columns]

How does standard PrediXcan Flu compare with standard PrediXcan NI in Allergy Eczema GWAS?

Code

input_files_compare_standard = {
    "AE": {
        "Flu_standard":"/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/standard_predictDB_aracena/Flu_AE_Farreira_filtered.txt",
        "NI_standard":"/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/standard_predictDB_aracena/NI_AE_Farreira_filtered.txt",
    }
}
gwas = "AE"

color_pal_compare_standard = {
    "Flu_standard":  {"primary": mx.lerp(color_pal["models"]["Standard"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5)},
    "NI_standard":  {"primary": mx.lerp(color_pal["models"]["Standard"], 
                                              color_pal["conditions"]["NI"], 
                                              0.5)},
    "Flu_standard_immune": {"primary": mx.lerp(color_pal["models"]["Standard"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},
    "NI_standard_immune": {"primary": mx.lerp(color_pal["models"]["Standard"], 
                                              color_pal["conditions"]["NI"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},                                          
}


loaded_results_compare_standard = load_results(input_files_compare_standard, tag=["immune"])

qq_plot_unif(loaded_results_compare_standard[gwas], gwas, "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/plots",
                plot_filename=f"compare_standard_Flu_NI_{gwas}_qqplot.png", BF=True, title_tag="Standard Flu vs. Standard NI", 
                colors=color_pal_compare_standard)

scatter_plot_all(load_results(input_files_compare_standard), colors=color_pal_compare_standard, 
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for Standard Flu vs. Standard NI in {gwas}")
scatter_plot_all(loaded_results_compare_standard, colors=color_pal_compare_standard,
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for Standard Flu vs. Standard NI in {gwas}")

upset_plot(loaded_results_compare_standard, pval_thresh=1.0, substrings=["immune"], percent=False, colors=color_pal_compare_standard)
upset_plot(loaded_results_compare_standard, pval_thresh=0.05, exclude_substrings=["immune"], percent=False, colors=color_pal_compare_standard)
upset_plot(loaded_results_compare_standard, pval_thresh=0.05, substrings=["immune"], percent=False, colors=color_pal_compare_standard)

How does EnPACT compare with Standard PrediXcan in Flu condition?

Code

input_files_compare_standard = {
    "AE": {
        "Flu_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
        "Flu_standard":"/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/standard_predictDB_aracena/Flu_AE_Farreira_filtered.txt",
        "NI_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/NI_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
        "NI_standard":"/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/standard_predictDB_aracena/NI_AE_Farreira_filtered.txt",
    }
}
gwas = "AE"

color_pal_compare_standard = {
    "Flu_sourcekallisto": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5)},
    "Flu_standard":  {"primary": mx.lerp(color_pal["models"]["Standard"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5)},
    "NI_sourcekallisto": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["NI"], 
                                              0.5)}, 
    "NI_standard":  {"primary": mx.lerp(color_pal["models"]["Standard"], 
                                              color_pal["conditions"]["NI"], 
                                              0.5)},
    "Flu_sourcekallisto_immune": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},
    "Flu_standard_immune": {"primary": mx.lerp(color_pal["models"]["Standard"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},
    "NI_sourcekallisto_immune": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["NI"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},
    "NI_standard_immune": {"primary": mx.lerp(color_pal["models"]["Standard"], 
                                              color_pal["conditions"]["NI"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},                                          
}


loaded_results_compare_standard_Flu = load_results(input_files_compare_standard, tag=["immune"], 
                                                   model_excludes_string="NI")

qq_plot_unif(loaded_results_compare_standard_Flu[gwas], gwas, "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/plots",
                plot_filename=f"compare_standard_Flu_{gwas}_qqplot.png", BF=True, title_tag="EnPACT Flu vs. Standard", 
                colors=color_pal_compare_standard)

scatter_plot_all(load_results(input_files_compare_standard, model_excludes_string="NI"), colors=color_pal_compare_standard, 
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for EnPACT Flu vs. standard Flu in {gwas}")
scatter_plot_all(loaded_results_compare_standard_Flu, colors=color_pal_compare_standard,
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for EnPACT Flu vs. standard Flu in {gwas}")

upset_plot(loaded_results_compare_standard_Flu, pval_thresh=1.0, substrings=["immune", "Flu"], percent=False, colors=color_pal_compare_standard)
upset_plot(loaded_results_compare_standard_Flu, pval_thresh=0.05, substrings=["Flu"], exclude_substrings=["immune"], percent=False, colors=color_pal_compare_standard)
upset_plot(loaded_results_compare_standard_Flu, pval_thresh=0.05, substrings=["immune", "Flu"], percent=False, colors=color_pal_compare_standard)

Contains exclusion substring
Contains exclusion substring
Contains exclusion substring
Contains exclusion substring
Flu_sourcekallisto_immune_AE Flu_sourcekallisto_immune
Flu_standard_immune_AE Flu_standard_immune
Flu_sourcekallisto_AE Flu_sourcekallisto
Flu_standard_AE Flu_standard
Flu_sourcekallisto_immune_AE Flu_sourcekallisto_immune
Flu_standard_immune_AE Flu_standard_immune

How does EnPACT compare with Standard PrediXcan in NI condition?

Code


loaded_results_compare_standard_NI = load_results(input_files_compare_standard, tag=["immune"], 
                                                   model_excludes_string="Flu")

qq_plot_unif(loaded_results_compare_standard_NI[gwas], gwas, "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/plots",
                plot_filename=f"compare_standard_qqplot.png", BF=True, title_tag="EnPACT NI vs. Standard NI",
                colors=color_pal_compare_standard)

scatter_plot_all(load_results(input_files_compare_standard, model_excludes_string="Flu"), colors=color_pal_compare_standard, 
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for EnPACT NI vs. standard NI in {gwas}")
scatter_plot_all(loaded_results_compare_standard_NI, colors=color_pal_compare_standard,
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for EnPACT NI vs. standard NI in {gwas}")

upset_plot(loaded_results_compare_standard_NI, pval_thresh=1.0, substrings=["immune", "NI"], percent=False, colors=color_pal_compare_standard)
upset_plot(loaded_results_compare_standard_NI, pval_thresh=0.05, substrings=["NI"], exclude_substrings=["immune"], percent=False, colors=color_pal_compare_standard)
upset_plot(loaded_results_compare_standard_NI, pval_thresh=0.05, substrings=["immune", "NI"], percent=False, colors=color_pal_compare_standard)

Contains exclusion substring
Contains exclusion substring
Contains exclusion substring
Contains exclusion substring
NI_sourcekallisto_immune_AE NI_sourcekallisto_immune
NI_standard_immune_AE NI_standard_immune
NI_sourcekallisto_AE NI_sourcekallisto
NI_standard_AE NI_standard
NI_sourcekallisto_immune_AE NI_sourcekallisto_immune
NI_standard_immune_AE NI_standard_immune

We can try subsetting to genes which pass the PredictDB threshold in the Standard case

Code

# First look at the Flu case


standard_flu_genes = loaded_results_compare_standard_Flu["AE"]["Flu_standard"]
create_gene_subsets_SPrediXcan_results(standard_flu_genes,
                                        "standard_Flu", input_files_compare_standard,
                                        gene_table_col="gene")

standard_flu_immune_genes = standard_flu_genes.loc[standard_flu_genes['gene'].isin(immune_genes['ensembl'])] 
create_gene_subsets_SPrediXcan_results(standard_flu_immune_genes,
                                        "standard_Flu_immune", input_files_compare_standard,
                                        gene_table_col="gene")


loaded_results_compare_standard_Flu = load_results(input_files_compare_standard, tag=["standard_Flu","standard_Flu_immune"], 
                                                   only_tag=True,
                                                   model_excludes_string="NI")

qq_plot_unif(loaded_results_compare_standard_Flu[gwas], gwas, "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/plots",
                plot_filename=f"compare_standard_Flu_qqplot.png", BF=True)
upset_plot(loaded_results_compare_standard_Flu, pval_thresh=0.05, substrings=["immune", "standard_Flu"], exclude_substrings=["NI"], percent=False)
upset_plot(loaded_results_compare_standard_Flu, pval_thresh=0.05, substrings=["standard_Flu"], exclude_substrings=["immune","NI"], percent=False)

subsetting
(1196, 12)
1196
subsetting
(1202, 12)
1202
subsetting
(1197, 12)
1197
subsetting
(248, 12)
248
subsetting
(119, 12)
119
subsetting
(121, 12)
121
subsetting
(120, 12)
120
subsetting
(24, 12)
24
Contains exclusion substring
Contains exclusion substring

Code

# Next, the NI case


standard_ni_genes = loaded_results_compare_standard_NI["AE"]["NI_standard"]
create_gene_subsets_SPrediXcan_results(standard_ni_genes,
                                        "standard_NI", input_files_compare_standard,
                                        gene_table_col="gene")

standard_ni_immune_genes = standard_ni_genes.loc[standard_ni_genes['gene'].isin(immune_genes['ensembl'])] 
create_gene_subsets_SPrediXcan_results(standard_ni_immune_genes,
                                        "standard_NI_immune", input_files_compare_standard,
                                        gene_table_col="gene")


loaded_results_compare_standard_NI = load_results(input_files_compare_standard, tag=["standard_NI","standard_NI_immune"], 
                                                   only_tag=True,
                                                   model_excludes_string="Flu")

qq_plot_unif(loaded_results_compare_standard_NI[gwas], gwas, "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/plots",
                plot_filename=f"compare_standard_NI_qqplot.png", BF=True)
upset_plot(loaded_results_compare_standard_NI, pval_thresh=0.05, substrings=["immune", "standard_NI"], exclude_substrings=["Flu"], percent=False)
upset_plot(loaded_results_compare_standard_NI, pval_thresh=0.05, substrings=["standard_NI"], exclude_substrings=["immune","Flu"], percent=False)

subsetting
(1094, 12)
1094
subsetting
(248, 12)
248
subsetting
(1095, 12)
1095
subsetting
(1102, 12)
1102
subsetting
(94, 12)
94
subsetting
(24, 12)
24
subsetting
(94, 12)
94
subsetting
(96, 12)
96
Contains exclusion substring
Contains exclusion substring