3.1.1_analyze_enpact_SPrediXcan_results

Author

Saideep Gona

Published

December 6, 2023

Context

Need some unified functions for analyzing SPrediXcan results.

Format should be a dictionary with the following tiers:

  1. GWAS trait
  2. Model type
  3. Dataframe with SPrediXcan results

Specify inputs

Code
import os,sys
import mixbox as mx

color_pal = {
    "conditions": {
        "Flu": (255, 0, 0),
        "NI": (0, 0, 255),
    },
    "models": {
        "EnPACT":(255, 255, 255),
        "Standard":(0,0,0),
        "GTeX_WB":(0,64,0)
    },
    "subsets": {
        "immune": (255, 165, 0) 
    }
}

input_files = {
    "T2D":{
        "Flu_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/T2D/T2D.txt",
        "NI_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/NI_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/T2D/T2D.txt",
        "Flu_ws4_ns_3_7_lnormTRUE_beforeTRUE_scale1":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_3_7_lnormTRUE_beforeTRUE_scale1/intermediates/TWAS/T2D/T2D.txt",
        "NI_ws4_ns_3_7_lnormTRUE_beforeTRUE_scale1":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/NI_ws4_ns_3_7_lnormTRUE_beforeTRUE_scale1/intermediates/TWAS/T2D/T2D.txt",
        "Flu_ws4_ns_4_4_lnormTRUE_beforeTRUE_scale1":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_4_4_lnormTRUE_beforeTRUE_scale1/intermediates/TWAS/T2D/T2D.txt",
        "NI_ws4_ns_4_4_lnormTRUE_beforeTRUE_scale1":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/NI_ws4_ns_4_4_lnormTRUE_beforeTRUE_scale1/intermediates/TWAS/T2D/T2D.txt",
        "Flu_standard":"/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/standard_predictDB_aracena/Flu_T2D_filtered.txt",
        "NI_standard":"/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/standard_predictDB_aracena/NI_T2D_filtered.txt", 
        "GTeX_WB": "/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/GTex_WB/T2D_SPrediXcan_results_GTex_WB_trimmed.txt"
    },
    "AE": {
        "Flu_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
        "NI_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/NI_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
        "Flu_ws4_ns_3_7_lnormTRUE_beforeTRUE_scale1":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_3_7_lnormTRUE_beforeTRUE_scale1/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
        "NI_ws4_ns_3_7_lnormTRUE_beforeTRUE_scale1":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/NI_ws4_ns_3_7_lnormTRUE_beforeTRUE_scale1/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
        "Flu_ws4_ns_4_4_lnormTRUE_beforeTRUE_scale1":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_4_4_lnormTRUE_beforeTRUE_scale1/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
        "NI_ws4_ns_4_4_lnormTRUE_beforeTRUE_scale1":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/NI_ws4_ns_4_4_lnormTRUE_beforeTRUE_scale1/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
        "Flu_standard":"/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/standard_predictDB_aracena/Flu_AE_Farreira_filtered.txt",
        "NI_standard":"/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/standard_predictDB_aracena/NI_AE_Farreira_filtered.txt",
        "GTeX_WB":"/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/GTex_WB/AE_SPrediXcan_results_GTex_WB_trimmed.txt"
    }
}

# "GTeX_WB", 

# gwas_traits = ["T2D", "AE"]
# expression_models = ["Flu_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto", 
#                      "NI_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto", "GTeX_WB"]
# path_to_results = "/beagle3/haky/users/saideep/projects/Con_EnPACT/SPrediXcan_results"

# for trait in gwas_traits:
#     for model in expression_models:
#         input_file = os.path.join(path_to_results, trait, model, f"{trait}_SPrediXcan_results_{model}.csv")
#         if not os.path.exists(input_file):
#             print(f"File {input_file} does not exist")
#             sys.exit(1)

Plotting functions

Code

import pandas as pd
import scipy.stats as stats

def to_latent(rgb_non_latent, alpha=0.5):
    return [x/255.0 for x in rgb_non_latent] + [alpha]

def load_results(input_files, tag=False, only_tag=False, model_contains_string=[""], model_excludes_string=[""]):

    output_dict = {}
    for gwas_type in input_files.keys():
        output_dict[gwas_type] = {}
        for model_type in input_files[gwas_type].keys():

            # Check if model name contains all substrings
            does_mcs_all = True
            if type(model_contains_string) == str:
                model_contains_string = [model_contains_string]
            for mcs in model_contains_string:
                if mcs not in model_type:
                    does_mcs_all = False
                    break
            if not does_mcs_all:
                print("Does not contain all substrings")
                continue

            # Check if model name contains any exclusion substrings
            does_mes_any = False
            if type(model_excludes_string) == str:
                model_excludes_string = [model_excludes_string]
            for mes in model_excludes_string:
                if mes == "":
                    continue
                if mes in model_type:
                    does_mes_any = True
                    break
            if does_mes_any:
                print("Contains exclusion substring")
                continue


            # Check if tagged datasets should be loaded
            if only_tag:
                pass
            else:
                cur_df = pd.read_csv(input_files[gwas_type][model_type], sep=",")
                output_dict[gwas_type][model_type] = cur_df
            
            if tag:
                if type(tag) == str:
                    cur_df = pd.read_csv(input_files[gwas_type][model_type].replace(".txt", "_"+tag+".txt"), sep=",")
                    cur_df_no_na = cur_df.dropna(subset=["pvalue"])
                    output_dict[gwas_type][model_type+"_"+tag] = cur_df_no_na
                else:
                    for t in tag:
                        cur_df = pd.read_csv(input_files[gwas_type][model_type].replace(".txt", "_"+t+".txt"), sep=",")
                        cur_df_no_na = cur_df.dropna(subset=["pvalue"])
                        output_dict[gwas_type][model_type+"_"+t] = cur_df_no_na
            
    return output_dict

# Create function for creating subsetted tables 

def create_gene_subsets_SPrediXcan_results(gene_table, tag, SPrediXcan_results_files, gene_table_col="ensembl"):

    loaded_results = load_results(SPrediXcan_results_files)

    for gwas in SPrediXcan_results_files.keys():
        for model in SPrediXcan_results_files[gwas].keys():
            cur_df = loaded_results[gwas][model]
            cur_df_subset = cur_df[cur_df['gene'].isin(gene_table[gene_table_col])]
            # print("subsetting")
            # print(cur_df_subset.shape)
            # print(sum(cur_df_subset['gene'].isin(gene_table[gene_table_col])))
            if not SPrediXcan_results_files[gwas][model].endswith(".txt"):
                print("Error: File does not end with .txt")
                return
            cur_df_subset.to_csv(SPrediXcan_results_files[gwas][model].replace(".txt", "_"+tag+".txt"), sep=",", index=False)
Code
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import beta

import upsetplot

def qqunif_maxp(p, label, BF=False, CI=False, mlog10_p_thres=30, maxp=1, color=False, lw=1.0):
    nn = len(p)
    xx = -np.log10((np.arange(1, nn + 1) * maxp) / (nn + 1))

    if not color:
        plt.scatter(xx, -np.sort(np.log10(p)), s = 8, label=label)
        plt.plot(xx, -np.sort(np.log10(p)), lw=lw)
    else:
        if "secondary" in color:
            plt.plot(xx, -np.sort(np.log10(p)), lw=lw, label=label, color=to_latent(color["secondary"]))
        else:
            plt.plot(xx, -np.sort(np.log10(p)), lw=lw, color=to_latent(color["primary"]))
        plt.scatter(xx, -np.sort(np.log10(p)), s = 8, label=label, color=to_latent(color["primary"]))

    if BF:
        plt.axhline(y=-np.log10(0.05/nn), linestyle=':', color='grey')

    if CI:
        c95 = np.zeros(nn)
        c05 = np.zeros(nn)

        for i in range(1, nn + 1):
            c95[i - 1] = beta.ppf(0.95, i, nn - i + 1) * maxp
            c05[i - 1] = beta.ppf(0.05, i, nn - i + 1) * maxp

        plt.plot(xx, -np.log10(c95))
        plt.plot(xx, -np.log10(c05))


def qq_plot_unif(S_results, GWAS, save_dir, colors=False, BF=False, CI=False, title_tag = "", plot_filename="default"):

    plt.figure(figsize=(10, 6))
    # plt.axhline(-np.log10(0.05), color='red', linestyle='-', label='FDR = 0.05')
    # plt.axhline(-np.log10(0.10), color='orange', linestyle='--', label='FDR = 0.10')
    # plt.axhline(-np.log10(0.25), color='yellow', linestyle='-.', label='FDR = 0.25')

    for S_result in S_results.keys():
        if not colors:
            qqunif_maxp(S_results[S_result].pvalue, label=f'{S_result}', BF=BF, CI=CI)
        else:
            qqunif_maxp(S_results[S_result].pvalue, label=f'{S_result}', BF=BF, CI=CI, color=colors[S_result])
        
    plt.xlabel("Expected -log10(p)")
    plt.ylabel("Observed -log10(p)")
    plt.title(f"QQ-unif plot for gwas: {GWAS}, "+title_tag)
    plt.legend()

    if plot_filename == "default":
        plt.savefig(os.path.join(save_dir,f'{GWAS}_qqplot.png'))
    else:
        plt.savefig(os.path.join(save_dir,plot_filename))
    plt.show()


def hist_plot(S_results, GWAS, model, save_dir):
    plt.figure(figsize=(10, 6))

    plt.hist(S_results.pvalue, bins=100)
    plt.xlabel("Expected -log10(p)")
    plt.ylabel("Observed -log10(p)")
    plt.title(f"QQ-unif plot for {GWAS},{model}")
    plt.legend()

    plt.savefig(os.path.join(save_dir,f'{GWAS}_{model}_histogram.png'))
    plt.show()

def scatter_plots(S_results):
    # Assumes equal rows in all dataframes
    for S_result in S_results.keys():
        dict_for_plot = {}
        for model in S_results[S_result].keys():
            sorted_cur_df = S_results[S_result][model].sort_values(by=["gene"])
            dict_for_plot[model] = list(sorted_cur_df["pvalue"])
        df_for_plot = pd.DataFrame(dict_for_plot)
        pd.plotting.scatter_matrix(df_for_plot, figsize=(len(S_results[S_result].keys())*5,len(S_results[S_result].keys())*5), diagonal='kde')



def create_subset_df(list_of_dfs, subset_col="gene", join="inner"):
    subset_df = list_of_dfs[0].copy()
    for df in list_of_dfs[1:]:
        subset_df = subset_df.merge(df, how=join,on=subset_col)
    return subset_df

def print_dims(results_dict):
    for gwas_type in results_dict.keys():
        for model_type in results_dict[gwas_type].keys():
            print(f"{gwas_type} {model_type} : {results_dict[gwas_type][model_type].shape}")

def upset_plot(loaded_results, pval_thresh=0.05, substrings=[""], exclude_substrings=[""], percent=True, colors=False):
    contents = {}
    for gwas_type in loaded_results.keys():
        for model_type in loaded_results[gwas_type].keys():

            # Check if substrings are present in keywords
            all_substrings_present = True
            for substring in substrings:
                if substring not in (model_type+"_"+gwas_type):
                    all_substrings_present = False
            if not all_substrings_present:
                continue

            # Check if substrings for exclusion are present in keywords
            exclude_substring = False
            for exclude_sub in exclude_substrings:
                if exclude_sub == "":
                    continue
                if exclude_sub in (model_type+"_"+gwas_type):
                    exclude_substring = True
            if exclude_substring:
                continue        

            # Filter by pvalue
            cur_df = loaded_results[gwas_type][model_type]
            cur_df_filt = cur_df.loc[cur_df["pvalue"]<pval_thresh]

            cur_gene_list = list(cur_df_filt["gene"])
            contents[model_type+"_"+gwas_type] = cur_gene_list

    contents_for_plot = upsetplot.from_contents(contents)

    try:
        if percent:
            if not colors:
                u = upsetplot.UpSet(contents_for_plot, orientation='horizontal', 
                        sort_by='cardinality',
                        show_percentages=True,
                        )
            else:
                u = upsetplot.UpSet(contents_for_plot, orientation='horizontal', 
                        sort_by='cardinality',
                        show_percentages=True
                        )
                for twas_type in contents.keys():
                    twas_type_wo_gwas = "_".join(twas_type.split("_")[0:-1])
                    u.style_categories(twas_type, shading_facecolor=to_latent(colors[twas_type_wo_gwas]["primary"]))

        else:
            if not colors:
                u = upsetplot.UpSet(contents_for_plot, orientation='horizontal', 
                        sort_by='cardinality',
                        show_counts=True,
                        )
            else:
                u = upsetplot.UpSet(contents_for_plot, orientation='horizontal', 
                        sort_by='cardinality',
                        show_counts=True,
                        )
                for twas_type in contents.keys():
                    twas_type_wo_gwas = "_".join(twas_type.split("_")[0:-1])
                    u.style_categories(twas_type, shading_facecolor=to_latent(colors[twas_type_wo_gwas]["primary"]))
                    
        u.plot()
        plt.suptitle("Gene overlap, pvalue < "+str(pval_thresh))
        plt.show()
    except Exception as e:
        print(e)


def scatter_plot(first_df, second_df, ax, x_lab="x", y_lab="y", sort_by="gene", bf=True, colors=False):

    common_genes = list(set(first_df["gene"]).intersection(set(second_df["gene"])))
    first_df_subset = first_df[first_df["gene"].isin(common_genes)].sort_values(by=[sort_by])
    second_df_subset = second_df[second_df["gene"].isin(common_genes)].sort_values(by=[sort_by])


    nan_filt = np.logical_not(np.logical_or(np.isnan(-np.log10(list(first_df_subset["pvalue"]))), np.isnan(-np.log10(list(second_df_subset["pvalue"])))))

    r,p = stats.pearsonr(x=-np.log10(first_df_subset[nan_filt]["pvalue"]),y= -np.log10(second_df_subset[nan_filt]["pvalue"]))
    n = len(first_df_subset[nan_filt]["pvalue"])
    
    ax.scatter(-np.log10(first_df_subset[nan_filt]["pvalue"]), -np.log10(second_df_subset[nan_filt]["pvalue"]), s=8)
    if bf:
        ax.axhline(y=-np.log10(0.05/n), linestyle=':', color='grey')
        ax.axvline(x=-np.log10(0.05/n), linestyle=':', color='grey')
    ax.text(.05, .8, "Pearson's r ={:.2f}".format(r), transform=ax.transAxes)
    ax.text(.05, .7, f"n ={n}", transform=ax.transAxes)
    ax.set_xlabel(x_lab, fontsize=12, weight = 'bold')
    ax.set_ylabel(y_lab, fontsize=12, weight = 'bold')
    if not colors:
        pass
    else:
        ax.xaxis.label.set_color(to_latent(colors["_".join(x_lab.split("_")[0:-1])]["primary"], alpha=1.0))
        ax.yaxis.label.set_color(to_latent(colors["_".join(y_lab.split("_")[0:-1])]["primary"], alpha=1.0))
    ax.axline((0, 0), slope=1)

def scatter_plot_all(results_dict, plot_title= "-log10 pvalue for pair-wise intersected genes", colors=False):

    results_count = 0
    for gwas_type in results_dict.keys():
        for model_type in results_dict[gwas_type].keys():
            results_count += 1

    fig = plt.figure(figsize=(4*results_count, 4*results_count), layout="constrained")
    fig.suptitle(plot_title, fontsize=16)
    spec = fig.add_gridspec(ncols=results_count, nrows=results_count)

    ind1 = 0
    ind2 = 0
    for gwas_type1 in results_dict.keys():
        for model_type1 in results_dict[gwas_type].keys():
            ind2 = 0
            for gwas_type2 in results_dict.keys():
                for model_type2 in results_dict[gwas_type].keys():

                    ax = fig.add_subplot(spec[ind1, ind2])
                    scatter_plot(results_dict[gwas_type1][model_type1], results_dict[gwas_type2][model_type2], ax,
                                 x_lab=model_type1+"_"+gwas_type1, y_lab=model_type2+"_"+gwas_type2, colors=colors)

                    ind2 += 1
            ind2 = 0
            ind1 += 1

Plots

Code
# loaded_results = load_results(input_files)

# print_dims(loaded_results)
Code
# upset_plot(loaded_results, pval_thresh=0.05)

Generate Histograms

Code
# for gwas in loaded_results.keys():
#     for model in loaded_results[gwas].keys():
#         pass
        # hist_plot(loaded_results[gwas][model], gwas, model, "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/plots")

Generate qqplots

How does the new normalization (with isoform quantification) compare with the old in Flu Condition, Immune GWAS?

Code

# input_files_compare_normalization = {
#     "AE": {
#         "Flu_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
#         "Flu_gene_length":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_3_7_lnormTRUE_beforeTRUE_scale1/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt"
#     }
# }
# loaded_results_compare_normalization = load_results(input_files_compare_normalization, tag="immune")
# gwas ="AE"


# qq_plot_unif(loaded_results_compare_normalization[gwas], gwas, "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/plots",
#                 plot_filename=f"compare_normalization_qqplot.png", BF=True, title_tag="EnPACT gene length vs. isoform length normalization")


# upset_plot(loaded_results_compare_normalization, pval_thresh=1.0, exclude_substrings=["immune"])
# upset_plot(loaded_results_compare_normalization, pval_thresh=1.0, substrings=["immune"])

# upset_plot(loaded_results_compare_normalization, pval_thresh=0.05, exclude_substrings=["immune"])
# upset_plot(loaded_results_compare_normalization, pval_thresh=0.05, substrings=["immune"])


# scatter_plot_all(load_results(input_files_compare_normalization))

How does EnPACT compare between Flu, NI in Allergy Eczema GWAS?

Code
input_files_compare_fluni = {
    "AE": {
        "Flu_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
        "NI_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/NI_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt"
    }
}
loaded_results_compare_fluni = load_results(input_files_compare_fluni, tag="immune")
gwas ="AE"

color_pal_compare_fluni = {
    "Flu_sourcekallisto": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5)},
    "NI_sourcekallisto": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["NI"], 
                                              0.5)},
    "Flu_sourcekallisto_immune": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},
    "NI_sourcekallisto_immune": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["NI"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},                                              
}

qq_plot_unif(loaded_results_compare_fluni[gwas], gwas, "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/plots",
                plot_filename=f"compare_fluni_{gwas}_qqplot.png", BF=True, title_tag="EnPACT Flu vs. NI", colors=color_pal_compare_fluni)


scatter_plot_all(load_results(input_files_compare_fluni), colors=color_pal_compare_fluni, 
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for EnPACT Flu vs. NI in {gwas}")
scatter_plot_all(loaded_results_compare_fluni, colors=color_pal_compare_fluni,
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for EnPACT Flu vs. NI in {gwas}")

upset_plot(loaded_results_compare_fluni, pval_thresh=1.0, exclude_substrings=["immune"], colors=color_pal_compare_fluni)
upset_plot(loaded_results_compare_fluni, pval_thresh=1.0, substrings=["immune"], colors=color_pal_compare_fluni)

upset_plot(loaded_results_compare_fluni, pval_thresh=0.05, exclude_substrings=["immune"], colors=color_pal_compare_fluni)
upset_plot(loaded_results_compare_fluni, pval_thresh=0.05, substrings=["immune"], colors=color_pal_compare_fluni)

How does EnPACT compare between Flu, NI in T2D GWAS?

Code
input_files_compare_fluni = {
    "T2D": {
        "Flu_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/T2D/T2D.txt",
        "NI_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/NI_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/T2D/T2D.txt"
    }
}
loaded_results_compare_fluni = load_results(input_files_compare_fluni, tag="immune")
gwas ="T2D"

color_pal_compare_fluni = {
    "Flu_sourcekallisto": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5)},
    "NI_sourcekallisto": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["NI"], 
                                              0.5)},
    "Flu_sourcekallisto_immune": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},
    "NI_sourcekallisto_immune": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["NI"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},                                              
}

qq_plot_unif(loaded_results_compare_fluni[gwas], gwas, "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/plots",
                plot_filename=f"compare_fluni_{gwas}_qqplot.png", BF=True, title_tag="EnPACT Flu vs. NI", colors=color_pal_compare_fluni)

scatter_plot_all(load_results(input_files_compare_fluni), colors=color_pal_compare_fluni, 
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for EnPACT Flu vs. NI in {gwas}")
scatter_plot_all(loaded_results_compare_fluni, colors=color_pal_compare_fluni,
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for EnPACT Flu vs. NI in {gwas}")

upset_plot(loaded_results_compare_fluni, pval_thresh=1.0, exclude_substrings=["immune"], colors=color_pal_compare_fluni)
upset_plot(loaded_results_compare_fluni, pval_thresh=1.0, substrings=["immune"], colors=color_pal_compare_fluni)

upset_plot(loaded_results_compare_fluni, pval_thresh=0.05, exclude_substrings=["immune"], colors=color_pal_compare_fluni)
upset_plot(loaded_results_compare_fluni, pval_thresh=0.05, substrings=["immune"], colors=color_pal_compare_fluni)

How does EnPACT compare with GTEx?

Code

input_files_compare_gtex = {
    "AE": {
        "Flu_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
        "GTeX_WB":"/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/GTex_WB/AE_SPrediXcan_results_GTex_WB_trimmed.txt"
    }
}
gwas = "AE"
loaded_results_compare_gtex = load_results(input_files_compare_gtex, tag="immune")

color_pal_compare_gtex = {
    "Flu_sourcekallisto": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5)},
    "GTeX_WB": {"primary": color_pal["models"]["GTeX_WB"]},
    "Flu_sourcekallisto_immune": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},
    "GTeX_WB_immune": {"primary": color_pal["models"]["GTeX_WB"],
                                    "secondary": color_pal["subsets"]["immune"]},                                              
}

qq_plot_unif(loaded_results_compare_gtex[gwas], gwas, "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/plots",
                plot_filename=f"compare_gtex_{gwas}_qqplot.png", BF=True, title_tag="EnPACT Flu vs. GTeX WB", colors=color_pal_compare_gtex)


scatter_plot_all(load_results(input_files_compare_gtex), colors=color_pal_compare_gtex, 
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for EnPACT Flu vs. GTeX WB in {gwas}")
scatter_plot_all(loaded_results_compare_gtex, colors=color_pal_compare_gtex,
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for EnPACT Flu vs. GTeX WB in {gwas}")

upset_plot(loaded_results_compare_gtex, pval_thresh=1.0, exclude_substrings=["immune"], colors=color_pal_compare_gtex)
upset_plot(loaded_results_compare_gtex, pval_thresh=1.0, substrings=["immune"], colors=color_pal_compare_gtex)

upset_plot(loaded_results_compare_gtex, pval_thresh=0.05, exclude_substrings=["immune"], colors=color_pal_compare_gtex)
upset_plot(loaded_results_compare_gtex, pval_thresh=0.05, substrings=["immune"], colors=color_pal_compare_gtex)

                                               id
Flu_sourcekallisto_AE GTeX_WB_AE                 
True                  True        ENSG00000174123
                      True        ENSG00000134987
                      True        ENSG00000197712
                      True        ENSG00000115604
                      True        ENSG00000172057
...                                           ...
False                 True        ENSG00000250889
                      True        ENSG00000237372
                      True        ENSG00000272255
                      True        ENSG00000273759
                      True        ENSG00000253214

[19601 rows x 1 columns]
                                                             id
Flu_sourcekallisto_immune_AE GTeX_WB_immune_AE                 
True                         True               ENSG00000174123
                             True               ENSG00000115604
                             True               ENSG00000115602
                             False              ENSG00000123384
                             True               ENSG00000174130
...                                                         ...
False                        True               ENSG00000129450
                             True               ENSG00000205220
                             True               ENSG00000121933
                             True               ENSG00000221995
                             True               ENSG00000173486

[1445 rows x 1 columns]
                                               id
Flu_sourcekallisto_AE GTeX_WB_AE                 
True                  True        ENSG00000174123
                      True        ENSG00000134987
                      True        ENSG00000197712
                      False       ENSG00000115604
                      True        ENSG00000172057
...                                           ...
False                 True        ENSG00000140455
                      True        ENSG00000244509
                      True        ENSG00000135722
                      True        ENSG00000122376
                      True        ENSG00000184574

[2866 rows x 1 columns]
                                                             id
Flu_sourcekallisto_immune_AE GTeX_WB_immune_AE                 
True                         True               ENSG00000174123
                             False              ENSG00000115604
                             True               ENSG00000115602
                             False              ENSG00000123384
                             True               ENSG00000174130
...                                                         ...
False                        True               ENSG00000213949
                             True               ENSG00000229314
                             True               ENSG00000168811
                             True               ENSG00000197885
                             True               ENSG00000177575

[292 rows x 1 columns]

How does standard PrediXcan Flu compare with standard PrediXcan NI in Allergy Eczema GWAS?

Code
input_files_compare_standard = {
    "AE": {
        "Flu_standard":"/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/standard_predictDB_aracena/Flu_AE_Farreira_filtered.txt",
        "NI_standard":"/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/standard_predictDB_aracena/NI_AE_Farreira_filtered.txt",
    }
}
gwas = "AE"

color_pal_compare_standard = {
    "Flu_standard":  {"primary": mx.lerp(color_pal["models"]["Standard"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5)},
    "NI_standard":  {"primary": mx.lerp(color_pal["models"]["Standard"], 
                                              color_pal["conditions"]["NI"], 
                                              0.5)},
    "Flu_standard_immune": {"primary": mx.lerp(color_pal["models"]["Standard"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},
    "NI_standard_immune": {"primary": mx.lerp(color_pal["models"]["Standard"], 
                                              color_pal["conditions"]["NI"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},                                          
}


loaded_results_compare_standard = load_results(input_files_compare_standard, tag=["immune"])

qq_plot_unif(loaded_results_compare_standard[gwas], gwas, "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/plots",
                plot_filename=f"compare_standard_Flu_NI_{gwas}_qqplot.png", BF=True, title_tag="Standard Flu vs. Standard NI", 
                colors=color_pal_compare_standard)

scatter_plot_all(load_results(input_files_compare_standard), colors=color_pal_compare_standard, 
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for Standard Flu vs. Standard NI in {gwas}")
scatter_plot_all(loaded_results_compare_standard, colors=color_pal_compare_standard,
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for Standard Flu vs. Standard NI in {gwas}")

upset_plot(loaded_results_compare_standard, pval_thresh=1.0, substrings=["immune"], percent=False, colors=color_pal_compare_standard)
upset_plot(loaded_results_compare_standard, pval_thresh=0.05, exclude_substrings=["immune"], percent=False, colors=color_pal_compare_standard)
upset_plot(loaded_results_compare_standard, pval_thresh=0.05, substrings=["immune"], percent=False, colors=color_pal_compare_standard)

How does EnPACT compare with Standard PrediXcan in Flu condition?

Code
input_files_compare_standard = {
    "AE": {
        "Flu_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
        "Flu_standard":"/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/standard_predictDB_aracena/Flu_AE_Farreira_filtered.txt",
        "NI_sourcekallisto":"/beagle3/haky/users/saideep/projects/Con_EnPACT/models/NI_ws4_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/TWAS/Allergy_Eczema/Allergy_Eczema.txt",
        "NI_standard":"/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/standard_predictDB_aracena/NI_AE_Farreira_filtered.txt",
    }
}
gwas = "AE"

color_pal_compare_standard = {
    "Flu_sourcekallisto": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5)},
    "Flu_standard":  {"primary": mx.lerp(color_pal["models"]["Standard"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5)},
    "NI_sourcekallisto": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["NI"], 
                                              0.5)}, 
    "NI_standard":  {"primary": mx.lerp(color_pal["models"]["Standard"], 
                                              color_pal["conditions"]["NI"], 
                                              0.5)},
    "Flu_sourcekallisto_immune": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},
    "Flu_standard_immune": {"primary": mx.lerp(color_pal["models"]["Standard"], 
                                              color_pal["conditions"]["Flu"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},
    "NI_sourcekallisto_immune": {"primary": mx.lerp(color_pal["models"]["EnPACT"], 
                                              color_pal["conditions"]["NI"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},
    "NI_standard_immune": {"primary": mx.lerp(color_pal["models"]["Standard"], 
                                              color_pal["conditions"]["NI"], 
                                              0.5),
                                    "secondary": color_pal["subsets"]["immune"]},                                          
}


loaded_results_compare_standard_Flu = load_results(input_files_compare_standard, tag=["immune"], 
                                                   model_excludes_string="NI")

qq_plot_unif(loaded_results_compare_standard_Flu[gwas], gwas, "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/plots",
                plot_filename=f"compare_standard_Flu_{gwas}_qqplot.png", BF=True, title_tag="EnPACT Flu vs. Standard", 
                colors=color_pal_compare_standard)

scatter_plot_all(load_results(input_files_compare_standard, model_excludes_string="NI"), colors=color_pal_compare_standard, 
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for EnPACT Flu vs. standard Flu in {gwas}")
scatter_plot_all(loaded_results_compare_standard_Flu, colors=color_pal_compare_standard,
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for EnPACT Flu vs. standard Flu in {gwas}")

upset_plot(loaded_results_compare_standard_Flu, pval_thresh=1.0, substrings=["immune", "Flu"], percent=False, colors=color_pal_compare_standard)
upset_plot(loaded_results_compare_standard_Flu, pval_thresh=0.05, substrings=["Flu"], exclude_substrings=["immune"], percent=False, colors=color_pal_compare_standard)
upset_plot(loaded_results_compare_standard_Flu, pval_thresh=0.05, substrings=["immune", "Flu"], percent=False, colors=color_pal_compare_standard)

Contains exclusion substring
Contains exclusion substring
Contains exclusion substring
Contains exclusion substring
Flu_sourcekallisto_immune_AE Flu_sourcekallisto_immune
Flu_standard_immune_AE Flu_standard_immune
Flu_sourcekallisto_AE Flu_sourcekallisto
Flu_standard_AE Flu_standard
Flu_sourcekallisto_immune_AE Flu_sourcekallisto_immune
Flu_standard_immune_AE Flu_standard_immune

How does EnPACT compare with Standard PrediXcan in NI condition?

Code

loaded_results_compare_standard_NI = load_results(input_files_compare_standard, tag=["immune"], 
                                                   model_excludes_string="Flu")

qq_plot_unif(loaded_results_compare_standard_NI[gwas], gwas, "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/plots",
                plot_filename=f"compare_standard_qqplot.png", BF=True, title_tag="EnPACT NI vs. Standard NI",
                colors=color_pal_compare_standard)

scatter_plot_all(load_results(input_files_compare_standard, model_excludes_string="Flu"), colors=color_pal_compare_standard, 
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for EnPACT NI vs. standard NI in {gwas}")
scatter_plot_all(loaded_results_compare_standard_NI, colors=color_pal_compare_standard,
                 plot_title=f"-log10 pvalue for pair-wise intersected genes for EnPACT NI vs. standard NI in {gwas}")

upset_plot(loaded_results_compare_standard_NI, pval_thresh=1.0, substrings=["immune", "NI"], percent=False, colors=color_pal_compare_standard)
upset_plot(loaded_results_compare_standard_NI, pval_thresh=0.05, substrings=["NI"], exclude_substrings=["immune"], percent=False, colors=color_pal_compare_standard)
upset_plot(loaded_results_compare_standard_NI, pval_thresh=0.05, substrings=["immune", "NI"], percent=False, colors=color_pal_compare_standard)
Contains exclusion substring
Contains exclusion substring
Contains exclusion substring
Contains exclusion substring
NI_sourcekallisto_immune_AE NI_sourcekallisto_immune
NI_standard_immune_AE NI_standard_immune
NI_sourcekallisto_AE NI_sourcekallisto
NI_standard_AE NI_standard
NI_sourcekallisto_immune_AE NI_sourcekallisto_immune
NI_standard_immune_AE NI_standard_immune

We can try subsetting to genes which pass the PredictDB threshold in the Standard case

Code
# First look at the Flu case


standard_flu_genes = loaded_results_compare_standard_Flu["AE"]["Flu_standard"]
create_gene_subsets_SPrediXcan_results(standard_flu_genes,
                                        "standard_Flu", input_files_compare_standard,
                                        gene_table_col="gene")

standard_flu_immune_genes = standard_flu_genes.loc[standard_flu_genes['gene'].isin(immune_genes['ensembl'])] 
create_gene_subsets_SPrediXcan_results(standard_flu_immune_genes,
                                        "standard_Flu_immune", input_files_compare_standard,
                                        gene_table_col="gene")


loaded_results_compare_standard_Flu = load_results(input_files_compare_standard, tag=["standard_Flu","standard_Flu_immune"], 
                                                   only_tag=True,
                                                   model_excludes_string="NI")

qq_plot_unif(loaded_results_compare_standard_Flu[gwas], gwas, "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/plots",
                plot_filename=f"compare_standard_Flu_qqplot.png", BF=True)
upset_plot(loaded_results_compare_standard_Flu, pval_thresh=0.05, substrings=["immune", "standard_Flu"], exclude_substrings=["NI"], percent=False)
upset_plot(loaded_results_compare_standard_Flu, pval_thresh=0.05, substrings=["standard_Flu"], exclude_substrings=["immune","NI"], percent=False)
subsetting
(1196, 12)
1196
subsetting
(1202, 12)
1202
subsetting
(1197, 12)
1197
subsetting
(248, 12)
248
subsetting
(119, 12)
119
subsetting
(121, 12)
121
subsetting
(120, 12)
120
subsetting
(24, 12)
24
Contains exclusion substring
Contains exclusion substring

Code
# Next, the NI case


standard_ni_genes = loaded_results_compare_standard_NI["AE"]["NI_standard"]
create_gene_subsets_SPrediXcan_results(standard_ni_genes,
                                        "standard_NI", input_files_compare_standard,
                                        gene_table_col="gene")

standard_ni_immune_genes = standard_ni_genes.loc[standard_ni_genes['gene'].isin(immune_genes['ensembl'])] 
create_gene_subsets_SPrediXcan_results(standard_ni_immune_genes,
                                        "standard_NI_immune", input_files_compare_standard,
                                        gene_table_col="gene")


loaded_results_compare_standard_NI = load_results(input_files_compare_standard, tag=["standard_NI","standard_NI_immune"], 
                                                   only_tag=True,
                                                   model_excludes_string="Flu")

qq_plot_unif(loaded_results_compare_standard_NI[gwas], gwas, "/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-12-06-analyze_SPrediXcan_results/plots",
                plot_filename=f"compare_standard_NI_qqplot.png", BF=True)
upset_plot(loaded_results_compare_standard_NI, pval_thresh=0.05, substrings=["immune", "standard_NI"], exclude_substrings=["Flu"], percent=False)
upset_plot(loaded_results_compare_standard_NI, pval_thresh=0.05, substrings=["standard_NI"], exclude_substrings=["immune","Flu"], percent=False)
subsetting
(1094, 12)
1094
subsetting
(248, 12)
248
subsetting
(1095, 12)
1095
subsetting
(1102, 12)
1102
subsetting
(94, 12)
94
subsetting
(24, 12)
24
subsetting
(94, 12)
94
subsetting
(96, 12)
96
Contains exclusion substring
Contains exclusion substring