2.1.2_check_predictDB_standard.qmd

Author

Saideep Gona

Published

January 24, 2024

Context

I seem to be getting strangely low gene counts past filtering after running PredictDB. Need to investigate this.

Code
library(ggplot2)

Load summary standard

Code
path_pred_summ <- "/beagle3/haky/users/saideep/projects/aracena_modeling/linearization/standard_predictDB_aracena_Flu/database/Model_summary.txt"

pred_summ <- read.table(path_pred_summ, header = TRUE, sep = "\t")
print(dim(pred_summ))
[1] 10487    24
Code
hist(pred_summ$rho_avg, breaks = 20, main = "Standard PredictDB")

Code
hist(pred_summ$zscore_pval, breaks = 20, main = "Standard PredictDB")

Code
pred_summ_filt <- pred_summ[(pred_summ$rho_avg > 0.1) & (pred_summ$zscore_pval < 0.05),]

print(dim(pred_summ_filt))
[1] 230  24

Check for eGenes from Aracena et al.

Code
# Load eGenes table

path_to_egenes <- "/beagle3/haky/users/saideep/projects/aracena_modeling/Inputs/QTL_mapping/eqtls.csv"
egenes_table <- read.table(path_to_egenes, header = TRUE, sep = ",")

hist(egenes_table$pvalue_Flu, main = "eGenes pvalues")

Code
dim(pred_summ)
[1] 10487    24
Code
dim(egenes_table)
[1] 13680    13
Code
subset_to_egenes <- merge(egenes_table, pred_summ, by.x = "feature", by.y = "gene")
dim(subset_to_egenes)
[1] 10487    36
Code
ggplot(subset_to_egenes, aes(x = pvalue_Flu, y = zscore_pval)) + geom_point() 

Code
spearcor <- cor(subset_to_egenes$pvalue_Flu, subset_to_egenes$zscore_pval, method = "spearman")

spearcor
[1] 0.1050551

Load summary Enpact

Code
path_pred_summ <- "/beagle3/haky/users/saideep/projects/aracena_modeling/linearization/predixcan_RNAseq_Flu_from_HDF5_mean_log_revised/database/Model_summary.txt"

pred_summ <- read.table(path_pred_summ, header = TRUE, sep = "\t")
print(dim(pred_summ))
[1] 18714    24
Code
hist(pred_summ$rho_avg, breaks = 20, main = "Enpact PredictDB")

Code
hist(pred_summ$zscore_pval, breaks = 20, main = "Enpact PredictDB")

Code
pred_summ_filt <- pred_summ[(pred_summ$rho_avg > 0.1) & (pred_summ$zscore_pval < 0.05),]

print(dim(pred_summ_filt))
[1] 18695    24
Code
path_pred_summ <- "/beagle3/haky/users/charles/project/singleXcanDL/PredicDB/OneK1K/PrediXcan/work_dir/results/database/Model_summary.txt"

pred_summ <- read.table(path_pred_summ, header = TRUE, sep = "\t")
print(dim(pred_summ))
[1] 18082    24
Code
hist(pred_summ$rho_avg, breaks = 20, main = "Charles PredictDB")

Code
hist(pred_summ$zscore_pval, breaks = 20, main = "Charles PredictDB")

Code
pred_summ_filt <- pred_summ[(pred_summ$rho_avg > 0.1) & (pred_summ$zscore_pval < 0.05),]

print(dim(pred_summ_filt))
[1] 2992   24