compare_elastic_net_EnPACT_models

Author

Saideep Gona

Published

May 16, 2024

Context

This notebook is used to compare the number of non-zero coefficients in the elastic net models trained by EnPACT for different modalities and conditions. This gives some idea of how “complex” each modality is with respect to the Enformer predictions.

Code
en_net_path <- "/beagle3/haky/users/saideep/projects/Con_EnPACT/models/Flu_ATAC_ws8/intermediates/train_enpact/trained_enpact_eln_Flu.linear.rds"

loaded_en_net <- readRDS(en_net_path)
coefs = predict(loaded_en_net, type="coef")

nonzero_coefs = coefs != 0
sum(nonzero_coefs)
[1] 2749
Code
proj_path = "/beagle3/haky/users/saideep/projects/Con_EnPACT/models"

optimal_window_sizes = list(
    "H3K27ac"=8,
    "H3K27me3"=64,
    "H3K4me1"=32,
    "H3K4me3"=8,
    "ATAC"=8,
    "RNAseq"=4
)

modalities = c()
conditions = c()
window_sizes = c()
nonzero_count = c()
optws_modalities = c()

for (x in 1:length(optimal_window_sizes)) {
    for (condition in c("Flu","NI")) {
        for (ws in c(2,4,8,16,32,64,128)) {

            modality = names(optimal_window_sizes)[x]

            if (modality == "RNAseq") {
                path_to_en_net = glue("{proj_path}/{condition}_ws{ws}_ns_3_7_lnormFALSE_beforeFALSE_scale1_sourcekallisto/intermediates/train_enpact/trained_enpact_eln_{condition}.linear.rds")
            } else {
                path_to_en_net = glue("{proj_path}/{condition}_{modality}_ws{ws}/intermediates/train_enpact/trained_enpact_eln_{condition}.linear.rds")
            }

            cur_en_net <- readRDS(path_to_en_net)

            coefs = predict(cur_en_net, type="coef")

            nonzero_coefs = coefs != 0

            modalities = c(modalities, modality)
            window_sizes = c(window_sizes, as.character(ws))
            conditions = c(conditions, condition)
            nonzero_count = c(nonzero_count, sum(nonzero_coefs))
            optws_modalities = c(optws_modalities, glue(modality,"_", as.character(optimal_window_sizes[modality])))
        }
    }
}

en_summary_df <- data.frame(modalities, conditions, nonzero_count, window_sizes, optws_modalities)
en_summary_df$window_sizes = factor(en_summary_df$window_sizes, levels=c("2","4","8","16","32","64","128"))
Code
en_summary_df
   modalities conditions nonzero_count window_sizes optws_modalities
1     H3K27ac        Flu          2669            2        H3K27ac_8
2     H3K27ac        Flu          2401            4        H3K27ac_8
3     H3K27ac        Flu          2230            8        H3K27ac_8
4     H3K27ac        Flu          2211           16        H3K27ac_8
5     H3K27ac        Flu          2213           32        H3K27ac_8
6     H3K27ac        Flu          1312           64        H3K27ac_8
7     H3K27ac        Flu           336          128        H3K27ac_8
8     H3K27ac         NI          1974            2        H3K27ac_8
9     H3K27ac         NI          2047            4        H3K27ac_8
10    H3K27ac         NI          2032            8        H3K27ac_8
11    H3K27ac         NI          1939           16        H3K27ac_8
12    H3K27ac         NI          2056           32        H3K27ac_8
13    H3K27ac         NI          1849           64        H3K27ac_8
14    H3K27ac         NI           467          128        H3K27ac_8
15   H3K27me3        Flu          1522            2      H3K27me3_64
16   H3K27me3        Flu          1276            4      H3K27me3_64
17   H3K27me3        Flu          1414            8      H3K27me3_64
18   H3K27me3        Flu          1386           16      H3K27me3_64
19   H3K27me3        Flu          1477           32      H3K27me3_64
20   H3K27me3        Flu          1698           64      H3K27me3_64
21   H3K27me3        Flu          1384          128      H3K27me3_64
22   H3K27me3         NI          1464            2      H3K27me3_64
23   H3K27me3         NI          1261            4      H3K27me3_64
24   H3K27me3         NI          1383            8      H3K27me3_64
25   H3K27me3         NI          1333           16      H3K27me3_64
26   H3K27me3         NI          1421           32      H3K27me3_64
27   H3K27me3         NI          1679           64      H3K27me3_64
28   H3K27me3         NI          1397          128      H3K27me3_64
29    H3K4me1        Flu          3621            2       H3K4me1_32
30    H3K4me1        Flu          3214            4       H3K4me1_32
31    H3K4me1        Flu          2917            8       H3K4me1_32
32    H3K4me1        Flu          2715           16       H3K4me1_32
33    H3K4me1        Flu          2507           32       H3K4me1_32
34    H3K4me1        Flu          2223           64       H3K4me1_32
35    H3K4me1        Flu          1480          128       H3K4me1_32
36    H3K4me1         NI          3750            2       H3K4me1_32
37    H3K4me1         NI          3205            4       H3K4me1_32
38    H3K4me1         NI          2931            8       H3K4me1_32
39    H3K4me1         NI          2720           16       H3K4me1_32
40    H3K4me1         NI          2630           32       H3K4me1_32
41    H3K4me1         NI          2406           64       H3K4me1_32
42    H3K4me1         NI          1576          128       H3K4me1_32
43    H3K4me3        Flu          1422            2        H3K4me3_8
44    H3K4me3        Flu          1187            4        H3K4me3_8
45    H3K4me3        Flu          1232            8        H3K4me3_8
46    H3K4me3        Flu          1040           16        H3K4me3_8
47    H3K4me3        Flu           769           32        H3K4me3_8
48    H3K4me3        Flu           562           64        H3K4me3_8
49    H3K4me3        Flu           667          128        H3K4me3_8
50    H3K4me3         NI          1223            2        H3K4me3_8
51    H3K4me3         NI          1255            4        H3K4me3_8
52    H3K4me3         NI          1395            8        H3K4me3_8
53    H3K4me3         NI          1137           16        H3K4me3_8
54    H3K4me3         NI          1107           32        H3K4me3_8
55    H3K4me3         NI          1646           64        H3K4me3_8
56    H3K4me3         NI           656          128        H3K4me3_8
57       ATAC        Flu          2674            2           ATAC_8
58       ATAC        Flu          2575            4           ATAC_8
59       ATAC        Flu          2749            8           ATAC_8
60       ATAC        Flu          2168           16           ATAC_8
61       ATAC        Flu          2054           32           ATAC_8
62       ATAC        Flu          1234           64           ATAC_8
63       ATAC        Flu          1330          128           ATAC_8
64       ATAC         NI          3254            2           ATAC_8
65       ATAC         NI          3374            4           ATAC_8
66       ATAC         NI          2887            8           ATAC_8
67       ATAC         NI          2666           16           ATAC_8
68       ATAC         NI          2332           32           ATAC_8
69       ATAC         NI          1714           64           ATAC_8
70       ATAC         NI          1556          128           ATAC_8
71     RNAseq        Flu           327            2         RNAseq_4
72     RNAseq        Flu           311            4         RNAseq_4
73     RNAseq        Flu           343            8         RNAseq_4
74     RNAseq        Flu           319           16         RNAseq_4
75     RNAseq        Flu           466           32         RNAseq_4
76     RNAseq        Flu           419           64         RNAseq_4
77     RNAseq        Flu           232          128         RNAseq_4
78     RNAseq         NI           548            2         RNAseq_4
79     RNAseq         NI           532            4         RNAseq_4
80     RNAseq         NI           585            8         RNAseq_4
81     RNAseq         NI           621           16         RNAseq_4
82     RNAseq         NI           615           32         RNAseq_4
83     RNAseq         NI           547           64         RNAseq_4
84     RNAseq         NI           460          128         RNAseq_4
Code
ggplot(en_summary_df) + geom_bar(aes(x=modalities, y=nonzero_count, fill=conditions), stat="identity", position="dodge") + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Interestingly, RNAseq seems the “least” complex by this metric.

Code
ggplot(en_summary_df[en_summary_df$condition == "Flu",]) + geom_bar(aes(x=optws_modalities, y=nonzero_count, fill=window_sizes), stat="identity", position="dodge") + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1))