predict_for_linearization

Author

Saideep Gona

Published

May 20, 2024

Code
library(glue)
library(R.utils)
Loading required package: R.oo
Loading required package: R.methodsS3
R.methodsS3 v1.8.2 (2022-06-13 22:00:14 UTC) successfully loaded. See ?R.methodsS3 for help.
R.oo v1.25.0 (2022-06-12 02:20:02 UTC) successfully loaded. See ?R.oo for help.

Attaching package: 'R.oo'
The following object is masked from 'package:R.methodsS3':

    throw
The following object is masked from 'package:glue':

    trim
The following objects are masked from 'package:methods':

    getClasses, getMethods
The following objects are masked from 'package:base':

    attach, detach, load, save
R.utils v2.12.3 (2023-11-18 01:00:02 UTC) successfully loaded. See ?R.utils for help.

Attaching package: 'R.utils'
The following object is masked from 'package:utils':

    timestamp
The following objects are masked from 'package:base':

    cat, commandArgs, getOption, isOpen, nullfile, parse, warnings
Code
library(data.table)
library(glmnet)
Loading required package: Matrix
Loaded glmnet 4.1-8
Code
library(parallel)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.3     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::between()     masks data.table::between()
✖ tidyr::expand()      masks Matrix::expand()
✖ tidyr::extract()     masks R.utils::extract()
✖ dplyr::filter()      masks stats::filter()
✖ dplyr::first()       masks data.table::first()
✖ lubridate::hour()    masks data.table::hour()
✖ lubridate::isoweek() masks data.table::isoweek()
✖ dplyr::lag()         masks stats::lag()
✖ dplyr::last()        masks data.table::last()
✖ lubridate::mday()    masks data.table::mday()
✖ lubridate::minute()  masks data.table::minute()
✖ lubridate::month()   masks data.table::month()
✖ tidyr::pack()        masks Matrix::pack()
✖ lubridate::quarter() masks data.table::quarter()
✖ lubridate::second()  masks data.table::second()
✖ purrr::transpose()   masks data.table::transpose()
✖ tidyr::unpack()      masks Matrix::unpack()
✖ lubridate::wday()    masks data.table::wday()
✖ lubridate::week()    masks data.table::week()
✖ lubridate::yday()    masks data.table::yday()
✖ lubridate::year()    masks data.table::year()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Code
library(ggplot2)
Code
# glmnet_models_path <- "/beagle3/haky/users/saideep/projects/Con_EnPACT/models/"

# collected_preds_dir <- "/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/asthma_eczema_farreira_susie/enformer_preds/collected_preds"

# enpact_preds_linear_dir <- "/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/asthma_eczema_farreira_susie/enformer_preds/enpact_linear_preds"

# inds_path <- "/beagle3/haky/users/saideep/projects/enformer_all_geuvadis/inputs/all_inds.txt"
# inds <- c()
# inds <- readLines(inds_path)

# optimal_window_sizes = list(
#     "H3K27ac"=8,
#     "H3K27me3"=64,
#     "H3K4me1"=32,
#     "H3K4me3"=8,
#     "ATAC"=8
# )
Code
# for (ind in inds) {
#     for (x in 1:length(optimal_window_sizes)) {

#         cur_modality <- names(optimal_window_sizes)[x]
#         cur_window_size <- optimal_window_sizes[[cur_modality]]

#         print(cur_modality)
#         print(cur_window_size)

#         X_path <- glue("{collected_preds_dir}/{ind}_{cur_window_size}.txt")

#         if (file.exists(X_path)) {

#             skip_to_next <- FALSE
#             tryCatch(
#                 X <- as.matrix(read.table(X_path, row.names = 1, header = F, sep = "\t")),
#                 error = function(e) { skip_to_next <- TRUE}
#             )
#             if(skip_to_next) { next }

#             for (cond in c("Flu", "NI")) {

#                 out_path = glue("{enpact_preds_linear_dir}/{ind}_{cur_modality}_{cond}.txt")
#                 if (file.exists(out_path)) {
#                     print(glue("{ind}_{cur_modality}_{cond} already exists"))
#                     next
#                 }

#                 glmnet_model_path <- glue("{glmnet_models_path}/{cond}_{cur_modality}_ws{cur_window_size}/intermediates/train_enpact/trained_enpact_eln_{cond}.linear.rds")

#                 print(glmnet_model_path)

#                 glmnet_model <- readRDS(glmnet_model_path)

#                 glmnet_predictions <- as.data.frame(predict(glmnet_model, X, s = "lambda.min", type="response"))
#                 rownames(glmnet_predictions) <- rownames(X)

#                 print(glmnet_predictions)

#                 write.table(glmnet_predictions, glue("{enpact_preds_linear_dir}/{ind}_{cur_modality}_{cond}.txt"),row.names = T, sep="\t")

#                 }
#         } else {

#             print(glue({"{ind} missing"}))

#         }
#     }
# }