1.3.2_predict_for_personalized_predictions

Author

Saideep Gona

Published

January 10, 2024

Context

In order to linearize, we first need to predict on the

Code
library(data.table)
library(glmnet)
library(glue)
library(tidyverse)

Test on a single individual

Code
# X_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/linearization/HG00096.txt"

# X <- as.matrix(data.table::fread(X_path))

# glmnet_model_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/elastic_net/trained_eln_RNAseq_Flu_from_HDF5_mean_log_revised.linear.rds"
# glmnet_model <- readRDS(glmnet_model_path)

# glmnet_predictions <- predict(glmnet_model, X, s = "lambda.min", type="response")
Code
vcf_inds <- strsplit("Epi_AF08_NI_WGS_1_Epi_AF08_NI_WGS_1     Epi_AF10_NI_WGS_1_Epi_AF10_NI_WGS_1 Epi_AF12_Flu_WGS_1_Epi_AF12_Flu_WGS_1   Epi_AF14_Mock_WGS_1_Epi_AF14_Mock_WGS_1 Epi_AF16_NI_WGS_1_Epi_AF16_NI_WGS_1 Epi_AF18_NI_WGS_1_Epi_AF18_NI_WGS_1     Epi_AF20_NI_WGS_1_Epi_AF20_NI_WGS_1     Epi_AF22_Flu_WGS_1_Epi_AF22_Flu_WGS_1       Epi_AF24_Flu_WGS_1_Epi_AF24_Flu_WGS_1   Epi_AF26_NI_WGS_1_Epi_AF26_NI_WGS_1Epi_AF28_NI_WGS_1_Epi_AF28_NI_WGS_1      Epi_AF30_NI_WGS_1_Epi_AF30_NI_WGS_1     Epi_AF34_NI_WGS_1_Epi_AF34_NI_WGS_1 Epi_AF36_Flu_WGS_1_Epi_AF36_Flu_WGS_1   Epi_AF38_NI_WGS_1_Epi_AF38_NI_WGS_1     Epi_EU03_NI_WGS_1_Epi_EU03_NI_WGS_1 Epi_EU05_NI_WGS_1_Epi_EU05_NI_WGS_1     Epi_EU07_NI_WGS_1_Epi_EU07_NI_WGS_1     Epi_EU09_Mock_WGS_1_Epi_EU09_Mock_WGS_1     Epi_EU13_NI_WGS_1_Epi_EU13_NI_WGS_1     Epi_EU15_NI_WGS_1_Epi_EU15_NI_WGS_1Epi_EU17_NI_WGS_1_Epi_EU17_NI_WGS_1      Epi_EU19_NI_WGS_1_Epi_EU19_NI_WGS_1     Epi_EU21_Flu_WGS_1_Epi_EU21_Flu_WGS_1       Epi_EU25_NI_WGS_1_Epi_EU25_NI_WGS_1     Epi_EU27_NI_WGS_1_Epi_EU27_NI_WGS_1     Epi_EU29_NI_WGS_1_Epi_EU29_NI_WGS_1 Epi_EU33_Flu_WGS_1_Epi_EU33_Flu_WGS_1   Epi_EU37_Flu_WGS_1_Epi_EU37_Flu_WGS_1   Epi_EU39_NI_WGS_1_Epi_EU39_NI_WGS_1 Epi_EU41_NI_WGS_1_Epi_EU41_NI_WGS_1     Epi_EU43_NI_WGS_1_Epi_EU43_NI_WGS_1Epi_EU47_NI_WGS_1_Epi_EU47_NI_WGS_1","\\s+")

Flu expression

Code
glmnet_model_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/elastic_net/trained_eln_RNAseq_Flu_normalized_covariates.linear.rds"
glmnet_model <- readRDS(glmnet_model_path)

collected_preds_dir <- "/beagle3/haky/users/saideep/projects/aracena_modeling/enformer_predictions/aracena_preds/collected_preds"

enpact_preds_dir <- "/beagle3/haky/users/saideep/projects/aracena_modeling/enformer_predictions/aracena_preds/enpact_preds"

for (ind in vcf_inds[[1]]) {
    for (hap in c("haplo1","haplo2")) {
        X_path <- glue("{collected_preds_dir}/{ind}_{hap}.txt")
        if (file.exists(X_path)) {

            X <- as.matrix(read.table(X_path, row.names = 1, header = F, sep = "\t"))
            glmnet_predictions <- as.data.frame(predict(glmnet_model, X, s = "lambda.min", type="response"))
            rownames(glmnet_predictions) <- rownames(X)

            write.table(glmnet_predictions, glue("{enpact_preds_dir}/{ind}_{hap}_preds_RNAseq_Flu_personalized.txt"),row.names = T, sep="\t")

        } else {
            print(glue({"{ind}_{hap} missing"}))
        }
    }
    
}

NI expression

Code
glmnet_model_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/elastic_net/trained_eln_RNAseq_NI_normalized_covariates.linear.rds"
glmnet_model <- readRDS(glmnet_model_path)

collected_preds_dir <- "/beagle3/haky/users/saideep/projects/aracena_modeling/enformer_predictions/aracena_preds/collected_preds"

enpact_preds_dir <- "/beagle3/haky/users/saideep/projects/aracena_modeling/enformer_predictions/aracena_preds/enpact_preds"

for (ind in vcf_inds) {
    for (hap in c("haplo1","haplo2")) {
        X_path <- glue("{collected_preds_dir}/{ind}_{hap}.txt")
        if (file.exists(X_path)) {

            X <- as.matrix(data.table::fread(X_path))
            glmnet_predictions <- as.data.frame(predict(glmnet_model, X, s = "lambda.min", type="response"))

            write_delim(glmnet_predictions, glue("{enpact_preds_dir}/{ind}_{hap}_preds_RNAseq_NI_personalized.txt"), delim = "\t")

        } else {
            print(glue({"{ind}_{hap} missing"}))
        }
    }
}