Code
library(data.table)
library(glmnet)
library(glue)
library(tidyverse)
Saideep Gona
January 10, 2024
In order to linearize, we first need to predict on the
Test on a single individual
# X_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/linearization/HG00096.txt"
# X <- as.matrix(data.table::fread(X_path))
# glmnet_model_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/elastic_net/trained_eln_RNAseq_Flu_from_HDF5_mean_log_revised.linear.rds"
# glmnet_model <- readRDS(glmnet_model_path)
# glmnet_predictions <- predict(glmnet_model, X, s = "lambda.min", type="response")
vcf_inds <- strsplit("Epi_AF08_NI_WGS_1_Epi_AF08_NI_WGS_1 Epi_AF10_NI_WGS_1_Epi_AF10_NI_WGS_1 Epi_AF12_Flu_WGS_1_Epi_AF12_Flu_WGS_1 Epi_AF14_Mock_WGS_1_Epi_AF14_Mock_WGS_1 Epi_AF16_NI_WGS_1_Epi_AF16_NI_WGS_1 Epi_AF18_NI_WGS_1_Epi_AF18_NI_WGS_1 Epi_AF20_NI_WGS_1_Epi_AF20_NI_WGS_1 Epi_AF22_Flu_WGS_1_Epi_AF22_Flu_WGS_1 Epi_AF24_Flu_WGS_1_Epi_AF24_Flu_WGS_1 Epi_AF26_NI_WGS_1_Epi_AF26_NI_WGS_1Epi_AF28_NI_WGS_1_Epi_AF28_NI_WGS_1 Epi_AF30_NI_WGS_1_Epi_AF30_NI_WGS_1 Epi_AF34_NI_WGS_1_Epi_AF34_NI_WGS_1 Epi_AF36_Flu_WGS_1_Epi_AF36_Flu_WGS_1 Epi_AF38_NI_WGS_1_Epi_AF38_NI_WGS_1 Epi_EU03_NI_WGS_1_Epi_EU03_NI_WGS_1 Epi_EU05_NI_WGS_1_Epi_EU05_NI_WGS_1 Epi_EU07_NI_WGS_1_Epi_EU07_NI_WGS_1 Epi_EU09_Mock_WGS_1_Epi_EU09_Mock_WGS_1 Epi_EU13_NI_WGS_1_Epi_EU13_NI_WGS_1 Epi_EU15_NI_WGS_1_Epi_EU15_NI_WGS_1Epi_EU17_NI_WGS_1_Epi_EU17_NI_WGS_1 Epi_EU19_NI_WGS_1_Epi_EU19_NI_WGS_1 Epi_EU21_Flu_WGS_1_Epi_EU21_Flu_WGS_1 Epi_EU25_NI_WGS_1_Epi_EU25_NI_WGS_1 Epi_EU27_NI_WGS_1_Epi_EU27_NI_WGS_1 Epi_EU29_NI_WGS_1_Epi_EU29_NI_WGS_1 Epi_EU33_Flu_WGS_1_Epi_EU33_Flu_WGS_1 Epi_EU37_Flu_WGS_1_Epi_EU37_Flu_WGS_1 Epi_EU39_NI_WGS_1_Epi_EU39_NI_WGS_1 Epi_EU41_NI_WGS_1_Epi_EU41_NI_WGS_1 Epi_EU43_NI_WGS_1_Epi_EU43_NI_WGS_1Epi_EU47_NI_WGS_1_Epi_EU47_NI_WGS_1","\\s+")
glmnet_model_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/elastic_net/trained_eln_RNAseq_Flu_normalized_covariates.linear.rds"
glmnet_model <- readRDS(glmnet_model_path)
collected_preds_dir <- "/beagle3/haky/users/saideep/projects/aracena_modeling/enformer_predictions/aracena_preds/collected_preds"
enpact_preds_dir <- "/beagle3/haky/users/saideep/projects/aracena_modeling/enformer_predictions/aracena_preds/enpact_preds"
for (ind in vcf_inds[[1]]) {
for (hap in c("haplo1","haplo2")) {
X_path <- glue("{collected_preds_dir}/{ind}_{hap}.txt")
if (file.exists(X_path)) {
X <- as.matrix(read.table(X_path, row.names = 1, header = F, sep = "\t"))
glmnet_predictions <- as.data.frame(predict(glmnet_model, X, s = "lambda.min", type="response"))
rownames(glmnet_predictions) <- rownames(X)
write.table(glmnet_predictions, glue("{enpact_preds_dir}/{ind}_{hap}_preds_RNAseq_Flu_personalized.txt"),row.names = T, sep="\t")
} else {
print(glue({"{ind}_{hap} missing"}))
}
}
}
glmnet_model_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/elastic_net/trained_eln_RNAseq_NI_normalized_covariates.linear.rds"
glmnet_model <- readRDS(glmnet_model_path)
collected_preds_dir <- "/beagle3/haky/users/saideep/projects/aracena_modeling/enformer_predictions/aracena_preds/collected_preds"
enpact_preds_dir <- "/beagle3/haky/users/saideep/projects/aracena_modeling/enformer_predictions/aracena_preds/enpact_preds"
for (ind in vcf_inds) {
for (hap in c("haplo1","haplo2")) {
X_path <- glue("{collected_preds_dir}/{ind}_{hap}.txt")
if (file.exists(X_path)) {
X <- as.matrix(data.table::fread(X_path))
glmnet_predictions <- as.data.frame(predict(glmnet_model, X, s = "lambda.min", type="response"))
write_delim(glmnet_predictions, glue("{enpact_preds_dir}/{ind}_{hap}_preds_RNAseq_NI_personalized.txt"), delim = "\t")
} else {
print(glue({"{ind}_{hap} missing"}))
}
}
}
---
title: "1.3.2_predict_for_personalized_predictions"
author: "Saideep Gona"
date: "2024-1-10"
format:
html:
code-fold: true
code-summary: "Show the code"
execute:
freeze: true
warning: false
---
# Context
In order to linearize, we first need to predict on the
```{r}
library(data.table)
library(glmnet)
library(glue)
library(tidyverse)
```
Test on a single individual
```{r}
# X_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/linearization/HG00096.txt"
# X <- as.matrix(data.table::fread(X_path))
# glmnet_model_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/elastic_net/trained_eln_RNAseq_Flu_from_HDF5_mean_log_revised.linear.rds"
# glmnet_model <- readRDS(glmnet_model_path)
# glmnet_predictions <- predict(glmnet_model, X, s = "lambda.min", type="response")
```
```{r }
#| eval: false
vcf_inds <- strsplit("Epi_AF08_NI_WGS_1_Epi_AF08_NI_WGS_1 Epi_AF10_NI_WGS_1_Epi_AF10_NI_WGS_1 Epi_AF12_Flu_WGS_1_Epi_AF12_Flu_WGS_1 Epi_AF14_Mock_WGS_1_Epi_AF14_Mock_WGS_1 Epi_AF16_NI_WGS_1_Epi_AF16_NI_WGS_1 Epi_AF18_NI_WGS_1_Epi_AF18_NI_WGS_1 Epi_AF20_NI_WGS_1_Epi_AF20_NI_WGS_1 Epi_AF22_Flu_WGS_1_Epi_AF22_Flu_WGS_1 Epi_AF24_Flu_WGS_1_Epi_AF24_Flu_WGS_1 Epi_AF26_NI_WGS_1_Epi_AF26_NI_WGS_1Epi_AF28_NI_WGS_1_Epi_AF28_NI_WGS_1 Epi_AF30_NI_WGS_1_Epi_AF30_NI_WGS_1 Epi_AF34_NI_WGS_1_Epi_AF34_NI_WGS_1 Epi_AF36_Flu_WGS_1_Epi_AF36_Flu_WGS_1 Epi_AF38_NI_WGS_1_Epi_AF38_NI_WGS_1 Epi_EU03_NI_WGS_1_Epi_EU03_NI_WGS_1 Epi_EU05_NI_WGS_1_Epi_EU05_NI_WGS_1 Epi_EU07_NI_WGS_1_Epi_EU07_NI_WGS_1 Epi_EU09_Mock_WGS_1_Epi_EU09_Mock_WGS_1 Epi_EU13_NI_WGS_1_Epi_EU13_NI_WGS_1 Epi_EU15_NI_WGS_1_Epi_EU15_NI_WGS_1Epi_EU17_NI_WGS_1_Epi_EU17_NI_WGS_1 Epi_EU19_NI_WGS_1_Epi_EU19_NI_WGS_1 Epi_EU21_Flu_WGS_1_Epi_EU21_Flu_WGS_1 Epi_EU25_NI_WGS_1_Epi_EU25_NI_WGS_1 Epi_EU27_NI_WGS_1_Epi_EU27_NI_WGS_1 Epi_EU29_NI_WGS_1_Epi_EU29_NI_WGS_1 Epi_EU33_Flu_WGS_1_Epi_EU33_Flu_WGS_1 Epi_EU37_Flu_WGS_1_Epi_EU37_Flu_WGS_1 Epi_EU39_NI_WGS_1_Epi_EU39_NI_WGS_1 Epi_EU41_NI_WGS_1_Epi_EU41_NI_WGS_1 Epi_EU43_NI_WGS_1_Epi_EU43_NI_WGS_1Epi_EU47_NI_WGS_1_Epi_EU47_NI_WGS_1","\\s+")
```
### Flu expression
```{r}
#| eval: false
glmnet_model_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/elastic_net/trained_eln_RNAseq_Flu_normalized_covariates.linear.rds"
glmnet_model <- readRDS(glmnet_model_path)
collected_preds_dir <- "/beagle3/haky/users/saideep/projects/aracena_modeling/enformer_predictions/aracena_preds/collected_preds"
enpact_preds_dir <- "/beagle3/haky/users/saideep/projects/aracena_modeling/enformer_predictions/aracena_preds/enpact_preds"
for (ind in vcf_inds[[1]]) {
for (hap in c("haplo1","haplo2")) {
X_path <- glue("{collected_preds_dir}/{ind}_{hap}.txt")
if (file.exists(X_path)) {
X <- as.matrix(read.table(X_path, row.names = 1, header = F, sep = "\t"))
glmnet_predictions <- as.data.frame(predict(glmnet_model, X, s = "lambda.min", type="response"))
rownames(glmnet_predictions) <- rownames(X)
write.table(glmnet_predictions, glue("{enpact_preds_dir}/{ind}_{hap}_preds_RNAseq_Flu_personalized.txt"),row.names = T, sep="\t")
} else {
print(glue({"{ind}_{hap} missing"}))
}
}
}
```
### NI expression
```{r}
#| eval: false
glmnet_model_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/elastic_net/trained_eln_RNAseq_NI_normalized_covariates.linear.rds"
glmnet_model <- readRDS(glmnet_model_path)
collected_preds_dir <- "/beagle3/haky/users/saideep/projects/aracena_modeling/enformer_predictions/aracena_preds/collected_preds"
enpact_preds_dir <- "/beagle3/haky/users/saideep/projects/aracena_modeling/enformer_predictions/aracena_preds/enpact_preds"
for (ind in vcf_inds) {
for (hap in c("haplo1","haplo2")) {
X_path <- glue("{collected_preds_dir}/{ind}_{hap}.txt")
if (file.exists(X_path)) {
X <- as.matrix(data.table::fread(X_path))
glmnet_predictions <- as.data.frame(predict(glmnet_model, X, s = "lambda.min", type="response"))
write_delim(glmnet_predictions, glue("{enpact_preds_dir}/{ind}_{hap}_preds_RNAseq_NI_personalized.txt"), delim = "\t")
} else {
print(glue({"{ind}_{hap} missing"}))
}
}
}
```