Code
library(data.table)
library(glmnet)
library(glue)
library(tidyverse)
Saideep Gona
November 22, 2023
# X_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/linearization/HG00096.txt"
# X <- as.matrix(data.table::fread(X_path))
# glmnet_model_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/elastic_net/trained_eln_RNAseq_Flu_from_HDF5_mean_log_revised.linear.rds"
# glmnet_model <- readRDS(glmnet_model_path)
# glmnet_predictions <- predict(glmnet_model, X, s = "lambda.min", type="response")
# inds <- read_file("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-11-16-linearization/individuals.txt")
# inds <- gsub("\n", "", inds)
# inds <- strsplit(inds,split="\t")
# linear_dir <- "/beagle3/haky/users/saideep/projects/aracena_modeling/linearization"
# for (ind in inds[[1]]) {
# X_path <- glue("{linear_dir}/{ind}.txt")
# print(ind)
# if (file.exists(X_path)) {
# X <- as.matrix(data.table::fread(X_path))
# glmnet_model_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/elastic_net/trained_eln_RNAseq_Flu_from_HDF5_mean_log_revised.linear.rds"
# glmnet_model <- readRDS(glmnet_model_path)
# glmnet_predictions <- as.data.frame(predict(glmnet_model, X, s = "lambda.min", type="response"))
# write_delim(glmnet_predictions, glue("{linear_dir}/{ind}_preds_RNAseq_Flu_from_HDF5_mean_log_revised.txt"), delim = "\t")
# } else {
# print(glue({"{ind} missing"}))
# }
# }
---
title: "1.4.2_predict_for_linearization"
author: "Saideep Gona"
date: "2023-11-22"
format:
html:
code-fold: true
code-summary: "Show the code"
execute:
freeze: true
warning: false
---
# Context
In order to linearize, we first need to predict on the
```{r}
library(data.table)
library(glmnet)
library(glue)
library(tidyverse)
```
Test on a single individual
```{r}
# X_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/linearization/HG00096.txt"
# X <- as.matrix(data.table::fread(X_path))
# glmnet_model_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/elastic_net/trained_eln_RNAseq_Flu_from_HDF5_mean_log_revised.linear.rds"
# glmnet_model <- readRDS(glmnet_model_path)
# glmnet_predictions <- predict(glmnet_model, X, s = "lambda.min", type="response")
```
### Flu expression
```{r}
inds <- read_file("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-11-16-linearization/individuals.txt")
inds <- gsub("\n", "", inds)
inds <- strsplit(inds,split="\t")
glmnet_model_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/elastic_net/trained_eln_RNAseq_Flu_normalized_covariates.linear.rds"
glmnet_model <- readRDS(glmnet_model_path)
linear_dir <- "/beagle3/haky/users/saideep/projects/aracena_modeling/linearization"
for (ind in inds[[1]]) {
X_path <- glue("{linear_dir}/{ind}.txt")
print(ind)
if (file.exists(X_path)) {
if (file.exists(glue("{linear_dir}/{ind}_preds_RNAseq_Flu_from_HDF5_mean_log_revised.txt"))) {
print(glue("{ind} already exists"))
next
}
X <- as.matrix(data.table::fread(X_path))
glmnet_predictions <- as.data.frame(predict(glmnet_model, X, s = "lambda.min", type="response"))
write_delim(glmnet_predictions, glue("{linear_dir}/{ind}_preds_RNAseq_Flu_from_HDF5_mean_log_revised.txt"), delim = "\t")
} else {
print(glue({"{ind} missing"}))
}
}
```
### NI expression
```{r}
inds <- read_file("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-11-16-linearization/individuals.txt")
inds <- gsub("\n", "", inds)
inds <- strsplit(inds,split="\t")
glmnet_model_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/elastic_net/trained_eln_RNAseq_NI_normalized_covariates.linear.rds"
glmnet_model <- readRDS(glmnet_model_path)
linear_dir <- "/beagle3/haky/users/saideep/projects/aracena_modeling/linearization"
for (ind in inds[[1]]) {
X_path <- glue("{linear_dir}/{ind}.txt")
print(ind)
if (file.exists(X_path)) {
if (file.exists(glue("{linear_dir}/{ind}_preds_RNAseq_NI_from_HDF5_mean_log_revised.txt"))) {
print(glue("{ind} already exists"))
next
}
X <- as.matrix(data.table::fread(X_path))
glmnet_predictions <- as.data.frame(predict(glmnet_model, X, s = "lambda.min", type="response"))
write_delim(glmnet_predictions, glue("{linear_dir}/{ind}_preds_RNAseq_NI_from_HDF5_mean_log_revised.txt"), delim = "\t")
} else {
print(glue({"{ind} missing"}))
}
}
```