---
title: "make_aracena_enformer_preds"
author: "Saideep Gona"
date: "2023-10-26"
format:
html:
code-fold: true
code-summary: "Show the code"
execute:
freeze: true
warning: false
---
```{r}
#| label: Set up box storage directory
suppressMessages (library (tidyverse))
suppressMessages (library (glue))
PRE = "/Users/saideepgona/Library/CloudStorage/Box-Box/imlab-data/data-Github/Daily-Blog-Sai"
## COPY THE DATE AND SLUG fields FROM THE HEADER
SLUG= "make_aracena_enformer_preds" ## copy the slug from the header
bDATE= '2023-10-26' ## copy the date from the blog's header here
DATA = glue ("{PRE}/{bDATE}-{SLUG}" )
if (! file.exists (DATA)) system (glue:: glue ("mkdir {DATA}" ))
WORK= DATA
```
# Context
Made Enformer predictions on a set of topQTLs, now I can predict using my trained models and try to infer variant effects
```{r}
library (httpgd)
library (glmnet)
library (glue)
library (data.table)
library (neuralnet)
library (dplyr)
library (ggplot2)
library (patchwork)
library (reshape2)
library (stringr)
```
```{r}
make_predictions <- function (X, glmnet_model_path){
glmnet_model <- readRDS (glmnet_model_path)
# nnmodel <- readRDS(nnmodel_path)
glmnet_predictions <- predict (glmnet_model, X, s = "lambda.min" , type= "response" )
# predictions_nn <- predict(nnmodel, data_frame_for_training)
# return_list = list()
# return_list[["elastic_net_predictions"]] <- glmnet_predictions
# return_list[["nn_predictions"]] <- predictions_n
return (glmnet_predictions)
}
```
```{r}
glmnet_model_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/elastic_net/trained_eln_RNAseq_from_HDF5_mean.linear.rds"
ref_inputs_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/mutagenesis_hdf5/ref_preds.csv"
ref_inputs <- as.matrix (t (data.table:: fread (ref_inputs_path)))
ref_mono_track <- ref_inputs[,4766 ]
ref_preds <- make_predictions (ref_inputs, glmnet_model_path)
alt_inputs_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/mutagenesis_hdf5/alt_preds.csv"
alt_inputs <- as.matrix (t (data.table:: fread (alt_inputs_path)))
alt_mono_track <- alt_inputs[,4766 ]
alt_preds <- make_predictions (alt_inputs, glmnet_model_path)
muta_df <- data.frame (ref_mono_track, alt_mono_track, ref_allele_pred= as.numeric (ref_preds), alt_allele_pred= as.numeric (alt_preds))
```
```{r}
qtl_meta <- data.table:: fread ("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/meta_table.csv" )
print (dim (qtl_meta))
print (dim (muta_df))
muta_df$ qtl_beta <- qtl_meta$ effect_size
```
```{r}
colnames (muta_df)
# muta_df$mono_dif_ <- log2(muta_df$alt_mono_track / muta_df$ref_mono_track)
# muta_df$pred_dif <- log2(muta_df$alt_allele_pred / muta_df$ref_allele_pred)
muta_df$ mono_dif <- muta_df$ alt_mono_track - muta_df$ ref_mono_track
muta_df$ pred_dif <- muta_df$ alt_allele_pred - muta_df$ ref_allele_pred
muta_df$ mono_dif_abs <- abs (muta_df$ mono_dif)
muta_df$ pred_dif_abs <- abs (muta_df$ pred_dif)
muta_df$ qtl_beta_abs <- abs (muta_df$ qtl_beta)
muta_df_es <- muta_df %>% select (mono_dif, pred_dif, qtl_beta)
pearson_cor_mat <- melt (cor (muta_df_es, method= "pearson" ))
png ("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/figures/pearson_cor_mat.png" , width= 500 , height= 400 )
ggplot (data = pearson_cor_mat, aes (x= Var1, y= Var2,
fill= value)) + geom_tile ()
dev.off ()
spearman_cor_mat <- melt (cor (muta_df_es, method= "spearman" ))
png ("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/figures/spearman_cor_mat.png" , width= 500 , height= 400 )
ggplot (data = spearman_cor_mat, aes (x= Var1, y= Var2,
fill= value)) + geom_tile ()
dev.off ()
muta_df_es_abs <- muta_df %>% select (mono_dif_abs, pred_dif_abs, qtl_beta_abs)
pearson_cor_mat_abs <- melt (cor (muta_df_es_abs, method= "pearson" ))
png ("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/figures/pearson_cor_mat_abs.png" , width= 500 , height= 400 )
ggplot (data = pearson_cor_mat_abs, aes (x= Var1, y= Var2,
fill= value)) + geom_tile ()
dev.off ()
spearman_cor_mat_abs <- melt (cor (muta_df_es_abs, method= "spearman" ))
png ("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/figures/spearman_cor_mat_abs.png" , width= 500 , height= 400 )
ggplot (data = spearman_cor_mat_abs, aes (x= Var1, y= Var2,
fill= value)) + geom_tile ()
dev.off ()
```
```{r}
```
```{r}
# png("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/figures/ref_track_vs_predicted_effect_sizes.png", width=400, height=400)
# ggplot(muta_df, aes(x=mono_effect_size, y=pred_effect_size)) + geom_point()
# dev.off()
png ("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/figures/ref_vs_predicted_dif.png" , width= 400 , height= 400 )
ggplot (muta_df, aes (x= mono_dif, y= pred_dif)) + geom_point ()
dev.off ()
png ("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/figures/predicted_dif_vs_qtl.png" , width= 400 , height= 400 )
ggplot (muta_df, aes (x= pred_dif, y= qtl_beta)) + geom_point () + geom_smooth (method= "lm" ) + xlab ("Predicted effect size" ) + ylab ("QTL beta" )
dev.off ()
png ("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/figures/predicted_dif_vs_qtl_abs.png" , width= 400 , height= 400 )
ggplot (muta_df, aes (x= pred_dif_abs, y= qtl_beta_abs)) + geom_point ()
dev.off ()
png ("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/figures/ref_dif_vs_qtl.png" , width= 400 , height= 400 )
ggplot (muta_df, aes (x= mono_dif, y= qtl_beta)) + geom_point ()
dev.off ()
png ("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/figures/ref_dif_vs_qtl_abs.png" , width= 400 , height= 400 )
ggplot (muta_df, aes (x= mono_dif_abs, y= qtl_beta_abs)) + geom_point ()
dev.off ()
```
```{r}
```