make_aracena_enformer_preds

Author

Saideep Gona

Published

October 26, 2023

Code

suppressMessages(library(tidyverse))
suppressMessages(library(glue))
PRE = "/Users/saideepgona/Library/CloudStorage/Box-Box/imlab-data/data-Github/Daily-Blog-Sai"

## COPY THE DATE AND SLUG fields FROM THE HEADER
SLUG="make_aracena_enformer_preds" ## copy the slug from the header
bDATE='2023-10-26' ## copy the date from the blog's header here
DATA = glue("{PRE}/{bDATE}-{SLUG}")
if(!file.exists(DATA)) system(glue::glue("mkdir {DATA}"))
WORK=DATA

Context

Made Enformer predictions on a set of topQTLs, now I can predict using my trained models and try to infer variant effects

Code

library(httpgd)
library(glmnet)
library(glue)
library(data.table)
library(neuralnet)
library(dplyr)
library(ggplot2)
library(patchwork)
library(reshape2)
library(stringr)

Code

make_predictions <- function(X, glmnet_model_path){

  glmnet_model <- readRDS(glmnet_model_path)
  # nnmodel <- readRDS(nnmodel_path)

  glmnet_predictions <- predict(glmnet_model, X, s = "lambda.min", type="response")
  # predictions_nn <- predict(nnmodel, data_frame_for_training)

  # return_list = list()
  # return_list[["elastic_net_predictions"]] <- glmnet_predictions
  # return_list[["nn_predictions"]] <- predictions_n

  return(glmnet_predictions)
}

Code

glmnet_model_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/elastic_net/trained_eln_RNAseq_from_HDF5_mean.linear.rds"


ref_inputs_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/mutagenesis_hdf5/ref_preds.csv"
ref_inputs <- as.matrix(t(data.table::fread(ref_inputs_path)))
ref_mono_track <-ref_inputs[,4766]
ref_preds <- make_predictions(ref_inputs, glmnet_model_path)

alt_inputs_path <- "/beagle3/haky/users/saideep/projects/aracena_modeling/mutagenesis_hdf5/alt_preds.csv"
alt_inputs <- as.matrix(t(data.table::fread(alt_inputs_path)))
alt_mono_track <-alt_inputs[,4766]
alt_preds <- make_predictions(alt_inputs, glmnet_model_path)

muta_df <- data.frame(ref_mono_track, alt_mono_track, ref_allele_pred=as.numeric(ref_preds), alt_allele_pred=as.numeric(alt_preds))

Code

qtl_meta <- data.table::fread("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/meta_table.csv")
print(dim(qtl_meta))

[1] 491   5

Code

print(dim(muta_df))

[1] 491   4

Code

muta_df$qtl_beta <- qtl_meta$effect_size

Code

colnames(muta_df)

[1] "ref_mono_track"  "alt_mono_track"  "ref_allele_pred" "alt_allele_pred"
[5] "qtl_beta"

Code

# muta_df$mono_dif_ <- log2(muta_df$alt_mono_track / muta_df$ref_mono_track)
# muta_df$pred_dif <- log2(muta_df$alt_allele_pred / muta_df$ref_allele_pred)

muta_df$mono_dif <- muta_df$alt_mono_track - muta_df$ref_mono_track
muta_df$pred_dif <- muta_df$alt_allele_pred - muta_df$ref_allele_pred

muta_df$mono_dif_abs <- abs(muta_df$mono_dif)
muta_df$pred_dif_abs <- abs(muta_df$pred_dif)
muta_df$qtl_beta_abs <- abs(muta_df$qtl_beta)

muta_df_es <- muta_df %>% select(mono_dif, pred_dif, qtl_beta)

pearson_cor_mat <- melt(cor(muta_df_es, method="pearson"))
png("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/figures/pearson_cor_mat.png", width=500, height=400)
ggplot(data = pearson_cor_mat, aes(x=Var1, y=Var2,
                                   fill=value)) + geom_tile()
dev.off()

svg 
  2

Code

spearman_cor_mat <- melt(cor(muta_df_es, method="spearman"))
png("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/figures/spearman_cor_mat.png", width=500, height=400)
ggplot(data = spearman_cor_mat, aes(x=Var1, y=Var2,
                                   fill=value)) + geom_tile()
dev.off()

svg 
  2

Code

muta_df_es_abs <- muta_df %>% select(mono_dif_abs, pred_dif_abs, qtl_beta_abs)

pearson_cor_mat_abs <- melt(cor(muta_df_es_abs, method="pearson"))
png("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/figures/pearson_cor_mat_abs.png", width=500, height=400)
ggplot(data = pearson_cor_mat_abs, aes(x=Var1, y=Var2,
                                   fill=value)) + geom_tile()
dev.off()

svg 
  2

Code

spearman_cor_mat_abs <- melt(cor(muta_df_es_abs, method="spearman"))
png("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/figures/spearman_cor_mat_abs.png", width=500, height=400)
ggplot(data = spearman_cor_mat_abs, aes(x=Var1, y=Var2,
                                   fill=value)) + geom_tile()
dev.off()

svg 
  2

Code

# png("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/figures/ref_track_vs_predicted_effect_sizes.png", width=400, height=400)
# ggplot(muta_df, aes(x=mono_effect_size, y=pred_effect_size)) + geom_point()
# dev.off()

png("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/figures/ref_vs_predicted_dif.png", width=400, height=400)
ggplot(muta_df, aes(x=mono_dif, y=pred_dif)) + geom_point()
dev.off()

svg 
  2

Code

png("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/figures/predicted_dif_vs_qtl.png", width=400, height=400)
ggplot(muta_df, aes(x=pred_dif, y=qtl_beta)) + geom_point() + geom_smooth(method="lm") + xlab("Predicted effect size") + ylab("QTL beta")
dev.off()

svg 
  2

Code

png("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/figures/predicted_dif_vs_qtl_abs.png", width=400, height=400)
ggplot(muta_df, aes(x=pred_dif_abs, y=qtl_beta_abs)) + geom_point()
dev.off()

svg 
  2

Code

png("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/figures/ref_dif_vs_qtl.png", width=400, height=400)
ggplot(muta_df, aes(x=mono_dif, y=qtl_beta)) + geom_point()
dev.off()

svg 
  2

Code

png("/beagle3/haky/users/saideep/github_repos/Daily-Blog-Sai/posts/2023-10-26-make_aracena_enformer_preds/figures/ref_dif_vs_qtl_abs.png", width=400, height=400)
ggplot(muta_df, aes(x=mono_dif_abs, y=qtl_beta_abs)) + geom_point()
dev.off()

svg 
  2