suppressMessages(library(tidyverse))suppressMessages(library(glue))PRE ="/Users/saideepgona/Library/CloudStorage/Box-Box/imlab-data/data-Github/Daily-Blog-Sai"## COPY THE DATE AND SLUG fields FROM THE HEADERSLUG="running_Karl_geneset_enrichment"## copy the slug from the headerbDATE='2023-04-12'## copy the date from the blog's header hereDATA =glue("{PRE}/{bDATE}-{SLUG}")if(!file.exists(DATA)) system(glue::glue("mkdir {DATA}"))WORK=DATA
Context
Karl Tayeb in Mathew Stephens’ lab has been working on an improved geneset enrichment software.
Replicating Karl’s code
Code
# Need to install gfortran for VEB.Boostdevtools::install_github('stephenslab/VEB.Boost')devtools::install_github('karltayeb/gseasusie')devtools::install_github('karltayeb/logisticsusie@s3')library(gseasusie)library(dplyr)path <-"/Users/saideepgona/Library/CloudStorage/Box-Box/imlab-data/data-Github/analysis-Sai/enformer_geuvadis/enformer_epistasis/0.1_background_geneset_enformer_epistasis.csv"data <-read.table(path, sep =',', header = T)hallmark <- gseasusie::load_msigdb_geneset_x(db='H')rownames(data) <- data$gene_namesgs_background <-rownames(hallmark$X)# subset genes to those shared in gene sets and experimentdata2 <-coerce_scores(data, gs_background, 'SYMBOL', 'ENTREZID') %>%rename('ENTREZID'= to)y <-as.integer(data2$nested_bh_pval <0.5)X <- hallmark$X[data2$ENTREZID,]# setup data for logistic susie, fit modeldata <- logisticsusie::binsusie_prep_data(X, y, 1, NULL)fit <- logisticsusie::data_initialize_binsusie(data, L=5)fit <- logisticsusie::fit_model(fit, data)# there weren't any strong enrichments,# lets look at the marginal enrichments# first, using univariate logistic regression, closer to SuSiEser <- logisticsusie::data_initialize_uvbser(data)ser <- logisticsusie::fit_model(ser, data)# "Over-represntation analysis" p-values for the 2x2 contingency table# via Fishers exact test, hypergeometric testora <- gseasusie::fit_ora(X, y)ora$logBF <- ser$lbf # also add BFs from univariate regressionsaveRDS(ora, 'enformer_epistasis_enrichment.Rds')
Source Code
---title: "running_Karl_geneset_enrichment"author: "Saideep Gona"date: "2023-04-12"format: html: code-fold: true code-summary: "Show the code"execute: freeze: true warning: false---```{r}#| label: Set up box storage directorysuppressMessages(library(tidyverse))suppressMessages(library(glue))PRE ="/Users/saideepgona/Library/CloudStorage/Box-Box/imlab-data/data-Github/Daily-Blog-Sai"## COPY THE DATE AND SLUG fields FROM THE HEADERSLUG="running_Karl_geneset_enrichment"## copy the slug from the headerbDATE='2023-04-12'## copy the date from the blog's header hereDATA =glue("{PRE}/{bDATE}-{SLUG}")if(!file.exists(DATA)) system(glue::glue("mkdir {DATA}"))WORK=DATA```## ContextKarl Tayeb in Mathew Stephens' lab has been working on an improved geneset enrichment software.### Replicating Karl's code```{r eval=FALSE}# Need to install gfortran for VEB.Boostdevtools::install_github('stephenslab/VEB.Boost')devtools::install_github('karltayeb/gseasusie')devtools::install_github('karltayeb/logisticsusie@s3')library(gseasusie)library(dplyr)path <-"/Users/saideepgona/Library/CloudStorage/Box-Box/imlab-data/data-Github/analysis-Sai/enformer_geuvadis/enformer_epistasis/0.1_background_geneset_enformer_epistasis.csv"data <-read.table(path, sep =',', header = T)hallmark <- gseasusie::load_msigdb_geneset_x(db='H')rownames(data) <- data$gene_namesgs_background <-rownames(hallmark$X)# subset genes to those shared in gene sets and experimentdata2 <-coerce_scores(data, gs_background, 'SYMBOL', 'ENTREZID') %>%rename('ENTREZID'= to)y <-as.integer(data2$nested_bh_pval <0.5)X <- hallmark$X[data2$ENTREZID,]# setup data for logistic susie, fit modeldata <- logisticsusie::binsusie_prep_data(X, y, 1, NULL)fit <- logisticsusie::data_initialize_binsusie(data, L=5)fit <- logisticsusie::fit_model(fit, data)# there weren't any strong enrichments,# lets look at the marginal enrichments# first, using univariate logistic regression, closer to SuSiEser <- logisticsusie::data_initialize_uvbser(data)ser <- logisticsusie::fit_model(ser, data)# "Over-represntation analysis" p-values for the 2x2 contingency table# via Fishers exact test, hypergeometric testora <- gseasusie::fit_ora(X, y)ora$logBF <- ser$lbf # also add BFs from univariate regressionsaveRDS(ora, 'enformer_epistasis_enrichment.Rds')```