Code
library(tidyverse)
library(qqman)
Saideep Gona
May 20, 2024
path = "/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/Asthma_eczema_farreira_FORMATTED_hg19.chromb37.txt"
allSumstats <- data.table::fread(path)
# add zscore
# allSumstats$zscore <- allSumstats$beta / allSumstats$standard_error
# remove nonSNPs
cleanSumstats <- allSumstats[nchar(allSumstats$effect_allele) == 1 & nchar(allSumstats$non_effect_allele) == 1, ]
dim(allSumstats)
dim(cleanSumstats)
rm(allSumstats)
# add gwas_significant column
cleanSumstats$gwas_significant <- ifelse(cleanSumstats$pvalue < 5e-08, "YES","NO")
table(cleanSumstats$gwas_significant)
write.table(cleanSumstats, file = "/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/Asthma_eczema_farreira_FORMATTED_hg19.chromb37.cleaned.txt", sep = "\t", row.names = F,
col.names = T, quote = F)
cleanSumstats$chr_numeric <- as.numeric(str_replace_all(cleanSumstats$chromosome,"chr",""))
table(cleanSumstats$chr_numeric)
png("/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/Asthma_eczema_farreira_FORMATTED_hg19.chromb37.cleaned.png")
manhattan(na.omit(cleanSumstats), chr="chr_numeric", bp="position", p="pvalue", snp="panel_variant_id")
dev.off()
Reformat summ stats for susieR
sumstats <- data.table::fread("/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/Asthma_eczema_farreira_FORMATTED.txt.chromb38")
sumstats$gwas_significant <- ifelse(sumstats$pvalue < 5e-08, "YES","NO")
colnames(sumstats) <- c("chrom", "rsid", "ref", "old_effect_allele", "alt", "beta", "pval", "pos", "frequency", "zscore", "se", "sample_size", "gwas_significant")
write.table(sumstats, file = '/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/Asthma_eczema_farreira_FORMATTED.txt.chromb38.susie.txt', quote = F, row.names = F, sep = '\t')
Split the sumstats by chromosome
indir <- '/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/'
setwd(indir)
sumstats <- data.table::fread('Asthma_eczema_farreira_FORMATTED.txt.chromb38.susie.txt')
sumstats$chrom <- gsub("chr", "", sumstats$chrom)
sumstats$chrom <- as.numeric(as.character(sumstats$chrom))
nas <- sumstats[is.na(sumstats$chrom), ]
dim(nas[nas$gwas_significant == "YES"]) # 0 13
filtered_sumstats <- sumstats[!is.na(sumstats$chrom), ]
unique_chroms <- unique(filtered_sumstats$chrom)
for (c in unique_chroms){
print(c)
chrom_to_compare <- as.numeric(c)
chr_filt <- filtered_sumstats[filtered_sumstats$chrom == chrom_to_compare,]
print(dim(chr_filt))
write.table(chr_filt, file = paste0(indir, 'asthma_eczema_farreira_susie/chr', c, '_snps.txt'), sep = '\t', row.names = F, quote = F)
}
module load python
source activate /beagle3/haky/users/shared_software/TFXcan-pipeline-tools/
cd /beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/asthma_eczema_farreira_susie
mkdir susieR_out
for chr in {1..22}; do
echo "chromosome $chr"
if [ ! -f chr${chr}_snps.txt.gz ]; then
gzip chr${chr}_snps.txt
fi
/beagle3/haky/users/shared_software/TFXcan-pipeline-tools/bin/Rscript /beagle3/haky/users/saideep/software/susieR/run_susieR.R \
--chromosome $chr \
--sumstats chr${chr}_snps.txt.gz \
--LDBlocks_info /beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/LD_data/hg38_fourier_ls-all.bed \
--genotypes_dosages_pattern /beagle3/haky/users/saideep/software/susieR/genotype_dosage_patterns/ALL.chr${chr}.shapeit2_integrated_SNPs_v2a_27022019.GRCh38.phased.geno.txt.gz \
--output_folder susieR_out \
--phenotype OC \
--n 360838 \
--diagnostics_file susieR_out/diagnostics_susie.txt
done
suppressPackageStartupMessages(library(tidyverse))
chrom_count = 0
df_list <- list()
gwas_list <- list()
indir <- '/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/asthma_eczema_farreira_susie/susieR_out/'
for (chr in 1:22){
file_exists <- file.exists(paste0(indir, 'OC.chr',chr,'.filteredGWAS.txt.gz'))
print(file_exists)
if (file_exists == TRUE){
# # print(paste('chromosome',chr))
chrom_count = chrom_count + 1
# # Read in data
chr_susie <- readRDS(paste0(indir, 'OC.chr',chr,'.susie.RDS'))
# #print(chr_susie)
chr_filt_gwas <- data.table::fread(paste0(indir, 'OC.chr',chr,'.filteredGWAS.txt.gz'))
# Get which SNPs belong to each LD block
isNullList <- all(rapply(chr_susie, length) == 0)
if(isNullList == FALSE){
# filter lCausalSnps
filteredSNPs <- lapply(chr_susie, function(x){
if(is.null(x$loci)){
return(NULL)
}
dt <- as.data.frame(x$loci)
if(nrow(dt) == 0){
return(NULL)
}
return(dt)
})
filteredSNPs <- Filter(Negate(is.null), filteredSNPs) %>%
do.call(rbind, .)
row.names(filteredSNPs) <- NULL
# print(head(filteredSNPs, 3))
df_list[[chrom_count]] <- filteredSNPs
gwas_list[[chrom_count]] <- chr_filt_gwas
}
}
}
all_SNPs_susie_filt <- do.call(rbind, df_list)
write.table(all_SNPs_susie_filt, file = '/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/asthma_eczema_farreira_susie/all_SNPs_susie_filt.txt', quote = F, row.names = F, sep = '\t')
fine_mapped_gwas_filt <- do.call(rbind, gwas_list)
print(paste(length(all_SNPs_susie_filt$SNP), "SNPs were finemapped"))
print(paste(length(levels(as.factor(all_SNPs_susie_filt$ldBlock))), "LD blocks had finemapped SNPs"))
suppressPackageStartupMessages(library(ggplot2))
finemapped_gwas_hg38_filt <- cbind(fine_mapped_gwas_filt, all_SNPs_susie_filt[,-c(1:2, 4)])
finemapped_gwas_hg38_filt$ldBlock <- as.factor(finemapped_gwas_hg38_filt$ldBlock)
finemapped_gwas_hg38_filt <- finemapped_gwas_hg38_filt %>%
arrange(desc(PIP)) %>%
mutate(topPIP = row_number() <= 5)
ggplot(finemapped_gwas_hg38_filt, aes(x = factor(ldBlock), y = PIP)) +
geom_point() + # Plot all points
labs(x = "LD block", y = "PIP", title = "PIP by LD Block") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
cs_per_ld <- finemapped_gwas_hg38_filt %>%
group_by(ldBlock) %>%
summarise(count = n_distinct(cs))
ggplot(cs_per_ld, aes(x = ldBlock, y = count)) +
geom_bar(stat = "identity", position = "dodge") + # Plot all points
labs(x = "LD block", y = "Num of credible sets", title = "Fine mapped credible sets per LDblock") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
---
title: "Linearize_peak_EnPACT"
author: "Saideep Gona"
date: "2024-05-20"
format:
html:
code-fold: true
code-summary: "Show the code"
execute:
freeze: true
warning: false
eval: false
---
```{r}
library(tidyverse)
library(qqman)
```
## Explore summary statistics
```{r}
path = "/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/Asthma_eczema_farreira_FORMATTED_hg19.chromb37.txt"
allSumstats <- data.table::fread(path)
# add zscore
# allSumstats$zscore <- allSumstats$beta / allSumstats$standard_error
# remove nonSNPs
cleanSumstats <- allSumstats[nchar(allSumstats$effect_allele) == 1 & nchar(allSumstats$non_effect_allele) == 1, ]
dim(allSumstats)
dim(cleanSumstats)
rm(allSumstats)
# add gwas_significant column
cleanSumstats$gwas_significant <- ifelse(cleanSumstats$pvalue < 5e-08, "YES","NO")
table(cleanSumstats$gwas_significant)
write.table(cleanSumstats, file = "/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/Asthma_eczema_farreira_FORMATTED_hg19.chromb37.cleaned.txt", sep = "\t", row.names = F,
col.names = T, quote = F)
cleanSumstats$chr_numeric <- as.numeric(str_replace_all(cleanSumstats$chromosome,"chr",""))
table(cleanSumstats$chr_numeric)
png("/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/Asthma_eczema_farreira_FORMATTED_hg19.chromb37.cleaned.png")
manhattan(na.omit(cleanSumstats), chr="chr_numeric", bp="position", p="pvalue", snp="panel_variant_id")
dev.off()
```
Reformat summ stats for susieR
```{r}
sumstats <- data.table::fread("/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/Asthma_eczema_farreira_FORMATTED.txt.chromb38")
sumstats$gwas_significant <- ifelse(sumstats$pvalue < 5e-08, "YES","NO")
colnames(sumstats) <- c("chrom", "rsid", "ref", "old_effect_allele", "alt", "beta", "pval", "pos", "frequency", "zscore", "se", "sample_size", "gwas_significant")
write.table(sumstats, file = '/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/Asthma_eczema_farreira_FORMATTED.txt.chromb38.susie.txt', quote = F, row.names = F, sep = '\t')
```
## Running fine-mapping
Split the sumstats by chromosome
```{r}
indir <- '/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/'
setwd(indir)
sumstats <- data.table::fread('Asthma_eczema_farreira_FORMATTED.txt.chromb38.susie.txt')
sumstats$chrom <- gsub("chr", "", sumstats$chrom)
sumstats$chrom <- as.numeric(as.character(sumstats$chrom))
nas <- sumstats[is.na(sumstats$chrom), ]
dim(nas[nas$gwas_significant == "YES"]) # 0 13
filtered_sumstats <- sumstats[!is.na(sumstats$chrom), ]
unique_chroms <- unique(filtered_sumstats$chrom)
for (c in unique_chroms){
print(c)
chrom_to_compare <- as.numeric(c)
chr_filt <- filtered_sumstats[filtered_sumstats$chrom == chrom_to_compare,]
print(dim(chr_filt))
write.table(chr_filt, file = paste0(indir, 'asthma_eczema_farreira_susie/chr', c, '_snps.txt'), sep = '\t', row.names = F, quote = F)
}
```
### Run susieR
```{bash}
module load python
source activate /beagle3/haky/users/shared_software/TFXcan-pipeline-tools/
cd /beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/asthma_eczema_farreira_susie
mkdir susieR_out
for chr in {1..22}; do
echo "chromosome $chr"
if [ ! -f chr${chr}_snps.txt.gz ]; then
gzip chr${chr}_snps.txt
fi
/beagle3/haky/users/shared_software/TFXcan-pipeline-tools/bin/Rscript /beagle3/haky/users/saideep/software/susieR/run_susieR.R \
--chromosome $chr \
--sumstats chr${chr}_snps.txt.gz \
--LDBlocks_info /beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/LD_data/hg38_fourier_ls-all.bed \
--genotypes_dosages_pattern /beagle3/haky/users/saideep/software/susieR/genotype_dosage_patterns/ALL.chr${chr}.shapeit2_integrated_SNPs_v2a_27022019.GRCh38.phased.geno.txt.gz \
--output_folder susieR_out \
--phenotype OC \
--n 360838 \
--diagnostics_file susieR_out/diagnostics_susie.txt
done
```
### Collect Susie results and analyze
```{r}
suppressPackageStartupMessages(library(tidyverse))
chrom_count = 0
df_list <- list()
gwas_list <- list()
indir <- '/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/asthma_eczema_farreira_susie/susieR_out/'
for (chr in 1:22){
file_exists <- file.exists(paste0(indir, 'OC.chr',chr,'.filteredGWAS.txt.gz'))
print(file_exists)
if (file_exists == TRUE){
# # print(paste('chromosome',chr))
chrom_count = chrom_count + 1
# # Read in data
chr_susie <- readRDS(paste0(indir, 'OC.chr',chr,'.susie.RDS'))
# #print(chr_susie)
chr_filt_gwas <- data.table::fread(paste0(indir, 'OC.chr',chr,'.filteredGWAS.txt.gz'))
# Get which SNPs belong to each LD block
isNullList <- all(rapply(chr_susie, length) == 0)
if(isNullList == FALSE){
# filter lCausalSnps
filteredSNPs <- lapply(chr_susie, function(x){
if(is.null(x$loci)){
return(NULL)
}
dt <- as.data.frame(x$loci)
if(nrow(dt) == 0){
return(NULL)
}
return(dt)
})
filteredSNPs <- Filter(Negate(is.null), filteredSNPs) %>%
do.call(rbind, .)
row.names(filteredSNPs) <- NULL
# print(head(filteredSNPs, 3))
df_list[[chrom_count]] <- filteredSNPs
gwas_list[[chrom_count]] <- chr_filt_gwas
}
}
}
all_SNPs_susie_filt <- do.call(rbind, df_list)
write.table(all_SNPs_susie_filt, file = '/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/asthma_eczema_farreira_susie/all_SNPs_susie_filt.txt', quote = F, row.names = F, sep = '\t')
fine_mapped_gwas_filt <- do.call(rbind, gwas_list)
print(paste(length(all_SNPs_susie_filt$SNP), "SNPs were finemapped"))
```
```{r}
print(paste(length(levels(as.factor(all_SNPs_susie_filt$ldBlock))), "LD blocks had finemapped SNPs"))
suppressPackageStartupMessages(library(ggplot2))
finemapped_gwas_hg38_filt <- cbind(fine_mapped_gwas_filt, all_SNPs_susie_filt[,-c(1:2, 4)])
finemapped_gwas_hg38_filt$ldBlock <- as.factor(finemapped_gwas_hg38_filt$ldBlock)
finemapped_gwas_hg38_filt <- finemapped_gwas_hg38_filt %>%
arrange(desc(PIP)) %>%
mutate(topPIP = row_number() <= 5)
ggplot(finemapped_gwas_hg38_filt, aes(x = factor(ldBlock), y = PIP)) +
geom_point() + # Plot all points
labs(x = "LD block", y = "PIP", title = "PIP by LD Block") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
```
```{r}
cs_per_ld <- finemapped_gwas_hg38_filt %>%
group_by(ldBlock) %>%
summarise(count = n_distinct(cs))
ggplot(cs_per_ld, aes(x = ldBlock, y = count)) +
geom_bar(stat = "identity", position = "dodge") + # Plot all points
labs(x = "LD block", y = "Num of credible sets", title = "Fine mapped credible sets per LDblock") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
```