Linearize_peak_EnPACT

Author

Saideep Gona

Published

May 20, 2024

Code
library(tidyverse)
library(qqman)

Explore summary statistics

Code
path = "/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/Asthma_eczema_farreira_FORMATTED_hg19.chromb37.txt"
allSumstats <- data.table::fread(path)


# add zscore
# allSumstats$zscore <- allSumstats$beta / allSumstats$standard_error

# remove nonSNPs
cleanSumstats <- allSumstats[nchar(allSumstats$effect_allele) == 1 & nchar(allSumstats$non_effect_allele) == 1, ]

dim(allSumstats)
dim(cleanSumstats)

rm(allSumstats)
# add gwas_significant column

cleanSumstats$gwas_significant <- ifelse(cleanSumstats$pvalue < 5e-08, "YES","NO")

table(cleanSumstats$gwas_significant)

write.table(cleanSumstats, file = "/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/Asthma_eczema_farreira_FORMATTED_hg19.chromb37.cleaned.txt", sep = "\t", row.names = F,
            col.names = T,  quote = F)

cleanSumstats$chr_numeric <- as.numeric(str_replace_all(cleanSumstats$chromosome,"chr",""))
table(cleanSumstats$chr_numeric)

png("/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/Asthma_eczema_farreira_FORMATTED_hg19.chromb37.cleaned.png")
manhattan(na.omit(cleanSumstats), chr="chr_numeric", bp="position", p="pvalue", snp="panel_variant_id")
dev.off()

Reformat summ stats for susieR

Code
sumstats <- data.table::fread("/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/Asthma_eczema_farreira_FORMATTED.txt.chromb38")

sumstats$gwas_significant <- ifelse(sumstats$pvalue < 5e-08, "YES","NO")

colnames(sumstats) <- c("chrom", "rsid", "ref", "old_effect_allele", "alt", "beta", "pval", "pos", "frequency", "zscore", "se", "sample_size", "gwas_significant")

write.table(sumstats, file = '/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/Asthma_eczema_farreira_FORMATTED.txt.chromb38.susie.txt', quote = F, row.names = F, sep = '\t')

Running fine-mapping

Split the sumstats by chromosome

Code
indir <- '/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/'
setwd(indir)

sumstats <- data.table::fread('Asthma_eczema_farreira_FORMATTED.txt.chromb38.susie.txt')

sumstats$chrom <- gsub("chr", "", sumstats$chrom)
sumstats$chrom <- as.numeric(as.character(sumstats$chrom))
nas <- sumstats[is.na(sumstats$chrom), ]
dim(nas[nas$gwas_significant == "YES"]) # 0 13

filtered_sumstats <- sumstats[!is.na(sumstats$chrom), ] 
unique_chroms <- unique(filtered_sumstats$chrom)
for (c in unique_chroms){
  print(c)
  chrom_to_compare <- as.numeric(c)
  chr_filt <- filtered_sumstats[filtered_sumstats$chrom == chrom_to_compare,]
  print(dim(chr_filt))
  write.table(chr_filt, file = paste0(indir, 'asthma_eczema_farreira_susie/chr', c, '_snps.txt'), sep = '\t', row.names = F, quote = F)
}

Run susieR

Code

module load python
source activate /beagle3/haky/users/shared_software/TFXcan-pipeline-tools/

cd /beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/asthma_eczema_farreira_susie
mkdir susieR_out
for chr in {1..22}; do
echo "chromosome $chr"

if [ ! -f chr${chr}_snps.txt.gz ]; then
gzip chr${chr}_snps.txt
fi

/beagle3/haky/users/shared_software/TFXcan-pipeline-tools/bin/Rscript /beagle3/haky/users/saideep/software/susieR/run_susieR.R \
--chromosome $chr \
--sumstats chr${chr}_snps.txt.gz \
--LDBlocks_info /beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/LD_data/hg38_fourier_ls-all.bed \
--genotypes_dosages_pattern /beagle3/haky/users/saideep/software/susieR/genotype_dosage_patterns/ALL.chr${chr}.shapeit2_integrated_SNPs_v2a_27022019.GRCh38.phased.geno.txt.gz \
--output_folder susieR_out \
--phenotype OC \
--n 360838 \
--diagnostics_file susieR_out/diagnostics_susie.txt
done

Collect Susie results and analyze

Code
suppressPackageStartupMessages(library(tidyverse))

chrom_count = 0
df_list <- list()
gwas_list <- list()
indir <- '/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/asthma_eczema_farreira_susie/susieR_out/'
for (chr in 1:22){
  file_exists <- file.exists(paste0(indir, 'OC.chr',chr,'.filteredGWAS.txt.gz'))
  print(file_exists)
  if (file_exists == TRUE){
  #   # print(paste('chromosome',chr))
     chrom_count = chrom_count + 1
  #   # Read in data
     chr_susie <- readRDS(paste0(indir, 'OC.chr',chr,'.susie.RDS'))
  #   #print(chr_susie)
     chr_filt_gwas <- data.table::fread(paste0(indir, 'OC.chr',chr,'.filteredGWAS.txt.gz'))

    # Get which SNPs belong to each LD block

    isNullList <- all(rapply(chr_susie, length) == 0)
    if(isNullList == FALSE){
      # filter lCausalSnps
      filteredSNPs <- lapply(chr_susie, function(x){
        if(is.null(x$loci)){
          return(NULL)
        }
        dt <- as.data.frame(x$loci)
        if(nrow(dt) == 0){
          return(NULL)
        }
        return(dt)
      })
      filteredSNPs <- Filter(Negate(is.null), filteredSNPs) %>%
        do.call(rbind, .)
      row.names(filteredSNPs) <- NULL

      # print(head(filteredSNPs, 3))
      df_list[[chrom_count]] <- filteredSNPs
      gwas_list[[chrom_count]] <- chr_filt_gwas
    }
  }
}


all_SNPs_susie_filt <- do.call(rbind, df_list)
write.table(all_SNPs_susie_filt, file = '/beagle3/haky/users/saideep/projects/aracena_modeling/SPrediXcan/sumstats_formatted/asthma_eczema_farreira_susie/all_SNPs_susie_filt.txt', quote = F, row.names = F, sep = '\t')
fine_mapped_gwas_filt <- do.call(rbind, gwas_list)

print(paste(length(all_SNPs_susie_filt$SNP), "SNPs were finemapped"))
Code
print(paste(length(levels(as.factor(all_SNPs_susie_filt$ldBlock))), "LD blocks had finemapped SNPs"))

suppressPackageStartupMessages(library(ggplot2))
finemapped_gwas_hg38_filt <- cbind(fine_mapped_gwas_filt, all_SNPs_susie_filt[,-c(1:2, 4)])

finemapped_gwas_hg38_filt$ldBlock <- as.factor(finemapped_gwas_hg38_filt$ldBlock)

finemapped_gwas_hg38_filt <- finemapped_gwas_hg38_filt %>%
  arrange(desc(PIP)) %>%
  mutate(topPIP = row_number() <= 5)
ggplot(finemapped_gwas_hg38_filt, aes(x = factor(ldBlock), y = PIP)) +
  geom_point() +  # Plot all points
  labs(x = "LD block", y = "PIP", title = "PIP by LD Block") +  
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) 
Code
cs_per_ld <- finemapped_gwas_hg38_filt %>%
  group_by(ldBlock) %>%
  summarise(count = n_distinct(cs))


ggplot(cs_per_ld, aes(x = ldBlock, y = count)) +
  geom_bar(stat = "identity", position = "dodge") +  # Plot all points
  labs(x = "LD block", y = "Num of credible sets", title = "Fine mapped credible sets per LDblock") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))