This notebook is for testing the mutagenesis pipeline for Kircher LDLR, including whether the reverse compliment quantification is working¶

In [ ]:
import h5py
import numpy as np

tss_bins = [570, 276, 277, 406, 278, 404, 280, 281, 282, 405, 439, 568, 304, 305, 306, 315, 435, 436, 437, 438, 380, 569, 314, 379, 316, 381]
individual="NA18520"
with h5py.File("/projects/covid-ct/imlab/users/saideep/kircher_mutagenesis/LDLR/predictions_folder/saideepDataset_kircher_LDLR/predictions_2023-03-10/predictions/chr19_11089231_A_T/haplotype2/chr19_11089462_11133820_predictions.h5",'r') as h:
    ref = h.get("chr19_11089462_11133820")[:]
    # print(h.keys())
    # print(f[tss_bins,1])
    # print(f)
    # print(np.max(f,axis=0))


with h5py.File("/projects/covid-ct/imlab/users/saideep/kircher_mutagenesis/LDLR/predictions_folder/saideepDataset_kircher_LDLR/predictions_2023-03-10/predictions/chr19_11089231_A_T/haplotype2_rc/chr19_11089462_11133820_predictions.h5",'r') as h:
    ref_rc = h.get("chr19_11089462_11133820")[:]
    # print(h.keys())
    # print(f[tss_bins,1])
    # print(f)
    # print(np.max(f,axis=0))


print(ref.shape)
print(ref_rc.shape)
# print(lcl_merged.shape)
(896, 1)
(896, 1)

Plotting the reference and reverse complement tracks against each other should show mirror image signal (roughly)

In [ ]:
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import kipoiseq

def plot_tracks(tracks, chr, interval_start, interval_end, height=1.5):
    fig, axes = plt.subplots(len(tracks), 1, figsize=(20, height * len(tracks)), sharex=True)
    for ax, (title, y) in zip(axes, tracks.items()):
        ax.fill_between(np.linspace(interval_start, interval_end, num=len(y)), y)
        ax.set_title(title)
        sns.despine(top=True, right=True, bottom=True)
    ax.set_xlabel(chr+":"+str(interval_start)+"-"+str(interval_end))
    plt.tight_layout()
    
tracks = {"Reference_LDLR":ref[:,0],
          "Reference_LDLR_RC":ref_rc[:,0]}

LDLR_interval = kipoiseq.Interval("chr19", # creates a kipoiseq interval to select the right sequences
                        11089462,
                        11133820)
LDLR_interval_resized = LDLR_interval.resize(114688)
plot_tracks(tracks, "chr19", LDLR_interval.start,LDLR_interval.end)