0.2_split_data

Author

Saideep Gona

Published

February 12, 2024

Code
import pandas as pd

path_to_expression = "/beagle3/haky/users/saideep/projects/aracena_modeling/Inputs/counts_matrices/fully_preprocessed_expression_3_7_1milscale.txt"
expression = pd.read_csv(path_to_expression, sep=" ", index_col=0)

print(expression.head())
expression_cols = list(expression.columns)
expression_cols_flu = [x for x in expression_cols if "Flu" in x]
expression_cols_ni = [x for x in expression_cols if "NI" in x]
print(expression_cols)
print(expression_cols_flu)
expression_cols_conditions = {
    "Flu": expression_cols_flu,
    "NI": expression_cols_ni
}
                  AF04_NI   AF06_NI   AF08_NI   AF10_NI   AF12_NI   AF14_NI  \
ENSG00000000419  3.832487  3.720580  3.820378  3.794796  3.774970  3.825794   
ENSG00000000457  3.455694  3.322571  3.321409  3.430186  3.399893  3.284082   
ENSG00000000460  2.760727  2.817853  2.746570  2.886356  2.569840  3.053475   
ENSG00000000938  4.708793  4.764273  5.018582  5.004831  5.359564  4.568625   
ENSG00000000971  3.008845  2.247455  3.118099  3.043985  3.123934  1.943336   

                  AF16_NI   AF18_NI   AF20_NI   AF24_NI  ...  EU27_Flu  \
ENSG00000000419  3.997539  3.860062  3.804748  3.787164  ...  3.748346   
ENSG00000000457  3.379018  3.394553  3.476632  3.379672  ...  2.984355   
ENSG00000000460  2.904467  2.881541  2.783070  2.654901  ... -0.025036   
ENSG00000000938  4.590085  4.999869  5.026941  5.351475  ...  4.734660   
ENSG00000000971  2.762410  2.821370  3.201145  2.423992  ...  2.827839   

                 EU29_Flu  EU33_Flu  EU36_Flu  EU37_Flu  EU38_Flu  EU39_Flu  \
ENSG00000000419  3.989536  3.164942  3.879116  3.964150  3.830327  3.746049   
ENSG00000000457  3.184036  3.048213  3.184669  3.270105  3.126126  3.289271   
ENSG00000000460 -0.032677 -0.656847 -0.356527 -0.387754 -0.144380  0.073510   
ENSG00000000938  4.412698  4.909094  4.743519  4.739156  4.346217  4.325185   
ENSG00000000971  2.406308  2.525052  2.235784  3.060054  2.815563  2.922801   

                 EU41_Flu  EU43_Flu  EU47_Flu  
ENSG00000000419  3.753978  3.531744  4.139349  
ENSG00000000457  2.943629  2.970591  3.210895  
ENSG00000000460 -0.120135 -0.217167 -0.053922  
ENSG00000000938  4.600541  4.603586  4.272110  
ENSG00000000971  2.785935  2.496299  2.544744  

[5 rows x 70 columns]
['AF04_NI', 'AF06_NI', 'AF08_NI', 'AF10_NI', 'AF12_NI', 'AF14_NI', 'AF16_NI', 'AF18_NI', 'AF20_NI', 'AF24_NI', 'AF26_NI', 'AF28_NI', 'AF30_NI', 'AF34_NI', 'EU03_NI', 'EU05_NI', 'EU07_NI', 'EU09_NI', 'EU13_NI', 'EU15_NI', 'EU17_NI', 'EU19_NI', 'EU21_NI', 'EU22_NI', 'EU25_NI', 'EU27_NI', 'EU29_NI', 'EU33_NI', 'EU36_NI', 'EU37_NI', 'EU38_NI', 'EU39_NI', 'EU41_NI', 'EU43_NI', 'EU47_NI', 'AF04_Flu', 'AF06_Flu', 'AF08_Flu', 'AF10_Flu', 'AF12_Flu', 'AF14_Flu', 'AF16_Flu', 'AF18_Flu', 'AF20_Flu', 'AF24_Flu', 'AF26_Flu', 'AF28_Flu', 'AF30_Flu', 'AF34_Flu', 'EU03_Flu', 'EU05_Flu', 'EU07_Flu', 'EU09_Flu', 'EU13_Flu', 'EU15_Flu', 'EU17_Flu', 'EU19_Flu', 'EU21_Flu', 'EU22_Flu', 'EU25_Flu', 'EU27_Flu', 'EU29_Flu', 'EU33_Flu', 'EU36_Flu', 'EU37_Flu', 'EU38_Flu', 'EU39_Flu', 'EU41_Flu', 'EU43_Flu', 'EU47_Flu']
['AF04_Flu', 'AF06_Flu', 'AF08_Flu', 'AF10_Flu', 'AF12_Flu', 'AF14_Flu', 'AF16_Flu', 'AF18_Flu', 'AF20_Flu', 'AF24_Flu', 'AF26_Flu', 'AF28_Flu', 'AF30_Flu', 'AF34_Flu', 'EU03_Flu', 'EU05_Flu', 'EU07_Flu', 'EU09_Flu', 'EU13_Flu', 'EU15_Flu', 'EU17_Flu', 'EU19_Flu', 'EU21_Flu', 'EU22_Flu', 'EU25_Flu', 'EU27_Flu', 'EU29_Flu', 'EU33_Flu', 'EU36_Flu', 'EU37_Flu', 'EU38_Flu', 'EU39_Flu', 'EU41_Flu', 'EU43_Flu', 'EU47_Flu']
Code
for cond in ["Flu", "NI"]:
    cur_table = expression.loc[:, expression_cols_conditions[cond]]
    cur_table.to_csv(f"/beagle3/haky/users/saideep/projects/aracena_modeling/Inputs/counts_matrices/fully_preprocessed_expression_3_7_1milscale_{cond}.txt")