{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import shap\n", "import os\n", "import pickle" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Set working dir\n", "os.chdir('/users/rg/dgarrido/PhD/projects/side_tasks/chrom_bea/nf/ml-nf/')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 ENSG00000078808\n", "1 ENSG00000008130\n", "2 ENSG00000157933\n", "3 ENSG00000157916\n", "4 ENSG00000157873\n", " ... \n", "2098 ENSG00000160211\n", "2099 ENSG00000269335\n", "2100 ENSG00000160219\n", "2101 ENSG00000185515\n", "2102 ENSG00000124333\n", "Name: 3, Length: 2103, dtype: object" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Gene subset of interest\n", "sub_f = \"data/subset/upregulated.bed\"\n", "sub = pd.read_csv(sub_f, sep = \"\\t\", header = None)[3]\n", "sub = sub.str.replace(\"\\..*\", \"\")\n", "sub" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | H000 | \n", "H003 | \n", "H006 | \n", "H009 | \n", "H012 | \n", "H018 | \n", "H024 | \n", "H036 | \n", "H048 | \n", "H072 | \n", "H120 | \n", "H168 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
ENSG00000000419 | \n", "6.14 | \n", "6.11 | \n", "6.18 | \n", "6.29 | \n", "6.29 | \n", "6.15 | \n", "6.22 | \n", "6.17 | \n", "6.42 | \n", "6.24 | \n", "5.74 | \n", "5.51 | \n", "
ENSG00000000457 | \n", "2.37 | \n", "2.21 | \n", "2.59 | \n", "2.63 | \n", "2.59 | \n", "2.38 | \n", "2.45 | \n", "2.49 | \n", "2.64 | \n", "2.74 | \n", "2.69 | \n", "2.85 | \n", "
ENSG00000000460 | \n", "3.91 | \n", "3.47 | \n", "3.72 | \n", "4.05 | \n", "3.94 | \n", "3.95 | \n", "3.95 | \n", "3.99 | \n", "3.87 | \n", "2.84 | \n", "2.29 | \n", "2.48 | \n", "
ENSG00000000938 | \n", "0.66 | \n", "0.51 | \n", "0.38 | \n", "0.31 | \n", "0.40 | \n", "0.84 | \n", "1.34 | \n", "4.10 | \n", "5.29 | \n", "5.93 | \n", "5.99 | \n", "5.26 | \n", "
ENSG00000000971 | \n", "0.44 | \n", "0.45 | \n", "0.72 | \n", "0.91 | \n", "1.40 | \n", "1.96 | \n", "2.54 | \n", "2.77 | \n", "3.15 | \n", "3.87 | \n", "3.57 | \n", "3.38 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
ENSGR0000196433 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "
ENSGR0000197976 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "
ENSGR0000198223 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "
ENSGR0000205755 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "
ENSGR0000214717 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "
12248 rows × 12 columns
\n", "\n", " | H3K4me3_H000 | \n", "H3K4me3_H003 | \n", "H3K4me3_H006 | \n", "H3K4me3_H009 | \n", "H3K4me3_H012 | \n", "H3K4me3_H018 | \n", "H3K4me3_H024 | \n", "H3K4me3_H036 | \n", "H3K4me3_H048 | \n", "H3K4me3_H072 | \n", "... | \n", "H4K20me1_H006 | \n", "H4K20me1_H009 | \n", "H4K20me1_H012 | \n", "H4K20me1_H018 | \n", "H4K20me1_H024 | \n", "H4K20me1_H036 | \n", "H4K20me1_H048 | \n", "H4K20me1_H072 | \n", "H4K20me1_H120 | \n", "H4K20me1_H168 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ENSG00000000005 | \n", "0.06 | \n", "0.06 | \n", "0.06 | \n", "0.06 | \n", "0.06 | \n", "0.07 | \n", "0.07 | \n", "0.05 | \n", "0.06 | \n", "0.06 | \n", "... | \n", "0.09 | \n", "0.08 | \n", "0.08 | \n", "0.10 | \n", "0.08 | \n", "0.11 | \n", "0.11 | \n", "0.09 | \n", "0.08 | \n", "0.08 | \n", "
ENSG00000000419 | \n", "3.75 | \n", "3.72 | \n", "3.87 | \n", "4.10 | \n", "3.92 | \n", "3.84 | \n", "3.62 | \n", "3.97 | \n", "3.72 | \n", "3.14 | \n", "... | \n", "0.45 | \n", "0.44 | \n", "0.39 | \n", "0.44 | \n", "0.42 | \n", "0.39 | \n", "0.36 | \n", "0.39 | \n", "0.42 | \n", "0.43 | \n", "
ENSG00000000457 | \n", "2.96 | \n", "2.87 | \n", "2.94 | \n", "2.68 | \n", "2.81 | \n", "3.20 | \n", "2.69 | \n", "2.84 | \n", "2.69 | \n", "2.83 | \n", "... | \n", "0.10 | \n", "0.11 | \n", "0.12 | \n", "0.14 | \n", "0.13 | \n", "0.13 | \n", "0.12 | \n", "0.12 | \n", "0.24 | \n", "0.35 | \n", "
ENSG00000000460 | \n", "2.90 | \n", "3.16 | \n", "3.43 | \n", "3.24 | \n", "3.28 | \n", "3.08 | \n", "3.13 | \n", "2.98 | \n", "3.17 | \n", "2.94 | \n", "... | \n", "0.06 | \n", "0.07 | \n", "0.07 | \n", "0.07 | \n", "0.07 | \n", "0.06 | \n", "0.06 | \n", "0.06 | \n", "0.07 | \n", "0.10 | \n", "
ENSG00000000938 | \n", "0.99 | \n", "1.11 | \n", "0.99 | \n", "1.15 | \n", "1.09 | \n", "1.07 | \n", "1.09 | \n", "1.19 | \n", "1.59 | \n", "2.64 | \n", "... | \n", "0.08 | \n", "0.08 | \n", "0.07 | \n", "0.07 | \n", "0.06 | \n", "0.07 | \n", "0.08 | \n", "0.30 | \n", "0.26 | \n", "0.14 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
ENSGR0000196433 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "... | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "
ENSGR0000197976 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "... | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "
ENSGR0000198223 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "... | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "
ENSGR0000205755 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "... | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "
ENSGR0000214717 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "... | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "0.00 | \n", "
12248 rows × 108 columns
\n", "