{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Loading Fake Timeseries Surface Data\n",
"\n",
"This notebook is designed to explore some functionality with loading DataFiles and using Loaders.\n",
"\n",
"This example will require some extra optional libraries, including nibabel and nilearn! Note: while nilearn is not imported, when trying to import SingleConnectivityMeasure, if nilearn is not installed, this will give an ImportError.\n",
"\n",
"We will also use fake data for this example - so no special datasets required!"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import BPt as bp\n",
"import nibabel as nib\n",
"import numpy as np\n",
"import pandas as pd\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def save_fake_timeseries_data():\n",
" '''Save fake timeseries and fake surface data.'''\n",
" \n",
" X = np.random.random(size = (20, 100, 10242))\n",
" os.makedirs('fake_time_data', exist_ok=True)\n",
" \n",
" for x in range(len(X)):\n",
" np.save('fake_time_data/' + str(x) + '_lh', X[x])\n",
" for x in range(len(X)):\n",
" np.save('fake_time_data/' + str(x) + '_rh', X[x])\n",
" \n",
"save_fake_timeseries_data()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Init a Dataset\n",
"data = bp.Dataset()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Next, we are interested in loading in the files to the dataset as data files. There are a few different ways to do this, but we will use the method add_data_files. We will try and load the timeseries data first.\n",
"\n",
"First we need a dictionary mapping desired column name to location or a file glob (which is easier so let's use that)."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"
Data
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" timeseries_lh | \n",
" timeseries_rh | \n",
"
\n",
" \n",
" \n",
" \n",
" | 13_lh | \n",
" Loc(0) | \n",
" nan | \n",
"
\n",
" \n",
" | 9_lh | \n",
" Loc(1) | \n",
" nan | \n",
"
\n",
" \n",
" | 8_lh | \n",
" Loc(2) | \n",
" nan | \n",
"
\n",
" \n",
" | 2_lh | \n",
" Loc(3) | \n",
" nan | \n",
"
\n",
" \n",
" | 16_lh | \n",
" Loc(4) | \n",
" nan | \n",
"
\n",
" \n",
" | 11_lh | \n",
" Loc(5) | \n",
" nan | \n",
"
\n",
" \n",
" | 6_lh | \n",
" Loc(6) | \n",
" nan | \n",
"
\n",
" \n",
" | 7_lh | \n",
" Loc(7) | \n",
" nan | \n",
"
\n",
" \n",
" | 1_lh | \n",
" Loc(8) | \n",
" nan | \n",
"
\n",
" \n",
" | 17_lh | \n",
" Loc(9) | \n",
" nan | \n",
"
\n",
" \n",
" | 19_lh | \n",
" Loc(10) | \n",
" nan | \n",
"
\n",
" \n",
" | 15_lh | \n",
" Loc(11) | \n",
" nan | \n",
"
\n",
" \n",
" | 10_lh | \n",
" Loc(12) | \n",
" nan | \n",
"
\n",
" \n",
" | 3_lh | \n",
" Loc(13) | \n",
" nan | \n",
"
\n",
" \n",
" | 14_lh | \n",
" Loc(14) | \n",
" nan | \n",
"
\n",
" \n",
" | 0_lh | \n",
" Loc(15) | \n",
" nan | \n",
"
\n",
" \n",
" | 18_lh | \n",
" Loc(16) | \n",
" nan | \n",
"
\n",
" \n",
" | 5_lh | \n",
" Loc(17) | \n",
" nan | \n",
"
\n",
" \n",
" | 4_lh | \n",
" Loc(18) | \n",
" nan | \n",
"
\n",
" \n",
" | 12_lh | \n",
" Loc(19) | \n",
" nan | \n",
"
\n",
" \n",
" | 11_rh | \n",
" nan | \n",
" Loc(20) | \n",
"
\n",
" \n",
" | 10_rh | \n",
" nan | \n",
" Loc(21) | \n",
"
\n",
" \n",
" | 12_rh | \n",
" nan | \n",
" Loc(22) | \n",
"
\n",
" \n",
" | 3_rh | \n",
" nan | \n",
" Loc(23) | \n",
"
\n",
" \n",
" | 0_rh | \n",
" nan | \n",
" Loc(24) | \n",
"
\n",
" \n",
" | 18_rh | \n",
" nan | \n",
" Loc(25) | \n",
"
\n",
" \n",
" | 1_rh | \n",
" nan | \n",
" Loc(26) | \n",
"
\n",
" \n",
" | 9_rh | \n",
" nan | \n",
" Loc(27) | \n",
"
\n",
" \n",
" | 14_rh | \n",
" nan | \n",
" Loc(28) | \n",
"
\n",
" \n",
" | 6_rh | \n",
" nan | \n",
" Loc(29) | \n",
"
\n",
" \n",
" | 15_rh | \n",
" nan | \n",
" Loc(30) | \n",
"
\n",
" \n",
" | 7_rh | \n",
" nan | \n",
" Loc(31) | \n",
"
\n",
" \n",
" | 4_rh | \n",
" nan | \n",
" Loc(32) | \n",
"
\n",
" \n",
" | 19_rh | \n",
" nan | \n",
" Loc(33) | \n",
"
\n",
" \n",
" | 5_rh | \n",
" nan | \n",
" Loc(34) | \n",
"
\n",
" \n",
" | 2_rh | \n",
" nan | \n",
" Loc(35) | \n",
"
\n",
" \n",
" | 13_rh | \n",
" nan | \n",
" Loc(36) | \n",
"
\n",
" \n",
" | 8_rh | \n",
" nan | \n",
" Loc(37) | \n",
"
\n",
" \n",
" | 16_rh | \n",
" nan | \n",
" Loc(38) | \n",
"
\n",
" \n",
" | 17_rh | \n",
" nan | \n",
" Loc(39) | \n",
"
\n",
" \n",
"
\n",
"
\n"
],
"text/plain": [
" timeseries_lh timeseries_rh\n",
"13_lh 0.0 NaN\n",
"9_lh 1.0 NaN\n",
"8_lh 2.0 NaN\n",
"2_lh 3.0 NaN\n",
"16_lh 4.0 NaN\n",
"11_lh 5.0 NaN\n",
"6_lh 6.0 NaN\n",
"7_lh 7.0 NaN\n",
"1_lh 8.0 NaN\n",
"17_lh 9.0 NaN\n",
"19_lh 10.0 NaN\n",
"15_lh 11.0 NaN\n",
"10_lh 12.0 NaN\n",
"3_lh 13.0 NaN\n",
"14_lh 14.0 NaN\n",
"0_lh 15.0 NaN\n",
"18_lh 16.0 NaN\n",
"5_lh 17.0 NaN\n",
"4_lh 18.0 NaN\n",
"12_lh 19.0 NaN\n",
"11_rh NaN 20.0\n",
"10_rh NaN 21.0\n",
"12_rh NaN 22.0\n",
"3_rh NaN 23.0\n",
"0_rh NaN 24.0\n",
"18_rh NaN 25.0\n",
"1_rh NaN 26.0\n",
"9_rh NaN 27.0\n",
"14_rh NaN 28.0\n",
"6_rh NaN 29.0\n",
"15_rh NaN 30.0\n",
"7_rh NaN 31.0\n",
"4_rh NaN 32.0\n",
"19_rh NaN 33.0\n",
"5_rh NaN 34.0\n",
"2_rh NaN 35.0\n",
"13_rh NaN 36.0\n",
"8_rh NaN 37.0\n",
"16_rh NaN 38.0\n",
"17_rh NaN 39.0"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# The *'s just mean wildcard\n",
"files = {'timeseries_lh': 'fake_time_data/*_lh*',\n",
" 'timeseries_rh': 'fake_time_data/*_rh*'}\n",
"\n",
"# Now let's try loading with 'auto' as the file to subject function\n",
"data.add_data_files(files, 'auto')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can see 'auto' doesn't work for us, so we can try writing our own function instead."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
Data
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" timeseries_lh | \n",
" timeseries_rh | \n",
"
\n",
" \n",
" \n",
" \n",
" | 13 | \n",
" Loc(0) | \n",
" Loc(36) | \n",
"
\n",
" \n",
" | 9 | \n",
" Loc(1) | \n",
" Loc(27) | \n",
"
\n",
" \n",
" | 8 | \n",
" Loc(2) | \n",
" Loc(37) | \n",
"
\n",
" \n",
" | 2 | \n",
" Loc(3) | \n",
" Loc(35) | \n",
"
\n",
" \n",
" | 16 | \n",
" Loc(4) | \n",
" Loc(38) | \n",
"
\n",
" \n",
" | 11 | \n",
" Loc(5) | \n",
" Loc(20) | \n",
"
\n",
" \n",
" | 6 | \n",
" Loc(6) | \n",
" Loc(29) | \n",
"
\n",
" \n",
" | 7 | \n",
" Loc(7) | \n",
" Loc(31) | \n",
"
\n",
" \n",
" | 1 | \n",
" Loc(8) | \n",
" Loc(26) | \n",
"
\n",
" \n",
" | 17 | \n",
" Loc(9) | \n",
" Loc(39) | \n",
"
\n",
" \n",
" | 19 | \n",
" Loc(10) | \n",
" Loc(33) | \n",
"
\n",
" \n",
" | 15 | \n",
" Loc(11) | \n",
" Loc(30) | \n",
"
\n",
" \n",
" | 10 | \n",
" Loc(12) | \n",
" Loc(21) | \n",
"
\n",
" \n",
" | 3 | \n",
" Loc(13) | \n",
" Loc(23) | \n",
"
\n",
" \n",
" | 14 | \n",
" Loc(14) | \n",
" Loc(28) | \n",
"
\n",
" \n",
" | 0 | \n",
" Loc(15) | \n",
" Loc(24) | \n",
"
\n",
" \n",
" | 18 | \n",
" Loc(16) | \n",
" Loc(25) | \n",
"
\n",
" \n",
" | 5 | \n",
" Loc(17) | \n",
" Loc(34) | \n",
"
\n",
" \n",
" | 4 | \n",
" Loc(18) | \n",
" Loc(32) | \n",
"
\n",
" \n",
" | 12 | \n",
" Loc(19) | \n",
" Loc(22) | \n",
"
\n",
" \n",
"
\n",
"
\n"
],
"text/plain": [
" timeseries_lh timeseries_rh\n",
"13 0.0 36.0\n",
"9 1.0 27.0\n",
"8 2.0 37.0\n",
"2 3.0 35.0\n",
"16 4.0 38.0\n",
"11 5.0 20.0\n",
"6 6.0 29.0\n",
"7 7.0 31.0\n",
"1 8.0 26.0\n",
"17 9.0 39.0\n",
"19 10.0 33.0\n",
"15 11.0 30.0\n",
"10 12.0 21.0\n",
"3 13.0 23.0\n",
"14 14.0 28.0\n",
"0 15.0 24.0\n",
"18 16.0 25.0\n",
"5 17.0 34.0\n",
"4 18.0 32.0\n",
"12 19.0 22.0"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def file_to_subj(loc):\n",
" return loc.split('/')[-1].split('_')[0]\n",
"\n",
"# Actually load it this time\n",
"data = data.add_data_files(files, file_to_subj)\n",
"data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"What's this though? Why are the files showing up as Loc(int). Whats going on is that the data files are really stored as just integers, see:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"13 0.0\n",
"9 1.0\n",
"8 2.0\n",
"2 3.0\n",
"16 4.0\n",
"11 5.0\n",
"6 6.0\n",
"7 7.0\n",
"1 8.0\n",
"17 9.0\n",
"19 10.0\n",
"15 11.0\n",
"10 12.0\n",
"3 13.0\n",
"14 14.0\n",
"0 15.0\n",
"18 16.0\n",
"5 17.0\n",
"4 18.0\n",
"12 19.0\n",
"Name: timeseries_lh, dtype: float64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['timeseries_lh']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"They correspond to locations in a stored file mapping (note: you don't need to worry about any of this most of the time)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(DataFile(loc='/home/sage/BPt/Examples/Short_Examples/fake_time_data/13_lh.npy'),\n",
" DataFile(loc='/home/sage/BPt/Examples/Short_Examples/fake_time_data/9_lh.npy'),\n",
" DataFile(loc='/home/sage/BPt/Examples/Short_Examples/fake_time_data/8_lh.npy'))"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.file_mapping[0], data.file_mapping[1], data.file_mapping[2] "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's add a fake target to our dataset now"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
Data
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" timeseries_lh | \n",
" timeseries_rh | \n",
"
\n",
" \n",
" \n",
" \n",
" | 13 | \n",
" Loc(0) | \n",
" Loc(36) | \n",
"
\n",
" \n",
" | 9 | \n",
" Loc(1) | \n",
" Loc(27) | \n",
"
\n",
" \n",
" | 8 | \n",
" Loc(2) | \n",
" Loc(37) | \n",
"
\n",
" \n",
" | 2 | \n",
" Loc(3) | \n",
" Loc(35) | \n",
"
\n",
" \n",
" | 16 | \n",
" Loc(4) | \n",
" Loc(38) | \n",
"
\n",
" \n",
" | 11 | \n",
" Loc(5) | \n",
" Loc(20) | \n",
"
\n",
" \n",
" | 6 | \n",
" Loc(6) | \n",
" Loc(29) | \n",
"
\n",
" \n",
" | 7 | \n",
" Loc(7) | \n",
" Loc(31) | \n",
"
\n",
" \n",
" | 1 | \n",
" Loc(8) | \n",
" Loc(26) | \n",
"
\n",
" \n",
" | 17 | \n",
" Loc(9) | \n",
" Loc(39) | \n",
"
\n",
" \n",
" | 19 | \n",
" Loc(10) | \n",
" Loc(33) | \n",
"
\n",
" \n",
" | 15 | \n",
" Loc(11) | \n",
" Loc(30) | \n",
"
\n",
" \n",
" | 10 | \n",
" Loc(12) | \n",
" Loc(21) | \n",
"
\n",
" \n",
" | 3 | \n",
" Loc(13) | \n",
" Loc(23) | \n",
"
\n",
" \n",
" | 14 | \n",
" Loc(14) | \n",
" Loc(28) | \n",
"
\n",
" \n",
" | 0 | \n",
" Loc(15) | \n",
" Loc(24) | \n",
"
\n",
" \n",
" | 18 | \n",
" Loc(16) | \n",
" Loc(25) | \n",
"
\n",
" \n",
" | 5 | \n",
" Loc(17) | \n",
" Loc(34) | \n",
"
\n",
" \n",
" | 4 | \n",
" Loc(18) | \n",
" Loc(32) | \n",
"
\n",
" \n",
" | 12 | \n",
" Loc(19) | \n",
" Loc(22) | \n",
"
\n",
" \n",
"
\n",
"
\n",
"\n",
"
Target
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" t | \n",
"
\n",
" \n",
" \n",
" \n",
" | 13 | \n",
" 0.656648 | \n",
"
\n",
" \n",
" | 9 | \n",
" 0.298354 | \n",
"
\n",
" \n",
" | 8 | \n",
" 0.495359 | \n",
"
\n",
" \n",
" | 2 | \n",
" 0.414660 | \n",
"
\n",
" \n",
" | 16 | \n",
" 0.606687 | \n",
"
\n",
" \n",
" | 11 | \n",
" 0.453163 | \n",
"
\n",
" \n",
" | 6 | \n",
" 0.853856 | \n",
"
\n",
" \n",
" | 7 | \n",
" 0.044329 | \n",
"
\n",
" \n",
" | 1 | \n",
" 0.916036 | \n",
"
\n",
" \n",
" | 17 | \n",
" 0.865733 | \n",
"
\n",
" \n",
" | 19 | \n",
" 0.015055 | \n",
"
\n",
" \n",
" | 15 | \n",
" 0.082130 | \n",
"
\n",
" \n",
" | 10 | \n",
" 0.731628 | \n",
"
\n",
" \n",
" | 3 | \n",
" 0.074572 | \n",
"
\n",
" \n",
" | 14 | \n",
" 0.589903 | \n",
"
\n",
" \n",
" | 0 | \n",
" 0.768409 | \n",
"
\n",
" \n",
" | 18 | \n",
" 0.536750 | \n",
"
\n",
" \n",
" | 5 | \n",
" 0.401537 | \n",
"
\n",
" \n",
" | 4 | \n",
" 0.580557 | \n",
"
\n",
" \n",
" | 12 | \n",
" 0.508457 | \n",
"
\n",
" \n",
"
\n",
"
\n"
],
"text/plain": [
" timeseries_lh timeseries_rh t\n",
"13 0.0 36.0 0.656648\n",
"9 1.0 27.0 0.298354\n",
"8 2.0 37.0 0.495359\n",
"2 3.0 35.0 0.414660\n",
"16 4.0 38.0 0.606687\n",
"11 5.0 20.0 0.453163\n",
"6 6.0 29.0 0.853856\n",
"7 7.0 31.0 0.044329\n",
"1 8.0 26.0 0.916036\n",
"17 9.0 39.0 0.865733\n",
"19 10.0 33.0 0.015055\n",
"15 11.0 30.0 0.082130\n",
"10 12.0 21.0 0.731628\n",
"3 13.0 23.0 0.074572\n",
"14 14.0 28.0 0.589903\n",
"0 15.0 24.0 0.768409\n",
"18 16.0 25.0 0.536750\n",
"5 17.0 34.0 0.401537\n",
"4 18.0 32.0 0.580557\n",
"12 19.0 22.0 0.508457"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['t'] = np.random.random(len(data))\n",
"data.set_target('t', inplace=True)\n",
"data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Next we will generate a Loader to apply a parcellation, then extract a measure of connectivity."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"from BPt.extensions import SurfLabels\n",
"\n",
"lh_parc = SurfLabels(labels='data/lh.aparc.annot', vectorize=False)\n",
"rh_parc = SurfLabels(labels='data/rh.aparc.annot', vectorize=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can see how this object works on example data first."
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(100, 10242)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ex_lh = data.file_mapping[0].load()\n",
"ex_lh.shape"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(100, 35)"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"trans = lh_parc.fit_transform(ex_lh)\n",
"trans.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We essentially get a reduction from 10242 features to 35."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Next, we want to transform the matrix into a correlation matrix."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"from BPt.extensions import SingleConnectivityMeasure\n",
"scm = SingleConnectivityMeasure(kind='covariance', discard_diagonal=True, vectorize=True)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(595,)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scm.fit_transform(trans).shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The single connectivity measure is just a wrapper designed to let the ConnectivityMeasure from nilearn work with a single subject's data at a time.\n",
"\n",
"Next, let's use the input special Pipe wrapper to compose these two objects into their own pipeline"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"lh_loader = bp.Loader(bp.Pipe([lh_parc, scm]), scope='_lh')\n",
"rh_loader = bp.Loader(bp.Pipe([rh_parc, scm]), scope='_rh')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Define a simple pipeline with just our loader steps, then evaluate with mostly default settings."
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ee8b7bc1991f4995a160f3aa2aa124ce",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Folds: 0%| | 0/5 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"BPtEvaluator\n",
"------------\n",
"mean_scores = {'explained_variance': -0.3492082271322736, 'neg_mean_squared_error': -0.08532586202634963}\n",
"std_scores = {'explained_variance': 0.37944917198666483, 'neg_mean_squared_error': 0.025409784568717956}\n",
"\n",
"Saved Attributes: ['estimators', 'preds', 'timing', 'train_subjects', 'val_subjects', 'feat_names', 'ps', 'mean_scores', 'std_scores', 'weighted_mean_scores', 'scores', 'fis_', 'coef_']\n",
"\n",
"Available Methods: ['get_preds_dfs', 'get_fis', 'get_coef_', 'permutation_importance']\n",
"\n",
"Evaluated with:\n",
"ProblemSpec(problem_type='regression',\n",
" scorer={'explained_variance': make_scorer(explained_variance_score),\n",
" 'neg_mean_squared_error': make_scorer(mean_squared_error, greater_is_better=False)},\n",
" subjects='all', target='t')"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipeline = bp.Pipeline([lh_loader, rh_loader, bp.Model('linear')])\n",
"\n",
"results = bp.evaluate(pipeline, data)\n",
"results"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Don't be discouraged that this didn't work, we are after all trying to predict random noise with random noise ... "
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"loader_pipe0\n",
"loader_pipe1\n",
"linear regressor\n"
]
}
],
"source": [
"# These are the steps of the pipeline\n",
"fold0_pipeline = results.estimators[0]\n",
"for step in fold0_pipeline.steps:\n",
" print(step[0])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can investigate pieces, or use special functions like"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" timeseries_rh_0 | \n",
" timeseries_rh_1 | \n",
" timeseries_rh_2 | \n",
" timeseries_rh_3 | \n",
" timeseries_rh_4 | \n",
" timeseries_rh_5 | \n",
" timeseries_rh_6 | \n",
" timeseries_rh_7 | \n",
" timeseries_rh_8 | \n",
" timeseries_rh_9 | \n",
" ... | \n",
" timeseries_lh_585 | \n",
" timeseries_lh_586 | \n",
" timeseries_lh_587 | \n",
" timeseries_lh_588 | \n",
" timeseries_lh_589 | \n",
" timeseries_lh_590 | \n",
" timeseries_lh_591 | \n",
" timeseries_lh_592 | \n",
" timeseries_lh_593 | \n",
" timeseries_lh_594 | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" -0.000165 | \n",
" 0.000046 | \n",
" -0.000077 | \n",
" -0.000075 | \n",
" 0.000074 | \n",
" -0.000011 | \n",
" -0.000049 | \n",
" 0.000047 | \n",
" -0.000024 | \n",
" -0.000024 | \n",
" ... | \n",
" -8.290498e-06 | \n",
" -0.000006 | \n",
" -0.000023 | \n",
" 1.610693e-06 | \n",
" 0.000015 | \n",
" -0.000006 | \n",
" 4.867083e-06 | \n",
" -1.215231e-04 | \n",
" -0.000140 | \n",
" -0.000048 | \n",
"
\n",
" \n",
" | 1 | \n",
" 0.000051 | \n",
" 0.000027 | \n",
" -0.000011 | \n",
" -0.000003 | \n",
" 0.000022 | \n",
" 0.000033 | \n",
" 0.000049 | \n",
" 0.000072 | \n",
" 0.000010 | \n",
" -0.000014 | \n",
" ... | \n",
" 9.147214e-06 | \n",
" -0.000033 | \n",
" -0.000015 | \n",
" 4.817195e-06 | \n",
" 0.000001 | \n",
" 0.000009 | \n",
" -3.010718e-05 | \n",
" 5.807162e-05 | \n",
" -0.000070 | \n",
" 0.000016 | \n",
"
\n",
" \n",
" | 2 | \n",
" -0.000019 | \n",
" -0.000024 | \n",
" -0.000004 | \n",
" 0.000027 | \n",
" -0.000054 | \n",
" 0.000013 | \n",
" 0.000064 | \n",
" -0.000118 | \n",
" -0.000065 | \n",
" 0.000063 | \n",
" ... | \n",
" -8.021237e-06 | \n",
" -0.000059 | \n",
" 0.000004 | \n",
" -1.018778e-05 | \n",
" -0.000026 | \n",
" -0.000003 | \n",
" 1.120659e-05 | \n",
" -3.874970e-05 | \n",
" 0.000057 | \n",
" -0.000008 | \n",
"
\n",
" \n",
" | 3 | \n",
" 0.000037 | \n",
" 0.000027 | \n",
" 0.000050 | \n",
" 0.000080 | \n",
" 0.000038 | \n",
" 0.000009 | \n",
" -0.000094 | \n",
" -0.000117 | \n",
" 0.000056 | \n",
" -0.000005 | \n",
" ... | \n",
" 2.637188e-07 | \n",
" -0.000015 | \n",
" -0.000011 | \n",
" -6.939784e-06 | \n",
" 0.000022 | \n",
" 0.000005 | \n",
" -2.519195e-05 | \n",
" 1.219129e-04 | \n",
" 0.000021 | \n",
" 0.000074 | \n",
"
\n",
" \n",
" | 4 | \n",
" -0.000030 | \n",
" 0.000013 | \n",
" -0.000048 | \n",
" -0.000002 | \n",
" 0.000043 | \n",
" -0.000021 | \n",
" -0.000021 | \n",
" 0.000045 | \n",
" 0.000015 | \n",
" -0.000008 | \n",
" ... | \n",
" -4.193627e-05 | \n",
" -0.000005 | \n",
" -0.000038 | \n",
" -1.579288e-05 | \n",
" -0.000010 | \n",
" 0.000007 | \n",
" -2.074608e-05 | \n",
" 1.288912e-04 | \n",
" 0.000048 | \n",
" 0.000015 | \n",
"
\n",
" \n",
" | 5 | \n",
" -0.000027 | \n",
" 0.000012 | \n",
" 0.000049 | \n",
" -0.000040 | \n",
" 0.000137 | \n",
" -0.000020 | \n",
" 0.000023 | \n",
" 0.000057 | \n",
" 0.000020 | \n",
" 0.000018 | \n",
" ... | \n",
" -2.317345e-05 | \n",
" 0.000047 | \n",
" -0.000021 | \n",
" -3.256373e-06 | \n",
" 0.000013 | \n",
" 0.000006 | \n",
" -2.017995e-05 | \n",
" 3.174790e-05 | \n",
" -0.000044 | \n",
" -0.000050 | \n",
"
\n",
" \n",
" | 6 | \n",
" -0.000003 | \n",
" 0.000011 | \n",
" 0.000037 | \n",
" -0.000007 | \n",
" 0.000026 | \n",
" 0.000034 | \n",
" 0.000007 | \n",
" -0.000071 | \n",
" -0.000019 | \n",
" -0.000004 | \n",
" ... | \n",
" 1.230251e-05 | \n",
" 0.000065 | \n",
" 0.000008 | \n",
" 8.041033e-07 | \n",
" 0.000001 | \n",
" -0.000026 | \n",
" -1.401379e-05 | \n",
" 2.662647e-05 | \n",
" -0.000020 | \n",
" 0.000032 | \n",
"
\n",
" \n",
" | 7 | \n",
" 0.000038 | \n",
" 0.000019 | \n",
" 0.000006 | \n",
" 0.000017 | \n",
" -0.000173 | \n",
" 0.000027 | \n",
" -0.000058 | \n",
" 0.000120 | \n",
" 0.000028 | \n",
" -0.000029 | \n",
" ... | \n",
" -2.762708e-05 | \n",
" 0.000019 | \n",
" 0.000015 | \n",
" -5.296039e-06 | \n",
" -0.000021 | \n",
" 0.000017 | \n",
" -3.512035e-06 | \n",
" -1.743649e-04 | \n",
" 0.000015 | \n",
" 0.000002 | \n",
"
\n",
" \n",
" | 8 | \n",
" -0.000009 | \n",
" 0.000007 | \n",
" 0.000034 | \n",
" -0.000002 | \n",
" 0.000032 | \n",
" -0.000011 | \n",
" -0.000021 | \n",
" -0.000113 | \n",
" 0.000040 | \n",
" 0.000024 | \n",
" ... | \n",
" -1.286571e-06 | \n",
" -0.000022 | \n",
" -0.000027 | \n",
" 2.031265e-05 | \n",
" -0.000008 | \n",
" 0.000035 | \n",
" -5.331094e-06 | \n",
" -5.483645e-05 | \n",
" 0.000103 | \n",
" -0.000014 | \n",
"
\n",
" \n",
" | 9 | \n",
" 0.000062 | \n",
" -0.000022 | \n",
" 0.000060 | \n",
" 0.000010 | \n",
" -0.000017 | \n",
" 0.000012 | \n",
" -0.000019 | \n",
" 0.000093 | \n",
" -0.000002 | \n",
" 0.000028 | \n",
" ... | \n",
" -1.272615e-05 | \n",
" 0.000027 | \n",
" -0.000015 | \n",
" -1.022682e-05 | \n",
" -0.000044 | \n",
" -0.000006 | \n",
" 4.879025e-06 | \n",
" 3.508208e-07 | \n",
" -0.000069 | \n",
" -0.000002 | \n",
"
\n",
" \n",
" | 10 | \n",
" 0.000019 | \n",
" 0.000110 | \n",
" 0.000062 | \n",
" -0.000019 | \n",
" 0.000011 | \n",
" -0.000007 | \n",
" -0.000059 | \n",
" -0.000056 | \n",
" 0.000022 | \n",
" -0.000041 | \n",
" ... | \n",
" -1.971200e-05 | \n",
" 0.000055 | \n",
" 0.000020 | \n",
" -5.049802e-06 | \n",
" 0.000014 | \n",
" 0.000014 | \n",
" -4.576251e-07 | \n",
" -3.902154e-05 | \n",
" 0.000023 | \n",
" -0.000025 | \n",
"
\n",
" \n",
" | 11 | \n",
" 0.000013 | \n",
" -0.000036 | \n",
" -0.000063 | \n",
" -0.000026 | \n",
" -0.000008 | \n",
" -0.000007 | \n",
" 0.000029 | \n",
" -0.000117 | \n",
" 0.000052 | \n",
" 0.000013 | \n",
" ... | \n",
" 4.998446e-07 | \n",
" -0.000018 | \n",
" -0.000016 | \n",
" -1.614390e-05 | \n",
" 0.000006 | \n",
" -0.000006 | \n",
" 1.069373e-05 | \n",
" -6.800519e-06 | \n",
" 0.000029 | \n",
" -0.000103 | \n",
"
\n",
" \n",
" | 12 | \n",
" -0.000033 | \n",
" -0.000027 | \n",
" 0.000066 | \n",
" 0.000013 | \n",
" 0.000021 | \n",
" -0.000012 | \n",
" 0.000061 | \n",
" 0.000105 | \n",
" 0.000020 | \n",
" 0.000022 | \n",
" ... | \n",
" -3.358210e-06 | \n",
" -0.000003 | \n",
" -0.000018 | \n",
" 2.135645e-05 | \n",
" 0.000009 | \n",
" 0.000002 | \n",
" -1.748675e-05 | \n",
" 2.181139e-04 | \n",
" 0.000018 | \n",
" -0.000078 | \n",
"
\n",
" \n",
" | 13 | \n",
" 0.000080 | \n",
" -0.000046 | \n",
" -0.000040 | \n",
" 0.000033 | \n",
" -0.000092 | \n",
" 0.000013 | \n",
" -0.000005 | \n",
" -0.000085 | \n",
" 0.000020 | \n",
" 0.000096 | \n",
" ... | \n",
" -3.432920e-06 | \n",
" 0.000038 | \n",
" 0.000048 | \n",
" 5.295833e-06 | \n",
" 0.000013 | \n",
" 0.000030 | \n",
" 5.164307e-06 | \n",
" -9.442774e-05 | \n",
" -0.000010 | \n",
" -0.000014 | \n",
"
\n",
" \n",
" | 14 | \n",
" 0.000077 | \n",
" -0.000009 | \n",
" -0.000118 | \n",
" 0.000056 | \n",
" -0.000049 | \n",
" 0.000021 | \n",
" -0.000036 | \n",
" 0.000130 | \n",
" -0.000081 | \n",
" 0.000017 | \n",
" ... | \n",
" -9.383758e-06 | \n",
" -0.000027 | \n",
" -0.000019 | \n",
" -2.622800e-06 | \n",
" 0.000005 | \n",
" 0.000009 | \n",
" -1.135353e-05 | \n",
" 1.509882e-05 | \n",
" -0.000070 | \n",
" -0.000058 | \n",
"
\n",
" \n",
" | 15 | \n",
" -0.000113 | \n",
" -0.000045 | \n",
" 0.000040 | \n",
" 0.000020 | \n",
" -0.000040 | \n",
" -0.000010 | \n",
" -0.000081 | \n",
" 0.000031 | \n",
" -0.000066 | \n",
" 0.000002 | \n",
" ... | \n",
" 1.523565e-05 | \n",
" -0.000071 | \n",
" 0.000031 | \n",
" -6.086060e-06 | \n",
" -0.000013 | \n",
" 0.000003 | \n",
" 1.540947e-06 | \n",
" 1.604218e-04 | \n",
" 0.000140 | \n",
" 0.000034 | \n",
"
\n",
" \n",
"
\n",
"
16 rows × 1190 columns
\n",
"
"
],
"text/plain": [
" timeseries_rh_0 timeseries_rh_1 timeseries_rh_2 timeseries_rh_3 \\\n",
"0 -0.000165 0.000046 -0.000077 -0.000075 \n",
"1 0.000051 0.000027 -0.000011 -0.000003 \n",
"2 -0.000019 -0.000024 -0.000004 0.000027 \n",
"3 0.000037 0.000027 0.000050 0.000080 \n",
"4 -0.000030 0.000013 -0.000048 -0.000002 \n",
"5 -0.000027 0.000012 0.000049 -0.000040 \n",
"6 -0.000003 0.000011 0.000037 -0.000007 \n",
"7 0.000038 0.000019 0.000006 0.000017 \n",
"8 -0.000009 0.000007 0.000034 -0.000002 \n",
"9 0.000062 -0.000022 0.000060 0.000010 \n",
"10 0.000019 0.000110 0.000062 -0.000019 \n",
"11 0.000013 -0.000036 -0.000063 -0.000026 \n",
"12 -0.000033 -0.000027 0.000066 0.000013 \n",
"13 0.000080 -0.000046 -0.000040 0.000033 \n",
"14 0.000077 -0.000009 -0.000118 0.000056 \n",
"15 -0.000113 -0.000045 0.000040 0.000020 \n",
"\n",
" timeseries_rh_4 timeseries_rh_5 timeseries_rh_6 timeseries_rh_7 \\\n",
"0 0.000074 -0.000011 -0.000049 0.000047 \n",
"1 0.000022 0.000033 0.000049 0.000072 \n",
"2 -0.000054 0.000013 0.000064 -0.000118 \n",
"3 0.000038 0.000009 -0.000094 -0.000117 \n",
"4 0.000043 -0.000021 -0.000021 0.000045 \n",
"5 0.000137 -0.000020 0.000023 0.000057 \n",
"6 0.000026 0.000034 0.000007 -0.000071 \n",
"7 -0.000173 0.000027 -0.000058 0.000120 \n",
"8 0.000032 -0.000011 -0.000021 -0.000113 \n",
"9 -0.000017 0.000012 -0.000019 0.000093 \n",
"10 0.000011 -0.000007 -0.000059 -0.000056 \n",
"11 -0.000008 -0.000007 0.000029 -0.000117 \n",
"12 0.000021 -0.000012 0.000061 0.000105 \n",
"13 -0.000092 0.000013 -0.000005 -0.000085 \n",
"14 -0.000049 0.000021 -0.000036 0.000130 \n",
"15 -0.000040 -0.000010 -0.000081 0.000031 \n",
"\n",
" timeseries_rh_8 timeseries_rh_9 ... timeseries_lh_585 \\\n",
"0 -0.000024 -0.000024 ... -8.290498e-06 \n",
"1 0.000010 -0.000014 ... 9.147214e-06 \n",
"2 -0.000065 0.000063 ... -8.021237e-06 \n",
"3 0.000056 -0.000005 ... 2.637188e-07 \n",
"4 0.000015 -0.000008 ... -4.193627e-05 \n",
"5 0.000020 0.000018 ... -2.317345e-05 \n",
"6 -0.000019 -0.000004 ... 1.230251e-05 \n",
"7 0.000028 -0.000029 ... -2.762708e-05 \n",
"8 0.000040 0.000024 ... -1.286571e-06 \n",
"9 -0.000002 0.000028 ... -1.272615e-05 \n",
"10 0.000022 -0.000041 ... -1.971200e-05 \n",
"11 0.000052 0.000013 ... 4.998446e-07 \n",
"12 0.000020 0.000022 ... -3.358210e-06 \n",
"13 0.000020 0.000096 ... -3.432920e-06 \n",
"14 -0.000081 0.000017 ... -9.383758e-06 \n",
"15 -0.000066 0.000002 ... 1.523565e-05 \n",
"\n",
" timeseries_lh_586 timeseries_lh_587 timeseries_lh_588 \\\n",
"0 -0.000006 -0.000023 1.610693e-06 \n",
"1 -0.000033 -0.000015 4.817195e-06 \n",
"2 -0.000059 0.000004 -1.018778e-05 \n",
"3 -0.000015 -0.000011 -6.939784e-06 \n",
"4 -0.000005 -0.000038 -1.579288e-05 \n",
"5 0.000047 -0.000021 -3.256373e-06 \n",
"6 0.000065 0.000008 8.041033e-07 \n",
"7 0.000019 0.000015 -5.296039e-06 \n",
"8 -0.000022 -0.000027 2.031265e-05 \n",
"9 0.000027 -0.000015 -1.022682e-05 \n",
"10 0.000055 0.000020 -5.049802e-06 \n",
"11 -0.000018 -0.000016 -1.614390e-05 \n",
"12 -0.000003 -0.000018 2.135645e-05 \n",
"13 0.000038 0.000048 5.295833e-06 \n",
"14 -0.000027 -0.000019 -2.622800e-06 \n",
"15 -0.000071 0.000031 -6.086060e-06 \n",
"\n",
" timeseries_lh_589 timeseries_lh_590 timeseries_lh_591 \\\n",
"0 0.000015 -0.000006 4.867083e-06 \n",
"1 0.000001 0.000009 -3.010718e-05 \n",
"2 -0.000026 -0.000003 1.120659e-05 \n",
"3 0.000022 0.000005 -2.519195e-05 \n",
"4 -0.000010 0.000007 -2.074608e-05 \n",
"5 0.000013 0.000006 -2.017995e-05 \n",
"6 0.000001 -0.000026 -1.401379e-05 \n",
"7 -0.000021 0.000017 -3.512035e-06 \n",
"8 -0.000008 0.000035 -5.331094e-06 \n",
"9 -0.000044 -0.000006 4.879025e-06 \n",
"10 0.000014 0.000014 -4.576251e-07 \n",
"11 0.000006 -0.000006 1.069373e-05 \n",
"12 0.000009 0.000002 -1.748675e-05 \n",
"13 0.000013 0.000030 5.164307e-06 \n",
"14 0.000005 0.000009 -1.135353e-05 \n",
"15 -0.000013 0.000003 1.540947e-06 \n",
"\n",
" timeseries_lh_592 timeseries_lh_593 timeseries_lh_594 \n",
"0 -1.215231e-04 -0.000140 -0.000048 \n",
"1 5.807162e-05 -0.000070 0.000016 \n",
"2 -3.874970e-05 0.000057 -0.000008 \n",
"3 1.219129e-04 0.000021 0.000074 \n",
"4 1.288912e-04 0.000048 0.000015 \n",
"5 3.174790e-05 -0.000044 -0.000050 \n",
"6 2.662647e-05 -0.000020 0.000032 \n",
"7 -1.743649e-04 0.000015 0.000002 \n",
"8 -5.483645e-05 0.000103 -0.000014 \n",
"9 3.508208e-07 -0.000069 -0.000002 \n",
"10 -3.902154e-05 0.000023 -0.000025 \n",
"11 -6.800519e-06 0.000029 -0.000103 \n",
"12 2.181139e-04 0.000018 -0.000078 \n",
"13 -9.442774e-05 -0.000010 -0.000014 \n",
"14 1.509882e-05 -0.000070 -0.000058 \n",
"15 1.604218e-04 0.000140 0.000034 \n",
"\n",
"[16 rows x 1190 columns]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results.get_X_transform_df(data, fold=0)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.1 64-bit ('bpt': conda)",
"language": "python",
"name": "python39164bitbptconda7805b3f5d58e4b658b79cb94739371e6"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}