{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Loading Fake Timeseries Surface Data\n",
"\n",
"This notebook is designed to explore some functionality with loading DataFiles and using Loaders.\n",
"\n",
"This example will require some extra optional libraries, including nibabel and nilearn! Note: while nilearn is not imported, when trying to import SingleConnectivityMeasure, if nilearn is not installed, this will give an ImportError.\n",
"\n",
"We will also use fake data for this example - so no special datasets required!"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import BPt as bp\n",
"import nibabel as nib\n",
"import numpy as np\n",
"import pandas as pd\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def save_fake_timeseries_data():\n",
" '''Save fake timeseries and fake surface data.'''\n",
" \n",
" X = np.random.random(size = (20, 100, 10242))\n",
" os.makedirs('fake_time_data', exist_ok=True)\n",
" \n",
" for x in range(len(X)):\n",
" np.save('fake_time_data/' + str(x) + '_lh', X[x])\n",
" for x in range(len(X)):\n",
" np.save('fake_time_data/' + str(x) + '_rh', X[x])\n",
" \n",
"save_fake_timeseries_data()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Init a Dataset\n",
"data = bp.Dataset()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Next, we are interested in loading in the files to the dataset as data files. There are a few different ways to do this, but we will use the method add_data_files. We will try and load the timeseries data first.\n",
"\n",
"First we need a dictionary mapping desired column name to location or a file glob (which is easier so let's use that)."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"
Data
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" timeseries_lh | \n",
" timeseries_rh | \n",
"
\n",
" \n",
" \n",
" \n",
" 13_lh | \n",
" Loc(0) | \n",
" nan | \n",
"
\n",
" \n",
" 9_lh | \n",
" Loc(1) | \n",
" nan | \n",
"
\n",
" \n",
" 8_lh | \n",
" Loc(2) | \n",
" nan | \n",
"
\n",
" \n",
" 2_lh | \n",
" Loc(3) | \n",
" nan | \n",
"
\n",
" \n",
" 16_lh | \n",
" Loc(4) | \n",
" nan | \n",
"
\n",
" \n",
" 11_lh | \n",
" Loc(5) | \n",
" nan | \n",
"
\n",
" \n",
" 6_lh | \n",
" Loc(6) | \n",
" nan | \n",
"
\n",
" \n",
" 7_lh | \n",
" Loc(7) | \n",
" nan | \n",
"
\n",
" \n",
" 1_lh | \n",
" Loc(8) | \n",
" nan | \n",
"
\n",
" \n",
" 17_lh | \n",
" Loc(9) | \n",
" nan | \n",
"
\n",
" \n",
" 19_lh | \n",
" Loc(10) | \n",
" nan | \n",
"
\n",
" \n",
" 15_lh | \n",
" Loc(11) | \n",
" nan | \n",
"
\n",
" \n",
" 10_lh | \n",
" Loc(12) | \n",
" nan | \n",
"
\n",
" \n",
" 3_lh | \n",
" Loc(13) | \n",
" nan | \n",
"
\n",
" \n",
" 14_lh | \n",
" Loc(14) | \n",
" nan | \n",
"
\n",
" \n",
" 0_lh | \n",
" Loc(15) | \n",
" nan | \n",
"
\n",
" \n",
" 18_lh | \n",
" Loc(16) | \n",
" nan | \n",
"
\n",
" \n",
" 5_lh | \n",
" Loc(17) | \n",
" nan | \n",
"
\n",
" \n",
" 4_lh | \n",
" Loc(18) | \n",
" nan | \n",
"
\n",
" \n",
" 12_lh | \n",
" Loc(19) | \n",
" nan | \n",
"
\n",
" \n",
" 11_rh | \n",
" nan | \n",
" Loc(20) | \n",
"
\n",
" \n",
" 10_rh | \n",
" nan | \n",
" Loc(21) | \n",
"
\n",
" \n",
" 12_rh | \n",
" nan | \n",
" Loc(22) | \n",
"
\n",
" \n",
" 3_rh | \n",
" nan | \n",
" Loc(23) | \n",
"
\n",
" \n",
" 0_rh | \n",
" nan | \n",
" Loc(24) | \n",
"
\n",
" \n",
" 18_rh | \n",
" nan | \n",
" Loc(25) | \n",
"
\n",
" \n",
" 1_rh | \n",
" nan | \n",
" Loc(26) | \n",
"
\n",
" \n",
" 9_rh | \n",
" nan | \n",
" Loc(27) | \n",
"
\n",
" \n",
" 14_rh | \n",
" nan | \n",
" Loc(28) | \n",
"
\n",
" \n",
" 6_rh | \n",
" nan | \n",
" Loc(29) | \n",
"
\n",
" \n",
" 15_rh | \n",
" nan | \n",
" Loc(30) | \n",
"
\n",
" \n",
" 7_rh | \n",
" nan | \n",
" Loc(31) | \n",
"
\n",
" \n",
" 4_rh | \n",
" nan | \n",
" Loc(32) | \n",
"
\n",
" \n",
" 19_rh | \n",
" nan | \n",
" Loc(33) | \n",
"
\n",
" \n",
" 5_rh | \n",
" nan | \n",
" Loc(34) | \n",
"
\n",
" \n",
" 2_rh | \n",
" nan | \n",
" Loc(35) | \n",
"
\n",
" \n",
" 13_rh | \n",
" nan | \n",
" Loc(36) | \n",
"
\n",
" \n",
" 8_rh | \n",
" nan | \n",
" Loc(37) | \n",
"
\n",
" \n",
" 16_rh | \n",
" nan | \n",
" Loc(38) | \n",
"
\n",
" \n",
" 17_rh | \n",
" nan | \n",
" Loc(39) | \n",
"
\n",
" \n",
"
\n",
"
\n"
],
"text/plain": [
" timeseries_lh timeseries_rh\n",
"13_lh 0.0 NaN\n",
"9_lh 1.0 NaN\n",
"8_lh 2.0 NaN\n",
"2_lh 3.0 NaN\n",
"16_lh 4.0 NaN\n",
"11_lh 5.0 NaN\n",
"6_lh 6.0 NaN\n",
"7_lh 7.0 NaN\n",
"1_lh 8.0 NaN\n",
"17_lh 9.0 NaN\n",
"19_lh 10.0 NaN\n",
"15_lh 11.0 NaN\n",
"10_lh 12.0 NaN\n",
"3_lh 13.0 NaN\n",
"14_lh 14.0 NaN\n",
"0_lh 15.0 NaN\n",
"18_lh 16.0 NaN\n",
"5_lh 17.0 NaN\n",
"4_lh 18.0 NaN\n",
"12_lh 19.0 NaN\n",
"11_rh NaN 20.0\n",
"10_rh NaN 21.0\n",
"12_rh NaN 22.0\n",
"3_rh NaN 23.0\n",
"0_rh NaN 24.0\n",
"18_rh NaN 25.0\n",
"1_rh NaN 26.0\n",
"9_rh NaN 27.0\n",
"14_rh NaN 28.0\n",
"6_rh NaN 29.0\n",
"15_rh NaN 30.0\n",
"7_rh NaN 31.0\n",
"4_rh NaN 32.0\n",
"19_rh NaN 33.0\n",
"5_rh NaN 34.0\n",
"2_rh NaN 35.0\n",
"13_rh NaN 36.0\n",
"8_rh NaN 37.0\n",
"16_rh NaN 38.0\n",
"17_rh NaN 39.0"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# The *'s just mean wildcard\n",
"files = {'timeseries_lh': 'fake_time_data/*_lh*',\n",
" 'timeseries_rh': 'fake_time_data/*_rh*'}\n",
"\n",
"# Now let's try loading with 'auto' as the file to subject function\n",
"data.add_data_files(files, 'auto')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can see 'auto' doesn't work for us, so we can try writing our own function instead."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
Data
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" timeseries_lh | \n",
" timeseries_rh | \n",
"
\n",
" \n",
" \n",
" \n",
" 13 | \n",
" Loc(0) | \n",
" Loc(36) | \n",
"
\n",
" \n",
" 9 | \n",
" Loc(1) | \n",
" Loc(27) | \n",
"
\n",
" \n",
" 8 | \n",
" Loc(2) | \n",
" Loc(37) | \n",
"
\n",
" \n",
" 2 | \n",
" Loc(3) | \n",
" Loc(35) | \n",
"
\n",
" \n",
" 16 | \n",
" Loc(4) | \n",
" Loc(38) | \n",
"
\n",
" \n",
" 11 | \n",
" Loc(5) | \n",
" Loc(20) | \n",
"
\n",
" \n",
" 6 | \n",
" Loc(6) | \n",
" Loc(29) | \n",
"
\n",
" \n",
" 7 | \n",
" Loc(7) | \n",
" Loc(31) | \n",
"
\n",
" \n",
" 1 | \n",
" Loc(8) | \n",
" Loc(26) | \n",
"
\n",
" \n",
" 17 | \n",
" Loc(9) | \n",
" Loc(39) | \n",
"
\n",
" \n",
" 19 | \n",
" Loc(10) | \n",
" Loc(33) | \n",
"
\n",
" \n",
" 15 | \n",
" Loc(11) | \n",
" Loc(30) | \n",
"
\n",
" \n",
" 10 | \n",
" Loc(12) | \n",
" Loc(21) | \n",
"
\n",
" \n",
" 3 | \n",
" Loc(13) | \n",
" Loc(23) | \n",
"
\n",
" \n",
" 14 | \n",
" Loc(14) | \n",
" Loc(28) | \n",
"
\n",
" \n",
" 0 | \n",
" Loc(15) | \n",
" Loc(24) | \n",
"
\n",
" \n",
" 18 | \n",
" Loc(16) | \n",
" Loc(25) | \n",
"
\n",
" \n",
" 5 | \n",
" Loc(17) | \n",
" Loc(34) | \n",
"
\n",
" \n",
" 4 | \n",
" Loc(18) | \n",
" Loc(32) | \n",
"
\n",
" \n",
" 12 | \n",
" Loc(19) | \n",
" Loc(22) | \n",
"
\n",
" \n",
"
\n",
"
\n"
],
"text/plain": [
" timeseries_lh timeseries_rh\n",
"13 0.0 36.0\n",
"9 1.0 27.0\n",
"8 2.0 37.0\n",
"2 3.0 35.0\n",
"16 4.0 38.0\n",
"11 5.0 20.0\n",
"6 6.0 29.0\n",
"7 7.0 31.0\n",
"1 8.0 26.0\n",
"17 9.0 39.0\n",
"19 10.0 33.0\n",
"15 11.0 30.0\n",
"10 12.0 21.0\n",
"3 13.0 23.0\n",
"14 14.0 28.0\n",
"0 15.0 24.0\n",
"18 16.0 25.0\n",
"5 17.0 34.0\n",
"4 18.0 32.0\n",
"12 19.0 22.0"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def file_to_subj(loc):\n",
" return loc.split('/')[-1].split('_')[0]\n",
"\n",
"# Actually load it this time\n",
"data = data.add_data_files(files, file_to_subj)\n",
"data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"What's this though? Why are the files showing up as Loc(int). Whats going on is that the data files are really stored as just integers, see:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"13 0.0\n",
"9 1.0\n",
"8 2.0\n",
"2 3.0\n",
"16 4.0\n",
"11 5.0\n",
"6 6.0\n",
"7 7.0\n",
"1 8.0\n",
"17 9.0\n",
"19 10.0\n",
"15 11.0\n",
"10 12.0\n",
"3 13.0\n",
"14 14.0\n",
"0 15.0\n",
"18 16.0\n",
"5 17.0\n",
"4 18.0\n",
"12 19.0\n",
"Name: timeseries_lh, dtype: float64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['timeseries_lh']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"They correspond to locations in a stored file mapping (note: you don't need to worry about any of this most of the time)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(DataFile(loc='/home/sage/BPt/Examples/Short_Examples/fake_time_data/13_lh.npy'),\n",
" DataFile(loc='/home/sage/BPt/Examples/Short_Examples/fake_time_data/9_lh.npy'),\n",
" DataFile(loc='/home/sage/BPt/Examples/Short_Examples/fake_time_data/8_lh.npy'))"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.file_mapping[0], data.file_mapping[1], data.file_mapping[2] "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's add a fake target to our dataset now"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
Data
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" timeseries_lh | \n",
" timeseries_rh | \n",
"
\n",
" \n",
" \n",
" \n",
" 13 | \n",
" Loc(0) | \n",
" Loc(36) | \n",
"
\n",
" \n",
" 9 | \n",
" Loc(1) | \n",
" Loc(27) | \n",
"
\n",
" \n",
" 8 | \n",
" Loc(2) | \n",
" Loc(37) | \n",
"
\n",
" \n",
" 2 | \n",
" Loc(3) | \n",
" Loc(35) | \n",
"
\n",
" \n",
" 16 | \n",
" Loc(4) | \n",
" Loc(38) | \n",
"
\n",
" \n",
" 11 | \n",
" Loc(5) | \n",
" Loc(20) | \n",
"
\n",
" \n",
" 6 | \n",
" Loc(6) | \n",
" Loc(29) | \n",
"
\n",
" \n",
" 7 | \n",
" Loc(7) | \n",
" Loc(31) | \n",
"
\n",
" \n",
" 1 | \n",
" Loc(8) | \n",
" Loc(26) | \n",
"
\n",
" \n",
" 17 | \n",
" Loc(9) | \n",
" Loc(39) | \n",
"
\n",
" \n",
" 19 | \n",
" Loc(10) | \n",
" Loc(33) | \n",
"
\n",
" \n",
" 15 | \n",
" Loc(11) | \n",
" Loc(30) | \n",
"
\n",
" \n",
" 10 | \n",
" Loc(12) | \n",
" Loc(21) | \n",
"
\n",
" \n",
" 3 | \n",
" Loc(13) | \n",
" Loc(23) | \n",
"
\n",
" \n",
" 14 | \n",
" Loc(14) | \n",
" Loc(28) | \n",
"
\n",
" \n",
" 0 | \n",
" Loc(15) | \n",
" Loc(24) | \n",
"
\n",
" \n",
" 18 | \n",
" Loc(16) | \n",
" Loc(25) | \n",
"
\n",
" \n",
" 5 | \n",
" Loc(17) | \n",
" Loc(34) | \n",
"
\n",
" \n",
" 4 | \n",
" Loc(18) | \n",
" Loc(32) | \n",
"
\n",
" \n",
" 12 | \n",
" Loc(19) | \n",
" Loc(22) | \n",
"
\n",
" \n",
"
\n",
"
\n",
"\n",
"
Target
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" t | \n",
"
\n",
" \n",
" \n",
" \n",
" 13 | \n",
" 0.656648 | \n",
"
\n",
" \n",
" 9 | \n",
" 0.298354 | \n",
"
\n",
" \n",
" 8 | \n",
" 0.495359 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.414660 | \n",
"
\n",
" \n",
" 16 | \n",
" 0.606687 | \n",
"
\n",
" \n",
" 11 | \n",
" 0.453163 | \n",
"
\n",
" \n",
" 6 | \n",
" 0.853856 | \n",
"
\n",
" \n",
" 7 | \n",
" 0.044329 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.916036 | \n",
"
\n",
" \n",
" 17 | \n",
" 0.865733 | \n",
"
\n",
" \n",
" 19 | \n",
" 0.015055 | \n",
"
\n",
" \n",
" 15 | \n",
" 0.082130 | \n",
"
\n",
" \n",
" 10 | \n",
" 0.731628 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.074572 | \n",
"
\n",
" \n",
" 14 | \n",
" 0.589903 | \n",
"
\n",
" \n",
" 0 | \n",
" 0.768409 | \n",
"
\n",
" \n",
" 18 | \n",
" 0.536750 | \n",
"
\n",
" \n",
" 5 | \n",
" 0.401537 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.580557 | \n",
"
\n",
" \n",
" 12 | \n",
" 0.508457 | \n",
"
\n",
" \n",
"
\n",
"
\n"
],
"text/plain": [
" timeseries_lh timeseries_rh t\n",
"13 0.0 36.0 0.656648\n",
"9 1.0 27.0 0.298354\n",
"8 2.0 37.0 0.495359\n",
"2 3.0 35.0 0.414660\n",
"16 4.0 38.0 0.606687\n",
"11 5.0 20.0 0.453163\n",
"6 6.0 29.0 0.853856\n",
"7 7.0 31.0 0.044329\n",
"1 8.0 26.0 0.916036\n",
"17 9.0 39.0 0.865733\n",
"19 10.0 33.0 0.015055\n",
"15 11.0 30.0 0.082130\n",
"10 12.0 21.0 0.731628\n",
"3 13.0 23.0 0.074572\n",
"14 14.0 28.0 0.589903\n",
"0 15.0 24.0 0.768409\n",
"18 16.0 25.0 0.536750\n",
"5 17.0 34.0 0.401537\n",
"4 18.0 32.0 0.580557\n",
"12 19.0 22.0 0.508457"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data['t'] = np.random.random(len(data))\n",
"data.set_target('t', inplace=True)\n",
"data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Next we will generate a Loader to apply a parcellation, then extract a measure of connectivity."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"from BPt.extensions import SurfLabels\n",
"\n",
"lh_parc = SurfLabels(labels='data/lh.aparc.annot', vectorize=False)\n",
"rh_parc = SurfLabels(labels='data/rh.aparc.annot', vectorize=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can see how this object works on example data first."
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(100, 10242)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ex_lh = data.file_mapping[0].load()\n",
"ex_lh.shape"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(100, 35)"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"trans = lh_parc.fit_transform(ex_lh)\n",
"trans.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We essentially get a reduction from 10242 features to 35."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Next, we want to transform the matrix into a correlation matrix."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"from BPt.extensions import SingleConnectivityMeasure\n",
"scm = SingleConnectivityMeasure(kind='covariance', discard_diagonal=True, vectorize=True)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(595,)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scm.fit_transform(trans).shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The single connectivity measure is just a wrapper designed to let the ConnectivityMeasure from nilearn work with a single subject's data at a time.\n",
"\n",
"Next, let's use the input special Pipe wrapper to compose these two objects into their own pipeline"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"lh_loader = bp.Loader(bp.Pipe([lh_parc, scm]), scope='_lh')\n",
"rh_loader = bp.Loader(bp.Pipe([rh_parc, scm]), scope='_rh')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Define a simple pipeline with just our loader steps, then evaluate with mostly default settings."
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "ee8b7bc1991f4995a160f3aa2aa124ce",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Folds: 0%| | 0/5 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"BPtEvaluator\n",
"------------\n",
"mean_scores = {'explained_variance': -0.3492082271322736, 'neg_mean_squared_error': -0.08532586202634963}\n",
"std_scores = {'explained_variance': 0.37944917198666483, 'neg_mean_squared_error': 0.025409784568717956}\n",
"\n",
"Saved Attributes: ['estimators', 'preds', 'timing', 'train_subjects', 'val_subjects', 'feat_names', 'ps', 'mean_scores', 'std_scores', 'weighted_mean_scores', 'scores', 'fis_', 'coef_']\n",
"\n",
"Available Methods: ['get_preds_dfs', 'get_fis', 'get_coef_', 'permutation_importance']\n",
"\n",
"Evaluated with:\n",
"ProblemSpec(problem_type='regression',\n",
" scorer={'explained_variance': make_scorer(explained_variance_score),\n",
" 'neg_mean_squared_error': make_scorer(mean_squared_error, greater_is_better=False)},\n",
" subjects='all', target='t')"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipeline = bp.Pipeline([lh_loader, rh_loader, bp.Model('linear')])\n",
"\n",
"results = bp.evaluate(pipeline, data)\n",
"results"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Don't be discouraged that this didn't work, we are after all trying to predict random noise with random noise ... "
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"loader_pipe0\n",
"loader_pipe1\n",
"linear regressor\n"
]
}
],
"source": [
"# These are the steps of the pipeline\n",
"fold0_pipeline = results.estimators[0]\n",
"for step in fold0_pipeline.steps:\n",
" print(step[0])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can investigate pieces, or use special functions like"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" timeseries_rh_0 | \n",
" timeseries_rh_1 | \n",
" timeseries_rh_2 | \n",
" timeseries_rh_3 | \n",
" timeseries_rh_4 | \n",
" timeseries_rh_5 | \n",
" timeseries_rh_6 | \n",
" timeseries_rh_7 | \n",
" timeseries_rh_8 | \n",
" timeseries_rh_9 | \n",
" ... | \n",
" timeseries_lh_585 | \n",
" timeseries_lh_586 | \n",
" timeseries_lh_587 | \n",
" timeseries_lh_588 | \n",
" timeseries_lh_589 | \n",
" timeseries_lh_590 | \n",
" timeseries_lh_591 | \n",
" timeseries_lh_592 | \n",
" timeseries_lh_593 | \n",
" timeseries_lh_594 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" -0.000165 | \n",
" 0.000046 | \n",
" -0.000077 | \n",
" -0.000075 | \n",
" 0.000074 | \n",
" -0.000011 | \n",
" -0.000049 | \n",
" 0.000047 | \n",
" -0.000024 | \n",
" -0.000024 | \n",
" ... | \n",
" -8.290498e-06 | \n",
" -0.000006 | \n",
" -0.000023 | \n",
" 1.610693e-06 | \n",
" 0.000015 | \n",
" -0.000006 | \n",
" 4.867083e-06 | \n",
" -1.215231e-04 | \n",
" -0.000140 | \n",
" -0.000048 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.000051 | \n",
" 0.000027 | \n",
" -0.000011 | \n",
" -0.000003 | \n",
" 0.000022 | \n",
" 0.000033 | \n",
" 0.000049 | \n",
" 0.000072 | \n",
" 0.000010 | \n",
" -0.000014 | \n",
" ... | \n",
" 9.147214e-06 | \n",
" -0.000033 | \n",
" -0.000015 | \n",
" 4.817195e-06 | \n",
" 0.000001 | \n",
" 0.000009 | \n",
" -3.010718e-05 | \n",
" 5.807162e-05 | \n",
" -0.000070 | \n",
" 0.000016 | \n",
"
\n",
" \n",
" 2 | \n",
" -0.000019 | \n",
" -0.000024 | \n",
" -0.000004 | \n",
" 0.000027 | \n",
" -0.000054 | \n",
" 0.000013 | \n",
" 0.000064 | \n",
" -0.000118 | \n",
" -0.000065 | \n",
" 0.000063 | \n",
" ... | \n",
" -8.021237e-06 | \n",
" -0.000059 | \n",
" 0.000004 | \n",
" -1.018778e-05 | \n",
" -0.000026 | \n",
" -0.000003 | \n",
" 1.120659e-05 | \n",
" -3.874970e-05 | \n",
" 0.000057 | \n",
" -0.000008 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.000037 | \n",
" 0.000027 | \n",
" 0.000050 | \n",
" 0.000080 | \n",
" 0.000038 | \n",
" 0.000009 | \n",
" -0.000094 | \n",
" -0.000117 | \n",
" 0.000056 | \n",
" -0.000005 | \n",
" ... | \n",
" 2.637188e-07 | \n",
" -0.000015 | \n",
" -0.000011 | \n",
" -6.939784e-06 | \n",
" 0.000022 | \n",
" 0.000005 | \n",
" -2.519195e-05 | \n",
" 1.219129e-04 | \n",
" 0.000021 | \n",
" 0.000074 | \n",
"
\n",
" \n",
" 4 | \n",
" -0.000030 | \n",
" 0.000013 | \n",
" -0.000048 | \n",
" -0.000002 | \n",
" 0.000043 | \n",
" -0.000021 | \n",
" -0.000021 | \n",
" 0.000045 | \n",
" 0.000015 | \n",
" -0.000008 | \n",
" ... | \n",
" -4.193627e-05 | \n",
" -0.000005 | \n",
" -0.000038 | \n",
" -1.579288e-05 | \n",
" -0.000010 | \n",
" 0.000007 | \n",
" -2.074608e-05 | \n",
" 1.288912e-04 | \n",
" 0.000048 | \n",
" 0.000015 | \n",
"
\n",
" \n",
" 5 | \n",
" -0.000027 | \n",
" 0.000012 | \n",
" 0.000049 | \n",
" -0.000040 | \n",
" 0.000137 | \n",
" -0.000020 | \n",
" 0.000023 | \n",
" 0.000057 | \n",
" 0.000020 | \n",
" 0.000018 | \n",
" ... | \n",
" -2.317345e-05 | \n",
" 0.000047 | \n",
" -0.000021 | \n",
" -3.256373e-06 | \n",
" 0.000013 | \n",
" 0.000006 | \n",
" -2.017995e-05 | \n",
" 3.174790e-05 | \n",
" -0.000044 | \n",
" -0.000050 | \n",
"
\n",
" \n",
" 6 | \n",
" -0.000003 | \n",
" 0.000011 | \n",
" 0.000037 | \n",
" -0.000007 | \n",
" 0.000026 | \n",
" 0.000034 | \n",
" 0.000007 | \n",
" -0.000071 | \n",
" -0.000019 | \n",
" -0.000004 | \n",
" ... | \n",
" 1.230251e-05 | \n",
" 0.000065 | \n",
" 0.000008 | \n",
" 8.041033e-07 | \n",
" 0.000001 | \n",
" -0.000026 | \n",
" -1.401379e-05 | \n",
" 2.662647e-05 | \n",
" -0.000020 | \n",
" 0.000032 | \n",
"
\n",
" \n",
" 7 | \n",
" 0.000038 | \n",
" 0.000019 | \n",
" 0.000006 | \n",
" 0.000017 | \n",
" -0.000173 | \n",
" 0.000027 | \n",
" -0.000058 | \n",
" 0.000120 | \n",
" 0.000028 | \n",
" -0.000029 | \n",
" ... | \n",
" -2.762708e-05 | \n",
" 0.000019 | \n",
" 0.000015 | \n",
" -5.296039e-06 | \n",
" -0.000021 | \n",
" 0.000017 | \n",
" -3.512035e-06 | \n",
" -1.743649e-04 | \n",
" 0.000015 | \n",
" 0.000002 | \n",
"
\n",
" \n",
" 8 | \n",
" -0.000009 | \n",
" 0.000007 | \n",
" 0.000034 | \n",
" -0.000002 | \n",
" 0.000032 | \n",
" -0.000011 | \n",
" -0.000021 | \n",
" -0.000113 | \n",
" 0.000040 | \n",
" 0.000024 | \n",
" ... | \n",
" -1.286571e-06 | \n",
" -0.000022 | \n",
" -0.000027 | \n",
" 2.031265e-05 | \n",
" -0.000008 | \n",
" 0.000035 | \n",
" -5.331094e-06 | \n",
" -5.483645e-05 | \n",
" 0.000103 | \n",
" -0.000014 | \n",
"
\n",
" \n",
" 9 | \n",
" 0.000062 | \n",
" -0.000022 | \n",
" 0.000060 | \n",
" 0.000010 | \n",
" -0.000017 | \n",
" 0.000012 | \n",
" -0.000019 | \n",
" 0.000093 | \n",
" -0.000002 | \n",
" 0.000028 | \n",
" ... | \n",
" -1.272615e-05 | \n",
" 0.000027 | \n",
" -0.000015 | \n",
" -1.022682e-05 | \n",
" -0.000044 | \n",
" -0.000006 | \n",
" 4.879025e-06 | \n",
" 3.508208e-07 | \n",
" -0.000069 | \n",
" -0.000002 | \n",
"
\n",
" \n",
" 10 | \n",
" 0.000019 | \n",
" 0.000110 | \n",
" 0.000062 | \n",
" -0.000019 | \n",
" 0.000011 | \n",
" -0.000007 | \n",
" -0.000059 | \n",
" -0.000056 | \n",
" 0.000022 | \n",
" -0.000041 | \n",
" ... | \n",
" -1.971200e-05 | \n",
" 0.000055 | \n",
" 0.000020 | \n",
" -5.049802e-06 | \n",
" 0.000014 | \n",
" 0.000014 | \n",
" -4.576251e-07 | \n",
" -3.902154e-05 | \n",
" 0.000023 | \n",
" -0.000025 | \n",
"
\n",
" \n",
" 11 | \n",
" 0.000013 | \n",
" -0.000036 | \n",
" -0.000063 | \n",
" -0.000026 | \n",
" -0.000008 | \n",
" -0.000007 | \n",
" 0.000029 | \n",
" -0.000117 | \n",
" 0.000052 | \n",
" 0.000013 | \n",
" ... | \n",
" 4.998446e-07 | \n",
" -0.000018 | \n",
" -0.000016 | \n",
" -1.614390e-05 | \n",
" 0.000006 | \n",
" -0.000006 | \n",
" 1.069373e-05 | \n",
" -6.800519e-06 | \n",
" 0.000029 | \n",
" -0.000103 | \n",
"
\n",
" \n",
" 12 | \n",
" -0.000033 | \n",
" -0.000027 | \n",
" 0.000066 | \n",
" 0.000013 | \n",
" 0.000021 | \n",
" -0.000012 | \n",
" 0.000061 | \n",
" 0.000105 | \n",
" 0.000020 | \n",
" 0.000022 | \n",
" ... | \n",
" -3.358210e-06 | \n",
" -0.000003 | \n",
" -0.000018 | \n",
" 2.135645e-05 | \n",
" 0.000009 | \n",
" 0.000002 | \n",
" -1.748675e-05 | \n",
" 2.181139e-04 | \n",
" 0.000018 | \n",
" -0.000078 | \n",
"
\n",
" \n",
" 13 | \n",
" 0.000080 | \n",
" -0.000046 | \n",
" -0.000040 | \n",
" 0.000033 | \n",
" -0.000092 | \n",
" 0.000013 | \n",
" -0.000005 | \n",
" -0.000085 | \n",
" 0.000020 | \n",
" 0.000096 | \n",
" ... | \n",
" -3.432920e-06 | \n",
" 0.000038 | \n",
" 0.000048 | \n",
" 5.295833e-06 | \n",
" 0.000013 | \n",
" 0.000030 | \n",
" 5.164307e-06 | \n",
" -9.442774e-05 | \n",
" -0.000010 | \n",
" -0.000014 | \n",
"
\n",
" \n",
" 14 | \n",
" 0.000077 | \n",
" -0.000009 | \n",
" -0.000118 | \n",
" 0.000056 | \n",
" -0.000049 | \n",
" 0.000021 | \n",
" -0.000036 | \n",
" 0.000130 | \n",
" -0.000081 | \n",
" 0.000017 | \n",
" ... | \n",
" -9.383758e-06 | \n",
" -0.000027 | \n",
" -0.000019 | \n",
" -2.622800e-06 | \n",
" 0.000005 | \n",
" 0.000009 | \n",
" -1.135353e-05 | \n",
" 1.509882e-05 | \n",
" -0.000070 | \n",
" -0.000058 | \n",
"
\n",
" \n",
" 15 | \n",
" -0.000113 | \n",
" -0.000045 | \n",
" 0.000040 | \n",
" 0.000020 | \n",
" -0.000040 | \n",
" -0.000010 | \n",
" -0.000081 | \n",
" 0.000031 | \n",
" -0.000066 | \n",
" 0.000002 | \n",
" ... | \n",
" 1.523565e-05 | \n",
" -0.000071 | \n",
" 0.000031 | \n",
" -6.086060e-06 | \n",
" -0.000013 | \n",
" 0.000003 | \n",
" 1.540947e-06 | \n",
" 1.604218e-04 | \n",
" 0.000140 | \n",
" 0.000034 | \n",
"
\n",
" \n",
"
\n",
"
16 rows × 1190 columns
\n",
"
"
],
"text/plain": [
" timeseries_rh_0 timeseries_rh_1 timeseries_rh_2 timeseries_rh_3 \\\n",
"0 -0.000165 0.000046 -0.000077 -0.000075 \n",
"1 0.000051 0.000027 -0.000011 -0.000003 \n",
"2 -0.000019 -0.000024 -0.000004 0.000027 \n",
"3 0.000037 0.000027 0.000050 0.000080 \n",
"4 -0.000030 0.000013 -0.000048 -0.000002 \n",
"5 -0.000027 0.000012 0.000049 -0.000040 \n",
"6 -0.000003 0.000011 0.000037 -0.000007 \n",
"7 0.000038 0.000019 0.000006 0.000017 \n",
"8 -0.000009 0.000007 0.000034 -0.000002 \n",
"9 0.000062 -0.000022 0.000060 0.000010 \n",
"10 0.000019 0.000110 0.000062 -0.000019 \n",
"11 0.000013 -0.000036 -0.000063 -0.000026 \n",
"12 -0.000033 -0.000027 0.000066 0.000013 \n",
"13 0.000080 -0.000046 -0.000040 0.000033 \n",
"14 0.000077 -0.000009 -0.000118 0.000056 \n",
"15 -0.000113 -0.000045 0.000040 0.000020 \n",
"\n",
" timeseries_rh_4 timeseries_rh_5 timeseries_rh_6 timeseries_rh_7 \\\n",
"0 0.000074 -0.000011 -0.000049 0.000047 \n",
"1 0.000022 0.000033 0.000049 0.000072 \n",
"2 -0.000054 0.000013 0.000064 -0.000118 \n",
"3 0.000038 0.000009 -0.000094 -0.000117 \n",
"4 0.000043 -0.000021 -0.000021 0.000045 \n",
"5 0.000137 -0.000020 0.000023 0.000057 \n",
"6 0.000026 0.000034 0.000007 -0.000071 \n",
"7 -0.000173 0.000027 -0.000058 0.000120 \n",
"8 0.000032 -0.000011 -0.000021 -0.000113 \n",
"9 -0.000017 0.000012 -0.000019 0.000093 \n",
"10 0.000011 -0.000007 -0.000059 -0.000056 \n",
"11 -0.000008 -0.000007 0.000029 -0.000117 \n",
"12 0.000021 -0.000012 0.000061 0.000105 \n",
"13 -0.000092 0.000013 -0.000005 -0.000085 \n",
"14 -0.000049 0.000021 -0.000036 0.000130 \n",
"15 -0.000040 -0.000010 -0.000081 0.000031 \n",
"\n",
" timeseries_rh_8 timeseries_rh_9 ... timeseries_lh_585 \\\n",
"0 -0.000024 -0.000024 ... -8.290498e-06 \n",
"1 0.000010 -0.000014 ... 9.147214e-06 \n",
"2 -0.000065 0.000063 ... -8.021237e-06 \n",
"3 0.000056 -0.000005 ... 2.637188e-07 \n",
"4 0.000015 -0.000008 ... -4.193627e-05 \n",
"5 0.000020 0.000018 ... -2.317345e-05 \n",
"6 -0.000019 -0.000004 ... 1.230251e-05 \n",
"7 0.000028 -0.000029 ... -2.762708e-05 \n",
"8 0.000040 0.000024 ... -1.286571e-06 \n",
"9 -0.000002 0.000028 ... -1.272615e-05 \n",
"10 0.000022 -0.000041 ... -1.971200e-05 \n",
"11 0.000052 0.000013 ... 4.998446e-07 \n",
"12 0.000020 0.000022 ... -3.358210e-06 \n",
"13 0.000020 0.000096 ... -3.432920e-06 \n",
"14 -0.000081 0.000017 ... -9.383758e-06 \n",
"15 -0.000066 0.000002 ... 1.523565e-05 \n",
"\n",
" timeseries_lh_586 timeseries_lh_587 timeseries_lh_588 \\\n",
"0 -0.000006 -0.000023 1.610693e-06 \n",
"1 -0.000033 -0.000015 4.817195e-06 \n",
"2 -0.000059 0.000004 -1.018778e-05 \n",
"3 -0.000015 -0.000011 -6.939784e-06 \n",
"4 -0.000005 -0.000038 -1.579288e-05 \n",
"5 0.000047 -0.000021 -3.256373e-06 \n",
"6 0.000065 0.000008 8.041033e-07 \n",
"7 0.000019 0.000015 -5.296039e-06 \n",
"8 -0.000022 -0.000027 2.031265e-05 \n",
"9 0.000027 -0.000015 -1.022682e-05 \n",
"10 0.000055 0.000020 -5.049802e-06 \n",
"11 -0.000018 -0.000016 -1.614390e-05 \n",
"12 -0.000003 -0.000018 2.135645e-05 \n",
"13 0.000038 0.000048 5.295833e-06 \n",
"14 -0.000027 -0.000019 -2.622800e-06 \n",
"15 -0.000071 0.000031 -6.086060e-06 \n",
"\n",
" timeseries_lh_589 timeseries_lh_590 timeseries_lh_591 \\\n",
"0 0.000015 -0.000006 4.867083e-06 \n",
"1 0.000001 0.000009 -3.010718e-05 \n",
"2 -0.000026 -0.000003 1.120659e-05 \n",
"3 0.000022 0.000005 -2.519195e-05 \n",
"4 -0.000010 0.000007 -2.074608e-05 \n",
"5 0.000013 0.000006 -2.017995e-05 \n",
"6 0.000001 -0.000026 -1.401379e-05 \n",
"7 -0.000021 0.000017 -3.512035e-06 \n",
"8 -0.000008 0.000035 -5.331094e-06 \n",
"9 -0.000044 -0.000006 4.879025e-06 \n",
"10 0.000014 0.000014 -4.576251e-07 \n",
"11 0.000006 -0.000006 1.069373e-05 \n",
"12 0.000009 0.000002 -1.748675e-05 \n",
"13 0.000013 0.000030 5.164307e-06 \n",
"14 0.000005 0.000009 -1.135353e-05 \n",
"15 -0.000013 0.000003 1.540947e-06 \n",
"\n",
" timeseries_lh_592 timeseries_lh_593 timeseries_lh_594 \n",
"0 -1.215231e-04 -0.000140 -0.000048 \n",
"1 5.807162e-05 -0.000070 0.000016 \n",
"2 -3.874970e-05 0.000057 -0.000008 \n",
"3 1.219129e-04 0.000021 0.000074 \n",
"4 1.288912e-04 0.000048 0.000015 \n",
"5 3.174790e-05 -0.000044 -0.000050 \n",
"6 2.662647e-05 -0.000020 0.000032 \n",
"7 -1.743649e-04 0.000015 0.000002 \n",
"8 -5.483645e-05 0.000103 -0.000014 \n",
"9 3.508208e-07 -0.000069 -0.000002 \n",
"10 -3.902154e-05 0.000023 -0.000025 \n",
"11 -6.800519e-06 0.000029 -0.000103 \n",
"12 2.181139e-04 0.000018 -0.000078 \n",
"13 -9.442774e-05 -0.000010 -0.000014 \n",
"14 1.509882e-05 -0.000070 -0.000058 \n",
"15 1.604218e-04 0.000140 0.000034 \n",
"\n",
"[16 rows x 1190 columns]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results.get_X_transform_df(data, fold=0)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.1 64-bit ('bpt': conda)",
"language": "python",
"name": "python39164bitbptconda7805b3f5d58e4b658b79cb94739371e6"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}