Homework: DataFrames and Plotting¶
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format='retina' # for Mac retina displays
In [2]:
import seaborn as sns
dat = sns.load_dataset('fmri')
DataFrames¶
In [3]:
print(dat.shape)
dat.head(10)
(1064, 5)
Out[3]:
subject | timepoint | event | region | signal | |
---|---|---|---|---|---|
0 | s13 | 18 | stim | parietal | -0.017552 |
1 | s5 | 14 | stim | parietal | -0.080883 |
2 | s12 | 18 | stim | parietal | -0.081033 |
3 | s11 | 18 | stim | parietal | -0.046134 |
4 | s10 | 18 | stim | parietal | -0.037970 |
5 | s9 | 18 | stim | parietal | -0.103513 |
6 | s8 | 18 | stim | parietal | -0.064408 |
7 | s7 | 18 | stim | parietal | -0.060526 |
8 | s6 | 18 | stim | parietal | -0.007029 |
9 | s5 | 18 | stim | parietal | -0.040557 |
In [4]:
print(dat.subject.unique())
print(dat.region.unique())
['s13' 's5' 's12' 's11' 's10' 's9' 's8' 's7' 's6' 's4' 's3' 's2' 's1' 's0'] ['parietal' 'frontal']
In [5]:
tab = dat.groupby(['region', 'subject']).agg(
{'signal': ['mean', 'std']})
tab = tab.reset_index()
tab['SNR'] = tab['signal']['mean'].abs()/tab['signal']['std']
tab
Out[5]:
region | subject | signal | SNR | ||
---|---|---|---|---|---|
mean | std | ||||
0 | frontal | s0 | 0.003771 | 0.037305 | 0.101093 |
1 | frontal | s1 | 0.006105 | 0.110323 | 0.055338 |
2 | frontal | s10 | 0.003949 | 0.033367 | 0.118338 |
3 | frontal | s11 | -0.005611 | 0.074502 | 0.075311 |
4 | frontal | s12 | -0.001854 | 0.043914 | 0.042211 |
5 | frontal | s13 | 0.000796 | 0.072732 | 0.010939 |
6 | frontal | s2 | -0.003060 | 0.053104 | 0.057629 |
7 | frontal | s3 | 0.006597 | 0.101186 | 0.065198 |
8 | frontal | s4 | 0.011415 | 0.145245 | 0.078589 |
9 | frontal | s5 | 0.000352 | 0.074300 | 0.004738 |
10 | frontal | s6 | 0.004165 | 0.087947 | 0.047355 |
11 | frontal | s7 | 0.005201 | 0.057982 | 0.089693 |
12 | frontal | s8 | -0.004920 | 0.048267 | 0.101934 |
13 | frontal | s9 | -0.008732 | 0.043089 | 0.202640 |
14 | parietal | s0 | 0.003882 | 0.067340 | 0.057645 |
15 | parietal | s1 | 0.013851 | 0.196472 | 0.070497 |
16 | parietal | s10 | 0.009539 | 0.091296 | 0.104485 |
17 | parietal | s11 | 0.000262 | 0.098102 | 0.002672 |
18 | parietal | s12 | 0.003913 | 0.083983 | 0.046596 |
19 | parietal | s13 | 0.006910 | 0.092687 | 0.074553 |
20 | parietal | s2 | -0.000792 | 0.073499 | 0.010781 |
21 | parietal | s3 | 0.008900 | 0.129123 | 0.068930 |
22 | parietal | s4 | 0.011836 | 0.149921 | 0.078945 |
23 | parietal | s5 | -0.002032 | 0.087987 | 0.023094 |
24 | parietal | s6 | -0.000320 | 0.092333 | 0.003469 |
25 | parietal | s7 | 0.004221 | 0.071113 | 0.059354 |
26 | parietal | s8 | 0.009619 | 0.108204 | 0.088893 |
27 | parietal | s9 | 0.011153 | 0.125803 | 0.088650 |
In [6]:
tab.sort_values('SNR', ascending=False)
Out[6]:
region | subject | signal | SNR | ||
---|---|---|---|---|---|
mean | std | ||||
13 | frontal | s9 | -0.008732 | 0.043089 | 0.202640 |
2 | frontal | s10 | 0.003949 | 0.033367 | 0.118338 |
16 | parietal | s10 | 0.009539 | 0.091296 | 0.104485 |
12 | frontal | s8 | -0.004920 | 0.048267 | 0.101934 |
0 | frontal | s0 | 0.003771 | 0.037305 | 0.101093 |
11 | frontal | s7 | 0.005201 | 0.057982 | 0.089693 |
26 | parietal | s8 | 0.009619 | 0.108204 | 0.088893 |
27 | parietal | s9 | 0.011153 | 0.125803 | 0.088650 |
22 | parietal | s4 | 0.011836 | 0.149921 | 0.078945 |
8 | frontal | s4 | 0.011415 | 0.145245 | 0.078589 |
3 | frontal | s11 | -0.005611 | 0.074502 | 0.075311 |
19 | parietal | s13 | 0.006910 | 0.092687 | 0.074553 |
15 | parietal | s1 | 0.013851 | 0.196472 | 0.070497 |
21 | parietal | s3 | 0.008900 | 0.129123 | 0.068930 |
7 | frontal | s3 | 0.006597 | 0.101186 | 0.065198 |
25 | parietal | s7 | 0.004221 | 0.071113 | 0.059354 |
14 | parietal | s0 | 0.003882 | 0.067340 | 0.057645 |
6 | frontal | s2 | -0.003060 | 0.053104 | 0.057629 |
1 | frontal | s1 | 0.006105 | 0.110323 | 0.055338 |
10 | frontal | s6 | 0.004165 | 0.087947 | 0.047355 |
18 | parietal | s12 | 0.003913 | 0.083983 | 0.046596 |
4 | frontal | s12 | -0.001854 | 0.043914 | 0.042211 |
23 | parietal | s5 | -0.002032 | 0.087987 | 0.023094 |
5 | frontal | s13 | 0.000796 | 0.072732 | 0.010939 |
20 | parietal | s2 | -0.000792 | 0.073499 | 0.010781 |
9 | frontal | s5 | 0.000352 | 0.074300 | 0.004738 |
24 | parietal | s6 | -0.000320 | 0.092333 | 0.003469 |
17 | parietal | s11 | 0.000262 | 0.098102 | 0.002672 |
Plotting¶
Matplotlib:
In [7]:
frontal = dat[(dat.region == 'frontal') & (dat.event == 'stim')]
frontal = frontal.sort_values(['subject', 'timepoint'])
frontal.head(10)
Out[7]:
subject | timepoint | event | region | signal | |
---|---|---|---|---|---|
67 | s0 | 0 | stim | frontal | -0.021452 |
280 | s0 | 1 | stim | frontal | -0.021054 |
294 | s0 | 2 | stim | frontal | -0.009038 |
308 | s0 | 3 | stim | frontal | 0.026727 |
322 | s0 | 4 | stim | frontal | 0.070558 |
351 | s0 | 5 | stim | frontal | 0.085387 |
349 | s0 | 6 | stim | frontal | 0.059436 |
363 | s0 | 7 | stim | frontal | 0.007993 |
377 | s0 | 8 | stim | frontal | -0.042793 |
391 | s0 | 9 | stim | frontal | -0.070102 |
In [8]:
for sub in frontal.subject.unique():
this_dat = frontal[frontal.subject == sub]
plt.plot(this_dat.timepoint, this_dat.signal, label=sub)
Seaborn
In [9]:
sns.lineplot(data=frontal, x='timepoint', y='signal', hue='subject')
Out[9]:
<Axes: xlabel='timepoint', ylabel='signal'>
In [10]:
ax = sns.lineplot(data=frontal, x='timepoint', y='signal', hue='subject')
plt.xlabel('time')
plt.ylabel('BOLD signal')
plt.title('Frontal stim response')
sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
plt.axhline(y=0.0, color='gray', zorder=-3, alpha=0.5)
Out[10]:
<matplotlib.lines.Line2D at 0x7fd1bfdfe920>
In [ ]: