Fitting a psychometric function at the subject level#
Author: Nicolas Legrand nicolas.legrand@cas.au.dk
import pytensor.tensor as pt
import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.stats import norm
import pymc as pm
sns.set_context('talk')
WARNING (pytensor.tensor.blas): Using NumPy C-API based implementation for BLAS functions.
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
Cell In[1], line 2
1 import pytensor.tensor as pt
----> 2 import arviz as az
3 import matplotlib.pyplot as plt
4 import numpy as np
File /opt/hostedtoolcache/Python/3.9.19/x64/lib/python3.9/site-packages/arviz/__init__.py:34
30 _log = Logger("arviz")
33 from .data import *
---> 34 from .plots import *
35 from .plots.backends import *
36 from .stats import *
File /opt/hostedtoolcache/Python/3.9.19/x64/lib/python3.9/site-packages/arviz/plots/__init__.py:2
1 """Plotting functions."""
----> 2 from .autocorrplot import plot_autocorr
3 from .bpvplot import plot_bpv
4 from .bfplot import plot_bf
File /opt/hostedtoolcache/Python/3.9.19/x64/lib/python3.9/site-packages/arviz/plots/autocorrplot.py:7
5 from ..rcparams import rcParams
6 from ..utils import _var_names
----> 7 from .plot_utils import default_grid, filter_plotters_list, get_plotting_function
10 def plot_autocorr(
11 data,
12 var_names=None,
(...)
24 show=None,
25 ):
26 r"""Bar plot of the autocorrelation function (ACF) for a sequence of data.
27
28 The ACF plots are helpful as a convergence diagnostic for posteriors from MCMC
(...)
117 >>> az.plot_autocorr(data, var_names=['mu', 'tau'], max_lag=200, combined=True)
118 """
File /opt/hostedtoolcache/Python/3.9.19/x64/lib/python3.9/site-packages/arviz/plots/plot_utils.py:15
11 from scipy.interpolate import CubicSpline
14 from ..rcparams import rcParams
---> 15 from ..stats.density_utils import kde
16 from ..stats import hdi
18 KwargSpec = Dict[str, Any]
File /opt/hostedtoolcache/Python/3.9.19/x64/lib/python3.9/site-packages/arviz/stats/__init__.py:3
1 # pylint: disable=wildcard-import
2 """Statistical tests and diagnostics for ArviZ."""
----> 3 from .density_utils import *
4 from .diagnostics import *
5 from .stats import *
File /opt/hostedtoolcache/Python/3.9.19/x64/lib/python3.9/site-packages/arviz/stats/density_utils.py:8
6 from scipy.fftpack import fft
7 from scipy.optimize import brentq
----> 8 from scipy.signal import convolve, convolve2d, gaussian # pylint: disable=no-name-in-module
9 from scipy.sparse import coo_matrix
10 from scipy.special import ive # pylint: disable=no-name-in-module
ImportError: cannot import name 'gaussian' from 'scipy.signal' (/opt/hostedtoolcache/Python/3.9.19/x64/lib/python3.9/site-packages/scipy/signal/__init__.py)
In this example, we are going to fit a cummulative normal function to decision responses made during the Heart Rate Discrimination task. We are going to use the data from the HRD method paper [Legrand et al., 2022] and analyse the responses from one participant from the second session.
# Load data frame
psychophysics_df = pd.read_csv('https://github.com/embodied-computation-group/CardioceptionPaper/raw/main/data/Del2_merged.txt')
First, let’s filter this data frame so we only keep subject 19 (sub_0019
label) and the interoceptive condition (Extero
label).
this_df = psychophysics_df[(psychophysics_df.Modality == 'Extero') & (psychophysics_df.Subject == 'sub_0019')]
this_df.head()
TrialType | Condition | Modality | StairCond | Decision | DecisionRT | Confidence | ConfidenceRT | Alpha | listenBPM | ... | EstimatedThreshold | EstimatedSlope | StartListening | StartDecision | ResponseMade | RatingStart | RatingEnds | endTrigger | HeartRateOutlier | Subject | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | psi | Less | Extero | psi | Less | 2.216429 | 59.0 | 1.632995 | -0.5 | 78.0 | ... | 22.805550 | 12.549457 | 1.603353e+09 | 1.603354e+09 | 1.603354e+09 | 1.603354e+09 | 1.603354e+09 | 1.603354e+09 | False | sub_0019 |
3 | psiCatchTrial | Less | Extero | psiCatchTrial | Less | 1.449154 | 100.0 | 0.511938 | -30.0 | 82.0 | ... | NaN | NaN | 1.603354e+09 | 1.603354e+09 | 1.603354e+09 | 1.603354e+09 | 1.603354e+09 | 1.603354e+09 | False | sub_0019 |
6 | psi | More | Extero | psi | More | 1.182666 | 95.0 | 0.606786 | 22.5 | 69.0 | ... | 10.001882 | 12.884902 | 1.603354e+09 | 1.603354e+09 | 1.603354e+09 | 1.603354e+09 | 1.603354e+09 | 1.603354e+09 | False | sub_0019 |
10 | psi | More | Extero | psi | More | 1.848141 | 24.0 | 1.448969 | 10.5 | 62.0 | ... | 0.998384 | 13.044744 | 1.603354e+09 | 1.603354e+09 | 1.603354e+09 | 1.603354e+09 | 1.603354e+09 | 1.603354e+09 | False | sub_0019 |
11 | psiCatchTrial | More | Extero | psiCatchTrial | More | 1.349469 | 75.0 | 0.561820 | 10.0 | 72.0 | ... | NaN | NaN | 1.603354e+09 | 1.603354e+09 | 1.603354e+09 | 1.603354e+09 | 1.603354e+09 | 1.603354e+09 | False | sub_0019 |
5 rows × 25 columns
This data frame contain a large number of columns, but here we will be interested in the Alpha
column (the intensity value) and the Decision
column (the response made by the participant).
this_df = this_df[['Alpha', 'Decision']]
this_df.head()
Alpha | Decision | |
---|---|---|
1 | -0.5 | Less |
3 | -30.0 | Less |
6 | 22.5 | More |
10 | 10.5 | More |
11 | 10.0 | More |
These two columns are enought for us to extract the 3 vectors of interest to fit a psychometric function:
The intensity vector, listing all the tested intensities values
The total number of trials for each tested intensity value
The number of “correct” response (here, when the decision == ‘More’).
Let’s take a look at the data. This function will plot the proportion of “Faster” responses depending on the intensity value of the trial stimuli (expressed in BPM). Here, the size of the circle represent the number of trials that were presented for each intensity values.
fig, axs = plt.subplots(figsize=(8, 5))
for ii, intensity in enumerate(np.sort(this_df.Alpha.unique())):
resp = sum((this_df.Alpha == intensity) & (this_df.Decision == 'More'))
total = sum(this_df.Alpha == intensity)
axs.plot(intensity, resp/total, 'o', alpha=0.5, color='#4c72b0',
markeredgecolor='k', markersize=total*5)
plt.ylabel('P$_{(Response = More|Intensity)}$')
plt.xlabel('Intensity ($\Delta$ BPM)')
plt.tight_layout()
sns.despine()
Model#
The model was defined as follows:
Where \(erf\) denotes the error functions and \(\phi\) is the cumulative normal function.
Let’s create our own cumulative normal distribution function here using pytensor.
def cumulative_normal(x, alpha, beta):
# Cumulative distribution function for the standard normal distribution
return 0.5 + 0.5 * pt.erf((x - alpha) / (beta * pt.sqrt(2)))
We preprocess the data to extract the intensity \(x\), the number or trials \(n\) and number of hit responses \(r\).
x, n, r = np.zeros(163), np.zeros(163), np.zeros(163)
for ii, intensity in enumerate(np.arange(-40.5, 41, 0.5)):
x[ii] = intensity
n[ii] = sum(this_df.Alpha == intensity)
r[ii] = sum((this_df.Alpha == intensity) & (this_df.Decision == "More"))
# remove no responses trials
validmask = n != 0
xij, nij, rij = x[validmask], n[validmask], r[validmask]
Create the model.
with pm.Model() as subject_psychophysics:
alpha = pm.Uniform("alpha", lower=-40.5, upper=40.5)
beta = pm.HalfNormal("beta", 10)
thetaij = pm.Deterministic(
"thetaij", cumulative_normal(xij, alpha, beta)
)
rij_ = pm.Binomial("rij", p=thetaij, n=nij, observed=rij)
pm.model_to_graphviz(subject_psychophysics)
with subject_psychophysics:
idata = pm.sample(chains=4, cores=4)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [alpha, beta]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 2 seconds.
az.plot_trace(idata, var_names=['alpha', 'beta']);
stats = az.summary(idata, ["alpha", "beta"])
stats
mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
---|---|---|---|---|---|---|---|---|---|
alpha | 2.197 | 1.576 | -0.664 | 5.143 | 0.029 | 0.021 | 3149.0 | 2042.0 | 1.0 |
beta | 7.438 | 1.843 | 4.424 | 10.866 | 0.037 | 0.026 | 2712.0 | 2349.0 | 1.0 |
Hint
Here, \(\alpha\) refers to the threshold value (also the point of subjective equality for this design). This participant had a threshold at estimated at 2.25, which is just slightly positively biased. The \(\beta\) value refers to the slope. A higher value means lower precision. Here, the slope is estimated to be around 7.46 for this participant.
Plotting#
Extrace the last 10 sample of each chain (here we have 4).
alpha_samples = idata["posterior"]["alpha"].values[:, -10:].flatten()
beta_samples = idata["posterior"]["beta"].values[:, -10:].flatten()
fig, axs = plt.subplots(figsize=(8, 5))
# Draw some sample from the traces
for a, b in zip(alpha_samples, beta_samples):
axs.plot(
np.linspace(-40, 40, 500),
(norm.cdf(np.linspace(-40, 40, 500), loc=a, scale=b)),
color='k', alpha=.08, linewidth=2
)
# Plot psychometric function with average parameters
slope = stats['mean']['beta']
threshold = stats['mean']['alpha']
axs.plot(np.linspace(-40, 40, 500),
(norm.cdf(np.linspace(-40, 40, 500), loc=threshold, scale=slope)),
color='#4c72b0', linewidth=4)
# Draw circles showing response proportions
for ii, intensity in enumerate(np.sort(this_df.Alpha.unique())):
resp = sum((this_df.Alpha == intensity) & (this_df.Decision == 'More'))
total = sum(this_df.Alpha == intensity)
axs.plot(intensity, resp/total, 'o', alpha=0.5, color='#4c72b0',
markeredgecolor='k', markersize=total*5)
plt.ylabel('P$_{(Response = More|Intensity)}$')
plt.xlabel('Intensity ($\Delta$ BPM)')
plt.tight_layout()
sns.despine()
System configuration#
%load_ext watermark
%watermark -n -u -v -iv -w -p pymc,arviz,pytensor
Last updated: Fri Nov 10 2023
Python implementation: CPython
Python version : 3.9.18
IPython version : 8.16.1
pymc : 5.9.0
arviz : 0.16.1
pytensor: 2.17.2
pytensor : 2.17.2
pymc : 5.9.0
seaborn : 0.13.0
matplotlib: 3.8.0
pandas : 2.0.3
numpy : 1.22.0
arviz : 0.16.1
Watermark: 2.4.3