# -*- coding: utf-8 -*-# -*- coding: utf-8 -*-
import warnings
import tempfile
from AFQ.definitions.mapping import SynMap
from AFQ.definitions.utils import Definition
import AFQ.api.bundle_dict as abd
warnings.simplefilter(action='ignore', category=FutureWarning) # noqa
import logging
from AFQ.api.participant import ParticipantAFQ
from AFQ.api.utils import (
check_attribute, AFQclass_doc,
export_all_helper, valid_exports_string)
import AFQ.utils.streamlines as aus
from AFQ.viz.utils import get_eye
from dipy.utils.parallel import paramap
from dipy.io.stateful_tractogram import StatefulTractogram, Space
import dipy.tracking.streamlinespeed as dps
import dipy.tracking.streamline as dts
from dipy.io.streamline import save_tractogram
from AFQ.version import version as pyafq_version
from AFQ.viz.utils import trim
import pandas as pd
import pydra
import numpy as np
import os
import os.path as op
from tqdm import tqdm
import json
import s3fs
from time import time
import nibabel as nib
from PIL import Image
from s3bids.utils import S3BIDSStudy
import glob
from bids.layout import BIDSLayout, BIDSLayoutIndexer
try:
import afqbrowser as afqb
using_afqb = True
except ImportError:
using_afqb = False
__all__ = ["GroupAFQ", "get_afq_bids_entities_fname"]
logger = logging.getLogger('AFQ')
logger.setLevel(logging.INFO)
# get rid of unnecessary columns in df
def clean_pandas_df(df):
df = df.reset_index(drop=True)
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
return df
[docs]def get_afq_bids_entities_fname():
return op.dirname(op.dirname(op.abspath(
aus.__file__))) + "/afq_bids_entities.json"
class _ParticipantAFQInputs:
def __init__(
self, dwi_data_file, bval_file, bvec_file, results_dir,
kwargs):
self.dwi_data_file = dwi_data_file
self.bval_file = bval_file
self.bvec_file = bvec_file
self.results_dir = results_dir
self.kwargs = kwargs
[docs]class GroupAFQ(object):
f"""{AFQclass_doc}"""
def __init__(self,
bids_path,
bids_filters={"suffix": "dwi"},
preproc_pipeline="all",
participant_labels=None,
output_dir=None,
parallel_params={"engine": "serial"},
bids_layout_kwargs={},
**kwargs):
'''
Initialize a GroupAFQ object from a BIDS dataset.
Parameters
----------
bids_path : str
The path to preprocessed diffusion data organized in a BIDS
dataset. This should contain a BIDS derivative dataset with
preprocessed dwi/bvals/bvecs.
bids_filters : dict
Filter to pass to bids_layout.get when finding DWI files.
Default: {"suffix": "dwi"}
preproc_pipeline : str, optional.
The name of the pipeline used to preprocess the DWI data.
Default: "all".
participant_labels : list or None, optional
List of participant labels (subject IDs) to perform
processing on. If None, all subjects are used.
Default: None
output_dir : str or None, optional
Path to output directory. If None, outputs are put
in a AFQ pipeline folder in the derivatives folder of
the BIDS directory. pyAFQ will use existing derivatives
from the output directory if they exist, instead of recalculating
them (this means you need to clear the output folder if you want
to recalculate a derivative).
Default: None
parallel_params : dict, optional
Parameters to pass to paramap in AFQ.utils.parallel,
to parallelize computations across subjects and sessions.
Set "n_jobs" to -1 to automatically parallelize as
the number of cpus. Here is an example for how to do
multiprocessing with 4 cpus:
{"n_jobs": 4, "engine": "joblib", "backend": "loky"}
Default: {"engine": "serial"}
bids_layout_kwargs: dict, optional
Additional arguments to give to BIDSLayout from pybids.
For large datasets, try:
{"validate": False, "index_metadata": False}
Default: {}
kwargs : additional optional parameters
You can set additional parameters for any step
of the process. See :ref:`usage/kwargs` for more details.
Examples
--------
api.GroupAFQ(my_path, csd_sh_order=4)
api.GroupAFQ(
my_path,
reg_template_spec="mni_t2", reg_subject_spec="b0")
'''
if not isinstance(bids_path, str):
raise TypeError("bids_path must be a string")
if not op.exists(bids_path):
raise ValueError("bids_path not found")
if not op.exists(op.join(bids_path, "dataset_description.json")):
raise ValueError("There must be a dataset_description.json"
+ " in bids_path")
if not isinstance(bids_filters, dict):
raise TypeError("bids_filters must be a dict")
# preproc_pipeline typechecking handled by pyBIDS
if participant_labels is not None\
and not isinstance(participant_labels, list):
raise TypeError(
"participant_labels must be either a list or None")
if output_dir is not None\
and not isinstance(output_dir, str):
raise TypeError(
"output_dir must be either a str or None")
if not isinstance(parallel_params, dict):
raise TypeError("parallel_params must be a dict")
if not isinstance(bids_layout_kwargs, dict):
raise TypeError("bids_layout_kwargs must be a dict")
self.logger = logger
self.parallel_params = parallel_params
self.wf_dict = {}
# validate input and fail early
if not op.exists(bids_path):
raise ValueError(f'Unable to locate BIDS dataset in: {bids_path}')
# This is where all the outputs will go:
if output_dir is None:
self.afq_path = op.join(bids_path, 'derivatives', 'afq')
self.afqb_path = op.join(bids_path, 'derivatives', 'afq_browser')
else:
self.afq_path = output_dir
self.afqb_path = op.join(output_dir, 'afq_browser')
# Create it as needed:
os.makedirs(self.afq_path, exist_ok=True)
bids_indexer = BIDSLayoutIndexer(**bids_layout_kwargs)
bids_layout = BIDSLayout(
bids_path, derivatives=True, indexer=bids_indexer)
bids_description = bids_layout.description
# check that any files exist in the derivatives folder,
# not including the dataset_description.json files
# the second check may be particularly useful in checking
# that the derivatives folder is well-defined
if len(bids_layout.get())\
- len(bids_layout.get(extension="json")) < 1:
raise ValueError(
f"No non-json files recognized by pyBIDS in {bids_path}")
if len(bids_layout.get(scope=preproc_pipeline))\
- len(bids_layout.get(
scope=preproc_pipeline,
extension="json")) < 1:
raise ValueError((
f"No non-json files recognized by "
f"pyBIDS in the pipeline: {preproc_pipeline}"))
# Add required metadata file at top level (inheriting as needed):
pipeline_description = {
"Name": bids_description["Name"],
"BIDSVersion": bids_description["BIDSVersion"],
"PipelineDescription": {"Name": "pyAFQ",
"Version": pyafq_version},
"GeneratedBy": [{"Name": op.basename(self.afq_path),
"Version": pyafq_version}]}
pl_desc_file = op.join(self.afq_path, 'dataset_description.json')
with open(pl_desc_file, 'w') as outfile:
json.dump(pipeline_description, outfile)
self.subjects = bids_layout.get(return_type='id', target='subject')
if not len(self.subjects):
raise ValueError(
"`bids_path` contains no subjects in derivatives folders."
+ " This could be caused by derivatives folders not following"
+ " the BIDS format.")
if participant_labels is not None:
filtered_subjects = []
subjects_found_printed = False
for subjectID in participant_labels:
subjectID = str(subjectID)
if subjectID not in self.subjects:
self.logger.warning((
f"Subject {subjectID} specified in "
f"`participant_labels` but not found "
f"in BIDS derivatives folders"))
if not subjects_found_printed:
subjects_found_printed = True
self.logger.warning((
f"Only these subjects found in BIDS "
f"derivatives folders: {self.subjects}"))
else:
filtered_subjects.append(subjectID)
self.subjects = filtered_subjects
if not len(self.subjects):
raise ValueError(
"No subjects specified in `participant_labels` "
+ " found in BIDS derivatives folders."
+ " See above warnings.")
sessions = bids_layout.get(return_type='id', target='session')
self.sessions = sessions if len(sessions) else [None]
# do not bother to parallelize if less than 2 subject-sessions
if len(self.sessions) * len(self.subjects) < 2:
self.parallel_params["engine"] = "serial"
# do not parallelize segmentation if parallelizing across
# subject-sessions
if self.parallel_params["engine"] != "serial":
if "segmentation_params" not in kwargs:
kwargs["segmentation_params"] = {}
if "parallel_segmentation" not in kwargs["segmentation_params"]:
kwargs["segmentation_params"]["parallel_segmentation"] = {}
kwargs["segmentation_params"]["parallel_segmentation"]["engine"] =\
"serial"
self.valid_sub_list = []
self.valid_ses_list = []
self.pAFQ_list = []
self.pAFQ_inputs_list = []
for subject in self.subjects:
self.wf_dict[subject] = {}
for session in self.sessions:
this_kwargs = kwargs.copy()
results_dir = op.join(self.afq_path, 'sub-' + subject)
if session is not None:
results_dir = op.join(results_dir, 'ses-' + session)
dwi_bids_filters = {
"subject": subject,
"session": session,
"return_type": "filename",
"scope": preproc_pipeline,
"extension": "nii.gz",
"suffix": "dwi",
}
dwi_bids_filters.update(bids_filters)
dwi_files = bids_layout.get(**dwi_bids_filters)
if (not len(dwi_files)):
self.logger.warning(
f"No dwi found for subject {subject} and session "
f"{session}. Skipping.")
continue
os.makedirs(results_dir, exist_ok=True)
dwi_data_file = dwi_files[0]
# For bvals and bvecs, use ``get_bval()`` and ``get_bvec()`` to
# walk up the file tree and inherit the closest bval and bvec
# files. Maintain input ``bids_filters`` in case user wants to
# specify acquisition labels, but pop suffix since it is
# already specified inside ``get_bvec()`` and ``get_bval()``
suffix = bids_filters.pop("suffix", None)
bvec_file = bids_layout.get_bvec(
dwi_data_file,
**bids_filters)
bval_file = bids_layout.get_bval(
dwi_data_file,
**bids_filters)
if suffix is not None:
bids_filters["suffix"] = suffix
# Call find path for all definitions
for key, value in this_kwargs.items():
if key == "scalars":
for scalar in this_kwargs["scalars"]:
if isinstance(scalar, Definition):
scalar_found = scalar.find_path(
bids_layout,
dwi_data_file,
subject,
session,
required=False)
if scalar_found is False:
this_kwargs["scalars"].remove(scalar)
elif key == "import_tract":
if isinstance(this_kwargs["import_tract"], dict):
it_res = \
bids_layout.get(
subject=subject,
session=session,
extension=[
'.trk',
'.tck',
'.vtk',
'.fib',
'.dpy'],
return_type='filename',
**this_kwargs["import_tract"])
if len(it_res) < 1:
raise ValueError((
"No custom tractography found for"
f" subject {subject}"
" and session "
f"{session}."))
elif len(it_res) > 1:
this_kwargs["import_tract"] = it_res[0]
logger.warning((
f"Multiple viable custom tractographies found for"
f" subject "
f"{subject} and session "
f"{session}. Will use: {it_res[0]}"))
else:
this_kwargs["import_tract"] = it_res[0]
elif isinstance(value, dict):
for _, subvalue in value.items():
if isinstance(subvalue, Definition):
subvalue.find_path(
bids_layout,
dwi_data_file,
subject,
session)
elif isinstance(value, Definition):
value.find_path(
bids_layout,
dwi_data_file,
subject,
session)
# call find path for all ROIs
if "bundle_info" in this_kwargs and isinstance(
this_kwargs["bundle_info"], abd.BundleDict):
for b_name in this_kwargs["bundle_info"].bundle_names:
this_kwargs["bundle_info"].apply_to_rois(
b_name,
this_kwargs["bundle_info"]._use_bids_info,
bids_layout, bids_path, subject, session,
dry_run=False)
self.valid_sub_list.append(subject)
self.valid_ses_list.append(str(session))
this_pAFQ_inputs = _ParticipantAFQInputs(
dwi_data_file,
bval_file, bvec_file,
results_dir,
this_kwargs)
this_pAFQ = ParticipantAFQ(
this_pAFQ_inputs.dwi_data_file,
this_pAFQ_inputs.bval_file,
this_pAFQ_inputs.bvec_file,
this_pAFQ_inputs.results_dir,
**this_pAFQ_inputs.kwargs)
self.wf_dict[subject][str(session)] = this_pAFQ.wf_dict
self.pAFQ_list.append(this_pAFQ)
self.pAFQ_inputs_list.append(this_pAFQ_inputs)
[docs] def combine_profiles(self):
tract_profiles_dict = self.export("profiles")
if len(self.sessions) > 1:
tract_profiles_list = []
for _, subject_dict in tract_profiles_dict.items():
tract_profiles_list.extend(subject_dict.values())
else:
tract_profiles_list = list(tract_profiles_dict.values())
_df = combine_list_of_profiles(tract_profiles_list)
out_file = op.abspath(op.join(
self.afq_path, "tract_profiles.csv"))
os.makedirs(op.dirname(out_file), exist_ok=True)
_df = clean_pandas_df(_df)
_df.to_csv(out_file, index=False)
return _df
[docs] def get_streamlines_json(self):
sls_json_fname = op.abspath(op.join(
self.afq_path, "afqb_streamlines.json"))
if not op.exists(sls_json_fname):
subses_info = []
def load_next_subject():
subses_idx = len(subses_info)
sub = self.valid_sub_list[subses_idx]
ses = self.valid_ses_list[subses_idx]
this_bundles_file = self.export(
"bundles", collapse=False)[sub][ses]
this_mapping = self.export("mapping", collapse=False)[sub][ses]
this_img = self.export(
"dwi", collapse=False)[sub][ses]
seg_sft = aus.SegmentedSFT.fromfile(
this_bundles_file,
this_img)
seg_sft.sft.to_rasmm()
subses_info.append((seg_sft, this_mapping))
bundle_dict = self.export("bundle_dict", collapse=False)[
self.valid_sub_list[0]][self.valid_ses_list[0]]
sls_dict = {}
load_next_subject() # load first subject
for b in bundle_dict.keys():
if b != "whole_brain":
for i in range(len(self.valid_sub_list)):
seg_sft, mapping = subses_info[i]
idx = seg_sft.bundle_idxs[b]
# use the first subses that works
# otherwise try each successive subses
if len(idx) == 0:
# break if we run out of subses
if i + 1 >= len(self.valid_sub_list):
break
# load subses if not already loaded
if i + 1 >= len(subses_info):
load_next_subject()
continue
if len(idx) > 100:
idx = np.random.choice(
idx, size=100, replace=False)
these_sls = seg_sft.sft.streamlines[idx]
these_sls = dps.set_number_of_points(these_sls, 100)
tg = StatefulTractogram(
these_sls,
seg_sft.sft,
Space.RASMM)
delta = dts.values_from_volume(
mapping.forward,
tg.streamlines, np.eye(4))
moved_sl = dts.Streamlines(
[d + s for d, s in zip(delta, tg.streamlines)])
moved_sl = np.asarray(moved_sl)
median_sl = np.median(moved_sl, axis=0)
sls_dict[b] = {"coreFiber": median_sl.tolist()}
for ii, sl_idx in enumerate(idx):
sls_dict[b][str(sl_idx)] = moved_sl[ii].tolist()
break
with open(sls_json_fname, 'w') as fp:
json.dump(sls_dict, fp)
return sls_json_fname
[docs] def export(self, attr_name="help", collapse=True):
f"""
Export a specific output. To print a list of available outputs,
call export without arguments.
{valid_exports_string}
Parameters
----------
attr_name : str
Name of the output to export. Default: "help"
collapse : bool
Whether to collapse session dimension if there is only 1 session.
Default: True
Returns
-------
output : dict
The specific output as a dictionary. Keys are subjects.
Values are dictionaries with keys of sessions
if multiple sessions are used. Otherwise, values are
the output.
None if called without arguments.
"""
section = check_attribute(attr_name)
# iterate over subjects / sessions,
# decide if they need to be calculated or not
in_list = []
to_calc_list = []
results = {}
for ii, subject in enumerate(self.valid_sub_list):
if subject not in results:
results[subject] = {}
session = self.valid_ses_list[ii]
wf_dict = self.wf_dict[subject][str(session)]
if section is not None:
wf_dict = wf_dict[section]
if ((self.parallel_params.get("engine", False) != "serial")
and (hasattr(wf_dict, "efferents"))
and (attr_name not in wf_dict.efferents)):
in_list.append((wf_dict))
to_calc_list.append((subject, session))
else:
results[subject][session] = wf_dict[attr_name]
# if some need to be calculated, do those in parallel
if to_calc_list:
par_results = paramap(
lambda wf, attr: wf[attr], in_list,
func_args=[attr_name],
**self.parallel_params)
for i, subses in enumerate(to_calc_list):
subject, session = subses
results[subject][session] = par_results[i]
# If only one session, collapse session dimension
if len(self.sessions) == 1 and collapse:
for subject in self.valid_sub_list:
results[subject] = results[subject][self.valid_ses_list[0]]
return results
[docs] def export_up_to(self, attr_name="help"):
f"""
Export all derivatives necessary for a specific output.
To print a list of available outputs,
call export_up_to without arguments.
{valid_exports_string}
Parameters
----------
attr_name : str
Name of the output to export up to. Default: "help"
"""
section = check_attribute(attr_name)
wf_dict = self.wf_dict[
self.valid_sub_list[0]][self.valid_ses_list[0]]
if section is not None:
wf_dict = wf_dict[section]
for dependent in wf_dict.plan.dependencies[attr_name]:
self.export(dependent)
[docs] def export_all(self, viz=True, afqbrowser=True, xforms=True,
indiv=True):
""" Exports all the possible outputs
Parameters
----------
viz : bool
Whether to output visualizations. This includes tract profile
plots, a figure containing all bundles, and, if using the AFQ
segmentation algorithm, individual bundle figures.
Default: True
afqbrowser : bool
Whether to output an AFQ-Browser from this AFQ instance.
Default: True
xforms : bool
Whether to output the reg_template image in subject space and,
depending on if it is possible based on the mapping used, to
output the b0 in template space.
Default: True
indiv : bool
Whether to output individual bundles in their own files, in
addition to the one file containing all bundles. If using
the AFQ segmentation algorithm, individual ROIs are also
output.
Default: True
"""
start_time = time()
seg_params = self.export("segmentation_params", collapse=False)[
self.valid_sub_list[0]][self.valid_ses_list[0]]
seg_algo = seg_params.get("seg_algo", "AFQ")
export_all_helper(self, seg_algo, xforms, indiv, viz)
self.combine_profiles()
if afqbrowser:
self.assemble_AFQ_browser()
self.logger.info(
f"Time taken for export all: {str(time() - start_time)}")
[docs] def cmd_outputs(self, cmd="rm", dependent_on=None, exceptions=[],
suffix=""):
"""
Perform some command some or all outputs of pyafq.
This is useful if you change a parameter and need
to recalculate derivatives that depend on it.
Some examples: cp, mv, rm .
-r will be automtically added when necessary.
Parameters
----------
cmd : str
Command to run on outputs. Default: 'rm'
dependent_on : str or None
Which derivatives to perform command on .
If None, perform on all.
If "track", perform on all derivatives that depend on the
tractography.
If "recog", perform on all derivatives that depend on the
bundle recognition.
Default: None
exceptions : list of str
Name outputs that the command should not be applied to.
Default: []
suffix : str
Parts of command that are used after the filename.
Default: ""
Example
-------
# This command would move all derivatives that are
# dependent on the tractography into 'my_other_folder'
myafq.cmd_outputs(
"cp",
dependent_on="track",
suffix="~/my_other_folder/")
"""
for pAFQ in self.pAFQ_list:
pAFQ.cmd_outputs(cmd, dependent_on, exceptions, suffix=suffix)
[docs] clobber = cmd_outputs # alias for default of cmd_outputs
[docs] def make_all_participant_montages(self, images_per_row=2):
"""
Generate montage of all bundles for a all subjects.
Parameters
----------
images_per_row : int
Number of bundle images per row in output file.
Default: 2
Returns
-------
filename of montage images
"""
for pAFQ in self.pAFQ_list:
pAFQ.participant_montage(images_per_row=images_per_row)
[docs] def group_montage(self, bundle_name, size, view, direc, slice_pos=None):
"""
Generate montage file(s) of a given bundle at a given angle.
Parameters
----------
bundle_name : str
Name of bundle to visualize, should be the same as in the
bundle dictionary.
size : tuple of int
The number of columns and rows for each file.
view : str
Which view to display. Can be one of sagittal, coronal, or axial.
direc : str
Which direction to views. Can be one of left, right, top, bottom,
front, back
slice_pos : float, or None
If float, indicates the fractional position along the
perpendicular axis to the slice. Currently only works with plotly.
If None, no slice is displayed.
Returns
-------
list of filenames of montage images
"""
tdir = tempfile.gettempdir()
best_scalar = self.export("best_scalar", collapse=False)[
self.valid_sub_list[0]][self.valid_ses_list[0]]
viz_backend_dict = self.export("viz_backend", collapse=False)
b0_backend_dict = self.export("b0", collapse=False)
dwi_affine_dict = self.export("dwi_affine", collapse=False)
bundles_dict = self.export("bundles", collapse=False)
best_scalar_dict = self.export(best_scalar, collapse=False)
all_fnames = []
self.logger.info("Generating Montage...")
for ii in tqdm(range(len(self.valid_ses_list))):
this_sub = self.valid_sub_list[ii]
this_ses = self.valid_ses_list[ii]
viz_backend = viz_backend_dict[this_sub][this_ses]
b0 = b0_backend_dict[this_sub][this_ses]
dwi_affine = dwi_affine_dict[this_sub][this_ses]
bundles = bundles_dict[this_sub][this_ses]
best_scalar = best_scalar_dict[this_sub][this_ses]
flip_axes = [False, False, False]
for i in range(3):
flip_axes[i] = (dwi_affine[i, i] < 0)
if slice_pos is not None:
slice_kwargs = {}
if view == "sagittal":
slice_kwargs["x_pos"] = slice_pos
slice_kwargs["y_pos"] = None
slice_kwargs["z_pos"] = None
elif view == "coronal":
slice_kwargs["x_pos"] = None
slice_kwargs["y_pos"] = slice_pos
slice_kwargs["z_pos"] = None
elif view == "axial":
slice_kwargs["x_pos"] = None
slice_kwargs["y_pos"] = None
slice_kwargs["z_pos"] = slice_pos
figure = viz_backend.visualize_volume(
best_scalar,
opacity=1.0,
flip_axes=flip_axes,
interact=False,
inline=False,
**slice_kwargs)
else:
figure = None
figure = viz_backend.visualize_bundles(
bundles,
shade_by_volume=best_scalar,
flip_axes=flip_axes,
bundle=bundle_name,
interact=False,
inline=False,
figure=figure)
eye = get_eye(view, direc)
this_fname = tdir + f"/t{ii}.png"
if "plotly" in viz_backend.backend:
figure.update_layout(scene_camera=dict(
projection=dict(type="orthographic"),
up={"x": 0, "y": 0, "z": 1},
eye=eye,
center=dict(x=0, y=0, z=0)))
figure.write_image(this_fname)
# temporary fix for memory leak
import plotly.io as pio
pio.kaleido.scope._shutdown_kaleido()
else:
from dipy.viz import window
direc = np.fromiter(eye.values(), dtype=int)
data_shape = np.asarray(nib.load(b0).get_fdata().shape)
figure.set_camera(
position=direc * data_shape,
focal_point=data_shape // 2,
view_up=(0, 0, 1))
figure.zoom(0.5)
window.snapshot(figure, fname=this_fname, size=(600, 600))
def _save_file(curr_img, curr_file_num):
save_path = op.abspath(op.join(
self.afq_path,
(f"bundle-{bundle_name}_view-{view}"
f"_idx-{curr_file_num}_montage.png")))
curr_img.save(save_path)
all_fnames.append(save_path)
this_img_trimmed = {}
max_height = 0
max_width = 0
for ii in range(len(self.valid_ses_list)):
this_img = Image.open(tdir + f"/t{ii}.png")
try:
this_img_trimmed[ii] = trim(trim(this_img))
except IndexError: # this_img is a picture of nothing
this_img_trimmed[ii] = this_img
if this_img_trimmed[ii].size[0] > max_width:
max_width = this_img_trimmed[ii].size[0]
if this_img_trimmed[ii].size[1] > max_height:
max_height = this_img_trimmed[ii].size[1]
curr_img = Image.new(
'RGB',
(max_width * size[0], max_height * size[1]),
color="white")
curr_file_num = 0
for ii in range(len(self.valid_ses_list)):
x_pos = ii % size[0]
_ii = ii // size[0]
y_pos = _ii % size[1]
_ii = _ii // size[1]
file_num = _ii
if file_num != curr_file_num:
_save_file(curr_img, curr_file_num)
curr_img = Image.new(
'RGB',
(max_width * size[0], max_height * size[1]),
color="white")
curr_file_num = file_num
curr_img.paste(
this_img_trimmed[ii],
(x_pos * max_width, y_pos * max_height))
_save_file(curr_img, curr_file_num)
return all_fnames
[docs] def combine_bundle(self, bundle_name):
"""
Transforms a given bundle to reg_template space for all subjects
then merges them to one trk file.
Useful for visualizing the variability in the bundle across subjects.
Note: currently only implemented using built-in SynMap
Parameters
----------
bundle_name : str
Name of the bundle to transform, should be one of the bundles in
bundle_dict.
"""
reference_wf_dict = self.wf_dict[
self.valid_sub_list[0]][self.valid_ses_list[0]]
if "mapping_definition" in reference_wf_dict:
mapping_definition = reference_wf_dict["mapping_definition"]
if mapping_definition is not None and not isinstance(
mapping_definition, SynMap):
raise NotImplementedError((
"combine_bundle not implemented for mapping_definition "
"other than SynMap"))
reg_template = self.export("reg_template", collapse=False)[
self.valid_sub_list[0]][self.valid_ses_list[0]]
bundles_dict = self.export("bundles", collapse=False)
mapping_dict = self.export("mapping", collapse=False)
sls_mni = []
self.logger.info("Combining Bundles...")
for ii in tqdm(range(len(self.valid_ses_list))):
this_sub = self.valid_sub_list[ii]
this_ses = self.valid_ses_list[ii]
seg_sft = aus.SegmentedSFT.fromfile(bundles_dict[
this_sub][this_ses])
seg_sft.sft.to_vox()
sls = seg_sft.get_bundle(bundle_name).streamlines
mapping = mapping_dict[this_sub][this_ses]
if len(sls) > 0:
delta = dts.values_from_volume(
mapping.forward,
sls, np.eye(4))
sls_mni.extend([d + s for d, s in zip(delta, sls)])
moved_sft = StatefulTractogram(
sls_mni,
reg_template,
Space.VOX)
save_tractogram(
moved_sft,
op.abspath(op.join(
self.afq_path,
f"bundle-{bundle_name}_subjects-all_MNI.trk")),
bbox_valid_check=False)
[docs] def upload_to_s3(self, s3fs, remote_path):
""" Upload entire AFQ derivatives folder to S3"""
s3fs.put(self.afq_path, remote_path, recursive=True)
if op.exists(self.afqb_path):
s3fs.put(self.afqb_path, remote_path, recursive=True)
[docs] def export_group_density(self, boolify=True):
"""
Generate a group density map by combining single subject density maps.
Parameters
----------
boolify : bool
Whether to turn subject streamline count images into booleans
before adding them into the group density map.
Return
------
Path to density nifti file.
"""
densities = self.export("density_maps", collapse=False)
ex_density_init = nib.load(densities[
self.valid_sub_list[0]][
self.valid_ses_list[0]]) # for shape and header
group_density = np.zeros_like(ex_density_init.get_fdata())
self.logger.info("Generating Group Density...")
for ii in tqdm(range(len(self.valid_ses_list))):
this_sub = self.valid_sub_list[ii]
this_ses = self.valid_ses_list[ii]
this_density = nib.load(densities[this_sub][this_ses]).get_fdata()
if boolify:
this_density = this_density.astype(bool)
group_density = group_density + this_density
group_density = group_density / len(self.valid_sub_list)
group_density = nib.Nifti1Image(
group_density,
ex_density_init.affine,
header=ex_density_init.header
)
out_fname = op.abspath(op.join(
self.afq_path,
f"desc-density_subjects-all_space-MNI_dwi.nii.gz"))
nib.save(group_density, out_fname)
return out_fname
[docs] def assemble_AFQ_browser(self, output_path=None, metadata=None,
page_title="AFQ Browser", page_subtitle="",
page_title_link="", page_subtitle_link=""):
"""
Assembles an instance of the AFQ-Browser from this AFQ instance.
First, we generate the combined tract profile if it is not already
generated. This includes running the full AFQ pipeline if it has not
already run. The combined tract profile is one of the outputs of
export_all.
Second, we generate a streamlines.json file from the bundle
recognized in the first subject's first session.
Third, we call AFQ-Browser's assemble to assemble an AFQ-Browser
instance in output_path.
Parameters
----------
output_path : str
Path to location to create this instance of the browser in.
Called "target" in AFQ Browser API. If None,
bids_path/derivatives/afq_browser is used.
Default: None
metadata : str
Path to subject metadata csv file. If None, an metadata file
containing only subject ID is created. This file requires a
"subjectID" column to work.
Default: None
page_title : str
Page title. If None, prompt is sent to command line.
Default: "AFQ Browser"
page_subtitle : str
Page subtitle. If None, prompt is sent to command line.
Default: ""
page_title_link : str
Title hyperlink (including http(s)://).
If None, prompt is sent to command line.
Default: ""
page_subtitle_link : str
Subtitle hyperlink (including http(s)://).
If None, prompt is sent to command line.
Default: ""
"""
if not using_afqb:
self.logger.warning((
"AFQ Browser is not installed, so AFQ Browser instance "
"cannot be assembled. AFQ Browser can be installed with: "
"`pip install pyAFQ[afqbrowser]` or "
"`pip install AFQ-Browser>=0.3`"))
return
n_points_profile = self.export("n_points_profile", collapse=False)[
self.valid_sub_list[0]][
self.valid_ses_list[0]]
if n_points_profile != 100:
self.logger.warning((
"AFQ Browser requires 100 points per tract profile, "
"so AFQ Browser instance cannot be assembled."))
return
if output_path is None:
output_path = self.afqb_path
os.makedirs(self.afqb_path, exist_ok=True)
# generate combined profiles csv
self.combine_profiles()
# generate streamlines.json file
sls_json_fname = self.get_streamlines_json()
afqb.assemble(
op.abspath(op.join(self.afq_path, "tract_profiles.csv")),
target=output_path,
metadata=metadata,
streamlines=sls_json_fname,
title=page_title,
subtitle=page_subtitle,
link=page_title_link,
sublink=page_subtitle_link)
class ParallelGroupAFQ():
def __init__(self, *args, **kwargs):
orig = GroupAFQ(*args, **kwargs)
orig.parallel_params["submitter_params"] = \
orig.parallel_params.get("submitter_params", {"plugin": "cf"})
orig.parallel_params["cache_dir"] = \
orig.parallel_params.get("cache_dir", None)
self.parallel_params = orig.parallel_params
self.pAFQ_kwargs = orig.pAFQ_inputs_list
self.finishing_params = dict()
self.finishing_params["args"] = args
self.finishing_params["kwargs"] = kwargs
self.finishing_params["output_dirs"] = [pAFQ.kwargs["output_dir"]
for pAFQ in orig.pAFQ_list]
def _submit_pydra(self, runnable):
try:
with pydra.Submitter(
**self.parallel_params["submitter_params"],
) as sub:
sub(runnable=runnable)
# Addresses https://github.com/nipype/pydra/issues/630
except AttributeError as e:
if "'NoneType' object has no attribute 'replace'" not in str(e):
raise
def export(self, attr_name="help", collapse=True):
f"""
Export a specific output. To print a list of available outputs,
call export without arguments.
{valid_exports_string}
Parameters
----------
attr_name : str
Name of the output to export. Default: "help"
collapse : bool
Whether to collapse session dimension if there is only 1 session.
Default: True
Returns
-------
output : dict
The specific output as a dictionary. Keys are subjects.
Values are dictionaries with keys of sessions
if multiple sessions are used. Otherwise, values are
the output.
None if called without arguments.
"""
@pydra.mark.task
def export_sub(pAFQ_kwargs, attr_name):
pAFQ = ParticipantAFQ(
pAFQ_kwargs.dwi_data_file,
pAFQ_kwargs.bval_file,
pAFQ_kwargs.bvec_file,
pAFQ_kwargs.results_dir,
**pAFQ_kwargs.kwargs)
pAFQ.export(attr_name)
# Submit to pydra
export_sub_task = export_sub(
attr_name=attr_name,
cache_dir=self.parallel_params["cache_dir"]
).split("pAFQ_kwargs", pAFQ_kwargs=self.pAFQ_kwargs)
self._submit_pydra(export_sub_task)
def export_all(self, viz=True, afqbrowser=True, xforms=True, indiv=True):
""" Exports all the possible outputs
Parameters
----------
viz : bool
Whether to output visualizations. This includes tract profile
plots, a figure containing all bundles, and, if using the AFQ
segmentation algorithm, individual bundle figures.
Default: True
afqbrowser : bool
Whether to output an AFQ-Browser from this AFQ instance.
Default: True
xforms : bool
Whether to output the reg_template image in subject space and,
depending on if it is possible based on the mapping used, to
output the b0 in template space.
Default: True
indiv : bool
Whether to output individual bundles in their own files, in
addition to the one file containing all bundles. If using
the AFQ segmentation algorithm, individual ROIs are also
output.
Default: True
"""
@pydra.mark.task
def export_sub(
pAFQ_kwargs,
finishing_params,
viz,
afqbrowser,
xforms,
indiv
):
pAFQ = ParticipantAFQ(
pAFQ_kwargs.dwi_data_file,
pAFQ_kwargs.bval_file,
pAFQ_kwargs.bvec_file,
pAFQ_kwargs.results_dir,
**pAFQ_kwargs.kwargs)
pAFQ.export_all(viz, xforms, indiv)
for dir in finishing_params["output_dirs"]:
if not glob.glob(op.join(dir, "*_desc-profiles_dwi.csv")):
return
gAFQ = GroupAFQ(*finishing_params["args"],
**finishing_params["kwargs"])
gAFQ.export_all(viz, afqbrowser, xforms, indiv)
# Submit to pydra
export_sub_task = export_sub(
finishing_params=self.finishing_params,
viz=viz,
afqbrowser=afqbrowser,
xforms=xforms,
indiv=indiv,
cache_dir=self.parallel_params["cache_dir"]
).split("pAFQ_kwargs", pAFQ_kwargs=self.pAFQ_kwargs)
self._submit_pydra(export_sub_task)
def download_and_combine_afq_profiles(bucket,
study_s3_prefix="", deriv_name=None,
out_file=None,
upload=False, session=None,
**kwargs):
"""
Download and combine tract profiles from different subjects / sessions
on an s3 bucket into one CSV.
Parameters
----------
bucket : str
The S3 bucket that contains the study data.
study_s3_prefix : str
The S3 prefix common to all of the study objects on S3.
out_file : filename, optional
Filename for the combined output CSV.
deriv_name : str, optional
If deriv_name is not None, it should be a string that specifies
which derivatives folder to download and combine profiles from.
upload : bool or str, optional
If True, upload the combined CSV to Amazon S3 at
bucket/study_s3_prefix/derivatives/afq. If a string,
assume string is an Amazon S3 URI and upload there.
Defaut: False
session : str, optional
Session to get CSVs from. If None, all sessions are used.
Default: None
kwargs : optional
Optional arguments to pass to S3BIDSStudy.
Returns
-------
Ouput CSV's pandas dataframe.
"""
if "subjects" not in kwargs:
kwargs["subjects"] = "all"
if "anon" not in kwargs:
kwargs["anon"] = False
if deriv_name is None:
deriv_name = True
with nib.tmpdirs.InTemporaryDirectory() as t_dir:
remote_study = S3BIDSStudy(
"get_profiles",
bucket,
study_s3_prefix,
**kwargs)
remote_study.download(
t_dir,
include_modality_agnostic=False,
include_derivs=deriv_name,
include_derivs_dataset_description=True,
suffix="profiles.csv")
temp_study = BIDSLayout(t_dir, validate=False, derivatives=True)
if session is None:
profiles = temp_study.get(
extension='csv',
suffix='profiles',
return_type='filename')
else:
profiles = temp_study.get(
session=session,
extension='csv',
suffix='profiles',
return_type='filename')
df = combine_list_of_profiles(profiles)
df.to_csv("tmp.csv", index=False)
if upload is True:
bids_prefix = "/".join([bucket, study_s3_prefix]).rstrip("/")
fs = s3fs.S3FileSystem()
fs.put(
"tmp.csv",
"/".join([
bids_prefix,
"derivatives",
"afq",
"combined_tract_profiles.csv"
]))
elif isinstance(upload, str):
fs = s3fs.S3FileSystem()
fs.put("tmp.csv", upload.replace("s3://", ""))
if out_file is not None:
out_file = op.abspath(out_file)
os.makedirs(op.dirname(out_file), exist_ok=True)
df = clean_pandas_df(df)
df.to_csv(out_file, index=False)
return df
def combine_list_of_profiles(profile_fnames):
"""
Combine tract profiles from different subjects / sessions
into one CSV.
Parameters
----------
profile_fnames : list of str
List of csv filenames.
Returns
-------
Ouput CSV's pandas dataframe.
"""
dfs = []
for fname in profile_fnames:
profiles = pd.read_csv(fname)
profiles['subjectID'] = fname.split('sub-')[1].split('/')[0]
if 'ses-' in fname:
session_name = fname.split('ses-')[1].split('/')[0]
else:
session_name = 'unknown'
profiles['sessionID'] = session_name
dfs.append(profiles)
return clean_pandas_df(pd.concat(dfs))