#!/bin/sh
# Some sort of component filtering algorithm needs to go here. For now,
# we will do them all, or just one.
# This section will extract max voxel coordinates of all signicant clusters
# We will maintain the numbering schema of the components for now, but in the
# future it might make sense to link this with an ontology to provide a more
# meaningful name for the functional network. We will then write the coordinates
# to text file in the format:
# num x y z
#
# We want one text file per dual regression significant result. Each
# text file will be used to extract coordinates, and query subject raw data.
#
# We can extract the timeseries for each coordinate for each subject,
# and we want one text file per max voxel, OR one text file per component,
# containing multiple max voxels
# Each group of max voxel timeseries will be used as the group of predictors
# (for one subject) and the subject's ADHD diagnosis used as the decision for
# the training set of the data
# The goal at the end of the day is to predict ADHD diagnosis based on raw
# resting data (although it is motion corrected, registered to MNI)
# Submit on command line: melodic_ts.sh /path/to/ica/experiment name.gica dual_regression_name
TSOUT="negts"
EXPERIMENT=$1 # This is the top level, experimental directory, no slash on the end
GICA=$2 # The name of the gica run (NAME.gica)
DRRUN=$3 # The name of the dual regression run
COMPS=( {0..24} ) # List of components to extract timeseries for, number of thresh_zstat#.nii.gz under stats
# For a range, do ( {1..30) ) for a custom list do ( {1 3 5} ) and for just one, do ( 4 )
# Make sure that we have group ICA folder
if [ ! -e "$EXPERIMENT/gica/$GICA/groupmelodic.ica" ]; then
echo "Cannot find groupmelodic.ica directory " $GICA " under " $EXPERIMENT"/gica. Exiting."
exit 32
else
ICADIR=$EXPERIMENT/gica/$GICA
fi
# Make sure that we have dr run specified...
if [ ! -d "$EXPERIMENT/dr/$DRRUN/result" ]; then
echo "Cannot find dual regression directory " $DRRUN " under " $EXPERIMENT"/dr Exiting."
exit 32
else
DRRUN=$EXPERIMENT/dr/$DRRUN/result
fi
# Read in filelist from group run, check that we have all single subject input
FILELIST=`cat $ICADIR/.filelist`
echo "Filelist is " $FILELIST
INPUTS=( $FILELIST )
echo "Inputs are " $INPUTS
for i in ${INPUTS[@]} ; do
if [ ! -e "$i.nii.gz" ]; then
echo "Cannot find single subject input " $i " Exiting."
exit 32
fi
done
# Create output ts folder
if [ ! -e "$EXPERIMENT/ts" ]; then
echo "Creating ts (timeseries) directory under " $EXPERIMENT "..."
mkdir -p $EXPERIMENT/ts
fi
if [ ! -e "$EXPERIMENT/ts/$TSOUT" ]; then
echo "Creating " $TSOUT " under ts..."
mkdir -p $EXPERIMENT/ts/$TSOUT
else
echo $TSOUT " already exists! Choose a different name. Exiting."
exit 32
fi
OUTPUT=$EXPERIMENT/ts/$TSOUT
# Create output log folder
if [ ! -e "$EXPERIMENT/ts/$TSOUT/log" ]; then
echo "Creating ts/$TSOUT/log output folder..."
mkdir -p $OUTPUT/log
fi
# FILTER OUT BAD ONES SOMEHOW...
# ALSO NEED TO CHECK FOR EMPTY IMAGES and REMOVE
# FOR NOW WE WILL USE COMPONENTS SPECIFIED BY USER (UNDER COMPS)
# Get list of output dr images, add to list if specified
DRRES=''
COUNT=0
for temp in `ls $DRRUN/dr_stage3_ic*_tfce_corrp_tstat1.nii.gz`; do
for n in "${COMPS[@]}"; do
if [ "$n" -eq "$COUNT" ] ; then
echo "Adding " $temp " to extract timeseries from..."
DRRES[$COUNT]=$temp;
fi
done
let COUNT=$COUNT+1;
done
# GET MAX VOXEL COORDINATES FOR EACH
# Go to output directory
cd $OUTPUT
ZVALSORT=''
COUNT=0
for IMAGE in ${DRRES[@]}; do
echo "Getting local maximum voxel coordinates for component " ${COMPS[$COUNT]}
echo "Image is: " $IMAGE"..."
# This is the text file to save the local maxima list, with a header plus (cluster_index) zval xvox yvox zvox
OLFILE=lomax${COMPS[$COUNT]}.txt
echo "OLFILE is " $OLFILE
# We run cluster, with the dual regression corrp, thresholded image as input
cluster --in=$IMAGE.nii.gz --thresh=0 --olmax=$OLFILE --no_table
# We read in the output table from file
lines=( $(cat $OLFILE) )
let uprange=${#lines[@]}-5
echo "Uprange is " $uprange
# Iterate through subjects, and extract timeseries for each one, print to file
for SUB in ${INPUTS[@]} ; do
# WE SHOULD KEEP TRACK OF ZVAL - TO PRESENT LIST OF MOST SIGNIFICANT DIFFERENCES TO USER, IN ORDER?
for (( cnt=6; cnt <= $uprange; cnt+=5 )); do
CLUSINDEX=${lines[$cnt]}
ZVAL=${lines[$cnt+1]}
XVOX=${lines[$cnt+2]}
YVOX=${lines[$cnt+3]}
ZVOX=${lines[$cnt+4]}
echo "Extracting for XYZ: " $XVOX " " $YVOX " " $ZVOX
# Go through the individual subject ICA directories, extract each set for each subject, into one long row, print to file
# one row per subject!
TS=`fslmeants -i $SUB.nii.gz -c $XVOX $YVOX $ZVOX --transpose`
echo -ne $TS" " >> ${COMPS[$COUNT]}_ts.txt
done
echo "Finished with subject " $SUB
echo "" >> ${COMPS[$COUNT]}_ts.txt
done
# PREPARE LIST ZVALUES - GREATEST TO LEAST
# Here we look through the list of the top Z value for each component and print a file with these ordered from largest --> smallest
# Cycle again through the lines and grab the ZVAL and CLUSTINDEX
for (( cnt=6; cnt <=$uprange; cnt+=5 )); do
CLUSINDEX=${lines[$cnt]}
ZVAL=${lines[$cnt+1]}
echo "Adding ZVAL " $ZVAL " to list for later sorting"
ZVALSORT=$ZVALSORT" "$ZVAL-${COMPS[$COUNT]}-$CLUSINDEX
done
let COUNT=$COUNT+1
done
# Now print the string of Zvalues to file
echo "zval-component-cluster#" >> zval_sort.txt
for zv in `echo $ZVALSORT`; do
echo $zv
done | sort -r >> zval_sort.txt
# Here is an example of the training data. There is one row per subject with a list of the timeseries
# for each max voxel. To start, this will be done for the component that is clearly the attention
# network. In the future, I'd like to figure out a way to first filter out bad components, and then
# choose out the "most meaningful" based on the disorder in question (or perhaps the most deviant from
# the norm?) This method would be used to come up with an algorithm that can make an ADHD diagnosis based
# on the extracted raw data from these timepoints during a resting bold scan.
# These would also need to come with labels for the coordinates, so you don't forget where the data
# has come from, and can reproduce the values for testing data.
# TRAINING DATA EXAMPLE
# ADHD_rX TIMESERIES (start with for just one component - the attention one?)
# Subject : [_TP, _TP, ,TP..._TP, _TP, _TP...n_TP, n_TP, n_TP]
# Subject : 0 [_TP, _TP, ,TP..._TP, _TP, _TP...n_TP, n_TP, n_TP]
# Subject : [_TP, _TP, ,TP..._TP, _TP, _TP...n_TP, n_TP, n_TP]