====== PleioFDR - Multiple runs on cluster (ABEL/TSD) ======


Pleiotropy-informed conditional and conjunctional false discovery rate ([[https://github.com/precimed/pleiofdr|pleiofdr]]) is used quite intensively to allow loci discovery in related phenotypes. You need a bash script like the following if you have 10 primary traits and 10 secondary traits, then you want to run condFDR and conjFDR in all combinations (total 200 condFDR runs, 100 conjFDR). You may have less traits to calculate, but following script is submitted only once, and you get all results in separate folders.\\

You need to have following items:
  - A sub-directory called "traitFolder" in your woring direcotry, such as [[https://github.com/precimed/pleiofdr|pleiofdr]]. Primary phenotypes (.mat files) are put in the sub-directory "trait1"  and secondary phenotypes (.mat files) in sub-directory "trait2" in the traitFolder.
  - A subdirectory [[https://github.com/precimed/python_convert| python_convert ]] in the working directory 
  - Submit the job (if the following script is in myJob.sh) as __sbatch myJob.sh__ on ABEL. TSD has similar submission criteria with little change in account.
<code>
#!/bin/bash

# **************************************************************
# STEP 1: ABEL specifications for job submission 
# **************************************************************

# Job name:
#SBATCH --job-name=myJob
#
# Project:
#SBATCH --account=nn9114k
#
# Wall clock limit:
#SBATCH --time=09:59:59
#
#SBATCH --cpus-per-task=16

# Max memory usage:
#SBATCH --mem-per-cpu=3828M

## Set up job environment:
source /cluster/bin/jobsetup
module purge   # clear any inherited modules
#set -o errexit # exit on errors

# **************************************************************
# STEP 2: module file loading.My pown built module,
#  e.g., Anaconda/2 will also be loaded.
# **************************************************************
module use --append $HOME/modulefiles
module load Anaconda/2
module load matlab/R2018a
module load plink/1.90b5.2

# **************************************************************
# STEP 3: reading trait files 
# **************************************************************

# python_covert is directory located as shown below.
# the directory contains several utilities inclding sumstat.py. fdrmat2csv.py etc.

python_convert=./python_convert


#reading trait1
trait1f=./traitFolder/trait1/
trait1n=$(ls $trait1f)

# 1st for loop starts here
for t1 in $trait1n
do
trait1nb=$(basename "$t1")
IFS='_' read -r -a trait1_array <<< "$trait1nb"
trait1name=${trait1_array[1]}


# reading trait2
trait2f=./traitFolder/trait2/
trait2n=$(ls $trait2f)

# 2nd loop starts here
for t2 in $trait2n

do
trait2nb=$(basename "$t2")
IFS='_' read -r -a trait2_array <<< "$trait2nb"
trait2name=${trait2_array[1]}

# Replace the variables
sed -i -e "/traitfolder=/c\traitfolder=./traitFolder"\
    -i -e "/traitfile1=/c\traitfile1="/trait1/"$trait1nb" \
    -i -e "/traitname1=/c\traitname1=$trait1name"\
    -i -e "/traitfiles=/c\traitfiles={\'/trait2/$trait2nb\'}"\
    -i -e "/traitnames=/c\traitnames={\'$trait2name\'}" config.txt


# **************************************************************
# CONDITIONAL FDR (cond FDR) -1st directtion
# **************************************************************

# Output directory
cond_out="results_"$trait1name"_"$trait2name"_condfdr"

# replace varibles even more
sed -i -e "/outputdir=/c\outputdir=$cond_out/" \
    -i -e "/stattype=/c\stattype=condfdr" \
    -i -e "/fdrthresh=/c\fdrthresh=0.01" config.txt

# Run Matlab now for cond_FDR
matlab -nodesktop -r "run runme.m; exit"

if [  -d  $cond_out  ]; then

# The output of the above MATLAB run results in "result.mat" file.
# Now this file will be converted to a CSV file ("result.mat.csv")
# by a python script, named fdrmat2csv.py.

# After conversion the CLUMPing is performed. 

# First check if the result file was created 

cd $cond_out
python $python_convert/fdrmat2csv.py result.mat $python_convert/9279485_ref.bim

# clumping
python $python_convert/sumstats.py clump --clump-field FDR --force  --sumstats result.mat.csv --bfile-chr $python_convert/plink_503eur/chr@  --exclude-ranges 6:25119106-33854733 8:7200000-12500000 19:42000000-47000000 --clump-p1 0.01 --out "results_${trait1name}_${trait2name}_condFDR_clump"

awk 'BEGIN{print "SNP", "P-val"} NR>1 {print $3, '0.0000000005'}' results_${trait1name}_${trait2name}_condFDR_clump.loci.csv > results_${trait1name}_${trait2name}_condFDR_clump.FUMA.loci.csv

python $python_convert/manhattan.py result.mat.csv --lead results_${trait1name}_${trait2name}_condFDR_clump.lead.csv --indep results_${trait1name}_${trait2name}_condFDR_clump.indep.csv --striped-background --no-legend --p FDR --y-label FDR --p-thresh 0.01 --out ${trait1name}_${trait2name}_condFDR_manhattan.png


### NOW IMPLEMENTED
matlab -nodesktop -r "tt=openfig('${trait1name}_${trait2name}_condfdr_0.01_manhattan.fig'); set(tt,'PaperUnits','inches','PaperPosition',[0 0 20 15]); saveas(tt, '${trait1name}_${trait2name}_condfdr_0.01_manhattan.png'); exit"

cd ../

fi
# **************************************************************
# CONDITIONAL FDR (cond FDR) -2nd directtion
# **************************************************************

# Output directory
cond_out="results_"$trait2name"_"$trait1name"_condfdr"
# Replace the variables
sed -i -e "/traitfolder=/c\traitfolder=./traitFolder"\
    -i -e "/traitfile1=/c\traitfile1="/trait2/"$trait2nb" \
    -i -e "/traitname1=/c\traitname1=$trait2name"\
    -i -e "/traitfiles=/c\traitfiles={\'/trait1/$trait1nb\'}"\
    -i -e "/traitnames=/c\traitnames={\'$trait1name\'}" config.txt

# replace varibles even more
sed -i -e "/outputdir=/c\outputdir=$cond_out/" \
    -i -e "/stattype=/c\stattype=condfdr" \
    -i -e "/fdrthresh=/c\fdrthresh=0.01" config.txt

# Run Matlab now for cond_FDR
matlab -nodesktop -r "run runme.m; exit"

if [  -d  $cond_out  ]; then

# The output of the above MATLAB run results in "result.mat" file.
# Now this file will be converted to a CSV file ("result.mat.csv")
# by a python script, named fdrmat2csv.py.

# After conversion the CLUMPing is performed.

# First check if the result file was created

cd $cond_out
python $python_convert/fdrmat2csv.py result.mat $python_convert/9279485_ref.bim

# clumping
python $python_convert/sumstats.py clump --clump-field FDR --force  --sumstats result.mat.csv --bfile-chr $python_convert/plink_503eur/chr@  --exclude-ranges 6:25119106-33854733 8:7200000-12500000 19:42000000-47000000 --clump-p1 0.01 --out "results_${trait2name}_${trait1name}_condFDR_clump"

awk 'BEGIN{print "SNP", "P-val"} NR>1 {print $3, '0.0000000005'}' results_${trait2name}_${trait1name}_condFDR_clump.loci.csv > results_${trait2name}_${trait1name}_condFDR_clump.FUMA.loci.csv

python $python_convert/manhattan.py result.mat.csv --lead results_${trait2name}_${trait1name}_condFDR_clump.lead.csv --indep results_${trait2name}_${trait1name}_condFDR_clump.indep.csv --striped-background --no-legend --p FDR --y-label FDR --p-thresh 0.01 --out ${trait2name}_${trait1name}_condFDR_manhattan.png


### NOW IMPLEMENTED
matlab -nodesktop -r "tt=openfig('${trait2name}_${trait1name}_condfdr_0.01_manhattan.fig'); set(tt,'PaperUnits','inches','PaperPosition',[0 0 20 15]); saveas(tt, '${trait2name}_${trait1name}_condfdr_0.01_manhattan.png'); exit"

cd ../

fi
# **************************************************************
# CONJUNCTIONAL FDR (conj FDR)
# **************************************************************

# Output directory
conj_out="results_"$trait1name"_"$trait2name"_conjfdr"
# Replace the variables
sed -i -e "/traitfolder=/c\traitfolder=./traitFolder"\
    -i -e "/traitfile1=/c\traitfile1="/trait1/"$trait1nb" \
    -i -e "/traitname1=/c\traitname1=$trait1name"\
    -i -e "/traitfiles=/c\traitfiles={\'/trait2/$trait2nb\'}"\
    -i -e "/traitnames=/c\traitnames={\'$trait2name\'}" config.txt

# variable change for cnj_fdr 
sed -i -e "/outputdir=/c\outputdir=$conj_out/"\
    -i -e "/stattype=/c\stattype=conjfdr"\
    -i -e "/fdrthresh=/c\fdrthresh=0.05" config.txt

# Run Matlab now for conj_FDR


matlab -nodesktop -r "run runme.m; exit"

if [  -d  $conj_out  ]; then

# The output of the above MATLAB run results in "result.mat" file.
# Now this file will be converted to a CSV file ("result.mat.csv")
# by a python script, named fdrmat2csv.py.

# After conversion the CLUMPing is performed.

# First check if the result file was created

cd $conj_out
python $python_convert/fdrmat2csv.py result.mat $python_convert/9279485_ref.bim
# clump
python $python_convert/sumstats.py clump --clump-field FDR --force  --sumstats result.mat.csv --bfile-chr $python_convert/plink_503eur/chr@  --exclude-ranges 6:25119106-33854733 8:7200000-12500000 19:42000000-47000000 --clump-p1 0.05 --out "results_${trait1name}_${trait2name}_conjFDR_clump" 

awk 'BEGIN{print "SNP", "P-val"} NR>1 {print $3, '0.0000000005'}' results_${trait1name}_${trait2name}_conjFDR_clump.loci.csv > results_${trait1name}_${trait2name}_conjFDR_clump.FUMA.loci.csv

python $python_convert/manhattan.py result.mat.csv --lead results_${trait1name}_${trait2name}_conjFDR_clump.lead.csv --indep results_${trait1name}_${trait2name}_conjFDR_clump.indep.csv --striped-background --no-legend --p FDR --y-label FDR --p-thresh 0.05 --out ${trait1name}_${trait2name}_conjFDR_manhattan.png


matlab -nodesktop -r "tt=openfig('${trait1name}_${trait2name}_conjfdr_0.05_manhattan.fig'); set(tt,'PaperUnits','inches','PaperPosition',[0 0 20 15]); saveas(tt, '${trait1name}_${trait2name}_conjfdr_0.05_manhattan.png'); exit"
cd ../

fi

done
done


</code>