#!/bin/bash
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=2
#SBATCH --mem=16gb
#SBATCH --time=15:00:00
#SBATCH --partition=Orion

# Make bed files with junctions from spliced alignments, as the
# old tophat program used to do.
# The score field in the output bed file is the number of spliced
# read alignments that support the junction.

cd $SLURM_SUBMIT_DIR 

# -u - use singly-mapped reads only
# -f - require this many bases on flanking exon
# assumes bam files in the same directory
# requires 2bit file for genome
# If working with IGB related genomes, you can "wget" these 2bit files from
# https://lorainelab-quickload.scidas.org/quickload
# for example, to get the SL5.0 tomato genome file, you can do this:
#   wget http://lorainelab-quickload.scidas.org/quickload/S_lycopersicum_Jun_2022/S_lycopersicum_Jun_2022.2bit

J=find-junctions-1.0.0-jar-with-dependencies.jar
T=S_lycopersicum_Jun_2022.2bit

# S, F passed in from qsub -v option

# Java8 or higher must be in PATH

module load samtools

if [ ! -s $S.FJ.bed.gz ]; 
then
    java -Xmx32g -jar $J -u -f 5 -b $T -o $S.FJ.bed $F
    if [ -s "$S.FJ.bed" ];
    then
	sort -k1,1 -k2,2n $S.FJ.bed | bgzip > $S.FJ.bed.gz
	rm $S.FJ.bed
	tabix -s 1 -b 2 -e 3 -f -0 $S.FJ.bed.gz
    fi
fi
