######################################################################

# Building the drBlastTab for the Ensembl v89 release
# (DONE - 217-09-07 - Hiram)

######################################################################
######################################################################

Going to run up all the alignments even though only three tables are
needed in the end

######################################################################
# setup the protein faa files:

mkdir /hive/data/genomes/danRer10/bed/ensGene.89/blastTab
cd /hive/data/genomes/danRer10/bed/ensGene.89/blastTab

ln -s /hive/data/genomes/ce11/bed/ws245Genes/ws245Pep.faa ce11.ws245Pep.faa
ln -s /hive/data/genomes/danRer10/bed/ensGene.86/ensembl.faa danRer10.ensembl.faa
ln -s /hive/data/genomes/dm6/bed/ensGene.86/ensembl.faa dm6.ensembl.faa
ln -s /hive/data/genomes/mm10/bed/ucsc.16.1/ucscGenes.faa mm10.ucscGenes.faa
ln -s /hive/data/genomes/rn6/bed/ensGene.86/ensembl.faa rn6.ensembl.faa
ln -s /hive/data/genomes/sacCer3/bed/sgdAnnotations/blastTab/sacCer3.sgd.faa sacCer3.sgd.faa
ln -s /cluster/data/hg38/bed/ucsc.18.1/ucscGenes.faa hg38.ucscGenes.faa

######################################################################
# setup each sub-directory for each database that alignments for the
#	proteins are going to be done

############# rn6 #########################
for D in rn6
do
mkdir -p $D
ln -s ../${D}.ensembl.faa ./${D}
echo "targetGenesetPrefix ensGene
targetDb $D" > ${D}/${D}.config.ra
echo "queryDbs danRer10" >> ${D}/${D}.config.ra
echo "${D}Fa ${runDir}/${D}.ensembl.faa
danRer10Fa ${runDir}/danRer10.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" >> ${D}/${D}.config.ra

echo "#!/bin/bash
set -beEu -o pipefail
cd \"${runDir}/${D}\"
time (doHgNearBlastp.pl -noLoad -workhorse=hgwdev \
        -noSelf -clusterHub=ku ${D}.config.ra) > do.log 2>&1
" > ${D}/run.${D}.sh
chmod +x ${D}/run.${D}.sh

done

############# sacCer3 #########################

# sgd gene tables
for D in sacCer3
do
mkdir -p $D
echo "targetGenesetPrefix sgd
targetDb $D
queryDbs danRer10
${D}Fa ${runDir}/${D}.sgd.faa
danRer10Fa ${runDir}/danRer10.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra

echo "#!/bin/bash
set -beEu -o pipefail
cd \"${runDir}/${D}\"
time (doHgNearBlastp.pl -noLoad -workhorse=hgwdev \
        -noSelf -clusterHub=ku ${D}.config.ra) > do.log 2>&1
" > ${D}/run.${D}.sh
chmod +x ${D}/run.${D}.sh

done

############# hg38, mm10 #########################

# ucscGenes gene tables
for D in hg38 mm10
do
mkdir -p $D
echo "targetGenesetPrefix ucscGenes
targetDb $D" > ${D}/${D}.config.ra
echo "queryDbs danRer10" >> ${D}/${D}.config.ra
echo "${D}Fa ${runDir}/${D}.ucscGenes.faa
danRer10Fa ${runDir}/danRer10.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" >> ${D}/${D}.config.ra

echo "#!/bin/bash
set -beEu -o pipefail
cd \"${runDir}/${D}\"
time (doHgNearBlastp.pl -noLoad -workhorse=hgwdev \
        -noSelf -clusterHub=ku ${D}.config.ra) > do.log 2>&1
" > ${D}/run.${D}.sh
chmod +x ${D}/run.${D}.sh

done

############# ce11 #########################

# ws245 table
for D in ce11
do
mkdir -p $D
echo "targetGenesetPrefix ws245
targetDb $D
queryDbs danRer10
${D}Fa ${runDir}/${D}.ws245Pep.faa
danRer10Fa ${runDir}/danRer10.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra

echo "#!/bin/bash
set -beEu -o pipefail
cd \"${runDir}/${D}\"
time (doHgNearBlastp.pl -noLoad -workhorse=hgwdev \
        -noSelf -clusterHub=ku ${D}.config.ra) > do.log 2>&1
" > ${D}/run.${D}.sh
chmod +x ${D}/run.${D}.sh

done

############# dm6 #########################

# ensembl table
for D in dm6
do
mkdir -p $D
echo "targetGenesetPrefix ensembl
targetDb $D
queryDbs danRer10
${D}Fa ${runDir}/${D}.ensembl.faa
danRer10Fa ${runDir}/danRer10.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra

echo "#!/bin/bash
set -beEu -o pipefail
cd \"${runDir}/${D}\"
time (doHgNearBlastp.pl -noLoad -workhorse=hgwdev \
        -noSelf -clusterHub=ku ${D}.config.ra) > do.log 2>&1
" > ${D}/run.${D}.sh
chmod +x ${D}/run.${D}.sh
done

######################################################################
# run up the protein alignments

for db in ce11 dm6 hg38 mm10 rn6 sacCer3
do
  cd /hive/data/genomes/danRer10/bed/ensGene.89/blastTab/${db}
  ./run.${db}.sh
  tail do.log
done

ce11/do.log
real    65m9.385s

dm6/do.log
real    101m12.864s

hg38/do.log
real    241m19.969s

mm10/do.log
real    161m18.066s

rn6/do.log
real    102m2.132s

sacCer3/do.log
real    19m21.203s

######################################################################
