# for emacs: -*- mode: sh; -*-

# running up the 160-way on the this reference: KM034562.1
# DONE - 2014-09-18 - Hiram

mkdir /hive/data/genomes/eboVir3/bed/lastz160way
cd /hive/data/genomes/eboVir3/bed/lastz160way

rm -fr query2Bits
mkdir query2Bits

for F in /hive/data/genomes/eboVir3/ucsc/*.fa.gz
do
  C=`basename $F | sed -e 's/.fa.gz//'`
  echo $C
  faToTwoBit ${F} query2Bits/${C}.2bit
done

# and including the Marburg sequences:
cut -f1 ../../../marVir1/chrom.sizes | while read C
do
  echo "marVir1.${C}"
  faToTwoBit ../../../marVir1/ucsc/${C}.fa query2Bits/marVir1.${C}.2bit
done

mkdir KM034562v1
cd KM034562v1

cat << '_EOF_' > mkRunDirs.sh
#!/bin/sh

export referenceSP="KM034562v1"

mkDef() {
  query=$1
echo "# Ebola virus ${referenceSP} vs $query"
echo "BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz
BLASTZ_H=2000
BLASTZ_Y=3400
BLASTZ_L=4000
BLASTZ_K=2200
BLASTZ_Q=/cluster/data/blastz/HoxD55.q

# TARGET - eboVir3 ${referenceSP}
SEQ1_DIR=/hive/data/genomes/eboVir3/bed/lastz160way/${referenceSP}/${referenceSP}.2bit
SEQ1_LEN=/hive/data/genomes/eboVir3/bed/lastz160way/${referenceSP}/${referenceSP}.chrom.sizes
SEQ1_CHUNK=200000
SEQ1_LAP=0

# QUERY - $query
SEQ2_DIR=/hive/data/genomes/eboVir3/bed/lastz160way/query2Bits/${query}.2bit
SEQ2_LEN=/hive/data/genomes/eboVir3/bed/lastz160way/${referenceSP}/pairs/${query}/${query}.chrom.sizes
SEQ2_CHUNK=200000
SEQ2_LAP=0

BASE=/hive/data/genomes/eboVir3/bed/lastz160way/${referenceSP}/pairs/${query}
TMPDIR=/dev/shm"
}

mkRunSh() {
  query=$1
  echo "#!/bin/sh"
  echo
  echo "cd /hive/data/genomes/eboVir3/bed/lastz160way/${referenceSP}/pairs/${query}"
  echo
  echo "time (doBlastzChainNet.pl \`pwd\`/DEF \\
    -ignoreSelf -stop=net -noDbNameCheck -fileServer=hgwdev \\
    -tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \\
    -chainMinScore=10 -chainLinearGap=loose \\
    -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku) > do.log 2>&1"
}

ln -s ../query2Bits/${referenceSP}.2bit ${referenceSP}.2bit

twoBitInfo ../query2Bits/${referenceSP}.2bit ${referenceSP}.chrom.sizes

ls ../query2Bits/*.2bit | while read F
do
  B=`basename $F | sed -e 's/.2bit//'`
  mkdir -p pairs/${B}
  mkDef ${B} > pairs/${B}/DEF
  twoBitInfo ../query2Bits/${B}.2bit pairs/${B}/${query}.chrom.sizes
  mkRunSh ${B} > pairs/${B}/runLz.sh
  chmod +x pairs/${B}/runLz.sh
done
'_EOF_'

chmod +x mkRunDirs.sh
./mkRunDirs.sh

# verify correct files are in some of the pairs/*/ directories
ls -ogrt pairs/NC_002549v1
# total 16
# -rw-rw-r-- 1 748 Sep 18 09:12 DEF
# -rw-rw-r-- 1  18 Sep 18 09:12 NC_002549v1.chrom.sizes
# -rwxrwxr-x 1 370 Sep 18 09:12 runLz.sh

# running 6 at a time
#   (got into some kind of ssh limit trouble running more than that)

export N=0
ls pairs/*/runLz.sh | while read P
do
  N=`echo $N | awk '{print $1+1}'`
  echo "running $P"
  ${P} &
  if [ ${N} -eq 6 ]; then
   echo "waiting"
   wait
   N=0
  fi
done > do.log 2>&1
echo "final wait"
wait


# verify all results are present, note sizes of results:

ls -ogrt pairs/*/mafNet/*.maf.gz | sort -k3nr | less
# and count should be 159
ls -ogrt pairs/*/mafNet/*.maf.gz | wc -l
# 159
