# See ../../crisprTrack/README.txt (Done. 2016-09-15 max)
doCrispr.sh hg38 knownGene

###############################################################################
# with 10K shoulders (working - 2018-06-06 - Hiram)

mkdir /hive/data/genomes/hg38/bed/crispr.10K
cd /hive/data/genomes/hg38/bed/crispr.10K

time (/cluster/home/hiram/kent/src/hg/utils/automation/doCrispr.pl \
   -buildDir=`pwd` -bigClusterHub=ku -smallClusterHub=ku \
        -workhorse=hgwdev -stop=ranges hg38 knownGene) > ranges.log 2>&1
# real    0m52.339s

time (/cluster/home/hiram/kent/src/hg/utils/automation/doCrispr.pl \
   -buildDir=`pwd` -bigClusterHub=ku -smallClusterHub=ku \
        -continue=guides -stop=guides -workhorse=hgwdev hg38 knownGene) \
	> guides.log 2>&1
# real    12m40.910s

time (/cluster/home/hiram/kent/src/hg/utils/automation/doCrispr.pl \
   -buildDir=`pwd` -bigClusterHub=ku -smallClusterHub=ku \
        -continue=specScores -stop=specScores -workhorse=hgwdev hg38 \
	knownGene) > specScores.log 2>&1
# this failed when trying to run on ku:
# Traceback (most recent call last):
#   File "/hive/data/outside/crisprTrack/scripts/splitGuidesSpecScore.py", line 39, in <module>
#     guideCounts[guide[:20]]+=1
# MemoryError
# Command failed:
# ssh -x -o 'StrictHostKeyChecking = no' -o 'BatchMode = yes' ku nice /hive/data/genomes/hg38/bed/crispr.10K/specScores/runSpecScores.bash

# real    22m46.921s

    # run this command on hgwdev
    time /cluster/software/bin/python /hive/data/outside/crisprTrack/scripts/splitGuidesSpecScore.py ../allGuides.txt tmp/inFa jobNames.txt

Writing tmp/inFa/1781201.fa
Writing tmp/inFa/1781202.fa
124684218 sequences written
3368831 sequences removed as they are non-unique
0 sequences removed as they were already done before

real    35m17.468s

    # then, continuing the kluster run:
    ssh ku
    cd /hive/data/genomes/hg38/bed/crispr.10K/specScores
    gensub2 jobNames.txt single gsub jobList
    time para create jobList

#     1781204 jobs written to /hive/data/genomes/hg38/bed/crispr.10K/specScores/batch

# real    15m24.369s

    # hive cleaning 2021-04-27
    time (~/kent/src/hg/utils/automation/doCrispr.pl -verbose=2 \
       -continue=cleanup hg38 -fileServer=hgwdev -buildDir=`pwd` \
         -smallClusterHub=hgwdev -bigClusterHub=ku -workhorse=hgwdev) \
             > cleanup.log 2>&1

###############################################################################
