# for emacs: -*- mode: sh; -*-

#	http://www.ncbi.nlm.nih.gov/bioproject/30809
#	http://www.ncbi.nlm.nih.gov/genome/907
#	http://www.ncbi.nlm.nih.gov/Traces/wgs/?val=ABVD00

# sloth ( Choloepus hoffmanni )
#########################################################################
# DOWNLOAD SEQUENCE (DONE braney 2008-10-07)
    ssh kkstore05
    mkdir /cluster/store12/choHof1
    ln -s /cluster/store12/choHof1 /cluster/data
    mkdir /cluster/data/choHof1/broad
    cd /cluster/data/choHof1/broad

    wget --timestamping \
ftp://ftp.broad.mit.edu/pub/assemblies/mammals/sloth/ChoHof1.0/assembly.agp \
ftp://ftp.broad.mit.edu/pub/assemblies/mammals/sloth/ChoHof1.0/assembly.bases.gz \
ftp://ftp.broad.mit.edu/pub/assemblies/mammals/sloth/ChoHof1.0/assembly.quals.gz 
    md5sum ass* > assembly.md5sum

    qaToQac assembly.quals.gz stdout | qacAgpLift assembly.agp stdin choHof1.qual.qac

   cut -f 1 assembly.agp | uniq -c | wc -l 
   # Number of scaffolds: 481259

#########################################################################
# Create .ra file and run makeGenomeDb.pl (DONE braney 2008-10-07)
    ssh kolossus
    cd /cluster/data/choHof1
cat << _EOF_ >choHof1.config.ra
# Config parameters for makeGenomeDb.pl:
db choHof1
clade mammal
genomeCladePriority 35
scientificName Choloepus hoffmanni
commonName Sloth
assemblyDate Jul. 2008
assemblyLabel Broad Institute choHof1 
orderKey 236.5
#mitoAcc AJ222767
mitoAcc none
fastaFiles /cluster/data/choHof1/broad/assembly.bases.gz
agpFiles /cluster/data/choHof1/broad/assembly.agp
qualFiles /cluster/data/choHof1/broad/choHof1.qual.qac
dbDbSpeciesDir sloth
_EOF_

    makeGenomeDb.pl -workhorse kolossus choHof1.config.ra > makeGenomeDb.out 2>&1 &

    cut -f 2 chrom.sizes | ave stdin
# Q1 1235.000000
# median 2269.000000
# Q3 6207.000000
# average 5109.364438
# min 250.000000
# max 197267.000000
# count 481259
# total 2458927620.000000
# standard deviation 6671.452585

#########################################################################
## Repeat Masker (DONE - 2008-10-16 - Hiram)
    screen	# to manage this several day job
    mkdir /hive/data/genomes/choHof1/bed/repeatMasker
    cd /hive/data/genomes/choHof1/bed/repeatMasker
    time $HOME/kent/src/hg/utils/automation/doRepeatMasker.pl \
	-workhorse=hgwdev -bigClusterHub=swarm \
	-buildDir=`pwd` choHof1 > do.log 2>&1 &
    #	real    907m8.655s
    twoBitToFa choHof1.rmsk.2bit stdout | faSize stdin > faSize.rmsk.txt
# 2458927620 bases (398507935 N's 2060419685 real 1325807439 upper 734612246
# lower) in 481259 sequences in 1 files
# %29.88 masked total, %35.65 masked rea

#########################################################################
# SIMPLE REPEATS TRF (DONE - 2008-10-15 - Hiram)
    screen # use a screen to manage this job
    mkdir /hive/data/genomes/choHof1/bed/simpleRepeat
    cd /hive/data/genomes/choHof1/bed/simpleRepeat
    #
    time $HOME/kent/src/hg/utils/automation/doSimpleRepeat.pl \
	-buildDir=/hive/data/genomes/choHof1/bed/simpleRepeat choHof1 \
	> do.log 2>&1 &
    #	real    276m12.206s
    cat fb.simpleRepeat
    #	32246798 bases of 2060419685 (1.565%) in intersection

    #	after RM run is done, add this mask:
    cd /hive/data/genomes/choHof1
    rm choHof1.2bit
    twoBitMask choHof1.rmsk.2bit -add bed/simpleRepeat/trfMask.bed choHof1.2bit
    #	can safely ignore warning about >=13 fields in bed file

    twoBitToFa choHof1.2bit stdout | faSize stdin > choHof1.2bit.faSize.txt
# 2458927620 bases (398507935 N's 2060419685 real 1325416974 upper 735002711
# lower) in 481259 sequences in 1 files
# %29.89 masked total, %35.67 masked real

    #	link to gbdb
    ln -s `pwd`/choHof1.2bit /gbdb/choHof1

###########################################################################
# prepare for kluster runs (DONE _ 2008-10-16 - Hiram)
    # compare to size of real bases to adjust the repMatch
    #	hg18: 2881421696
    #	choHof1: 2060419685
    # thus: 1024 * 2060419685/2881421696 = 732
    #	rounding up to 800 for a bit more conservative masking
    cd /hive/data/genomes/choHof1
    time blat choHof1.2bit \
	/dev/null /dev/null -tileSize=11 -makeOoc=choHof1.11.ooc -repMatch=800
    #	Wrote 34983 overused 11-mers to choHof1.11.ooc
    #	real    2m5.136s

    #	and staging data for push to kluster nodes
    mkdir /hive/data/staging/data/choHof1
    cp -p choHof1.2bit chrom.sizes choHof1.11.ooc \
	/hive/data/staging/data/choHof1
    #	request to cluster admin to push this to the kluster nodes
    #	/scratch/data/

###########################################################################
# cpgIsland - (DONE - 2012-04-23 - Hiram)
    mkdir /hive/data/genomes/choHof1/bed/cpgIslands
    cd /hive/data/genomes/choHof1/bed/cpgIslands
    time doCpgIslands.pl choHof1 > do.log 2>&1
    # fixing broken command in script:
    time ./doLoadCpg.csh
    #   real    4m0.503s
    # continuing:
    time doCpgIslands.pl -continue=cleanup choHof1 > cleanup.log 2>&1
    #   real    135m42.244s

    cat fb.choHof1.cpgIslandExt.txt
    #   9771165 bases of 2060419685 (0.474%) in intersection

##############################################################################
# genscan - (DONE - 2011-04-25 - Hiram)
    mkdir /hive/data/genomes/choHof1/bed/genscan
    cd /hive/data/genomes/choHof1/bed/genscan
    time doGenscan.pl choHof1 > do.log 2>&1
    #   real    1004m5.449s

    cat fb.choHof1.genscan.txt
    #   41313862 bases of 2060419685 (2.005%) in intersection
    cat fb.choHof1.genscanSubopt.txt
    #   37047077 bases of 2060419685 (1.798%) in intersection

#########################################################################
# windowMasker - (DONE - 2012-05-02 - Hiram)
    mkdir /hive/data/genomes/choHof1/bed/windowMasker
    cd /hive/data/genomes/choHof1/bed/windowMasker
    # trying out new version of the script that does all the usual steps
    #   that used to be performed manually after it was done
    time /cluster/home/hiram/kent/src/hg/utils/automation/doWindowMasker.pl \
        -workhorse=hgwdev -buildDir=`pwd` -dbHost=hgwdev choHof1 > do.log 2>&1
    #   real    219m35.396s
    # fixing some broken commands:
    #   time ./doLoadClean.csh > loadClean.log 2>&1
    #   real    1460m21.553s

    # fixing some broken commands:
    time ./lastClean.sh
    #   real    13m14.689s

    cat fb.choHof1.windowmaskerSdust.beforeClean.txt
    #   1188087555 bases of 2458927620 (48.317%) in intersection
    cat fb.choHof1.windowmaskerSdust.clean.txt
    #   789579620 bases of 2458927620 (32.111%) in intersection

    cat fb.choHof1.rmsk.windowmaskerSdust.txt
    #   377356167 bases of 2458927620 (15.346%) in intersection

#########################################################################
# AUTO UPDATE GENBANK (DONE - 2012-05-03 - Hiram)
    # examine the file:
    /cluster/data/genbank/data/organism.lst
    # for your species to see what counts it has for:
# organism       mrnaCnt estCnt  refSeqCnt
# Choloepus hoffmanni	7	0	0
    # to decide which "native" mrna or ests you want to specify in genbank.conf

    ssh hgwdev
    cd $HOME/kent/src/hg/makeDb/genbank
    git pull
    # edit etc/genbank.conf to add choHof1 just after ce2
# choHof1 (sloth)
choHof1.serverGenome = /hive/data/genomes/choHof1/choHof1.2bit
choHof1.clusterGenome = /scratch/data/choHof1/choHof1.2bit
choHof1.ooc = /scratch/data/choHof1//choHof1.11.ooc
choHof1.lift = no
choHof1.refseq.mrna.native.pslCDnaFilter  = ${lowCover.refseq.mrna.native.pslCDnaFilter}
choHof1.refseq.mrna.xeno.pslCDnaFilter    = ${lowCover.refseq.mrna.xeno.pslCDnaFilter}
choHof1.genbank.mrna.native.pslCDnaFilter = ${lowCover.genbank.mrna.native.pslCDnaFilter}
choHof1.genbank.mrna.xeno.pslCDnaFilter   = ${lowCover.genbank.mrna.xeno.pslCDnaFilter}
choHof1.genbank.est.native.pslCDnaFilter  = ${lowCover.genbank.est.native.pslCDnaFilter}
choHof1.refseq.mrna.native.load = no
choHof1.refseq.mrna.xeno.load = yes
choHof1.genbank.mrna.xeno.load = yes
choHof1.genbank.est.native.load = no
choHof1.downloadDir = choHof1
choHof1.perChromTables = no

    # end of section added to etc/genbank.conf
    git commit -m "adding choHof1 and removing blanks at end of line" \
        etc/genbank.conf
    git push
    make etc-update

    git pull
    # Edit src/lib/gbGenome.c to add new species.
    git commit -m "adding definition for choHofNames Choloepus hoffmanni" \
        src/lib/gbGenome.c
    git push
    make install-server

    ssh hgwdev			# used to do this on "genbank" machine
    screen -S choHof1           # long running job managed in screen
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbAlignStep -initial choHof1 &
    # after setting genbank.mrna.xeno.load = no
    #   var/build/logs/2012.05.31-10:16:29.choHof1.initalign.log
    #   real    1472m49.406s
    #   2012-10-10 - see if genbank.mrna.xeno.load = yes is now viable
    #   real    1098m41.122s

    # load database when finished
    ssh hgwdev
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad choHof1 &
    #	logFile: var/dbload/hgwdev/logs/2012.06.10-16:52:16.dbload.log
    #   real    10m9.448s
    # Reloaded with genbank.mrna.xeno.load on Thu Oct 11 08:29:06 PDT 2012
    #   real    48m53.920s

    # enable daily alignment and update of hgwdev (DONE - 2012-06-11 - Hiram)
    cd ~/kent/src/hg/makeDb/genbank
    git pull
    # add choHof1 to:
    vi etc/align.dbs etc/hgwdev.dbs
    git commit -m "Added choHof1." etc/align.dbs etc/hgwdev.dbs
    git push
    make etc-update

#########################################################################
# set default position to CHM/CHML/OPN3 gene displays:
#	(DONE - 2012-07-20 - Hiram)
    hgsql -e \
'update dbDb set defaultPos="scaffold_5701:20408-28976" where name="choHof1";' \
	hgcentraltest

############################################################################
# pushQ entry (DONE - 2012-07-20 - Hiram)
    mkdir /hive/data/genomes/choHof1/pushQ
    cd /hive/data/genomes/choHof1/pushQ
    # Mark says don't let the transMap track get there
    makePushQSql.pl choHof1 2> stderr.txt | grep -v transMap > choHof1.sql

    scp -p choHof1.sql hgwbeta:/tmp
    ssh hgwbeta
    cd /tmp
    hgsql qapushq < choHof1.sql

############################################################################
##############################################################################
# TransMap V3 tracks. see makeDb/doc/transMapTracks.txt (2014-12-21 markd)
##############################################################################
