# for emacs: -*- mode: sh; -*-

#       DATE:	18-May-2012
#       ORGANISM:	Ceratotherium simum simum
#       TAXID:	73337
#       ASSEMBLY LONG NAME:	CerSimSim1.0
#       ASSEMBLY SHORT NAME:	CerSimSim1.0
#       ASSEMBLY SUBMITTER:	Broad Institute
#       ASSEMBLY TYPE:	Haploid
#       NUMBER OF ASSEMBLY-UNITS:	1
#       ASSEMBLY ACCESSION:	GCA_000283155.1

#       FTP-RELEASE DATE: 07-Aug-2012

#       http://www.ncbi.nlm.nih.gov/genome/11839
#       http://www.ncbi.nlm.nih.gov/assembly/406328/
#       http://www.ncbi.nlm.nih.gov/bioproject/74583

#       chrMt: NC_001808.1 GCF_000029945.1 bioproject 11835

#       http://www.ncbi.nlm.nih.gov/Traces/wgs/?val=AKZM01
#       Genome Coverage : 91x Illumina Hi-Seq

#       http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=73337

# rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Ceratotherium_simum/CerSimSim1.0/ ./

##########################################################################
# Download sequence (DONE - 2012-08-28 - Hiram)
    mkdir /hive/data/genomes/cerSim1
    cd /hive/data/genomes/cerSim1
    mkdir genbank
    cd genbank
    time rsync -a -P \
rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Ceratotherium_simum/CerSimSim1.0/ ./

    # verify the size of the sequence here:
    faSize Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz
# 2464350348 bases (97509168 N's 2366841180 real 2366841180 upper 0 lower)
#       in 3086 sequences in 1 files
# Total size: mean 798558.1 sd 4913492.1
#       min 1000 (gi|398413566|gb|AKZM01057813.1|)
#       max 79733107 (gi|398925635|gb|JH767723.1|) median 2106
# %0.00 masked total, %0.00 masked real

    # verify version numbers are only .1 and nothing else:
    zcat Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz \
| grep -v "^#" \
| sed -e "s/^\(JH7[0-9]*\)\(.[0-9]\)/\2/; s/^\(AKZM01[0-9]*\)\(.[0-9]\)/\2/;" \
        | cut -f1 | sort | uniq -c
    #    112560 .1

    # thus, it is safe to strip the names down to something reasonable,
    #   they can all be: >JH7[0-9] and >AKZM01
    time zcat Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fa.gz \
  | sed -e "s/^>.*JH7/>JH7/; s/^>.*AKZM01/>AKZM01/; s/.1. Ceratotherium.*//" \
        | gzip -c > ucsc.fa.gz
    #  real    12m16.938s
    zcat Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz \
        | sed -e "s/^\(JH7[0-9]*\).1/\1/; s/^\(AKZM01[0-9]*\).1/\1/;" \
        | gzip -c > ucsc.agp.gz
    time checkAgpAndFa  ucsc.agp.gz ucsc.fa.gz 2>&1 | tail -2
# Valid Fasta file entry
# All AGP and FASTA entries agree - both files are valid
    #   real    1m38.329s

    mkdir /hive/data/genomes/cerSim1/photograph
    cd /hive/data/genomes/cerSim1/photograph
    wget --timestamping \
http://upload.wikimedia.org/wikipedia/commons/3/3c/Ceratotherium_simum_Kruger_Park_02.JPG
    # from user: http://commons.wikimedia.org/wiki/User:Esculapio

    convert -geometry "300x225" Ceratotherium_simum_Kruger_Park_02.JPG \
        Ceratotherium_simum.jpg
    # check this .jpg file into the source tree kent/src/hg/htdocs/images/
    git add Ceratotherium_simum.jpg
    git commit -m "rhino photo from wikicommons user Esculapio" \
        Ceratotherium_simum.jpg
    # and copy to /usr/local/apache/htdocs/images
    cp -p Ceratotherium_simum.jpg /usr/local/apache/htdocs/images

##########################################################################
# Initial makeGenomeDb.pl (DONE - 2012-08-28 - Hiram)
    # obtain a template for this from the source tree:
    # kent/src/hg/utils/automation/configFiles/
    # and check it back into the source tree when completed here:
    cd /hive/data/genomes/cerSim1
    cat << '_EOF_' > cerSim1.config.ra
# Config parameters for makeGenomeDb.pl:
db cerSim1
clade mammal
genomeCladePriority 45
scientificName Ceratotherium simum
commonName White rhinoceros
assemblyDate May 2012
assemblyLabel Broad Institute
assemblyShortLabel CerSimSim1.0
orderKey 2415
mitoAcc NC_001808
fastaFiles /hive/data/genomes/cerSim1/genbank/ucsc.fa.gz
agpFiles /hive/data/genomes/cerSim1/genbank/ucsc.agp.gz
# qualFiles none
dbDbSpeciesDir rhino
photoCreditURL http://commons.wikimedia.org/wiki/User:Esculapio
photoCreditName Photo courtesy of Wikimedia Commons user Esculapio
ncbiGenomeId 11839
ncbiAssemblyId 406328
ncbiAssemblyName CerSimSim1.0
ncbiBioProject 74583
genBankAccessionID GCA_000283155.1
taxId 73337
'_EOF_'
    # << happy emacs

    time makeGenomeDb.pl -workhorse=hgwdev -fileServer=hgwdev -dbHost=hgwdev \
        -stop=agp cerSim1.config.ra > agp.log 2>&1
    #   real    4m25.252s
    # verify OK:
    tail -1 agp.log
    #   *** All done!  (through the 'agp' step)

    # finish it off
    time makeGenomeDb.pl -continue=db -workhorse=hgwdev -fileServer=hgwdev \
        -dbHost=hgwdev cerSim1.config.ra > db.log 2>&1
    #   real    20m20.688s

    #	add the trackDb entries to the source tree, and the 2bit link:
    ln -s `pwd`/cerSim1.unmasked.2bit /gbdb/cerSim1/cerSim1.2bit
    #	browser should function now

##########################################################################
# running repeat masker (DONE - 2012-08-28 - Hiram)
    mkdir /hive/data/genomes/cerSim1/bed/repeatMasker
    cd /hive/data/genomes/cerSim1/bed/repeatMasker
    time doRepeatMasker.pl -buildDir=`pwd` -noSplit \
	-bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \
	-smallClusterHub=encodek cerSim1 > do.log 2>&1 &
    #   real    444m31.442s

    cat faSize.rmsk.txt
    #   2464367180 bases (97509168 N's 2366858012 real 1448944781
    #   upper 917913231 lower) in 3087 sequences in 1 files
    #   Total size: mean 798304.9 sd 4912716.1 min 1000 (AKZM01057813)
    #   max 79733107 (JH767723) median 2106
    #   %37.25 masked total, %38.78 masked real

    egrep -i "versi|relea" do.log
#    April 26 2011 (open-3-3-0) version of RepeatMasker
# CC   RELEASE 20110920;
# RepeatMasker version development-$Id: RepeatMasker,v 1.26 2011/09/26 16:19:44 angie Exp $

    featureBits -countGaps cerSim1 rmsk
    #   918443203 bases of 2464367180 (37.269%) in intersection

    # why is it different than the faSize above ?
    # because rmsk masks out some N's as well as bases, the count above
    #	separates out the N's from the bases, it doesn't show lower case N's

##########################################################################
# running simple repeat (DONE - 2012-08-28 - Hiram)
    mkdir /hive/data/genomes/cerSim1/bed/simpleRepeat
    cd /hive/data/genomes/cerSim1/bed/simpleRepeat
    time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=swarm \
	-dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=encodek \
	cerSim1 > do.log 2>&1 &
    #   real    11m58.961s

    cat fb.simpleRepeat
    #   23369813 bases of 2366858012 (0.987%) in intersection

#########################################################################
# Verify all gaps are marked, add any N's not in gap as type 'other'
#	(DONE - 2012-08-28 - Hiram)
    mkdir /hive/data/genomes/cerSim1/bed/gap
    cd /hive/data/genomes/cerSim1/bed/gap
    time nice -n +19 findMotif -motif=gattaca -verbose=4 \
	-strand=+ ../../cerSim1.unmasked.2bit > findMotif.txt 2>&1
    #   real    1m7.343s
    grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed
    time featureBits cerSim1 -not gap -bed=notGap.bed
    #   2366858012 bases of 2366858012 (100.000%) in intersection
    #   real    0m12.510s


    # can see now if allGaps.bed actually is all the gaps:
    hgsql -N -e "select size from gap;" cerSim1 | ave stdin | grep total
# total 97509168.000000
    ave -col=5 allGaps.bed | grep total
# total 97509168.000000
    # same count, all gaps are marked

    # check if any non-bridged gaps here (looking for 'no'):
    hgsql -N -e "select bridge from gap;" cerSim1 | sort | uniq -c
    #  54737 yes

##########################################################################
## WINDOWMASKER (DONE - 2012-08-28 - Hiram)
    mkdir /hive/data/genomes/cerSim1/bed/windowMasker
    cd /hive/data/genomes/cerSim1/bed/windowMasker
    time nice -n +19 doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \
	-dbHost=hgwdev cerSim1 > do.log 2>&1 &
    #   real    188m18.153s

    # Masking statistics
     cat faSize.cerSim1.wmsk.txt
    #   2464367180 bases (97509168 N's 2366858012 real 1667189438 upper
    #   699668574 lower) in 3087 sequences in 1 files
    #   Total size: mean 798304.9 sd 4912716.1 min 1000 (AKZM01057813)
    #   max 79733107 (JH767723) median 2106
    #   %28.39 masked total, %29.56 masked real

    cat faSize.cerSim1.wmsk.sdust.txt
    #   2464367180 bases (97509168 N's 2366858012 real 1654019560 upper
    #   712838452 lower) in 3087 sequences in 1 files
    #   Total size: mean 798304.9 sd 4912716.1 min 1000 (AKZM01057813)
    #   max 79733107 (JH767723) median 2106
    #   %28.93 masked total, %30.12 masked real

    cat faSize.cerSim1.cleanWMSdust.txt
    #   2464367180 bases (97509168 N's 2366858012 real 1654019560 upper
    #   712838452 lower) in 3087 sequences in 1 files
    #   Total size: mean 798304.9 sd 4912716.1 min 1000 (AKZM01057813)
    #   max 79733107 (JH767723) median 2106
    #   %28.93 masked total, %30.12 masked real

    cat fb.cerSim1.windowmaskerSdust.clean.txt
    #   712838452 bases of 2464367180 (28.926%) in intersection

    # how much does this window masker and repeat masker overlap:
    featureBits -countGaps cerSim1 rmsk windowmaskerSdust
    #   382474428 bases of 2464367180 (15.520%) in intersection

##########################################################################
# add simpleRepeats to either WindowMasker or RepeatMasker to make masked 2bit
#       usually use the one that has the more masking
#       (DONE - 2012-08-31 - Hiram)
    cd /hive/data/genomes/cerSim1
    # add to rmsk after RM run is done:
    twoBitMask cerSim1.rmsk.2bit \
	-add bed/simpleRepeat/trfMask.bed cerSim1.2bit
    twoBitToFa cerSim1.2bit stdout | faSize stdin > faSize.cerSim1.2bit.txt
    #	you can safely ignore the warning about fields >= 13

    # or after WM run is done:
    cd /hive/data/genomes/cerSim1
    twoBitMask -add bed/windowMasker/cerSim1.cleanWMSdust.2bit \
	bed/simpleRepeat/trfMask.bed cerSim1.2bit
    #	you can safely ignore the warning about fields >= 13

    twoBitToFa cerSim1.2bit stdout | faSize stdin > faSize.cerSim1.2bit.txt
    cat faSize.cerSim1.2bit.txt
    #   2464367180 bases (97509168 N's 2366858012 real 1448462731
    #   upper 918395281 lower) in 3087 sequences in 1 files
    #   Total size: mean 798304.9 sd 4912716.1 min 1000 (AKZM01057813)
    #   max 79733107 (JH767723) median 2106
    #   %37.27 masked total, %38.80 masked real

    rm /gbdb/cerSim1/cerSim1.2bit
    ln -s `pwd`/cerSim1.2bit /gbdb/cerSim1/cerSim1.2bit

##########################################################################
# cpgIslands - (DONE - 2012-08-31 - Hiram)
    mkdir /hive/data/genomes/cerSim1/bed/cpgIslands
    cd /hive/data/genomes/cerSim1/bed/cpgIslands
    time doCpgIslands.pl cerSim1 > do.log 2>&1
    #   real    6m13s

    cat fb.cerSim1.cpgIslandExt.txt
    #   22364010 bases of 2366858012 (0.945%) in intersection

#########################################################################
# genscan - (DONE - 2012-09-05 - Hiram)
    mkdir /hive/data/genomes/cerSim1/bed/genscan
    cd /hive/data/genomes/cerSim1/bed/genscan
    time doGenscan.pl cerSim1 > do.log 2>&1
    # one broken job:
# ./runGsBig.csh JH767732 000 gtf/000/JH767732.gtf pep/000/JH767732.pep subopt/000/JH767732.bed
    # reset the window size to 200000 and run:
    time ./lastOne.csh JH767732 000 gtf/000/JH767732.gtf pep/000/JH767732.pep subopt/000/JH767732.bed
# Completed: 3086 of 3087 jobs
# Crashed: 1 jobs
# CPU time in finished jobs:      49072s     817.87m    13.63h    0.57d  0.002 y
# IO & Wait Time:                 13542s     225.70m     3.76h    0.16d  0.000 y
# Average job time:                  20s       0.34m     0.01h    0.00d
# Longest finished job:            1738s      28.97m     0.48h    0.02d
# Submission to last job:          2708s      45.13m     0.75h    0.03d
    #   real    12m35.518s
    time doGenscan.pl -continue=makeBed cerSim1 > makeBed.log 2>&1
    #   real    3m19.120s

    cat fb.cerSim1.genscan.txt
    #   55376418 bases of 2366858012 (2.340%) in intersection
    cat fb.cerSim1.genscanSubopt.txt
    #   54875625 bases of 2366858012 (2.319%) in intersection

#########################################################################
# MAKE 11.OOC FILE FOR BLAT/GENBANK (DONE - 2012-08-31 - Hiram)
    # Use -repMatch=400, based on size -- for human we use 1024
    # use the "real" number from the faSize measurement,
    # hg19 is 2897316137, calculate the ratio factor for 1024:
    calc \( 2366858012 / 2897316137 \) \* 1024
    #   ( 2366858012 / 2897316137 ) * 1024 = 836.519900

    # round up to 850 (equCab2 was 825)

    cd /hive/data/genomes/cerSim1
    time blat cerSim1.2bit /dev/null /dev/null -tileSize=11 \
      -makeOoc=jkStuff/cerSim1.11.ooc -repMatch=850
    #   Wrote 22623 overused 11-mers to jkStuff/cerSim1.11.ooc
    #   real    1m46.942s

    # there are no non-bridged gaps, no lift file needed for genbank
    hgsql -N -e "select bridge from gap;" cerSim1 | sort | uniq -c
    #   54737 yes
#    cd /hive/data/genomes/cerSim1/jkStuff
#    gapToLift cerSim1 cerSim1.nonBridged.lift -bedFile=cerSim1.nonBridged.bed
    # largest non-bridged contig:
#    awk '{print $3-$2,$0}' cerSim1.nonBridged.bed | sort -nr | head
    #   123773608 chrX  95534   123869142       chrX.01

#########################################################################
# AUTO UPDATE GENBANK (DONE - 2012-08-31 - Hiram)
    # examine the file:
    /cluster/data/genbank/data/organism.lst
    # for your species to see what counts it has for:
# organism       mrnaCnt estCnt  refSeqCnt
# Ceratotherium simum     12      0       0
    # to decide which "native" mrna or ests you want to specify in genbank.conf

    ssh hgwdev
    cd $HOME/kent/src/hg/makeDb/genbank
    git pull
    # edit etc/genbank.conf to add cerSim1 just after equCab1
# cerSim1 (Ceratotherium simum - White Rhino)
cerSim1.serverGenome = /hive/data/genomes/cerSim1/cerSim1.2bit
cerSim1.clusterGenome = /hive/data/genomes/cerSim1/cerSim1.2bit
cerSim1.ooc = /hive/data/genomes/cerSim1/jkStuff/cerSim1.11.ooc
cerSim1.lift = no
cerSim1.refseq.mrna.native.pslCDnaFilter  = ${ordered.refseq.mrna.native.pslCDnaFilter}
cerSim1.refseq.mrna.xeno.pslCDnaFilter    = ${ordered.refseq.mrna.xeno.pslCDnaFilter}
cerSim1.genbank.mrna.native.pslCDnaFilter = ${ordered.genbank.mrna.native.pslCDnaFilter}
cerSim1.genbank.mrna.xeno.pslCDnaFilter   = ${ordered.genbank.mrna.xeno.pslCDnaFilter}
cerSim1.genbank.est.native.pslCDnaFilter  = ${ordered.genbank.est.native.pslCDnaFilter}
cerSim1.refseq.mrna.native.load = no
cerSim1.refseq.mrna.xeno.load = yes
cerSim1.genbank.mrna.xeno.load = no
cerSim1.genbank.est.native.load = no
cerSim1.downloadDir = cerSim1
cerSim1.perChromTables = no

    # end of section added to etc/genbank.conf
    git commit -m "adding cerSim1 white rhino redmine 8752" etc/genbank.conf
    git push
    make etc-update

    git pull
    # Edit src/lib/gbGenome.c to add new species.
    git commit -m "adding definition for cerSimNames white rhino redmine 8752" src/lib/gbGenome.c
    git push
    make install-server

    ssh hgwdev			# used to do this on "genbank" machine
    screen -S cerSim1           # long running job managed in screen
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbAlignStep -initial cerSim1 &
    #   var/build/logs/2012.08.31-14:22:23.cerSim1.initalign.log
    #   real    162m56.210s

    # load database when finished
    ssh hgwdev
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad cerSim1 &
    #   real    28m9.256s
    #   var/dbload/hgwdev/logs/2012.09.05-15:21:50.dbload.log

    # check the end of that dbload.log to see if it was successful
    #   hgwdev 2012.09.05-15:49:59 dbload: finish

    # enable daily alignment and update of hgwdev (DONE - 2012-05-09 - Hiram)
    cd ~/kent/src/hg/makeDb/genbank
    git pull
    # add cerSim1 to:
    vi etc/align.dbs etc/hgwdev.dbs
    git commit -m "Added cerSim1." etc/align.dbs etc/hgwdev.dbs
    git push
    make etc-update

#########################################################################
# set default position to FOXP2 gene displays  (DONE - 2012-08-02 - Hiram)
XXX - to be done - Tue Oct 16 15:03:19 PDT 2012
    hgsql -e \
'update dbDb set defaultPos="JH739914:135428-435957" where name="cerSim1";' \
	hgcentraltest

############################################################################
# downloads and pushQ entry (DONE - 2012-10-16 - Hiram)
    # after adding cerSim1 to the all.joiner file and verifying that
    #   joinerCheck is clean, can construct the downloads:
    cd /hive/data/genomes/cerSim1
    time makeDownloads.pl -workhorse=hgwdev cerSim1
    #   real    23m8.762s

    mkdir /hive/data/genomes/cerSim1/pushQ
    cd /hive/data/genomes/cerSim1/pushQ
    # Mark says don't let the transMap track get there
    time makePushQSql.pl cerSim1 2> stderr.txt > cerSim1.sql
    #   real    3m39.347s

    # check the stderr.txt for bad stuff, these kinds of warnings are OK:
# WARNING: hgwdev does not have /gbdb/cerSim1/wib/gc5Base.wib
# WARNING: hgwdev does not have /gbdb/cerSim1/wib/quality.wib
# WARNING: hgwdev does not have /gbdb/cerSim1/bbi/quality.bw
# WARNING: cerSim1 does not have seq
# WARNING: cerSim1 does not have extFile
# WARNING: cerSim1 does not have estOrientInfo

    scp -p cerSim1.sql hgwbeta:/tmp
    ssh hgwbeta "hgsql qapushq < /tmp/cerSim1.sql"

##########################################################################
#  BLATSERVERS ENTRY (DONE - 2012-10-23 - Hiram)
#	After getting a blat server assigned by the Blat Server Gods,
    ssh hgwdev
    hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("cerSim1", "blat4a", "17838", "1", "0"); \
	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("cerSim1", "blat4a", "17839", "0", "1");' \
	    hgcentraltest
    #	test it with some sequence

############################################################################
# lastz Human hg19 (DONE - 2012-10-23 - Hiram)
    # the original alignment
    cd /hive/data/genomes/hg19/bed/lastzCerSim1.2012-10-17
    cat fb.hg19.chainCerSim1Link.txt
    #   1683424317 bases of 2897316137 (58.103%) in intersection

    #	and for this swap
    mkdir /hive/data/genomes/cerSim1/bed/blastz.hg19.swap
    cd /hive/data/genomes/cerSim1/bed/blastz.hg19.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/hg19/bed/lastzCerSim1.2012-10-17/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #   real     100m36s
    cat  fb.cerSim1.chainHg19Link.txt
    #	1637961407 bases of 2366858012 (69.204%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/cerSim1/bed
    ln -s blastz.hg19.swap lastz.hg19

#########################################################################
# lastz Mouse mm10 (DONE - 2012-10-24 - Hiram)
    # the original alignment
    cd /hive/data/genomes/mm10/bed/lastzCerSim1.2012-10-23
    cat fb.mm10.chainCerSim1Link.txt
    #   942281365 bases of 2652783500 (35.520%) in intersection

    #	and for this swap
    mkdir /hive/data/genomes/cerSim1/bed/blastz.mm10.swap
    cd /hive/data/genomes/cerSim1/bed/blastz.mm10.swap
    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	/hive/data/genomes/mm10/bed/lastzCerSim1.2012-10-23/DEF \
        -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
        -swap -chainMinScore=3000 -chainLinearGap=medium > swap.log 2>&1 &
    #   real    62m44s
    cat  fb.cerSim1.chainMm10Link.txt
    #	926131511 bases of 2366858012 (39.129%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/cerSim1/bed
    ln -s blastz.mm10.swap lastz.mm10

#########################################################################
# lastz swap Opossum monDom5 (DONE - 2012-10-30 - Hiram)
    # the original alignment
    cd /hive/data/genomes/monDom5/bed/lastzCerSim1.2012-10-23
    cat fb.monDom5.chainCerSim1Link.txt
    #	370181399 bases of 3501660299 (10.572%) in intersection

    # and this swap
    mkdir /hive/data/genomes/cerSim1/bed/blastz.monDom5.swap
    cd /hive/data/genomes/cerSim1/bed/blastz.monDom5.swap
    time doBlastzChainNet.pl \
	/hive/data/genomes/monDom5/bed/lastzCerSim1.2012-10-23/DEF \
	-verbose=2 -bigClusterHub=swarm \
        -swap -workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	-chainMinScore=5000 -chainLinearGap=loose > swap.log 2>&1 &
    #	real    69m55.609s
    cat fb.cerSim1.chainMonDom5Link.txt
    #	362581231 bases of 2366858012 (15.319%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/cerSim1/bed
    ln -s blastz.monDom5.swap lastz.monDom5

#########################################################################
# lastz swap Cow bosTau7 (DONE - 2012-10-30 - Hiram)
    # the original alignment
    cd /hive/data/genomes/bosTau7/bed/lastzCerSim1.2012-10-24
    cat fb.bosTau7.chainCerSim1Link.txt
    #   1628320914 bases of 2804673174 (58.057%) in intersection

    # and this swap
    mkdir /hive/data/genomes/cerSim1/bed/blastz.bosTau7.swap
    cd /hive/data/genomes/cerSim1/bed/blastz.bosTau7.swap
    time nice -n +19 doBlastzChainNet.pl \
        /hive/data/genomes/bosTau7/bed/lastzCerSim1.2012-10-24/DEF \
	-workhorse=hgwdev -chainMinScore=3000 -chainLinearGap=medium \
	-swap -qRepeats=windowmaskerSdust \
	-bigClusterHub=swarm -verbose=2 > swap.log 2>&1 &
    #   real    120m35.871s

    cat fb.cerSim1.chainBosTau7Link.txt
    #   1592093865 bases of 2366858012 (67.266%) in intersection

    # set sym link to indicate this is the lastz for this genome:
    cd /hive/data/genomes/cerSim1/bed
    ln -s blastz.bosTau7.swap lastz.bosTau7

##############################################################################
# create ucscToINSDC name mapping (DONE - 2013-08-23 - Hiram)
    mkdir /hive/data/genomes/cerSim1/bed/ucscToINSDC
    cd /hive/data/genomes/cerSim1/bed/ucscToINSDC

    # copying these scripts from the previous load and improving them
    # with each instance
    ./translateNames.sh NC_001808.1
    ./verifyAll.sh
    ./join.sh

    sed -e "s/21/12/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
      | hgLoadSqlTab cerSim1 ucscToINSDC stdin ucscToINSDC.tab
    checkTableCoords cerSim1 ucscToINSDC
    featureBits -countGaps cerSim1 ucscToINSDC
    # 2464367180 bases of 2464367180 (100.000%) in intersection

    # verify the track link to INSDC functions

##############################################################################
##############################################################################
# TransMap V3 tracks. see makeDb/doc/transMapTracks.txt (2014-12-21 markd)
##############################################################################
