# for emacs: -*- mode: sh; -*-

# This file describes browser build for the tarSyr2
# Tarsius syrichta -- WashU Tarsius_syrichta-2.0.1 Tarsius  syrichta

#############################################################################
# fetch sequence from new style download directory (DONE - 2014-11-20 - Hiram)
    # NCBI has redesigned their FTP download site, new type of address
    #      and naming scheme
    mkdir -p /hive/data/genomes/tarSyr2/genbank
    cd /hive/data/genomes/tarSyr2/genbank

    rsync -L -a -P rsync://ftp.ncbi.nlm.nih.gov/genomes/genbank/vertebrate_mammalian/Tarsius_syrichta/all_assembly_versions/GCA_000164805.2_Tarsius_syrichta-2.0.1/ ./

    # measure what we have here:
    faSize GCA_000164805.2_Tarsius_syrichta-2.0.1_genomic.fna.gz

    # 3453847770 bases (48109210 N's 3405738560 real 2042571886
    # upper 1363166674 lower) in 337188 sequences in 1 files
    # Total size: mean 10243.1 sd 72186.0 min 202 (ABRT02344103.1)
    #   max 4032379 (KE950501.1) median 773
    # %39.47 masked total, %40.03 masked real

    # note that these totals do not include chrM since the GenBank ftp directory
    # did not include a non-nuclear directory

#############################################################################
# fixup to UCSC naming scheme (DONE - 2014-11-20 - Hiram)
    mkdir /hive/data/genomes/tarSyr2/ucsc
    cd /hive/data/genomes/tarSyr2/ucsc

    # this one is unusually simple because there is just one file of
    # contigs, and one AGP file.  Even the contig names are already
    # almost in UCSC format.  Just need to substitute v where . is in
    # the accession suffix

    zcat ../genbank/GCA_000164805.2_Tarsius_syrichta-2.0.1_genomic.fna.gz \
      | sed -e 's/ Tarsius syrichta.*//;' \
         | sed -e 's/\./v/;' | gzip -c > tarSyr2.fa.gz

    zcat ../genbank/GCA_000164805.2_Tarsius_syrichta-2.0.1_assembly_structure/Primary_Assembly/unplaced_scaffolds/AGP/unplaced.scaf.agp.gz \
      | sed -e 's/\./v/;' > tarSyr2.agp

    # verify nothing lost compared to the genbank source:
    faSize tarSyr2.fa.gz
    # 3453847770 bases (48109210 N's 3405738560 real 2042571886
    #   upper 1363166674 lower) in 337188 sequences in 1 files
    # Total size: mean 10243.1 sd 72186.0 min 202 (ABRT02344103v1)
    #   max 4032379 (KE950501v1) median 773
    # %39.47 masked total, %40.03 masked real

    # same numbers as above.  We don't need the masking, but that will be
    # overwritten when the 2bit file is constructed below

#############################################################################
#  Initial database build (DONE - 2014-11-20 - Hiram)

    cd /hive/data/genomes/tarSyr2
    cat << '_EOF_' > tarSyr2.config.ra
# Config parameters for makeGenomeDb.pl:
db tarSyr2
clade mammal
genomeCladePriority 35
scientificName Tarsius syrichta
commonName Tarsier
assemblyDate Sep. 2013
assemblyLabel Washington University
assemblyShortLabel Tarsius_syrichta-2.0.1
orderKey 409
# chrM bioProject: 38399 -> NC_012774.1
mitoAcc NC_012774.1
fastaFiles /cluster/data/tarSyr2/ucsc/tarSyr2.fa.gz
agpFiles /cluster/data/tarSyr2/ucsc/tarSyr2.agp
# qualFiles /cluster/data/tarSyr2/ucsc/tarSyr2.qual.qac
dbDbSpeciesDir tarsier
photoCreditURL http://en.wikipedia.org/wiki/File:Tarsier_Hugs_Mossy_Branch.jpg
photoCreditName Waldir, Wikipedia Commons
ncbiGenomeId 766
ncbiAssemblyId 784738
ncbiAssemblyName Tarsius_syrichta-2.0.1
ncbiBioProject 20339
genBankAccessionID GCA_000164805.2
taxId 9478
'_EOF_'
    # << happy emacs

    # verify sequence and AGP are OK:
    time makeGenomeDb.pl -workhorse=hgwdev -dbHost=hgwdev -fileServer=hgwdev \
         -stop=agp tarSyr2.config.ra > agp.log 2>&1
    # real    2m22.582s

    # then finish it off:
    time nice -n +19 makeGenomeDb.pl -workhorse=hgwdev -dbHost=hgwdev \
       -fileServer=hgwdev -continue=db tarSyr2.config.ra > db.log 2>&1

    # there was a problem with the last step trackDb, fixed makeGenomeDb.pl
    # and completed:
    time nice -n +19 makeGenomeDb.pl -workhorse=hgwdev -dbHost=hgwdev \
      -fileServer=hgwdev -continue=trackDb tarSyr2.config.ra > trackDb.log 2>&1

##########################################################################
# running repeat masker (DONE - 2014-11-21 - Hiram)
    mkdir /hive/data/genomes/tarSyr2/bed/repeatMasker
    cd /hive/data/genomes/tarSyr2/bed/repeatMasker
    time  doRepeatMasker.pl -buildDir=`pwd` \
        -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
        -smallClusterHub=ku tarSyr2 > do.log 2>&1
    # real    750m50.942s

    cat faSize.rmsk.txt
    # 3453864774 bases (48109210 N's 3405755564 real 1778461643 upper
    #   1627293921 lower) in 337189 sequences in 1 files
    # Total size: mean 10243.1 sd 72185.9 min 202 (ABRT02344103v1)
    #   max 4032379 (KE950501v1) median 773
    # %47.12 masked total, %47.78 masked real

    egrep -i "versi|relea" do.log
    #    January 31 2015 (open-4-0-5) version of RepeatMasker
    #  CC   RELEASE 20140131;

    time featureBits -countGaps tarSyr2 rmsk
    #  1631014044 bases of 3453864774 (47.223%) in intersection

    #  real    3m0.934s

    # why is it different than the faSize above ?
    # because rmsk masks out some N's as well as bases, the count above
    #   separates out the N's from the bases, it doesn't show lower case N's

##########################################################################
# running simple repeat (DONE 2014-11-21 - Hiram)

    mkdir /hive/data/genomes/tarSyr2/bed/simpleRepeat
    cd /hive/data/genomes/tarSyr2/bed/simpleRepeat
    time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=ku \
        -dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=ku \
        tarSyr2 > do.log 2>&1
    # real    75m31.505s

    cat fb.simpleRepeat
    # 85367852 bases of 3405755564 (2.507%) in intersection

    # add to rmsk after it is done:
    cd /hive/data/genomes/tarSyr2
    twoBitMask tarSyr2.rmsk.2bit \
        -add bed/simpleRepeat/trfMask.bed tarSyr2.2bit
    #   you can safely ignore the warning about fields >= 13

    twoBitToFa tarSyr2.2bit stdout | faSize stdin > faSize.tarSyr2.2bit.txt
    cat faSize.tarSyr2.2bit.txt
    # 3453864774 bases (48109210 N's 3405755564 real 1776164022 upper
    #    1629591542 lower) in 337189 sequences in 1 files
    # Total size: mean 10243.1 sd 72185.9 min 202 (ABRT02344103v1)
    #    max 4032379 (KE950501v1) median 773
    # %47.18 masked total, %47.85 masked real
    # 2670044500 bases (20740269 N's 2649304231 real 1359475670 upper
    #    1289828561 lower) in 3179 sequences in 1 files
    # Total size: mean 839900.8 sd 9113794.6 min 224 (chrUn_GJ060330v1)
    #    max 158337067 (chr1) median 1356
    # %48.31 masked total, %48.69 masked real

    rm /gbdb/tarSyr2/tarSyr2.2bit
    ln -s `pwd`/tarSyr2.2bit /gbdb/tarSyr2/tarSyr2.2bit

##########################################################################
## WINDOWMASKER (DONE - 2014-11-21 - Hiram)

    mkdir /hive/data/genomes/tarSyr2/bed/windowMasker
    cd /hive/data/genomes/tarSyr2/bed/windowMasker
    time nice -n +19 doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \
        -dbHost=hgwdev tarSyr2 > do.log 2>&1
    # real    801m33.192s

    # Masking statistics
    cat faSize.tarSyr2.cleanWMSdust.txt
    # 3453864774 bases (48109210 N's 3405755564 real 2023347923 upper
    #    1382407641 lower) in 337189 sequences in 1 files
    # Total size: mean 10243.1 sd 72185.9 min 202 (ABRT02344103v1)
    #    max 4032379 (KE950501v1) median 773
    # %40.02 masked total, %40.59 masked real

    cat fb.tarSyr2.rmsk.windowmaskerSdust.txt
    # 976090281 bases of 3453864774 (28.261%) in intersection

##########################################################################
# cpgIslands - (DONE - 2014-11-25,27 - Hiram)
    mkdir /hive/data/genomes/tarSyr2/bed/cpgIslands
    cd /hive/data/genomes/tarSyr2/bed/cpgIslands
    time doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku \
      -workhorse=hgwdev -smallClusterHub=ku tarSyr2 > do.log 2>&1
    #  real    2250m21.599s

    cat fb.tarSyr2.cpgIslandExt.txt
    # 23569257 bases of 3405755564 (0.692%) in intersection

##############################################################################
# cpgIslands on UNMASKED sequence (DONE - 2014-11-25,27 - Hiram)
    mkdir /hive/data/genomes/tarSyr2/bed/cpgIslandsUnmasked
    cd /hive/data/genomes/tarSyr2/bed/cpgIslandsUnmasked

    # run stepwise so the loading can be done in a different table
    time doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku -buildDir=`pwd` \
       -tableName=cpgIslandExtUnmasked \
          -maskedSeq=/hive/data/genomes/tarSyr2/tarSyr2.unmasked.2bit \
             -workhorse=hgwdev -smallClusterHub=ku tarSyr2 > do.log 2>&1
    # real    2229m13.108s

    cat fb.tarSyr2.cpgIslandExtUnmasked.txt
    # 35810524 bases of 3405755564 (1.051%) in intersection

#############################################################################
# cytoBandIdeo - (DONE - 2014-11-25 - Hiram)
    mkdir /hive/data/genomes/tarSyr2/bed/cytoBand
    cd /hive/data/genomes/tarSyr2/bed/cytoBand
    makeCytoBandIdeo.csh tarSyr2

#########################################################################
# genscan - (DONE 2014-11-25,27 - Hiram)
    mkdir /hive/data/genomes/tarSyr2/bed/genscan
    cd /hive/data/genomes/tarSyr2/bed/genscan
    time doGenscan.pl -buildDir=`pwd` -workhorse=hgwdev -dbHost=hgwdev \
      -bigClusterHub=ku tarSyr2 > do.log 2>&1
    # real    2786m14.622s

    cat fb.tarSyr2.genscan.txt
    # 63545533 bases of 3405755564 (1.866%) in intersection

    cat fb.tarSyr2.genscanSubopt.txt
    # 71743479 bases of 3405755564 (2.107%) in intersection

########################################################################
# Create kluster run files (DONE - 2014-12-03 - Hiram)

    # numerator is tarSyr2 gapless bases "real" as reported by:
    featureBits -noRandom -noHap tarSyr2 gap
    # 48109210 bases of 3405755564 (1.413%) in intersection

    # denominator is hg19 gapless bases as reported by:
    #   featureBits -noRandom -noHap hg19 gap
    #     234344806 bases of 2861349177 (8.190%) in intersection
    # 1024 is threshold used for human -repMatch:
    calc \( 3405755564 / 2861349177 \) \* 1024
    # ( 3405755564 / 2861349177 ) * 1024 = 1218.828420


    # ==> use -repMatch=1200 according to size scaled down from 1024 for human.
    #   and rounded down to nearest 50
    cd /hive/data/genomes/tarSyr2
    time blat tarSyr2.2bit \
         /dev/null /dev/null -tileSize=11 -makeOoc=jkStuff/tarSyr2.11.ooc \
        -repMatch=1200
    # Wrote 32323 overused 11-mers to jkStuff/tarSyr2.11.ooc
    # real    0m57.833s

    #   there are no non-bridged gaps, don't need this
    #   check non-bridged gaps to see what the typical size is:
    hgsql -N -e 'select * from gap where bridge="no" order by size;' \
       tarSyr2 | ave -col=7 stdin
    # this does nothing
#     gapToLift -verbose=2 -minGap=100 bosTau7 jkStuff/nonBridged.lft \
#         -bedFile=jkStuff/nonBridged.bed

########################################################################
# GENBANK AUTO UPDATE (DONE - 2014-12-11 - Hiram)
    ssh hgwdev
    cd $HOME/kent/src/hg/makeDb/genbank
    git pull
    # /cluster/data/genbank/data/organism.lst shows:
    # #organism       mrnaCnt estCnt  refSeqCnt
    # Tarsius syrichta        8       0       0

    # edit etc/genbank.conf to add tarSyr2 just before tarSyr1

# tarSyr2 (Tarsier)
tarSyr2.serverGenome = /hive/data/genomes/tarSyr2/tarSyr2.2bit
tarSyr2.clusterGenome = /hive/data/genomes/tarSyr2/tarSyr2.2bit
tarSyr2.ooc = /hive/data/genomes/tarSyr2/jkStuff/tarSyr2.11.ooc
tarSyr2.lift = no
tarSyr2.refseq.mrna.native.pslCDnaFilter  = ${ordered.refseq.mrna.native.pslCDnaFilter}
tarSyr2.refseq.mrna.xeno.pslCDnaFilter    = ${ordered.refseq.mrna.xeno.pslCDnaFilter}
tarSyr2.genbank.mrna.native.pslCDnaFilter = ${ordered.genbank.mrna.native.pslCDnaFilter}
tarSyr2.genbank.mrna.xeno.pslCDnaFilter   = ${ordered.genbank.mrna.xeno.pslCDnaFilter}
tarSyr2.genbank.est.native.pslCDnaFilter  = ${ordered.genbank.est.native.pslCDnaFilter}
tarSyr2.refseq.mrna.native.load = no
tarSyr2.refseq.mrna.xeno.load = yes
tarSyr2.genbank.mrna.xeno.load = no
tarSyr2.genbank.est.native.load = no
tarSyr2.downloadDir = tarSyr2
tarSyr2.perChromTables = no


    git commit -m "Added tarSyr2; refs #14410" etc/genbank.conf
    git push
    # update /cluster/data/genbank/:
    make etc-update

# Edit src/lib/gbGenome.c to add new species.  Skipped

    screen      #  control this business with a screen since it takes a while
    cd /cluster/data/genbank

    time ./bin/gbAlignStep -initial tarSyr2
    # logFile: var/build/logs/2014.12.03-15:37:55.tarSyr2.initalign.log
    #   real    369m37.727s

    #   To re-do, rm the dir first:
    #     /cluster/data/genbank/work/initial.tarSyr2

    # load database when finished
    ssh hgwdev
    cd /cluster/data/genbank
    time ./bin/gbDbLoadStep -drop -initialLoad tarSyr2
    # logFile: var/dbload/hgwdev/logs/2014.12.11-11:06:27.tarSyr2.dbload.log
    # real    35m42.214s

    # enable daily alignment and update of hgwdev
    cd ~/kent/src/hg/makeDb/genbank
    git pull
    # add tarSyr2 to:
    #   vi etc/align.dbs etc/hgwdev.dbs
    git commit -m "Added tarSyr2 - Tarsier refs #14410" etc/align.dbs etc/hgwdev.dbs
    git push
    make etc-update

#########################################################################
# ucscToINSDC table/track (DONE - 2014-12-11 - Hiram)

    mkdir /hive/data/genomes/tarSyr2/bed/ucscToINSDC
    cd /hive/data/genomes/tarSyr2/bed/ucscToINSDC
    # check for chrM in assembly:
    grep chrM ../../tarSyr2.agp
# chrM    1       17004   12      F       NC_012774.1     1       17004   +

    # use the accession name from there in this command (blank if none)
    ~/kent/src/hg/makeDb/doc/felCat8/ucscToINSDC.sh \
        ../../genbank/GCA_*assembly_structure/Primary_Assembly NC_012774.1

    awk '{printf "%s\t0\t%d\n", $1,$2}' ../../chrom.sizes \
         | sort > name.coordinate.tab
    join name.coordinate.tab ucscToINSDC.txt | tr '[ ]' '[\t]' \
         > ucscToINSDC.bed
    # should all be the same line count:
    wc -l *
    # 337189 name.coordinate.tab
    # 337189 ucscToINSDC.bed
    # 337189 ucscToINSDC.txt

    cut -f1 ucscToINSDC.bed | awk '{print length($0)}' | sort -n | tail -1
    # 14
    # use the 14 in this sed
    sed -e "s/21/14/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
         | hgLoadSqlTab tarSyr2 ucscToINSDC stdin ucscToINSDC.bed
    checkTableCoords tarSyr2
    # should cover %100 entirely:
    featureBits -countGaps tarSyr2 ucscToINSDC
    # 3453864774 bases of 3453864774 (100.000%) in intersection

#########################################################################
# add chromAlias table (DONE - 2016-10-20 - Hiram)

    mkdir /hive/data/genomes/tarSyr2/bed/chromAlias
    cd /hive/data/genomes/tarSyr2/bed/chromAlias

    hgsql -N -e 'select chrom,name from ucscToRefSeq;' tarSyr2 \
        > ucsc.refseq.tab
    printf "chrM\tNC_012774.1\n" >> ucsc.refseq.tab
    hgsql -N -e 'select chrom,name from ucscToINSDC;' tarSyr2 \
        | grep -v chrM > ucsc.genbank.tab
    printf "chrM\tAB371090.1\n" >> ucsc.genbank.tab

    join -t$'\t' ../idKeys/tarSyr2.idKeys.txt \
	../../ensembl/ensemblTarSyr2.idKeys.txt \
	| cut -f2,3 | sort > ucsc.ensembl.tab

    # verify chrM is correct:
    grep chrM *.tab
ucsc.ensembl.tab:chrM   MT
ucsc.genbank.tab:chrM   AB371090.1
ucsc.refseq.tab:chrM    NC_012774.1

    ~/kent/src/hg/utils/automation/chromAlias.pl
    sort -o tarSyr2.chromAlias.tab tarSyr2.chromAlias.tab

for t in refseq genbank ensembl
do
  c0=`cat ucsc.$t.tab | wc -l`
  c1=`grep $t tarSyr2.chromAlias.tab | wc -l`
  ok="OK"
  if [ "$c0" -ne "$c1" ]; then
     ok="ERROR"
  fi
  printf "# checking $t: $c0 =? $c1 $ok\n"
done
# checking refseq: 337189 =? 337189 OK
# checking genbank: 337189 =? 337189 OK
# checking ensembl: 337189 =? 337189 OK

    hgLoadSqlTab tarSyr2 chromAlias ~/kent/src/hg/lib/chromAlias.sql \
        tarSyr2.chromAlias.tab

#########################################################################
# fixup search rule for assembly track/gold table (DONE - 2014-05-01 - Hiram)
    hgsql -N -e "select frag from gold;" tarSyr2 | sort | head -1
ABRT02000001.1

    hgsql -N -e "select frag from gold;" tarSyr2 | sort | tail -2
ABRT02492902.1
NC_012774.1

    # verify this rule will find them all or eliminate them all:
    hgsql -N -e "select frag from gold;" tarSyr2 | wc -l
    # 492903

    hgsql -N -e "select frag from gold;" tarSyr2 \
       | egrep -e '[AN][BC][R_][T0][0-9]+(\.1)?' | wc -l
    # 492903

    hgsql -N -e "select frag from gold;" tarSyr2 \
       | egrep -v -e '[AN][BC][R_][T0][0-9]+(\.1)?' | wc -l
    # 0

    # hence, add to trackDb/tarsier/tarSyr2/trackDb.ra
searchTable gold
shortCircuit 1
termRegex [AN][BC][R_][T0][0-9]+(\.1)?
query select chrom,chromStart,chromEnd,frag from %s where frag like '%s%%'
searchPriority 8

#########################################################################
# LIFTOVER TO tarSyr1 (DONE - 2014-12-11 - Hiram)
    ssh hgwdev
    mkdir /hive/data/genomes/tarSyr2/bed/blat.tarSyr1.2014-12-11
    cd /hive/data/genomes/tarSyr2/bed/blat.tarSyr1.2014-12-11
    time (doSameSpeciesLiftOver.pl -verbose=2 -buildDir=`pwd` \
	-ooc=/hive/data/genomes/tarSyr2/jkStuff/tarSyr2.11.ooc \
        -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \
         tarSyr2 tarSyr1) > do.log 2>&1
    # real    13948m39.233s
    # this was a tough job, that long time was not necessarily due
    # to cluster contention.  the blat run took over 9 days:
# Completed: 361089 of 361089 jobs
# CPU time in finished jobs:  229192746s 3819879.10m 63664.65h 2652.69d  7.268 y
# IO & Wait Time:              11337726s  188962.10m  3149.37h  131.22d  0.360 y
# Average job time:                 666s      11.10m     0.19h    0.01d
# Longest finished job:            1492s      24.87m     0.41h    0.02d
# Submission to last job:        811729s   13528.82m   225.48h    9.40d

    # verify the convert link on the test browser is now active from tarSyr2 to
    # tarSyr1

#########################################################################
# all.joiner update, downloads and in pushQ - (DONE 2014-12-29 - Hiram)
    cd $HOME/kent/src/hg/makeDb/schema
    # fixup all.joiner until this is a clean output
    joinerCheck -database=tarSyr2 -keys all.joiner
    # about 50 minutes
    joinerCheck -database=tarSyr2 -tableCoverage all.joiner
    joinerCheck -database=tarSyr2 -times all.joiner

    cd /hive/data/genomes/tarSyr2
    time makeDownloads.pl tarSyr2 > downloads.log 2>&1
    # real    22m17.544s

    #   now ready for pushQ entry
    mkdir /hive/data/genomes/tarSyr2/pushQ
    cd /hive/data/genomes/tarSyr2/pushQ
    time makePushQSql.pl tarSyr2 > tarSyr2.pushQ.sql 2> stderr.out
    # real    5m47.086s

    #   check for errors in stderr.out, some are OK, e.g.:
# WARNING: hgwdev does not have /gbdb/tarSyr2/wib/gc5Base.wib
# WARNING: hgwdev does not have /gbdb/tarSyr2/wib/quality.wib
# WARNING: hgwdev does not have /gbdb/tarSyr2/bbi/qualityBw/quality.bw
# WARNING: tarSyr2 does not have seq
# WARNING: tarSyr2 does not have estOrientInfo

    #   copy it to hgwbeta
    scp -p tarSyr2.pushQ.sql qateam@hgwbeta:/tmp
    ssh qateam@hgwbeta hgwbeta "./bin/x86_64/hgsql qapushq < /tmp/tarSyr2.pushQ.sql"

    #   in that pushQ entry walk through each entry and see if the
    #   sizes will set properly

#########################################################################
# UCSC to RefSeq name correspondence (DONE - 2015-04-28 - braney)

    mkdir /hive/data/genomes/tarSyr2/bed/ucscToRefSeq
    cd /hive/data/genomes/tarSyr2/bed/ucscToRefSeq

    # columns 5 and 7 are the INSDC and RefSeq names

    grep -v "^#" ../../genbank/GCA_000164805.2_Tarsius_syrichta-2.0.1_assembly_report.txt | awk -F'\t' '{printf "%s\t%s\n", $5,$7}'  | sort > insdc.refSeq.tab

    hgsql -N -e 'select name,chrom,chromStart,chromEnd from ucscToINSDC;' tarSyr2 | sort > insdc.ucsc.tab

    join insdc.ucsc.tab insdc.refSeq.tab | tr '[ ]' '[\t]' | cut -f2- > ucsc.refSeq.tab


    export chrSize=`cut -f1 ucsc.refSeq.tab | awk '{print length($0)}' | sort -n | tail -1`
    sed -e "s/21/$chrSize/" $HOME/kent/src/hg/lib/ucscToINSDC.sql | sed -e 's/INSDC/RefSeq/g;' > ucscToRefSeq.sql
    hgLoadSqlTab tarSyr2 ucscToRefSeq ./ucscToRefSeq.sql ucsc.refSeq.tab

    checkTableCoords  tarSyr2 -table=ucscToRefSeq

