# for emacs: -*- mode: sh; -*-

# Ochotona princeps OchPri3.0
# ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Ochotona_princeps/OchPri3.0/

#	DATE:   21-May-2012
#	ORGANISM:       Ochotona princeps
#	TAXID:  9978
#	ASSEMBLY LONG NAME:     OchPri3.0
#	ASSEMBLY SHORT NAME:    OchPri3.0
#	ASSEMBLY SUBMITTER:     Broad Institute
#	ASSEMBLY TYPE:  Haploid
#	NUMBER OF ASSEMBLY-UNITS:       1
#	ASSEMBLY ACCESSION:     GCA_000292845.1
#	FTP-RELEASE DATE: 28-Aug-2012

##########################################################################
# Download sequence (DONE - 2012-02-26 - Hiram)
    mkdir -p /hive/data/genomes/ochPri3/genbank
    cd /hive/data/genomes/ochPri3/genbank

    rsync -a -P \
rsync://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Ochotona_princeps/OchPri3.0/ ./ > fetch.log

###########################################################################
# fixup to UCSC names (DONE - 2013-03-25 - Hiram)
    cd /hive/data/genomes/ochPri3

    $HOME/kent/src/hg/utils/automation/unplacedScaffolds.pl
    # constructs the directory: /hive/data/genomes/ochPri3/ucsc
    # with files:
    cd /hive/data/genomes/ochPri3/ucsc
    ls -ogrt
# -rw-rw-r-- 1  14853091 Mar 25 21:40 ochPri3.ucsc.agp
# -rw-rw-r-- 1 600736012 Mar 25 21:50 ochPri3.ucsc.fa.gz
# -rw-rw-r-- 1       219 Mar 25 21:51 checkAgp.result.txt

    # this script also constructs the ochPri3.unmasked.2bit file, but
    # this is not needed with the makeGenomeDb.pl script:
    rm -f /hive/data/genomes/ochPri3/ochPri3.unmasked.2bit
    # there is one duplicate contig:
    echo ALIT01123196 > remove.duplicate.list
    zcat ochPri3.ucsc.fa.gz > /dev/shm/ochPri3.ucsc.fa
    faSomeRecords -exclude /dev/shm/ochPri3.ucsc.fa remove.duplicate.list stdout | gzip -c > ochPri3.clean.fa.gz
    grep -v ALIT01123196 ochPri3.ucsc.agp > ochPri3.clean.agp

###########################################################################
# Initial genome build (DONE - 2013-06-18 - Hiram)
    cd /hive/data/genomes/ochPri3

    cat << '_EOF_' > ochPri3.config.ra
# Config parameters for makeGenomeDb.pl:
db ochPri3
clade mammal
# genomeCladePriority 35
scientificName Ochotona princeps
commonName Pika
assemblyDate May 2012
assemblyLabel Broad Institute
assemblyShortLabel OchPri3.0
orderKey 1929
mitoAcc NC_005358
fastaFiles /hive/data/genomes/ochPri3/ucsc/ochPri3.clean.fa.gz
agpFiles /hive/data/genomes/ochPri3/ucsc/ochPri3.clean.agp
# qualFiles none
dbDbSpeciesDir pika
photoCreditURL http://en.wikipedia.org/wiki/File:Ochotona_princeps.jpg
photoCreditName Justin Johnsen, Wikipedia Commons
ncbiGenomeId 771
ncbiAssemblyId 681538
ncbiAssemblyName OchPri3.0
ncbiBioProject 193497
genBankAccessionID GCA_000292845.1
taxId 9978
'_EOF_'

    # run step wise to confirm sequence and AGP files match each other
    time makeGenomeDb.pl -fileServer=hgwdev -workhorse=hgwdev \
         -stop=agp ochPri3.config.ra > genomeDb.agp.log 2>&1
    # real    4m15.127s

    # verify it is OK:
    tail -1 genomeDb.agp.log
    # *** All done!  (through the 'agp' step)

    time nice -n +19 makeGenomeDb.pl -fileServer=hgwdev \
	-workhorse=hgwdev -continue=db ochPri3.config.ra \
        > genomeDb.db.log 2>&1
    # real    21m27.788s

    # add the trackDb business to the source tree

##########################################################################
# running repeat masker (DONE - 2013-06-18 - Hiram)
    mkdir /hive/data/genomes/ochPri3/bed/repeatMasker
    cd /hive/data/genomes/ochPri3/bed/repeatMasker
    time doRepeatMasker.pl -buildDir=`pwd` -noSplit \
	-bigClusterHub=swarm -dbHost=hgwdev -workhorse=hgwdev \
	-smallClusterHub=encodek ochPri3 > do.log 2>&1 &
    #   real    2327m8.018s
    cat faSize.rmsk.txt
# 2229835716 bases (285847846 N's 1943987870 real 1370818698 upper
#    573169172 lower) in 10420 sequences in 1 files
# Total size: mean 213995.8 sd 2558422.6 min 1000 (ALIT01132924)
#    max 83735184 (JH802061) median 2572
# %25.70 masked total, %29.48 masked real

    egrep -i "versi|releas" do.log
# RepeatMasker version open-4.0.2
#    April 29 2013 (open-4-0-2) version of RepeatMasker
# CC   RELEASE 20130422;                                            *

##########################################################################
# running simple repeat (DONE - 2013-06-18 - Hiram)
    mkdir /hive/data/genomes/ochPri3/bed/simpleRepeat
    cd /hive/data/genomes/ochPri3/bed/simpleRepeat
    time doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=swarm \
	-dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=encodek \
	ochPri3 > do.log 2>&1 &
    #   about 24 minutes

    cat fb.simpleRepeat
    #	23780445 bases of 1943987870 (1.223%) in intersection

    # using RMSK and TRF since RMSK is about the same as WM
    cd /hive/data/genomes/ochPri3
    twoBitMask ochPri3.rmsk.2bit \
	-add bed/simpleRepeat/trfMask.bed ochPri3.2bit
    #	you can safely ignore the warning about fields >= 13

    twoBitToFa ochPri3.2bit stdout | faSize stdin > faSize.ochPri3.2bit.txt
    cat faSize.ochPri3.2bit.txt
# 2229835716 bases (285847846 N's 1943987870 real 1369994081 upper
#    573993789 lower) in 10420 sequences in 1 files
# Total size: mean 213995.8 sd 2558422.6 min 1000 (ALIT01132924)
#    max 83735184 (JH802061) median 2572
# %25.74 masked total, %29.53 masked real

    rm /gbdb/ochPri3/ochPri3.2bit
    ln -s `pwd`/ochPri3.2bit /gbdb/ochPri3/ochPri3.2bit

#########################################################################
# Verify all gaps are marked, add any N's not in gap as type 'other'
#	(DONE - 2013-06-18 - Hiram)
    mkdir /hive/data/genomes/ochPri3/bed/gap
    cd /hive/data/genomes/ochPri3/bed/gap
    time nice -n +19 findMotif -motif=gattaca -verbose=4 \
	-strand=+ ../../ochPri3.unmasked.2bit > findMotif.txt 2>&1
    #	real    0m40.949s
    grep "^#GAP " findMotif.txt | sed -e "s/^#GAP //" > allGaps.bed
    time featureBits ochPri3 -not gap -bed=notGap.bed
    # 1943987870 bases of 1943987870 (100.000%) in intersection
    # real    0m11.599s

    awk '{print $3-$2,$0}' notGap.bed | sort -rn > notGap.sizes.txt
    # largest contiguous sequence:
    head -1 notGap.sizes.txt | awk '{print $1}'
    # 579101
    # minimal coverage 1 base out of that largest sequence:
    echo 579101 | awk '{printf "%15.10f\n", 1/(2*$1)}' | sed -e 's/ //g'
    # 0.0000008634
    time bedIntersect -minCoverage=0.0000008634 allGaps.bed notGap.bed \
      test.new.gaps.bed
    # real    0m1.884s
    # number of bases in these new gaps, none:
# -rw-rw-r-- 1        0 Jun 18 15:37 test.new.gaps.bed
    # when non-zero, to count them, e.g.:
    # awk '{print $3-$2}' test.new.gaps.bed | ave stdin | grep total
    # total 8314.000000

#########################################################################
# cytoBandIdeo - (DONE - 2013-06-18 - Hiram)
    mkdir /hive/data/genomes/ochPri3/bed/cytoBand
    cd /hive/data/genomes/ochPri3/bed/cytoBand
    makeCytoBandIdeo.csh ochPri3

##########################################################################
## WINDOWMASKER (DONE - 2013-06-18 - Hiram)
    mkdir /hive/data/genomes/ochPri3/bed/windowMasker
    cd /hive/data/genomes/ochPri3/bed/windowMasker
    time nice -n +19 doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \
	-dbHost=hgwdev ochPri3 > do.log 2>&1 &
    #   about 195 minutes

    cat faSize.ochPri3.cleanWMSdust.txt
# 2229835716 bases (285847846 N's 1943987870 real 1348364836
#    upper 595623034 lower) in 10420 sequences in 1 files
# Total size: mean 213995.8 sd 2558422.6 min 1000 (ALIT01132924)
#    max 83735184 (JH802061) median 2572
# %26.71 masked total, %30.64 masked real

    #  This is pretty good for WM, but RMSK isn't that bad either,
    #  so, using the RMSK result to mask the genome

    featureBits -countGaps ochPri3 rmsk windowmaskerSdust  > fb.ochPri3.rmsk.windowmaskerSdust.txt 2>&1
    cat fb.ochPri3.rmsk.windowmaskerSdust.txt
#  416158057 bases of 2947024286 (14.121%) in intersection

########################################################################
# cpgIslands - (DONE - 2013-06-20 - Hiram)
    mkdir /hive/data/genomes/ochPri3/bed/cpgIslands
    cd /hive/data/genomes/ochPri3/bed/cpgIslands
    time doCpgIslands.pl ochPri3 > do.log 2>&1 &
    #  Elapsed time: 14m35s

    cat fb.ochPri3.cpgIslandExt.txt
    #   20228048 bases of 1943987870 (1.041%) in intersection

#############################################################################
# CPG Islands Unmasked track (DONE - 2014-04-16 - Hiram)

mkdir /hive/data/genomes/ochPri3/bed/cpgIslandsUnmasked
cd /hive/data/genomes/ochPri3/bed/cpgIslandsUnmasked
time doCpgIslands.pl -buildDir=`pwd` -bigClusterHub=ku \
   -tableName=cpgIslandExtUnmasked -dbHost=hgwdev -smallClusterHub=ku \
    -workhorse=hgwdev \
    -maskedSeq=/hive/data/genomes/ochPri3/ochPri3.unmasked.2bit ochPri3 > do.log 2>&1

#############################################################################
# genscan - (DONE - 2013-06-20 - Hiram)
    mkdir /hive/data/genomes/ochPri3/bed/genscan
    cd /hive/data/genomes/ochPri3/bed/genscan
    time doGenscan.pl ochPri3 > do.log 2>&1  &
    #  real    85m9.645s
    # last job finished manually:
    time ./lastGsBig.csh JH802073 000 gtf/000/JH802073.gtf \
           pep/000/JH802073.pep subopt/000/JH802073.bed
    #  real    14m23.140s

    time doGenscan.pl -continue=makeBed ochPri3 > makeBed.log 2>&1  &
    #  real    4m41.262s

    cat fb.ochPri3.genscan.txt
    #   58343898 bases of 1943987870 (3.001%) in intersection
    cat fb.ochPri3.genscanSubopt.txt
    #   52334278 bases of 1943987870 (2.692%) in intersection

########################################################################
# MAKE 11.OOC FILE FOR BLAT/GENBANK (DONE - 2013-06-20 - Hiram)
    # Use -repMatch=700, based on size -- for human we use 1024
    # use the "real" number from the faSize measurement,
    # hg19 is 2897316137, calculate the ratio factor for 1024:
    calc \( 1943987870 / 2897316137 \) \* 1024
    #   ( 1943987870 / 2897316137 ) * 1024 = 687.064678

    # round up to 700

    # ochPri2 was: -repMatch=700
    #   Wrote 19533 overused 11-mers to jkStuff/ochPri2.11.ooc

    cd /hive/data/genomes/ochPri3
    blat ochPri3.2bit /dev/null /dev/null -tileSize=11 \
      -makeOoc=jkStuff/ochPri3.11.ooc -repMatch=700
    #  Wrote 19859 overused 11-mers to jkStuff/ochPri3.11.ooc

    # there are *only* bridged gaps, no lift file needed for genbank
    hgsql -N -e "select bridge from gap;" ochPri3 | sort | uniq -c
    #   122542 yes

#########################################################################
# AUTO UPDATE GENBANK (DONE - 2014-04-16 - Hiram)
    # examine the file:
    /cluster/data/genbank/data/organism.lst
    # for your species to see what counts it has for:
# organism       mrnaCnt estCnt  refSeqCnt
# Ochotona princeps       1       0       8

    # to decide which "native" mrna or ests you want to specify in genbank.conf
    # this appears that ochPri3 has plenty of native est's

    ssh hgwdev
    cd $HOME/kent/src/hg/makeDb/genbank
    git pull
    # edit etc/genbank.conf to add ochPri3 before echTel1 and commit to GIT
# ochPri3 (tenrec)
ochPri3.serverGenome = /hive/data/genomes/ochPri3/ochPri3.2bit
ochPri3.clusterGenome = /hive/data/genomes/ochPri3/ochPri3.2bit
ochPri3.ooc = /hive/data/genomes/ochPri3/jkStuff/ochPri3.11.ooc
ochPri3.lift = no
ochPri3.refseq.mrna.native.pslCDnaFilter  = ${ordered.refseq.mrna.native.pslCDnaFilter}
ochPri3.refseq.mrna.xeno.pslCDnaFilter    = ${ordered.refseq.mrna.xeno.pslCDnaFilter}
ochPri3.genbank.mrna.native.pslCDnaFilter = ${ordered.genbank.mrna.native.pslCDnaFilter}
ochPri3.genbank.mrna.xeno.pslCDnaFilter   = ${ordered.genbank.mrna.xeno.pslCDnaFilter}
ochPri3.genbank.est.native.pslCDnaFilter  = ${ordered.genbank.est.native.pslCDnaFilter}
ochPri3.refseq.mrna.native.load = no
ochPri3.refseq.mrna.xeno.load = yes
ochPri3.genbank.mrna.xeno.load = no
ochPri3.genbank.est.native.load = no
ochPri3.genbank.mrna.native.load = no
ochPri3.genbank.mrna.native.loadDesc = no
ochPri3.downloadDir = ochPri3
ochPri3.perChromTables = no

    # end of section added to etc/genbank.conf
    git commit -m "adding ochPri3 pika refs #9347" etc/genbank.conf
    git push
    make etc-update

    # ~/kent/src/hg/makeDb/genbank/src/lib/gbGenome.c already contains
    # ochPri genome information, if this is a new species, need to add stuff
    # there

    ssh hgwdev			# used to do this on "genbank" machine
    screen			# long running job managed in screen
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbAlignStep -initial ochPri3 &
    #	var/build/logs/2014.04.16-15:35:03.ochPri3.initalign.log
    #	real    604m7.753s

    # load database when finished
    ssh hgwdev
    cd /cluster/data/genbank
    time nice -n +19 ./bin/gbDbLoadStep -drop -initialLoad ochPri3 &
    #	logFile: var/dbload/hgwdev/logs/2014.04.17-10:50:53.ochPri3.dbload.log
    #	real    11m21.383s

    # enable daily alignment and update of hgwdev (TBD - Hiram)
    cd ~/kent/src/hg/makeDb/genbank
    git pull
    # add ochPri3 to: etc/align.dbs etc/hgwdev.dbs
    vi etc/align.dbs etc/hgwdev.dbs
    git commit -m "Added ochPri3 to daily hgwdev build refs #9347" etc/align.dbs etc/hgwdev.dbs
    git push
    make etc-update

############################################################################
# construct liftOver from ochPri2 to ochPri3 (DONE - 2014-04-18 - Hiram)
    # documentation for this step is in ochPri2 - remember to do this

###########################################################################
# construct downloads files (DONE - 2014-04-23 - Hiram)
#   before starting downloads, the joinerCheck should be clean
#   after ochPri3 is added to all.joiner:
    joinerCheck -keys -database=ochPri3 all.joiner

    cd /hive/data/genomes/ochPri3
    makeDownloads.pl -dbHost=hgwdev -workhorse=hgwdev ochPri3 \
	> downloads.log 2>&1
    # about 40 minutes
    # examine the README.txt files to verify the text

###########################################################################
# ready for first pushQ entry (DONE - 2013-06-12 - Hiram)
    mkdir /hive/data/genomes/ochPri3/pushQ
    cd /hive/data/genomes/ochPri3/pushQ
    makePushQSql.pl ochPri3 > ochPri3.sql 2> stderr.out
    # some errors are legitimate and OK:
     head stderr.out
# WARNING: hgwdev does not have /gbdb/ochPri3/wib/gc5Base.wib
# WARNING: hgwdev does not have /gbdb/ochPri3/wib/quality.wib
# WARNING: hgwdev does not have /gbdb/ochPri3/bbi/gc5BaseBw/gc5Base.bw
# WARNING: hgwdev does not have /gbdb/ochPri3/bbi/qualityBw/quality.bw
# WARNING: ochPri3 does not have seq
# WARNING: ochPri3 does not have extFile
# WARNING: ochPri3 does not have estOrientInfo

    scp -p ochPri3.sql qateam@hgwbeta:/tmp
    ssh qateam@hgwbeta './bin/x86_64/hgsql qapushq < /tmp/ochPri3.sql'


############################################################################
# lastz alignment with Human/hg19 (DONE - 2014-06-30 - Hiram)
    #	the original alignment
    cd /hive/data/genomes/hg19/bed/lastzOchPri3.2013-06-17
    cat fb.hg19.chainOchPri3Link.txt
    #	1004662072 bases of 2897316137 (34.676%) in intersection

    #	running the swap
    mkdir /hive/data/genomes/ochPri3/bed/blastz.hg19.swap
    cd /hive/data/genomes/ochPri3/bed/blastz.hg19.swap

    time nice -n +19 doBlastzChainNet.pl -verbose=2 \
	-swap /hive/data/genomes/hg19/bed/lastzOchPri3.2013-06-17/DEF \
	-syntenicNet -chainMinScore=3000 -chainLinearGap=medium \
	-workhorse=hgwdev -smallClusterHub=encodek -bigClusterHub=swarm \
	> swap.log 2>&1 &
    #  real    71m5.679s

    cat fb.ochPri3.chainHg19Link.txt
    #  969182988 bases of 1943987870 (49.855%) in intersection

############################################################################
# blat servers (TBD - 2014-06-10 - Hiram)
#	After getting a blat server assigned by the Blat Server Gods,
    ssh hgwdev

    hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("ochPri3", "blat4a", "17846", "1", "0"); \
	INSERT INTO blatServers (db, host, port, isTrans, canPcr) \
	VALUES ("ochPri3", "blat4a", "17847", "0", "1");' \
	    hgcentraltest
    #	test it with some sequence

#########################################################################
## Default position set at RHO protein found with blat
##    (DONE - 2013-06-10 - Hiram)
    ssh hgwdev
    hgsql -e 'update dbDb set defaultPos="JH980304:37472293-37479623"
	where name="ochPri3";' hgcentraltest

##############################################################################
# running cpgIsland business (DONE - 2013-06-20 - Hiram)
    mkdir /hive/data/genomes/ochPri3/bed/cpgIsland
    cd /hive/data/genomes/ochPri3/bed/cpgIsland
    ln -s ../../../danRer6/bed/cpgIsland/cpglh.exe .

    mkdir -p hardMaskedFa
    cut -f1 ../../chrom.sizes | while read C
do
    echo ${C}
    twoBitToFa ../../ochPri3.2bit:$C stdout \
	| maskOutFa stdin hard hardMaskedFa/${C}.fa
done

    ssh encodek
    cd /hive/data/genomes/ochPri3/bed/cpgIsland
    mkdir results
    cut -f1 ../../chrom.sizes > chr.list
    cat << '_EOF_' > template
#LOOP
./runOne $(root1) {check out exists results/$(root1).cpg}
#ENDLOOP
'_EOF_'
    # << happy emacs

    #	the faCount business is to make sure there is enough sequence to
    #	work with in the fasta.  cpglh.exe does not like files with too many
    #	N's - it gets stuck
    cat << '_EOF_' > runOne
#!/bin/csh -fe
set C = `faCount hardMaskedFa/$1.fa | egrep "^chr|^Zv" | awk '{print  $2 - $7 }'`
if ( $C > 200 ) then
    ./cpglh.exe hardMaskedFa/$1.fa > /scratch/tmp/$1.$$
    mv /scratch/tmp/$1.$$ $2
else
    touch $2
endif
'_EOF_'
    # << happy emacs
    chmod +x runOne

    gensub2 chr.list single template jobList
    para create jobList
    para try
    para check ... etc
    para time
# Completed: 6457 of 6457 jobs
# CPU time in finished jobs:        135s       2.25m     0.04h    0.00d  0.000 y
# IO & Wait Time:                 16267s     271.11m     4.52h    0.19d  0.001 y
# Average job time:                   3s       0.04m     0.00h    0.00d
# Longest finished job:              28s       0.47m     0.01h    0.00d
# Submission to last job:           871s      14.52m     0.24h    0.01d

    # Transform cpglh output to bed +
    catDir results | awk '{
$2 = $2 - 1;
width = $3 - $2;
printf("%s\t%d\t%s\t%s %s\t%s\t%s\t%0.0f\t%0.1f\t%s\t%s\n",
       $1, $2, $3, $5,$6, width,
       $6, width*$7*0.01, 100.0*2*$6/width, $7, $9);
}' > cpgIsland.bed

    # verify longest unique chrom name:
    cut -f1 cpgIsland.bed | sed -e 's/_random//' \
	| awk '{print length($0)}' | sort -rn | head -1
    #	18
    # update the length 14 in the template to be 16:
    sed -e "s/14/18/" $HOME/kent/src/hg/lib/cpgIslandExt.sql > cpgIslandExt.sql

    cd /hive/data/genomes/ochPri3/bed/cpgIsland
    hgLoadBed ochPri3 cpgIslandExt -tab -sqlTable=cpgIslandExt.sql cpgIsland.bed
# Loaded 15374 elements of size 10


    featureBits ochPri3 cpgIslandExt
    #	6520314 bases of 1701353770 (0.383%) in intersection

    # there should be no output from checkTableCoords:
    checkTableCoords -verboseBlocks -table=cpgIslandExt ochPri3

    #	cleanup
    rm -fr hardMaskedFa

############################################################################
# create ucscToINSDC name mapping (DONE - 2014-04-11 - Hiram)
    mkdir /hive/data/genomes/ochPri3/bed/ucscToINSDC
    cd /hive/data/genomes/ochPri3/bed/ucscToINSDC

    # this script has been maturing over time, it is close to complete.
    # to find a latest copy of it:
    # ls -ogrt /hive/data/genomes/*/bed/ucscToINSDC/translateNames.sh

    cp -p /hive/data/genomes/papAnu2/bed/ucscToINSDC/translateNames.sh .
    ./translateNames.sh
    # it says:
# need to find chrM accessions
    # so add this one:
    echo -e 'chrM\tNC_005358.1' >> ucscToINSDC.txt
    # needs to be sorted to work with join
    sort ucscToINSDC.txt > ucscToINSDC.tab

    awk '{printf "%s\t0\t%d\n", $1,$2}' ../../chrom.sizes | sort \
        > name.coordinate.tab

    join name.coordinate.tab ucscToINSDC.tab | tr '[ ]' '[\t]' > ucscToINSDC.bed

    cut -f1 ucscToINSDC.bed | awk '{print length($0)}' | sort -n | tail -1
# 12

    # use the 12 in this sed:
    sed -e "s/21/12/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \
        | hgLoadSqlTab ochPri3 ucscToINSDC stdin ucscToINSDC.bed
    checkTableCoords ochPri3 ucscToINSDC
    # should cover all bases
    featureBits -countGaps ochPri3 ucscToINSDC
    # 2229835716 bases of 2229835716 (100.000%) in intersection

##############################################################################
# setup search rule for assembly track (DONE - 2014-07-08 - Hiram)
   export maxLen=`hgsql -N -e 'select frag from gold;' ochPri3 | awk '{print length($0)}' | sort -run | head -1`

   echo $maxLen
   #  14

export C=1
while [ $C -le $maxLen ];
do
  echo -n " $C: "
  hgsql -N -e 'select frag from gold;' ochPri3 | sort -u \
    | awk '{ print substr($0,'$C',1) }' | sort -u | xargs echo | sed -e 's/ //g'
  C=`echo $C | awk '{print $1+1}'`
done

#   1: AN
#   2: CL
#   3: I_
#   4: 0T
#   5: 0
#   6: 15
#   7: 013
#   8: 0123456789
#   9: 0123456789
#  10: 0123456789
#  11: 0123456789
#  12: 0123456789
#  13: .
#  14: 1

searchTable gold
searchMethod prefix
searchType bed
shortCircuit 1
termRegex [AN][CL][I_][0T]0[15][0-9]+(\.1)*
query select chrom,chromStart,chromEnd,frag from %s where frag like '%s%%'
searchPriority 8

  # test pattern:
   hgsql -N -e 'select frag from gold;' ochPri3 | wc -l
   # 132962
   hgsql -N -e 'select frag from gold;' ochPri3 \
     | egrep -e '[AN][CL][I_][0T]0[15][0-9]+(\.1)*' | wc -l
   # 132962
   hgsql -N -e 'select frag from gold;' ochPri3 \
     | egrep -v -e '[AN][CL][I_][0T]0[15][0-9]+(\.1)*' | wc -l
   # 0

##############################################################################
##############################################################################
# TransMap V3 tracks. see makeDb/doc/transMapTracks.txt (2014-12-21 markd)
##############################################################################
