#!/bin/sh

# fetchChromSizes - script to grab chrom.sizes from UCSC via either of:
#	mysql, wget or ftp

usage() {
    printf "fetchChromSizes - script to grab chrom.sizes from UCSC via either of: mysql, wget or ftp\n\nusage: fetchChromSizes <db> > <db>.chrom.sizes
    used to fetch chrom.sizes information from UCSC for the given <db>
<db> - name of UCSC database, e.g.: hg38, hg18, mm9, etc ...

This script expects to find one of the following commands:
    wget, mysql, or ftp in order to fetch information from UCSC.
Route the output to the file <db>.chrom.sizes as indicated above.
This data is available at the URL:
  http://hgdownload.soe.ucsc.edu/goldenPath/<db>/bigZips/<db>.chrom.sizes

Example:   fetchChromSizes hg38 > hg38.chrom.sizes\n" 1>&2
    exit 255
}

DB=$1
export DB
if [ -z "${DB}" ]; then
    usage
fi

DONE=0
export DONE

url="http://hgdownload.soe.ucsc.edu/goldenPath/${DB}/bigZips/${DB}.chrom.sizes"
if [ "$DB" = "hg19" -o "$DB" = "hg38" ] ; then
    printf "INFO: Using latest patch release of genome for hg19/hg38\n" 1>&2
    url="http://hgdownload.soe.ucsc.edu/goldenPath/${DB}/bigZips/latest/${DB}.chrom.sizes"
fi

printf "INFO: trying CURL ${CURL} for database ${DB}\n" 1>&2
printf "url: %s\n" "${url}" 1>&2
tmpResult=${DB}.tmp.sizes$$
curl ${url} \
-s > ${tmpResult} 2> /dev/null
if [ $? -ne 0 -o ! -s "${tmpResult}" ]; then
    printf "WARNING: curl command failed\n" 1>&2
    rm -f "${tmpResult}"
else
    cat ${tmpResult}
    rm -f "${tmpResult}"
    DONE=1
fi

if [ "${DONE}" -eq 1 ]; then
    exit 0
fi

printf "INFO: trying WGET ${WGET} for database ${DB}\n" 1>&2
printf "url: %s\n" "${url}" 1>&2
tmpResult=${DB}.tmp.sizes$$
wget ${url} \
-O ${tmpResult} 2> /dev/null
if [ $? -ne 0 -o ! -s "${tmpResult}" ]; then
    printf "WARNING: wget command failed\n" 1>&2
    rm -f "${tmpResult}"
else
    cat ${tmpResult}
    rm -f "${tmpResult}"
    DONE=1
fi

if [ "${DONE}" -eq 1 ]; then
    exit 0
fi

printf "INFO: trying MySQL ${MYSQL} for database ${DB}\n" 1>&2
mysql -N --user=genome --host=genome-mysql.soe.ucsc.edu -A -e \
"SELECT chrom,size FROM chromInfo ORDER BY size DESC;" ${DB}
if [ $? -ne 0 ]; then
    printf "WARNING: mysql command failed\n" 1>&2
else
    DONE=1
fi

if [ "${DONE}" -eq 1 ]; then
    exit 0
fi

printf "INFO: trying FTP ${FTP} for database ${DB}\n" 1>&2
url="ftp://hgdownload.soe.ucsc.edu/goldenPath/${DB}/database/chromInfo.txt.gz"
printf "url: %s\n" "${url}" 1>&2
tmpResult="chromInfo.txt.gz"
tmpFtpRsp=fetchTemp.$$
printf "user anonymous fetchChromSizes@
cd goldenPath/${DB}/database
get chromInfo.txt.gz ${tmpResult}
bye\n" > ${tmpFtpRsp}
ftp -n -i hgdownload.soe.ucsc.edu < ${tmpFtpRsp} > /dev/null 2>&1
if [ $? -ne 0 -o ! -s "${tmpResult}" ]; then
    printf "ERROR: ftp command failed\n" 1>&2
    rm -f "${tmpFtpRsp}" "${tmpResult}"
else
    cat ${tmpResult} | zcat | cut -f1,2 | sort -k2nr
    rm -f "${tmpFtpRsp}" "${tmpResult}"
    DONE=1
fi

if [ "${DONE}" -eq 1 ]; then
    exit 0
fi

if [ "${DONE}" -eq 0 ]; then
    printf "ERROR: sorry, attempt to fetch chrom.sizes has failed ?\n" 1>&2
    exit 255
fi