#!/usr/bin/env python3

import logging, sys, optparse, os, json, subprocess, shutil, string, glob, tempfile, re
import shlex, urllib, urllib.parse, time, tarfile, hashlib, gzip, csv, io

from pathlib import Path
from collections import defaultdict, OrderedDict
from os.path import join, basename, dirname, isfile, relpath, abspath, splitext, isdir, normpath
from urllib.parse import unquote
import concurrent.futures

requestsLoaded = True
try:
    import requests # if this fails, install the library with 'python -m pip install requests --user'
except ImportError:
    # most code now can work around the absence of the requests library
    requestsLoaded = False

#import pyyaml   # not loaded here, so it's not a hard requirement, is lazy loaded in parseMetaYaml()

# ==== functions =====

# debugging: when activated with -d, output more information and do not remove any temp files
debugMode = False
# full extra verbose output of all HTTP requests sent, there is no command line option for this
doVerbose = False

# allowed file types by hubtools up
# mirrors extensionMap in hg/js/hgMyData.js
# hub.txt comes before text so hub.txt and *.hub.txt don't fall through to text
fileTypeExtensions = {
    "hub.txt": [ "hub.txt" ],
    "bigBed": [ ".bb", ".bigbed" ],
    "bam": [ ".bam" ],
    "vcf": [ ".vcf" ],
    "vcfTabix": [ ".vcf.gz", "vcf.bgz" ],
    "bigWig": [ ".bw", ".bigwig" ],
    "hic": [ ".hic" ],
    "cram": [ ".cram" ],
    "bigBarChart": [ ".bigbarchart" ],
    "bigGenePred": [ ".bgp", ".biggenepred" ],
    "bigMaf": [ ".bigmaf" ],
    "bigInteract": [ ".biginteract" ],
    "bigPsl": [ ".bigpsl" ],
    "bigChain": [ ".bigchain" ],
    "bamIndex": [ ".bam.bai", ".bai" ],
    "tabixIndex": [ ".vcf.gz.tbi", "vcf.bgz.tbi" ],
    "2bit": [ ".2bit" ],
    "text": [ ".txt", ".text" ],
}

# JS regex for a single hub-name segment, used to validate the <hubName> CLI arg
# matches parentDirSegmentRegex in hg/js/hgMyData.js
hubNameSegmentRegex = re.compile(r"^[0-9a-zA-Z._]+$")

asHead = """table bed
"Browser extensible data (<=12 fields) "
    (
"""

asLines = """
    string chrom;      "Chromosome (or contig, scaffold, etc.)"
    uint   chromStart; "Start position in chromosome"
    uint   chromEnd;   "End position in chromosome"
    string name;       "Name of item"
    uint   score;      "Score from 0-1000"
    char[1] strand;    "+ or -"
    uint thickStart;   "Start of where display should be thick (start codon)"
    uint thickEnd;     "End of where display should be thick (stop codon)"
    uint reserved;     "Used as itemRgb as of 2004-11-22"
    int blockCount;    "Number of blocks"
    int[blockCount] blockSizes; "Comma separated list of block sizes"
    int[blockCount] chromStarts; "Start positions relative to chromStart"
""".split("\n")

def parseArgs():
    " setup logging, parse command line arguments and options. -h shows auto-generated help page "
    parser = optparse.OptionParser("""usage: %prog [options] <cmd> - create and edit UCSC track hubs

    hubtools make <assemblyCode e.g. hg38>: create a track hub for all bigBed/bigWig files under a directory.
    Creates single-file hub.txt, and tries to guess reasonable settings from the file names:
    - bigBed/bigWig files in the current directory will be top level tracks
    - big* files in subdirectories become composites
    - for every filename, the part before the first dot becomes the track base name
    - if a directory has more than 80% of track base names with both a bigBed
      and bigWig file, views are activated for this composite
    - track attributes can be changed using tracks.tsv or json/ra/yaml files, in
      each top or subdirectory. Both subdir and top directory are searched, subdir
      values override any values specified in a parent directory.
    - tracks.tsv (or tracks.json/ra/yaml) must have a first column named 'track'. For json and yaml,
      the key is the track name. ".hub" is a special key to provide email/shortLabel of the hub.
    - the track name in this column can be either the full one <parent>__fileBasename__type or just
      the fileBasename, this makes it easier. In practice, filenames are unique enough and don't
      usually clash between subdirs or filetype, in practice.
    - the track order is the one on disk, so relatively random. To order tracks, create a hub,
      use the "tab" command below, reorder tracks in tsv file, then re-run the "make" command.

    hubtools up <hubName>: upload files to hubSpace
    - needs ~/.hubtools.conf with a line apiKey="xxxx". Create a key by going to
      My Data > Track Hubs > Track Development on https://genome.ucsc.edu
    - uploads all files from the -i directory or the current dir if not specified.

    hubtools jbrowse <url> <db> : convert Jbrowse trackList.json files to hub.txt.
    - <url> is the URL to the Jbrowse2 installation, e.g. http://furlonglab.embl.de/FurlongBrowser/
    - <db> is assembly identifier

    hubtools tab <fname>: convert a hub.txt or trackDb.txt to tab-sep format, easier to bulk-edit with sed/cut/etc.
       The resulting file if named tracks.tsv can be used as inputs for future runs.
    - <fname> is the input filename. "hub" and "genome" stanzas are skipped.
    - output goes to stdout

    hubtools conv -i myTsvDir
    - convert .tsv files in inDir to .bigBed files in current directory

    hubtools ct <urlOrFile> [-o outDir]
    - Create a hub in current dir or outDir from any URL with a hgsid, or a
      session URL, or a local xxx.tar.gz track backup directory (See: My Data >
      My Session) Processes all genomes with custom tracks. Downloads
      bigDataUrl files into outDir.

    Examples:
    hubtools conv -i myTsvs/ # convert tsv to bigBeds in current dir
    hubtools make hg38 # make a hub in current dir
    hubtools jbrowse http://furlonglab.embl.de/FurlongBrowser/ dm3
    hubtools tab hub.txt > tracks.tsv  # convert a hub to a .tsv file

    hubtools ct SC_20230723_backup.tar.gz -o hub # convert a backup archive to a track hub
    hubtools ct 'https://genome-euro.ucsc.edu/cgi-bin/hgTracks?db=hg38&hgsid=345826202_8d3Mpumjaw9IXYI2RDaSt5xMoUhH'
    # convert all custom tracks to a track hub

    For the "hubtools make" and "ct" steps, you can specify additional options for your tracks via tracks.json/.yaml or tracks.tsv:

    tracks.json can look like this, can have more keys, one per track, or the special key ".hub":
    {
        ".hub" :      { "hub": "mouse_motor_atac", "shortLabel":"scATAC-seq Developing Cranial Motor Neurons" },
        "myTrack" :  { "shortLabel" : "My nice track" }
    }
    tracks.tsv should look like this, other columns can be added:

        #track<tab>shortLabel
        myTrack<tab>My nice track

    For a list of all possible fields/columns see https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html:
    """)


    parser.add_option("-i", "--inDir", dest="inDir", action="store", help="Input directory where files are stored. Default is current directory")
    parser.add_option("-o", "--outDir", dest="outDir", action="store", help="Input directory where hub.txt file is created. Default is same as input directory.")

    #parser.add_option("", "--igv", dest="igv", action="store", help="import an igv.js trackList.json file hierarchy")
    parser.add_option("-d", "--debug", dest="debug", action="store_true", help="show debug messages, stacktrace on abort and do not delete temp files")
    #parser.add_option("-u", "--upload", dest="upload", action="store_true", help="upload all files from outDir to hubSpace")
    parser.add_option("", "--download", dest="doDownload", action="store_true", help="Download all bigDataUrl files to outDir")
    (options, args) = parser.parse_args()

    if len(args)==0:
        parser.print_help()
        exit(1)

    global debugMode
    if options.debug:
        debugMode = True
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)
    return args, options

def errAbort(msg):
    " print and abort) "
    logging.error(msg)
    if debugMode:
        raise Exception(msg)
    else:
        sys.exit(1)

def makedirs(d):
    if not isdir(d):
        os.makedirs(d)

def parseConf(fname):
    " parse a hg.conf style file, return as dict key -> value (all strings) "
    logging.debug("Parsing "+fname)
    conf = {}
    for line in open(fname):
        line = line.strip()
        if line.startswith("#"):
            continue
        elif line.startswith("include "):
            inclFname = line.split()[1]
            absFname = normpath(join(dirname(fname), inclFname))
            if os.path.isfile(absFname):
                inclDict = parseConf(absFname)
                conf.update(inclDict)
        elif "=" in line: # string search for "="
            key, value = line.split("=",1)
            conf[key] = value
    return conf

# cache of hg.conf contents
hgConf = None

def parseHgConf():
    """ return hg.conf as dict key:value. """
    global hgConf
    if hgConf is not None:
        return hgConf

    hgConf = dict() # python dict = hash table

    fname = os.path.expanduser("~/.hubtools.conf")
    if isfile(fname):
        hgConf = parseConf(fname)
    else:
        fname = os.path.expanduser("~/.hg.conf")
        if isfile(fname):
            hgConf = parseConf(fname)

def cfgOption(name, default=None):
    " return hg.conf option or default "
    global hgConf

    if not hgConf:
        parseHgConf()

    return hgConf.get(name, default)

def parseMetaRa(fname):
    """parse tracks.ra or tracks.txt and return as a dict of trackName -> dict of key->val """
    logging.debug("Reading %s as .ra" % fname)
    trackName = None
    stanzaData = {}
    ret = {}
    for line in open(fname):
        line = line.strip()
        if line.startswith("#"):
            continue
        if line=="":
            if len(stanzaData)==0:
                # double newline
                continue
            if trackName is None:
                errAbort("File %s has a stanza without a track name" % fname)
            if trackName in ret:
                errAbort("File %s contains two stanzas with the same track name '%s' " % trackName)

            ret[trackName] = stanzaData
            stanzaData = {}
            trackName = None
            continue

        key, val = line.split(" ", maxsplit=1)
        if key == "track":
            trackName = val
            continue
        else:
            stanzaData[key] = val

    if len(stanzaData)!=0:
        ret[trackName] = stanzaData
    logging.debug("Got %s from .ra" % str(ret))
    return ret

def parseMetaTsv(fname):
    " parse a tracks.tsv file and return as a dict of trackName -> dict of key->val "
    headers = None
    meta = {}
    logging.debug("Parsing track meta data from %s in tsv format" % fname)
    for line in open(fname):
        row = line.rstrip("\r\n").split("\t")
        if headers is None:
            assert(line.startswith("track\t") or line.startswith("#track"))
            row[0] = row[0].lstrip("#")
            headers = row
            continue

        assert(len(row)==len(headers))
        key = row[0]

        rowDict = {}
        for header, val in zip(headers[1:], row[1:]):
            rowDict[header] = val
        #row = {k:v for k,v in zip(headers, fs)}
        meta[key] = rowDict
    return meta

def parseMetaJson(fname):
    " parse a json file and merge it into meta and return "
    logging.debug("Reading %s as json" % fname)
    newMeta = json.load(open(fname))
    return newMeta

def parseMetaYaml(fname):
    " parse yaml file "
    import yaml # if this doesn't work, run 'pip install pyyaml'
    with open(fname) as stream:
        try:
            return yaml.safe_load(stream)
        except yaml.YAMLError as exc:
            logging.error(exc)

def parseMeta(inDirs):
    """ parse a tab-sep file with headers and return an ordered dict firstField -> dictionary.
    Takes a list of input directories and the last directory overrides values in parent directories.
    """
    meta = OrderedDict()
    for inDir in inDirs:
        fname = join(inDir, "tracks.tsv")
        if isfile(fname):
            tsvMeta = parseMetaTsv(fname)
            meta = allMetaOverride(meta, tsvMeta)

        fname = join(inDir, "tracks.json")
        if isfile(fname):
            jsonMeta = parseMetaJson(fname)
            meta = allMetaOverride(meta, jsonMeta)

        fname = join(inDir, "tracks.ra")
        if isfile(fname):
            raMeta = parseMetaRa(fname)
            meta = allMetaOverride(meta, raMeta)

        fname = join(inDir, "tracks.yaml")
        if isfile(fname):
            yamlMeta = parseMetaYaml(fname)
            meta = allMetaOverride(meta, yamlMeta)

        logging.debug("Got meta from %s: %s" % (inDir, str(meta)))
    return meta

def writeHubGenome(ofh, db, inMeta):
    " create a hub.txt and genomes.txt file, hub.txt is just a template "
    meta = inMeta.get(".hub", {})
    ofh.write("hub autoHub\n")
    ofh.write("shortLabel %s\n" % meta.get("shortLabel", "Auto-generated hub"))
    ofh.write("longLabel %s\n" % meta.get("longLabel", "Auto-generated hub"))
    #ofh.write("genomesFile genomes.txt\n")
    if "descriptionUrl" in meta:
        ofh.write("descriptionUrl %s\n" % meta["descriptionUrl"])
    ofh.write("email %s\n" % meta.get("email", "yourEmail@example.com"))
    ofh.write("useOneFile on\n\n")

    ofh.write("genome %s\n\n" % db)
    return ofh

def readSubdirs(inDir, subDirs):
    " given a list of dirs, find those that are composite dirs (not supporting supertracks for now) "
    compDicts, superDicts = {}, {}
    for subDir in subDirs:
        subPath = join(inDir, subDir)
        subSubDirs, subDict = readFnames(subPath)
        if len(subDict)==0:
            # no files in this dir
            continue
        if len(subSubDirs)==0:
            compDicts[subDir] = subDict
        #else:
            #superDicts[subDir] = subDict

    return compDicts, superDicts

def reorderDirs(compDirs, meta):
    " order the directories in compDirs in the order that they appear in the meta data -> will mean that composites have the right order "
    if len(meta)==0:
        logging.debug("Not reordering these subdirectories: %s" % compDirs.keys())
        return compDirs

    # first use the names in the meta data, and put in the right order
    newCompDirs = OrderedDict()
    for dirName in meta.keys():
        if dirName in compDirs:
            newCompDirs[dirName] = compDirs[dirName]

    # then add everything else at the end
    for dirName in compDirs:
        if dirName not in newCompDirs:
            newCompDirs[dirName] = compDirs[dirName]

    logging.debug("Reordered input directories based on meta data. New order is: %s" % newCompDirs.keys())
    return newCompDirs


def readDirs(inDir, meta):
    " recurse down into directories and return containerType -> parentName -> fileBase -> type -> list of absPath "
    ret = {}
    subDirs, topFiles = readFnames(inDir)
    ret["top"] = { None : topFiles } # top-level track files have None as the parent

    compDirs, superDirs = readSubdirs(inDir, subDirs)
    compDirs = reorderDirs(compDirs, meta)
    # superDirs not used yet, no time

    ret["comps"] = compDirs
    ret["supers"] = superDirs

    return ret

def readFnames(inDir):
    " return dict with basename -> fileType -> filePath "
    fnameDict = defaultdict(dict)
    #tdbDir = abspath(dirname(trackDbPath))
    subDirs = []
    for fname in os.listdir(inDir):
        filePath = join(inDir, fname)
        if isdir(filePath):
            subDirs.append(fname)
            continue
        baseName, ext = splitext(basename(fname))
        ext = ext.lower()

        # actually, use the part before the first dot, not the one before the extension, as the track name
        # this means that a.scaled.bigBed and a.raw.bigWig get paired correctly
        fileBase = basename(fname).split(".")[0]

        if ext==".bw" or ext==".bigwig":
            fileType = "bigWig"
        elif ext==".bb" or ext==".bigbed":
            fileType = "bigBed"
        else:
            logging.debug("file %s is not bigBed nor bigWig, skipping" % fname)
            continue

        absFname = abspath(filePath)
        #relFname = relFname(absFname, tdbDir)
        fnameDict[fileBase].setdefault(fileType, [])
        #fnameDict[baseName][fileType].setdefault([])
        fnameDict[fileBase][fileType].append(absFname)
    return subDirs, fnameDict

def mostFilesArePaired(fnameDict):
    " check if 80% of the tracks have a pair bigBed+bigWig"
    pairCount = 0
    for baseName, typeDict in fnameDict.items():
        if "bigBed" in typeDict and "bigWig" in typeDict:
            pairCount += 1

    pairShare = pairCount / len(fnameDict)
    return ( pairShare > 0.8 )

def writeLn(ofh, spaceCount, line):
    "write line to ofh, with spaceCount before it "
    ofh.write("".join([" "]*spaceCount))
    ofh.write(line)
    ofh.write("\n")

def writeStanza(ofh, indent, tdb):
    " write a stanza given a tdb key-val dict "
    track = tdb["track"]

    shortLabel = tdb.get("shortLabel", track.replace("_", " "))
    visibility = tdb.get("visibility", "pack")
    longLabel  = tdb.get("longLabel", shortLabel)

    if not "type" in tdb:
        errAbort("Track info has no type attribute: %s" % tdb)
    trackType = tdb["type"]

    writeLn(ofh, indent, "track %s" % track)
    writeLn(ofh, indent, "shortLabel %s" % shortLabel)
    if longLabel:
        writeLn(ofh, indent, "longLabel %s" % longLabel)
    if "parent" in tdb:
        writeLn(ofh, indent, "parent %s" % tdb["parent"])
    writeLn(ofh, indent, "type %s" % trackType)
    writeLn(ofh, indent, "visibility %s" % visibility)
    if "bigDataUrl" in tdb:
        writeLn(ofh, indent, "bigDataUrl %s" % tdb["bigDataUrl"])

    for key, val in tdb.items():
        if key in ["track", "shortLabel", "longLabel", "type", "bigDataUrl", "visibility", "parent"]:
            continue
        writeLn(ofh, indent, "%s %s" % (key, val))

    ofh.write("\n")

def metaOverride(tdb, meta):
    " override track info for one single track, from meta into tdb "
    trackName = tdb["track"]

    if trackName not in meta and "__" in trackName:
        logging.debug("Using only basename of track %s" % trackName)
        trackName = trackName.split("__")[1]

    if trackName not in meta:
        logging.debug("No meta info for track %s" % tdb["track"])
        return

    trackMeta = meta[trackName]

    for key, val in trackMeta.items():
        if val!="":
            tdb[key] = trackMeta[key]

def allMetaOverride(tdb, meta):
    " override track info for all tracks, from meta into tdb "
    if meta is None:
        return tdb

    for trackName in meta:
        trackMeta = meta[trackName]
        if trackName not in tdb:
            tdb[trackName] = {}

        trackTdb = tdb[trackName]

        for key, val in trackMeta.items():
            trackTdb[key] = val

    return tdb

def reorderTracks(fileDict, meta):
    " given an unsorted dictionary of files and ordered metadata, try to sort the files according to the metadata"
    if len(meta)==0:
        return fileDict # no meta data -> no ordering necessary

    trackOrder = []
    # meta is an OrderedDict, so the keys are also ordered
    for trackName in meta.keys():
        if "__" in trackName: # in composite mode, the tracknames contain the parent and the track type
            trackName = trackName.split("__")[1]
        trackOrder.append( trackName )

    trackOrder = list(meta.keys())

    newFiles = OrderedDict()
    doneTracks = set()
    # first add the tracks in the order of the meta data
    for trackBase in trackOrder:
        # the tsv file can have the track names either as basenames or as full tracknames
        if trackBase not in fileDict and "__" in trackBase:
            trackBase = trackBase.split("__")[1]

        if trackBase in fileDict:
            newFiles[trackBase] = fileDict[trackBase]
            doneTracks.add(trackBase)

    logging.debug("Track ordering from meta data used: %s" % newFiles.keys())

    # then add all other tracks
    for trackBase, fileData in fileDict.items():
        if trackBase not in doneTracks:
            newFiles[trackBase] = fileDict[trackBase]
            logging.debug("Not specified in meta, so adding at the end: %s" % trackBase)

    logging.debug("Final track order is: %s" % newFiles.keys())

    assert(len(newFiles)==len(fileDict))
    return newFiles

def makeTrackDbEntries(inDir, dirDict, dirType, tdbDir, ofh):
    " given a dict with basename -> type -> filenames, write track entries to ofh "
    # this code is getting increasingly complex because it supports composites/views and pairing of bigBed/bigWig files
    # either this needs better comments or maybe a separate code path for this rare use case
    global compCount

    fnameDict = dirDict[dirType]

    for parentName, typeDict in fnameDict.items():
        if parentName is None: # top level tracks: use top tracks.tsv
            subDirs = [inDir]
        else: # container tracks -> use tracks.tsv in the parent and subdirectory
            subDirs = [inDir, join(inDir, parentName)]

        parentMeta = parseMeta(subDirs)

        indent = 0
        parentHasViews = False

        groupMeta = {}

        if dirType=="comps":
            tdb = {
                    "track" : parentName,
                    "shortLabel": parentName,
                    "visibility" : "dense",
                    "compositeTrack" : "on",
                    "autoScale" : "group",
                    "type" : "bed 4"
                  }
            metaOverride(tdb, parentMeta)
            groupMeta = parentMeta

            parentHasViews = mostFilesArePaired(typeDict)

            if parentHasViews:
                tdb["subGroup1"] = "view Views PK=Peaks SIG=Signals"
                logging.info("Container track %s has >80%% of paired files, activating views" % parentName)

            writeStanza(ofh, indent, tdb)
            indent = 4

            if parentHasViews:
                # we have composites with paired files? -> write the track stanzas for the two views
                groupMeta = parseMeta(subDirs)
                tdbViewPeaks = {
                        "track" : parentName+"ViewPeaks",
                        "shortLabel" : parentName+" Peaks",
                        "parent" : parentName,
                        "view" : "PK",
                        "visibility" : "dense",
                        "type" : "bigBed",
                        "scoreFilter" : "off",
                        "viewUi" : "on"
                        }
                metaOverride(tdbViewPeaks, parentMeta)
                writeStanza(ofh, indent, tdbViewPeaks)

                tdbViewSig = {
                        "track" : parentName+"ViewSignal",
                        "shortLabel" : parentName+" Signal",
                        "parent" : parentName,
                        "view" : "SIG",
                        "visibility" : "dense",
                        "type" : "bigWig",
                        "viewUi" : "on"
                        }
                metaOverride(tdbViewSig, parentMeta)
                writeStanza(ofh, indent, tdbViewSig)
        else:
            # no composites
            groupMeta = parseMeta(subDirs)

        typeDict = reorderTracks(typeDict, groupMeta)

        for trackBase, typeFnames in typeDict.items():
            for fileType, absFnames in typeFnames.items():
                assert(len(absFnames)==1) # for now, not sure what to do when we get multiple basenames of the same file type
                absFname = absFnames[0]
                fileBase = basename(absFname)
                relFname = relpath(absFname, tdbDir)

                labelSuff = ""
                if parentHasViews:
                    if fileType=="bigWig":
                        labelSuff = " Signal"
                    elif fileType=="bigBed":
                        labelSuff = " Peaks"
                    else:
                        assert(False) # views and non-bigWig/Bed are not supported yet?

                if parentName is not None:
                    parentPrefix = parentName+"__"
                else:
                    parentPrefix = ""

                trackName = parentPrefix+trackBase+"__"+fileType
                tdb = {
                        "track"      :  trackName,
                        "shortLabel" :  trackBase+labelSuff,
                        "longLabel"  :  trackBase+labelSuff,
                        "visibility" :  "dense",
                        "type"       :  fileType,
                        "bigDataUrl" :  relFname,
                      }

                if parentName:
                    tdb["parent"] = parentName

                if parentHasViews:
                    onOff = "on"
                    if trackName in groupMeta and "visibility" in groupMeta[trackName]:
                        vis = groupMeta[trackName]["visibility"]
                        if vis=="hide":
                            onOff = "off"
                        del tdb["visibility"]

                    if fileType=="bigBed":
                        tdb["parent"] = parentName+"ViewPeaks"+" "+onOff
                        tdb["subGroups"] = "view=PK"
                    else:
                        tdb["parent"] = parentName+"ViewSignal"+" "+onOff
                        tdb["subGroups"] = "view=SIG"

                metaOverride(tdb, groupMeta)

                if trackName in groupMeta and "visibility" in groupMeta[trackName]:
                    del tdb["visibility"]

                writeStanza(ofh, indent, tdb)

def httpReqOld(url, asBytes=False, asJson=False, params=None, method="get", max_redirects=10):
    " get data with HTTP and return it. Does not use the requests library. "
    # Parse URL
    import http.client
    import urllib.parse
    parsed_url = urllib.parse.urlparse(url)
    connection = http.client.HTTPSConnection(parsed_url.netloc) if parsed_url.scheme == "https" else http.client.HTTPConnection(parsed_url.netloc)

    if params is not None and len(params)!=0:
        query_string = urllib.parse.urlencode(params or {})
        path = parsed_url.path or "/"
        if method.lower() == "get" and query_string:
            path += "?" + query_string
    else:
        path = parsed_url.path or "/"
    
    # Set headers and body
    headers = {}
    body = None
    if method.lower() == "post":
        headers["Content-Type"] = "application/x-www-form-urlencoded"
        body = query_string

    logging.debug(f"HTTP {method.upper()} {url}")

    try:
        # Send the request
        connection.request(method.upper(), path, body=body, headers=headers)
        response = connection.getresponse()

        # Handle redirects
        if 300 <= response.status < 400 and response.getheader("Location"):
            if max_redirects <= 0:
                raise RuntimeError("Too many redirects")
            
            redirect_url = urllib.parse.urljoin(url, response.getheader("Location"))
            logging.debug(f"Redirecting to {redirect_url}, with parameters {params}")
            return httpReqOld(redirect_url, asBytes, asJson, params, method, max_redirects - 1)

        # Process the response
        content = response.read()
        if asBytes:
            return content
        elif asJson:
            return json.loads(content.decode("utf-8"))
        else:
            return content.decode("utf-8")
    except Exception as e:
        logging.error(f"Error fetching the URL: {e}")
        raise RuntimeError(f"Error fetching the URL: {e}")
    finally:
        connection.close()

def httpReq(url, asBytes=False, asJson=False, params=None):
    " get URL with http get request, return content "
    if not requestsLoaded:
        return httpReqOld(url, asBytes, asJson, params)

    if doVerbose:
        import http.client
        http.client.HTTPConnection.debuglevel=1
        logging.basicConfig()
        logging.getLogger().setLevel(logging.DEBUG)
        req_log = logging.getLogger('requests.packages.urllib3')
        req_log.setLevel(logging.DEBUG)
        req_log.propagate = True

    logging.debug("HTTP GET %s" % url)
    try:
        resp = requests.get(url, params=params, allow_redirects=True)
        resp.raise_for_status()
    except requests.RequestException as e:
        errAbort(f"Error fetching the URL: {e}")

    if asBytes:
        logging.debug("HTTP GET with raw bytes")
        return resp.content
    elif asJson:
        logging.debug("HTTP GET with JSON")
        return resp.json()
    else:
        logging.debug("HTTP GET with text")
        return resp.text

def importJbrowse(baseUrl, db, outDir):
    " import an IGV trackList.json hierarchy "
    outFn = join(outDir, "hub.txt")
    ofh = open(outFn, "w")
    writeHubGenome(ofh, db, {})

    trackListUrl = baseUrl+"/data/trackList.json"
    logging.info("Loading %s" % trackListUrl)
    trackList = httpReq(trackListUrl, asJson=True)
    tdbs = []
    for tl in trackList["tracks"]:
        if "type" in tl and tl["type"]=="SequenceTrack":
            logging.info("Genome is: "+tl["label"])
            continue
        if tl["storeClass"]=="JBrowse/Store/SeqFeature/NCList":
            logging.info("NCList found: ",tl)
            continue
        tdb = {}
        tdb["track"] = tl["label"]
        tdb["shortLabel"] = tl["key"]
        if "data_file" in tl:
            tdb["bigDataUrl"] = baseUrl+"/data/"+tl["data_file"]
        else:
            tdb["bigDataUrl"] = baseUrl+"/data/"+tl["urlTemplate"]
        if tl["storeClass"] == "JBrowse/Store/SeqFeature/BigWig":
            tdb["type"] = "bigWig"

        dispMode = tl.get("display_mode")
        if dispMode:
            if dispMode=="normal":
                tdb["visibility"] = "pack"
            elif dispMode=="compact":
                tdb["visibility"] = "dense"
            else:
                tdb["visibility"] = "pack"
        else:
            tdb["visibility"] = "pack"
    
        writeStanza(ofh, 0, tdb)

def installModule(package):
    " install a package "
    logging.info("Could not find Python module '%s', trying to install with pip" % package)
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

def cacheLoad(fname):
    " load file cache from json file, keyed as { hubName: { relPath: {mtime, size} } } "
    if not isfile(fname):
        logging.debug("No upload cache present")
        return {}

    logging.debug("Loading "+fname)
    with open(fname) as fh:
        data = json.load(fh)
    # Old cache shape was { localPath: {mtime, size} }; detect and discard.
    # Cache is purely local state, so no migration code.
    for v in data.values():
        if not isinstance(v, dict) or "mtime" in v:
            logging.info("upload cache %s is in the old flat shape, discarding" % fname)
            return {}
        break
    return data

def cacheWrite(uploadCache, fname):
    logging.debug("Writing "+fname)
    with open(fname, "w") as fh:
        json.dump(uploadCache, fh, indent=4)

def validateHubName(hubName):
    " errAbort if hubName isn't a single segment matching the JS parentDirSegmentRegex "
    # Trailing dots (e.g. "hub.") are allowed for parity with the JS regex.
    if not hubName:
        errAbort("hub name is empty")
    if "/" in hubName or hubName in (".", "..") or hubName.startswith("."):
        errAbort("hub name '%s' must be a single path segment, no '/' and no leading '.'" % hubName)
    if not hubNameSegmentRegex.match(hubName):
        errAbort("hub name '%s' has invalid characters; allowed: letters, digits, '.', '_'" % hubName)

def getFileType(fbase):
    " return the file type defined in the hubspace system, given a base file name "
    # hub.txt and <hubname>.hub.txt are both fileType=hub.txt; the server uses
    # the literal filename (not fileType) to tell them apart
    if fbase == "hub.txt" or fbase.endswith(".hub.txt"):
        logging.debug("file type for %s is hub.txt" % fbase)
        return "hub.txt"

    ret = "NA"
    for fileType, fileExts in fileTypeExtensions.items():
        if fileType == "hub.txt":
            continue
        for fileExt in fileExts:
            if fbase.endswith(fileExt):
                ret = fileType
                break
        if ret!="NA":
            break

    logging.debug("file type for %s is %s" % (fbase, ret))
    return ret

def uploadFiles(tdbDir, hubName):
    "upload files to hubspace. Server name and token can come from ~/.hubtools.conf "
    validateHubName(hubName)

    try:
        from tusclient import client
    except ModuleNotFoundError:
        installModule("tuspy")
        from tusclient import client

    serverUrl = cfgOption("tusUrl", "https://hubspace.gi.ucsc.edu/files")

    cookies = {}
    cookieNameUser = cfgOption("wiki.userNameCookie", "wikidb_mw1_UserName")
    cookieNameId = cfgOption("wiki.loggedInCookie", "wikidb_mw1_UserID")

    apiKey = cfgOption("apiKey")
    if apiKey is None:
        errAbort("To upload files, the file ~/.hubtools.conf must contain a line like apiKey='xxx').\n"
                "Go to https://genome.ucsc.edu/cgi-bin/hgHubConnect#dev to create a new  apiKey. Then run \n"
                "   echo 'apiKey=\"xxxx\"' >> ~/.hubtools.conf \n"
                "and run the 'hubtools up' command again.")

    logging.info(f"TUS server URL: {serverUrl}")
    my_client = client.TusClient(serverUrl)

    cacheFname = join(tdbDir, ".hubtools.files.json")
    uploadCache = cacheLoad(cacheFname)
    hubCache = uploadCache.setdefault(hubName, {})

    logging.debug("trackDb directory is %s" % tdbDir)
    for rootDir, dirs, files in os.walk(tdbDir):
        # don't descend into dot-dirs (.git, .cache, ...); their contents
        # would otherwise upload with a parentDir segment the server rejects
        dirs[:] = [d for d in dirs if not d.startswith(".")]
        for fbase in files:
            if fbase.startswith("."):
                continue
            localPath = normpath(join(rootDir, fbase))
            logging.debug("localPath: %s" % localPath)
            localMtime = os.stat(localPath).st_mtime

            fileAbsPath = abspath(localPath)
            # POSIX-style relative path inside the hub, with hubName as the root
            remoteRelPath = relpath(fileAbsPath, tdbDir).replace(os.sep, "/")
            subDir = dirname(remoteRelPath)
            parentDir = hubName + "/" + subDir if subDir else hubName

            # skip files that have not changed their mtime since last upload to this hub
            if remoteRelPath in hubCache:
                cacheMtime = hubCache[remoteRelPath]["mtime"]
                if localMtime == cacheMtime:
                    logging.info("%s: file mtime unchanged, not uploading again" % localPath)
                    continue
                else:
                    logging.debug("file %s: mtime is %f, cache mtime is %f, need to re-upload" %
                            (localPath, localMtime, cacheMtime))
            else:
                logging.debug("file %s not in upload cache for hub %s" % (localPath, hubName))

            fileType = getFileType(fbase)

            meta = {
                    "apiKey" : apiKey,
                    "parentDir" : parentDir,
                    "genome" : "",
                    "fileName" : fbase,
                    "hubtools" : "true",
                    "fileType": fileType,
                    "lastModified" : str(int(localMtime)*1000),
            }

            logging.info(f"Uploading {localPath}, meta {meta}")
            uploader = my_client.uploader(localPath, metadata=meta)
            uploader.upload()

            # record this file as uploaded and persist the cache after each
            # upload so an interrupted run doesn't re-upload finished files
            hubCache[remoteRelPath] = {
                    "mtime": os.stat(localPath).st_mtime,
                    "size": os.stat(localPath).st_size,
            }
            cacheWrite(uploadCache, cacheFname)

def iterRaStanzas(fname):
    " parse an ra-style (trackDb) file and yield dictionaries "
    data = dict()
    logging.debug("Parsing %s in trackDb format" % fname)
    with open(fname, "rt") as ifh:
        for l in ifh:
            l = l.lstrip(" ").rstrip("\r\n")
            if len(l)==0:
                yield data
                data = dict()
            else:
                if " " not in l:
                    continue
                key, val = l.split(" ", maxsplit=1)
                data[key] = val

    if len(data)!=0:
        yield data

def raToTab(fname):
    " convert .ra file to .tsv "
    stanzas = []
    allFields = set()
    for stanza in iterRaStanzas(fname):
        if "hub" in stanza or "genome" in stanza:
            continue
        allFields.update(stanza.keys())
        stanzas.append(stanza)

    if "track" in allFields:
        allFields.remove("track")
    if "shortLabel" in allFields:
        allFields.remove("shortLabel")

    hasLongLabel = False
    if "longLabel" in allFields:
        allFields.remove("longLabel")
        hasLongLabel = True

    hasType = False
    if "type" in allFields:
        allFields.remove("type")
        hasType = True

    appendFields = []
    if "bigDataUrl" in allFields:
        allFields.remove("bigDataUrl")
        appendFields.append("bigDataUrl")

    sortedFields = sorted(list(allFields))
    # make sure that track shortLabel and longLabel come first and always there, handy for manual edits
    if hasType:
        sortedFields.insert(0, "type")
    if hasLongLabel:
        sortedFields.insert(0, "longLabel")
    sortedFields.insert(0, "shortLabel")
    sortedFields.insert(0, "track")

    # make sure some fields are always at the end
    for af in appendFields:
        sortedFields.append(af)

    ofh = sys.stdout
    ofh.write("#")
    ofh.write("\t".join(sortedFields))
    ofh.write("\n")

    for s in stanzas:
        row = []
        for fieldName in sortedFields:
            row.append(s.get(fieldName, ""))
        ofh.write("\t".join(row))
        ofh.write("\n")

def guessFieldDesc(fieldNames):
    "given a list of field names, try to guess to which fields in bed12 they correspond "
    logging.info("No field description specified, guessing fields from TSV headers: %s" % fieldNames)
    fieldDesc = {}
    skipFields = set()

    for fieldIdx, fieldName in enumerate(fieldNames):
        caseField = fieldName.lower()
        if caseField in ["chrom", "chromosome"]:
            name = "chrom"
        elif caseField.endswith(" id") or caseField.endswith("accession") or caseField.endswith("alternate"):
            name = "name"
        elif caseField in ["start", "chromstart", "position"]:
            name = "start"
        elif caseField in ["Reference"]:
            name = "refAllele"
        elif caseField in ["strand"]:
            name = "strand"
        elif caseField in ["score"]:
            name = "score"
        else:
            continue
        fieldDesc[name] = fieldIdx
        skipFields.add(fieldIdx)

    #logging.info("TSV <-> BED correspondance: %s" % fieldDesc)
    logging.info("TSV <-> BED correspondance:")
    for bedName, fieldIdx in fieldDesc.items():
        tsvName = fieldNames[fieldIdx]
        logging.info("TSV field %s -> BED field %s" % (tsvName, bedName))

    return fieldDesc, skipFields

def parseFieldDesc(fieldDescStr, fieldNames):
    " given a string chrom=1,start=2,end=3,... return dict {chrom:1,...} "
    if not fieldDescStr:
        return guessFieldDesc(fieldNames)

    fieldDesc, skipFields = guessFieldDesc(fieldNames)

    if fieldDescStr:
        for part in fieldDescStr.split(","):
            fieldName, fieldIdx = part.split("=")
            fieldIdx = int(fieldIdx)
            fieldDesc[fieldName] = fieldIdx
            skipFields.add(fieldIdx)

    return fieldDesc, skipFields

def makeBedRow(row, fieldDesc, skipFields, isOneBased):
    " given a row of a tsv file and a fieldDesc with BedFieldName -> field-index, return a bed12+ row with extra fields "
    bedRow = []

    # first construct the bed 12 fields
    for fieldName in ["chrom", "start", "end", "name", "score", "strand",
            "thickStart", "thickEnd", "itemRgb", "blockCount", "blockSizes", "chromStarts"]:

        fieldIdx = fieldDesc.get(fieldName)
        if fieldIdx is not None:
            if fieldName=="start":
                chromStart = int(row[fieldDesc["start"]])
                if isOneBased:
                    chromStart = chromStart-1
                val = str(chromStart)
            elif fieldName=="end":
                chromEnd = int(val)
            else:
                val = row[fieldIdx]
        else:
            if fieldName=="end":
                chromEnd = chromStart+1
                val = str(chromEnd)
            elif fieldName=="score":
                val = "0"
            elif fieldName=="strand":
                val = "."
            elif fieldName=="thickStart":
                val = str(chromStart)
            elif fieldName=="thickEnd":
                val = str(chromEnd)
            elif fieldName=="itemRgb":
                val = "0,0,0"
            elif fieldName=="blockCount":
                val = "1"
            elif fieldName=="blockSizes":
                val = str(chromEnd-chromStart)
            elif fieldName=="chromStarts":
                #val = str(chromStart)
                val = "0"
            else:
                logging.error("Cannot find a field for %s" % fieldName)
                sys.exit(1)

        bedRow.append(val)

    # now handle all other fields
    for fieldIdx, val in enumerate(row):
        if not fieldIdx in skipFields:
            bedRow.append( row[fieldIdx] )

    return bedRow

def fetchChromSizes(db, outputFileName):
    " find on local disk or download a <db>.sizes text file "
    # Construct the URL based on the database name
    url = f"https://hgdownload.cse.ucsc.edu/goldenPath/{db}/database/chromInfo.txt.gz"
    # Send a request to download the file
    chromSizesData = httpReq(url, asBytes=True)
    # Open the output gzip file for writing
    with gzip.open(outputFileName, 'wt') as outFile:
        # Open the response content as a gzip file in text mode
        with gzip.GzipFile(fileobj=io.BytesIO(chromSizesData), mode='r') as inFile:
            # Read the content using csv reader to handle tab-separated values
            reader = csv.reader(inFile, delimiter='\t')
            writer = csv.writer(outFile, delimiter='\t', lineterminator='\n')
            # Iterate through each row, and retain only the first two fields
            for row in reader:
                writer.writerow(row[:2])  # Write only the first two fields
    logging.info("Downloaded %s to %s" % (db, outputFileName))

def getLocalDataPath(fname):
    " return local filename in directory for hubtool files "
    localDataDir = os.path.expanduser("~/.local/hubtools")
    if not isdir(localDataDir):
        logging.info("Creating directory "+localDataDir)
        os.makedirs(localDataDir)
    fname = join(localDataDir, fname)
    return fname

def getAsFname(fileType):
    " download an .as file into the local data directory "
    fname = getLocalDataPath(fileType+".as")
    urls = {
            "bigNarrowPeak" : "https://genome.ucsc.edu/goldenpath/help/examples/bigNarrowPeak.as",
            "bigBroadPeak"  : "https://genome.ucsc.edu/goldenpath/help/examples/bigBroadPeak.as",
            }
    if not isfile(fname):
        url = urls[fileType]
        downloadUrl(url, fname)
    return fname

def getChromSizesFname(db):
    " return fname of chrom sizes, download into ~/.local/hubtools/ if not found "
    fname = "/hive/data/genomes/%s/chrom.sizes" % db
    if isfile(fname):
        return fname

    fname = getLocalDataPath("%s.sizes" % db)
    if not isfile(fname):
        fetchChromSizes(db, fname)
    return fname

def convTsv(db, tsvFname, outBedFname, outAsFname, outBbFname):
    " convert tsv files in inDir to outDir, assume that they all have one column for chrom, start and end. Try to guess these or fail. "
    # join and output merged bed
    bigCols = set() # col names of columns with > 255 chars

    unsortedBedFh = tempfile.NamedTemporaryFile(suffix=".bed", dir=dirname(outBedFname), mode="wt")
    fieldNames = None

    isOneBased = True # useful in the future maybe, name=0,start=1,...
    bedFieldsDesc = None # in the future, the user may want to input a string like name=0,start=1, but switch this off for now

    for line in open(tsvFname):
        row = line.rstrip("\r\n").split("\t")
        if fieldNames is None:
            fieldNames = row
            fieldDesc, notExtraFields = parseFieldDesc(bedFieldsDesc, fieldNames)
            continue

        # note fields with data > 255 chars.
        for colName, colData in zip(fieldNames, row):
            if len(colData)>255:
                bigCols.add(colName)

        bedRow = makeBedRow(row, fieldDesc, notExtraFields, isOneBased)

        chrom = bedRow[0]
        if chrom.isdigit() or chrom in ["X", "Y"]:
            bedRow[0] = "chr"+bedRow[0]

        unsortedBedFh.write( ("\t".join(bedRow)))
        unsortedBedFh.write("\n")
    unsortedBedFh.flush()

    cmd = "sort -k1,1 -k2,2n %s > %s" % (unsortedBedFh.name, outBedFname)
    assert(os.system(cmd)==0)
    unsortedBedFh.close() # removes the temp file

    # generate autosql
    # BED fields
    #bedColCount = int(options.type.replace("bed", "").replace("+" , ""))
    bedColCount = 12
    asFh = open(outAsFname, "w")
    asFh.write(asHead)
    asFh.write("\n".join(asLines[:bedColCount+1]))
    asFh.write("\n")

    # extra fields
    #fieldNames = fieldNames[bedColCount:]
    for fieldIdx, field in enumerate(fieldNames):
        if fieldIdx in notExtraFields:
            continue
        name = field.replace(" ","")
        name = field.replace("%","perc_")
        name = re.sub("[^a-zA-Z0-9]", "", name)
        name = name[0].lower()+name[1:]
        fType = "string"
        if field in bigCols:
            fType = "lstring"
        asFh.write('      %s %s; "%s" \n' % (fType, name, field))
    asFh.write(")\n")
    asFh.close()

    chromSizesFname = getChromSizesFname(db)

    cmd = ["bedToBigBed", outBedFname, chromSizesFname, outBbFname, "-tab", "-type=bed%d+" % bedColCount, "-as=%s" % outAsFname]
    subprocess.check_call(cmd)

def convTsvDir(inDir, db, outDir):
    " find tsv files under inDir and convert them all to .bb "
    ext = "tsv"
    pat = join(inDir, '**/*.'+ext)
    logging.debug("Finding files under %s (%s), writing output to %s" % (inDir, pat, outDir))

    for fname in glob.glob(pat, recursive=True):
        logging.debug("Found %s" % fname)
        absFname = abspath(fname)
        relFname = relpath(absFname, inDir)
        outPath = Path(join(outDir, relFname))

        if not outPath.parents[0].is_dir():
            logging.debug("mkdir -p %s" % outPath.parents[0])
            makedirs(outPath.parents[0])

        bedFname = outPath.with_suffix(".bed")
        asFname = outPath.with_suffix(".as")
        bbFname = outPath.with_suffix(".bb")

        convTsv(db, fname, bedFname, asFname, bbFname)

def parseTrackLine(s):
    " Use shlex to split the string respecting quotes, written by chatGPT "
    lexer = shlex.shlex(s, posix=True)
    lexer.whitespace_split = True
    lexer.wordchars += '='  # Allow '=' as part of words

    tokens = list(lexer)

    # Convert the tokens into a dictionary
    it = iter(tokens)
    result = {}
    for token in it:
        if '=' in token:
            key, value = token.split('=', 1)
            result[key] = value.strip('"')  # Remove surrounding quotes if present
        else:
            result[token] = next(it).strip('"')  # Handle cases like name="...".

    return result

def readTrackLines(fnames):
    " read the first line and convert to a dict of all fnames. "
    logging.debug("Reading track lines from %s" % fnames)
    ret = {}
    for fn in fnames:
        line1 = open(fn).readline().rstrip("\n")
        notTrack = line1.replace("track ", "", 1)
        tdb = parseTrackLine(notTrack)
        ret[fn] = tdb
    return ret

def stripFirstLine(inputFilename, outputFilename):
    " chatGpt: copies all lines to output, except the first line "
    with open(inputFilename, 'r') as infile, open(outputFilename, 'w') as outfile:
        # Skip the first line
        next(infile)
        # Write the rest of the lines to the output file
        for line in infile:
            outfile.write(line)

def bedToBigBed(inFname, db, outFname, asFname=None, bedType=None):
    " convert bed to bigbed file, handles chrom.sizes download "
    chromSizesFname = getChromSizesFname(db)
    cmd = ["bedToBigBed", inFname, chromSizesFname, outFname, "-tab"]
    if asFname:
        cmd.append("-as="+asFname)
    if bedType:
        cmd.append("-type="+bedType)
    logging.debug("Running %s" % " ".join(cmd))
    logging.info(f'Converting {inFname} to {outFname}. (chromSizes: {chromSizesFname}')
    subprocess.check_call(cmd)

def downloadUrlOld(url, local_file_name):
    """ Downloads the content of the given URL and saves it to a local file. Does not use the requests library. """
    if os.path.isfile(local_file_name):
        logging.info("Not downloading %s, %s already exists" % (url, local_file_name))
        return

    # Parse the URL
    import http.client
    import urllib.parse
    parsed_url = urllib.parse.urlparse(url)
    connection = http.client.HTTPSConnection(parsed_url.netloc) if parsed_url.scheme == "https" else http.client.HTTPConnection(parsed_url.netloc)
    path = parsed_url.path or "/"
    if parsed_url.query:
        path += "?" + parsed_url.query

    try:
        # Send a GET request
        connection.request("GET", path)
        response = connection.getresponse()

        # Check for HTTP errors
        if response.status >= 400:
            logging.info(f"Unable to download {url}: HTTP error {response.status}")
            return

        # Write the content to the specified local file in chunks
        tmpFname = local_file_name + ".tmp"
        with open(tmpFname, 'wb') as file:
            while True:
                chunk = response.read(65536)  # Read in chunks
                if not chunk:
                    break
                file.write(chunk)

        logging.info(f"Downloaded '{url}' to '{local_file_name}'.")
        os.rename(tmpFname, local_file_name)
    except Exception as e:
        logging.error(f"An error occurred while downloading the URL: {e}")
        raise
    finally:
        connection.close()

def downloadUrl(url, local_file_name):
    """ Downloads the content of the given URL and saves it to a local file.  """
    if isfile(local_file_name):
        logging.info("Not downloading %s, %s already exists" % (url, local_file_name))
        return

    import requests
    try:
        # Send a GET request to the URL
        response = requests.get(url, stream=True)
        response.raise_for_status()  # Raise an exception for HTTP errors
        # Write the content to the specified local file
        tmpFname = local_file_name+".tmp"
        with open(tmpFname, 'wb') as file:
            for chunk in response.iter_content(chunk_size=65536):  # Download in chunks
                file.write(chunk)
        logging.info(f"Downloaded '{url}' to '{local_file_name}'.")
        os.rename(tmpFname, local_file_name)

    except requests.exceptions.HTTPError as e:
        logging.error(f"Unable to download %s: HTTP error {e}")
        raise

    except requests.exceptions.RequestException as e:
        logging.error(f"An error occurred while downloading the URL: {e}")
        raise

def downloadUrlsParallel(url_filename_list, max_threads=12):
    """ given a list of [url, localFname], download the files with 12 parallel threads """
    logging.info("Downloading %s files with %d parallel threads" % (len(url_filename_list), max_threads))
    func = downloadUrl
    if not requestsLoaded:
        func = downloadUrlOld
    with concurrent.futures.ThreadPoolExecutor(max_threads) as executor:
        futures = [executor.submit(func, url, local_filename) for url, local_filename in url_filename_list]
        # wait for all futures to complete (this will handle exceptions)
        for future in concurrent.futures.as_completed(futures):
            try:
                future.result()  # Block until this particular future is done
            except Exception as e:
                logging.error(f"Error in thread: {e}")

def makeLegalTrackName(s):
    " remove characters that are not allowed for track names "
    s = s.replace(" ", "_")
    # the only problem of this is that you can run into duplicated track names, e.g. "MyTrack!!" and "MyTrack!" are both "MyTrack"
    return re.sub('[^A-Za-z_0-9]+', '', s)

def mustBeLegalTrackName(s):
    " error abort if s is not a legal track name "
    if makeLegalTrackName(s)!=s:
        errAbort("The name '%s' is not a legal name for a track. Only alphanumeric characters and underscore are allowed." % s)

def narrowPeakToBigNarrowPeak(textFname, ofh):
    " convert old narrow peak text format to .bed format for bigNarrowPeak "
    #ofh = open(tmpFname, "w")
    for line in open(textFname):
        row = line.rstrip("\r\n").split("\t")
        # chr1    9356548 9356648 .       0       .       182     5.0945  -1  50
        chrom, chromStart, chromEnd, name, score, strand, signal, pVal, qVal, peak = row
        score = int(score)
        if score > 1000:
            score = 1000
        outRow = (chrom, chromStart, chromEnd, name, str(score), strand, signal, pVal, qVal, peak)
        ofh.write("\t".join(outRow))
        ofh.write("\n")
    ofh.flush()

def broadPeakToBed(textFname, ofh):
    " convert old broad peak text format to .bed format for bigBed "
    #ofh = open(tmpFname, "w")
    for line in open(textFname):
        row = line.rstrip("\r\n").split("\t")
        chrom, chromStart, chromEnd, name, score, strand, signal, pVal, qVal = row[:9]
        score = int(score)
        if score > 1000:
            score = 1000
        outRow = (chrom, chromStart, chromEnd, name, str(score), strand, signal, pVal, qVal)
        ofh.write("\t".join(outRow))
        ofh.write("\n")
    ofh.flush()

def convertTextToBin(db, textFname, tdb, outDir):
    " convert a text file to a binary file, e.g. bed to bigBed given input file and trackDb dictionary. Updates tdb dictionary with type "
    outBase = join(outDir, tdb["track"])

    trackType = "bed"
    if "type" in tdb:
        trackType = tdb["type"]

    logging.info("Converting %s of type %s to binary" % (textFname, trackType))
    outFname = outBase+".bb"

    if trackType.startswith("bed"):
        bedToBigBed(textFname, db, outFname)
        tdb["type"] = "bigBed"
    elif trackType=="narrowPeak":
        asFname = getAsFname("bigNarrowPeak")
        tmpFh = makeTempFile(dir=outDir, suffix=".bed")
        narrowPeakToBigNarrowPeak(textFname, tmpFh)
        bedToBigBed(tmpFh.name, db, outFname, asFname=asFname, bedType="bed6+4")
        tdb["type"] = "bigBed 6+"
        tdb["spectrum"] = "on"
    elif trackType=="broadPeak":
        asFname = getAsFname("bigBroadPeak")
        tmpFh = makeTempFile(dir=outDir, suffix=".bed")
        broadPeakToBed(textFname, tmpFh)
        bedToBigBed(tmpFh.name, db, outFname, asFname=asFname, bedType="bed6+3")
        tdb["type"] = "bigBed 6+3"
        tdb["spectrum"] = "on"

    else:
        errAbort("No support yet for track type '%s'. Please contact us." % trackType)

    tdb["bigDataUrl"] = basename(outFname)
    return tdb

def convCtDb(hubInDir, db, inDir, outDir, doDownload):
    " convert one db part of a track archive to an output directory "
    meta = parseMeta([hubInDir])

    findGlob = join(inDir, "*.ct")
    inFnames = glob.glob(findGlob)
    if len(inFnames)==0:
        logging.info("No *.ct files found in %s" % findGlob)
        return []

    tdbData = readTrackLines(inFnames)

    makedirs(outDir)
    hubTxtFname = join(outDir, "hub.txt")
    ofh = open(hubTxtFname, "wt")
    writeHubGenome(ofh, db, meta)

    getUrlsFnames = []
    doneFnames = set()
    tdbIdx = 0
    for fname, tdb in tdbData.items():
        tdb["shortLabel"] = tdb["name"]

        tdbIdx += 1
        # custom track names can include spaces, spec characters, etc. Strip all those
        # append a number to make sure that the result is unique and a legal track name
        track = tdb["name"]
        track = makeLegalTrackName(track)+"_"+str(tdbIdx)
        tdb["track"] = track
        del tdb["name"]

        tdb["longLabel"] = tdb["description"]
        del tdb["description"]

        if "bigDataUrl" not in tdb:
            textFname = join(outDir, tdb["track"]+".txt")
            stripFirstLine(fname, textFname)

            tdb = convertTextToBin(db, textFname, tdb, outDir)

            os.remove(textFname)
        else:
            url = tdb["bigDataUrl"]
            if doDownload:
                uniqueFname = basename(url)
                if uniqueFname in doneFnames:
                    # File name is not unique:
                    # we need to make the file name unique in a way that does not touch the suffix structure: prefix with hash
                    base_url = url.rsplit('/', 1)[0] # part before the last slash = directory
                    #shortHash = base64.urlsafe_b64encode(hashlib.sha1(base_url.encode()).digest())[:10].decode("ascii")
                    shortHash = hashlib.sha1(base_url.encode()).hexdigest()[:8]
                    uniqueFname = shortHash+"_"+uniqueFname

                assert(uniqueFname not in doneFnames) # eight hex digits should be enough for everyone
                doneFnames.add(uniqueFname)

                outFname = join(outDir, uniqueFname)
                getUrlsFnames.append((url, outFname))
                tdb["bigDataUrl"] = uniqueFname
            else:
                logging.debug("Not downloading %s, option to download was not set" % url)

        writeStanza(ofh, 0, tdb)

    logging.info("Wrote %s" % hubTxtFname)
    ofh.close()

    return getUrlsFnames

def convArchDir(hubInfoDir, inDir, outDir, doDownload):
    " convert a directory created from the .tar.gz file downloaded via our track archive feature "
    logging.info("Converting track archive in %s to a new track hub in %s" % (inDir, outDir))
    dbContent = os.listdir(inDir)
    dbDirs = []
    for db in dbContent:
        subDir = join(inDir, db)
        if isdir(subDir):
            dbDirs.append((db, subDir))

    if len(dbDirs)==0:
        errAbort("No directories found under %s. Is this really a UCSC track backup archive .tar.gz file?" % inDir)

    allBigDataUrls = []
    for db, inSubDir in dbDirs:
        logging.debug("Processing %s, db=%s" % (inSubDir, db))
        outSubDir = join(outDir, db)
        dbUrls = convCtDb(hubInfoDir, db, inSubDir, outSubDir, doDownload)
        allBigDataUrls.extend(dbUrls)

    downloadUrlsParallel( allBigDataUrls )

def hgsidFromUrl(url):
    " return the part after hgsid= from a URL "
    parsed_url = urllib.parse.urlparse(url)
    server_name = f"{parsed_url.scheme}://{parsed_url.netloc}"
    query_params = urllib.parse.parse_qs(parsed_url.query)
    hgsid = query_params.get('hgsid')[0]
    return server_name, hgsid  # Extracting the first value from the list

def hgsidFromPage(url):
    " return hgsid given a URL. Uses HTTP fetch and extracts hgsid from html page. "
    # Parse the URL
    # short session URLs first go through one redirect
    logging.info("Getting hgsid from page %s" % url)
    pageText = httpReq(url)

    hgsid = None
    for l in pageText.splitlines():
        # <INPUT TYPE=HIDDEN NAME='hgsid' VALUE='347013469_676goC3Wia55Duav9QqA0zLNKL5Q'>
        if l.startswith("<INPUT TYPE=HIDDEN NAME='hgsid' VALUE='"):
            logging.debug("HTML line with hgsid is %s" % repr(l))
            hgsid = l.split("NAME='hgsid' VALUE='")[1].split("'")[0]
            logging.debug("HGSID is %s" % repr(hgsid))
            break
    if hgsid is None:
        errAbort("Could not find hgsid on page %s" % url)

    parsed_url = urllib.parse.urlparse(url)
    server_name = f"{parsed_url.scheme}://{parsed_url.netloc}"
    #query_params = urllib.parse.parse_qs(parsed_url.query)
    #hgsid = query_params.get('hgsid')
    # Return the hgsid value if it exists
    return server_name, hgsid  # Extracting the first value from the list

def downloadCartLines(url):
    """
    Downloads the cart from the specified URL and extracts the main content,
    removing the header and footer. returns a list of lines
    XX Would be nice if we had a JSON API Endpoint for this, rather than parsing broken HTML.
    """
    # Download the content
    data = httpReq(url)
    if not "<PRE>" in data:
        errAbort("No <PRE> found in cartDump, internal error")

    data = data.split("<PRE>")[1].split("</TT>")[0] ## argh, HTML error in our output
    return data.splitlines()

def getCart(cartDumpUrl):
    " return the cart as a dict key->val given a cart dump URL.  "
    cartLines = downloadCartLines(cartDumpUrl)
    cart = {}
    for l in cartLines:
        key, val = l.split(" ", maxsplit=1)
        cart[key] = val
    return cart

def ctUrlToTdb(ctUrl):
    " grab a ctfile from a url, and parse it, return a list of tdb dicts, changing the format to hub statements "
    logging.info("Downloading %s" % ctUrl)
    ctText = httpReq(ctUrl)
    # track^Iname='SC21_WIGW2_3_NA_occX'^Idescription='nucleoatac.occ - occupancy based on fragment size distribution - using modified code!'^Ivisibility='2'^Ipriority='0.001'^IaltColor='127,127,127'^IautoScale='OFF'^IbigDataUrl='http://bartzabel.ls.manchester.ac.uk/Ian/adP0erXwWr/SC_20240716/SC21_WIGW2_3_nucleoatac.occ.bw'^Idb='hg38'^IinputType='bigWig'^ImaxHeightPixels='10:32:128'^IorigTrackLine='track type=bigWig name="SC21_WIGW2_3_NA_occX" description="nucleoatac.occ - occupancy based on fragment size distribution - using modified code!" bigDataUrl=http://bartzabel.ls.manchester.ac.uk/Ian/adP0erXwWr/SC_20240716/SC21_WIGW2_3_nucleoatac.occ.bw visibility=full maxHeightPixels=10:32:128 viewLimits=0:2 autoScale=OFF'^ItdbType='bigWig'^Itype='bigWig'^IviewLimits='0:2'$
    tdbs = []
    for line in ctText.splitlines():
        if not line.startswith("track"):
            errAbort("Unexpected custom track line in file %s" % ctUrl)

        parts = line.rstrip("\n").split("\t")[1:]
        tdb = {}
        for p in parts:
            if p.startswith("origTrackLine"):
                continue
            key, val = p.split("=", maxsplit=1)
            val = val.lstrip("'").rstrip("'")

            if key=="description":
                key = "longLabel"
            if key=="name":
                key = "track"

            if key in ["inputType", "tdbType"]:
                continue

            tdb[key] = val
            if key=="track":
                tdb["shortLabel"] = val

        if "dbTableName" in tdb or "bigDataUrl" not in tdb:
            logging.warning("Cannot import custom tracks without bigDataUrls, skipping %s" % tdb["track"])
        else:
            tdbs.append(tdb)

    return tdbs

def downloadTrackArchive(serverUrl, hgsid, ofh):
    " download the track archive for a given hgsid and save it under tgzFname "

    # https://hgwdev-max.gi.ucsc.edu/cgi-bin/hgSession?hgsid=425203018_AsoR0syMagMP0brh6a2Y2F7R2RNi&hgS_makeDownload_=Submit
    logging.info("Getting track archive from server %s, hgsid %s" % (serverUrl, hgsid))
    cgiUrl = serverUrl+f"/cgi-bin/hgSession"
    params = {"hgsid":hgsid, "hgS_makeDownload_":"Submit"}

    page = httpReq(cgiUrl, params=params)
    statusToken = re.search(r"backgroundStatus=([^&]*)", page).group(1)
    statusToken = unquote(statusToken)
    logging.debug("Status token is %s" % statusToken)

    # get the download token from the status page
    keepGoing = True
    tryCount = 0
    while keepGoing:
        waitSecs = 3
        logging.info("Waiting for %d seconds for archive" % waitSecs)
        time.sleep(waitSecs)

        params = {"hgsid":hgsid, "backgroundStatus":statusToken }
        page = httpReq(cgiUrl, params=params)
        matchObj = re.search(r"hgS_doDownload_([^']*)", page)
        if matchObj is None:
            logging.info("No hgsid found, waiting some more...")
            tryCount += 1
            if tryCount > 10:
                errAbort("Cannot find hgsid even after long wait. Giving up.")
        else:
            downloadToken = unquote(matchObj.group(1))
            keepGoing = False

    params = { "hgsid" : hgsid, "hgS_doDownload_"+downloadToken : "1", "hgS_saveLocalBackupFileName": "test" }
    logging.info("Downloading track archive and saving to %s" % ofh.name)
    binData = httpReq(cgiUrl, params=params, asBytes=True)

    ofh.write(binData)
    ofh.flush()

def makeTempFile(suffix=None, dir=None, mode="w", prefix=None):
    " make a temporary file. Do not delete in debug mode. always delete in normal mode, at the latest when program exits. "
    if debugMode:
        tmpFn = tempfile.mkstemp(suffix=suffix, dir=dir, prefix=prefix)[1] # does not remove file
        fh = open(tmpFn, mode)
    else:
        fh = tempfile.NamedTemporaryFile(prefix=prefix, suffix=suffix, dir=dir, mode=mode) # removes file on destruction of fh variable
    return fh

def convCtUrlOrFile(url, inDir, outDir, doDownload):
    """ given an hgTracks URL with an hgsid or a session link or .tar.gz local track archive tarball, get all
    custom track lines and create a hub file for it. Try to convert BED custom tracks to bigBed.
    Download bigDataUrls to outDir.
    """

    downDir = join(outDir, "archive.tmp")
    makedirs(downDir)

    if url.startswith("http"):
        if "hgsid=" in url:
            serverUrl, hgsid = hgsidFromUrl(url)
        else:
            serverUrl, hgsid = hgsidFromPage(url)

        tgzFh = makeTempFile(dir=downDir, suffix=".tar.gz", mode="wb")
        downloadTrackArchive(serverUrl, hgsid, tgzFh)
        tgzFname = tgzFh.name
    else:
        tgzFh = None
        tgzFname = url

    logging.info("Extracting %s to %s" % (tgzFname, downDir))
    with tarfile.open(tgzFname, 'r:gz') as tar:
        try:
            tar.extractall(path=downDir, filter='data')
        except TypeError:
            tar.extractall(path=downDir)

    convArchDir(inDir, downDir, outDir, doDownload)

    if not debugMode:
        if tgzFh:
            tgzFh.close() # = deletes temp file
        logging.info("Removing %s" % downDir)
        shutil.rmtree(downDir)

def stanzaKey(stanza):
    " return key of stanza, so track name or .hub or .genome "
    if "track" in stanza:
        return stanza["track"][2]
    else:
        for tdbType in ["hub", "genome"]:
            if tdbType in stanza:
                return "."+tdbType

    errAbort("Got hub.txt file with a stanza that has neither a 'track', nor a 'hub', nor a 'genome' key: %s" % repr(stanza))

def stanzaNew(pairs, indent=0):
    " given key,val pairs, make a new stanza "
    newTdb = OrderedDict()
    for key, val in pairs:
        newTdb[key] = ([], indent, val)
    return newTdb

def stanzaAddVal(tdb, tag, val):
    " add or update a key/val in a stanza, inheriting indent from existing entries "
    indent = next(iter(tdb.values()))[1] if tdb else 0
    tdb[tag] = ([], indent, val)

def stanzaMatchesRe(tdb, tags, pat):
    " try to match pat (a compiled regex) against values of all tags listed in 'tags'. Never match the special stanzas .hub and .genome . "
    for tag in tags:
        if tag in tdb:
            comments, indent, value = tdb[tag]
            if pat.search(value):
                return True
    return False

def stanzaMatchesTrack(tdb, searchName):
    " True if track name is same as searchName. False for non-track stanzas."
    tag = "track"
    if not tag in tdb:
        return False
    comments, indent, value = tdb[tag]
    if value==searchName:
        return True
    return False

def stanzaEdit(tdb, indent=None, newTags=None, delTags=None):
    " modify a stanza. newTags is a list of [key, val]. delTags is a list of keys. "
    newTdb = OrderedDict()
    for key, valTuple in tdb.items():
        comments, lineIndent, val = valTuple
        if indent is not None:
            lineIndent = indent

        if delTags is None or key not in delTags:
            newTdb[key] = (comments, lineIndent, val)

    if newTags is not None:
        for key, val in newTags:
            newTdb[key] = ([], lineIndent, val)

    return newTdb

def stanzaGetVal(tdb, tag, default=None):
    if tag in tdb:
        return tdb[tag][2]
    else:
        return default

def tdbCommentsFromPairs(pairs, indent=0):
    " make a new tdbComments data structure from a list of (key, val) pairs "
    tdbs = OrderedDict()
    newTdb = OrderedDict()
    trackName = None
    for key, val in pairs:
        newTdb[key] = ([], indent, val)
        if key=="track":
            trackName = val
    assert(trackName is not None) # can only make track stanzas for now
    tdbs[trackName] = newTdb
    return tdbs

def tdbCommentsParse(fname):
    """ like iterRaStanzas(), parse a hub.txt file, returns an ordered dict of trackName -> stanza BUT retains all comments
        Stanza is an OrderedDict of key -> (beforeCommentLines, indent, value)
        This system retains all comments of the input file and allows to restore the entire file identically
        Exceptions:
         - DOS/MAC newline characters become Unix NLs
         - multiple spaces between key and val become a single space
        Hub and genome stanzas are saved like tracks with the special tracknames ".hub" and ".genome"

        XX This should probably become a class TdbComments with methods on it.
    """
    bakFname = fname+".bak"
    shutil.copy(fname, bakFname)

    logging.debug(f"Parsing {fname}")
    tdbs = OrderedDict()
    headerDone = False
    headerLines = []

    stanza = OrderedDict()
    comments = []

    for line in open(fname):
        line = line.rstrip("\r\n")
        if len(line)==0:
            # empty line = new stanza
            if len(stanza)!=0:
                tdbKey = stanzaKey(stanza)
                tdbs[tdbKey] = stanza
                stanza = OrderedDict()
                comments = []
            # preserve empty lines at the file beginning or within comments
            else:
                comments.append("")

        # a normal line can either be a comment or a "key<space>value" lines
        else:
            if line.lstrip(" ").startswith("#"):
                comments.append(line)
                continue
            else:
                nonWhite = line.lstrip()
                indent = len(line) - len(nonWhite)
                key, val = nonWhite.split(" ", 1) # preserve trailing whitespace
                if key in stanza:
                    logging.warning(f"TrackDb .ra format error: duplicated key {key} in stanza {stanza}")
                stanza[key] = (comments, indent, val)
                comments = []

    # handle last stanza, most files don't end with a newline
    if len(stanza)!=0:
        tdbKey = stanzaKey(stanza)
        tdbs[tdbKey] = stanza

    tdbCount = len(tdbs)
    logging.info(f"Read {fname}, {tdbCount} stanzas")
    return tdbs

def tdbCommentsWrite(tdbs, fname):
    " write back the structure returned by tdbCommentsParse into fname "
    ofh = open(fname, "wt")
    for tdb in tdbs.values():
        for key, (comments, indent, val) in tdb.items():
            if len(comments)!=0:
                ofh.write("\n".join(comments))
                ofh.write("\n")
            ofh.write("".join(indent*[" "]))
            ofh.write("%s %s\n" % (key, val))

        ofh.write("\n")
    ofh.close()

    tdbCount = len(tdbs)
    logging.info(f"Wrote {fname}, {tdbCount} stanzas")

def tdbCommentsAppendStanza(tdbs, tdb, indent=0):
    " add a single OrderedDict() as a new track stanza to the end of the data structure from tdbCommentsParse "
    tdbKey = stanzaKey(tdb)
    tdbs[tdbKey] = tdb
    return tdbs

def tdbCommentsAppendOneList(tdbs, pairs, indent=0):
    " add a single list of pairs as a new track stanza to the end of the data structure from tdbCommentsParse "
    newTdb = OrderedDict()
    trackName = None
    for key, val in pairs:
        newTdb[key] = ([], indent, val)
        if key=="track":
            trackName = val
    assert(trackName is not None)
    tdbs[trackName] = newTdb
    return tdbs

def tdbCommentsEdit(tdbs, indent=None, newTags=None, delTags=None):
    " indent stanzas of a tdbCommentsParse data structure or add some keyVals "
    newTdbs = OrderedDict()
    for trackName, tdb in tdbs.items():
        newTdb = stanzaEdit(tdb, indent=indent, newTags=newTags, delTags=delTags)
        newTdbs[trackName] = newTdb

    return newTdbs

def tdbCommentsAppendAll(tdbs1, tdbs2):
    " append the second tdbCommentsParse data structure to the first "
    for key, val in tdbs2.items():
        tdbs1[key] = val
    return tdbs1

def tdbCommentsInsertAfter(tdbs, parentName, insertTdbs):
    " search in tdbs for a track parentName, insert newTdbs, and return the result "
    newTdbs = OrderedDict()
    for name, tdb in tdbs.items():
        tdbCommentsAppendStanza(newTdbs, tdb)
        if stanzaMatchesTrack(tdb, parentName):
            tdbCommentsAppendAll(newTdbs, insertTdbs)
    return newTdbs

def addView(hubFname, contType, contName, contLabel):
    " add a view under a container "
    logging.debug("Adding to %s: view of type %s with name %s and label %s" % (hubFname, contType, contName, contLabel))

    if "/" not in contName:
        errAbort("For views, the parent has to be specified in the name with slash, e.g. myComposite/myView")
    if contName.count("/")!=1:
        errAbort("View name must contain one single slash, but not more.")

    parentName, viewTrackSuffix = contName.split("/")

    tdbs = tdbCommentsParse(hubFname)

    if parentName not in tdbs:
        errAbort("Parent container track %s does not exist in %s" % (parentName, hubFname))

    viewTrackName = parentName+"_view_"+viewTrackSuffix

    viewName  = makeLegalTrackName(contLabel)

    # just overwrite the old one
    #if viewTrackName in tdbs:
        #errAbort("Track with name %s already exists." % viewTrackName)

    viewPairs = [
        ( "track" , viewTrackName),
        ( "shortLabel" , contLabel),
        ( "parent" , parentName),
        ( "view" , viewName),
        ( "visibility" , "dense"),
        ( "type" , contType),
        ( "scoreFilter" , "off"),
        ( "viewUi" , "on")
    ]

    insertTdbs = tdbCommentsFromPairs(viewPairs, indent=4)

    newTdbs = tdbCommentsInsertAfter(tdbs, parentName, insertTdbs)

    parentTdb = newTdbs[parentName]
    subGroupVal = stanzaGetVal(parentTdb, "subGroup1")
    stanzaAddVal(parentTdb, "subGroup1", "view Views PK=Peaks SIG=Signals")

    tdbCommentsWrite(newTdbs, hubFname)
    logging.info("New view added, name is %s" % viewTrackName)

def addContainer(hubFname, cont, contType, contName, contLabel):
    " add a new container track and save hub.txt "
    logging.debug("Adding to %s: container of type %s with name %s and label %s" % (hubFname, cont, contName, contLabel))
    tdbs = tdbCommentsParse(hubFname)

    mustBeLegalTrackName(contName)

    if contName in tdbs:
        errAbort(f"A track with the name {contName} already exists in {hubFname}.")

    indent = 4
    if cont=="composite":
        #tdb["compositeTrack"] = ([], 0, "on")
        contKey = "compositeTrack"
    elif cont=="superTrack":
        #tdb["superTrack"] = ([], 0, "on")
        contKey = "superTrack"
    else:
        errAbort("container track type must be either composite or superTrack or view, not %s" % repr(contType))

    containerDef =  (
        ('track', contName),
        ('shortLabel', contLabel),
        ('longLabel', contLabel),
        ('visibility', "dense"),
        ('type', contType),
        ('autoScale', 'group'),
        (contKey, 'on')
    )

    contTdbs = tdbCommentsFromPairs(containerDef)
    newTdbs = tdbCommentsAppendAll(tdbs, contTdbs)

    tdbCommentsWrite(newTdbs, hubFname)

def nest(hubFname, parentName, trackPat):
    " put all tracks matching trackPat under container contName and save hub.txt "
    tdbs = tdbCommentsParse(hubFname)

    pat = re.compile(trackPat)

    if not parentName in tdbs:
        errAbort("container track %s is not part of hub %s. Try the 'add' command to add it." % (parentName, hubFname))

    isView = False
    if "_view_" in parentName:
        isView = True

    matchTdbs = OrderedDict()
    oldTdbs = OrderedDict()
    for name, tdb in tdbs.items():
        if name not in [".hub", ".genome"] and stanzaMatchesRe(tdb, ["track", "shortLabel"], pat) and not name==parentName: # never match the parent
            matchTdbs[name] = tdb
        else:
            oldTdbs[name] = tdb

    logging.info("Found %d tracks matching %s" % (len(matchTdbs), trackPat))

    if len(matchTdbs)==0:
        errAbort("No matching tracks, aborting.")

    indent = 4
    if isView:
        indent = 8

    matchTdbs = tdbCommentsEdit(matchTdbs, indent=indent, newTags=[["parent", parentName]] )

    # copy over all the old stanzas to newTdbs, and inject the matching stanzas after the parent
    newTdbs = tdbCommentsInsertAfter(oldTdbs, parentName, matchTdbs)


    tdbCommentsWrite(newTdbs, hubFname)

def unnest(hubFname, trackPat):
    " remove parent attribute from all tracks matching trackPat, unindent them and save hub.txt. Does not change track order. "
    tdbs = tdbCommentsParse(hubFname)

    pat = re.compile(trackPat)

    newTdbs = OrderedDict()
    modCount = 0
    for name, tdb in tdbs.items():
        if name not in [".hub", ".genome"] and stanzaMatchesRe(tdb, ["track", "shortLabel"], pat):
            tdb = stanzaEdit(tdb, indent=0, delTags=["parent"] )
            modCount += 1
        newTdbs[name] = tdb

    logging.info("Modified %d stanzas" % modCount)

    tdbCommentsWrite(newTdbs, hubFname)

def hubtools(args, options):
    """ find files under dirName and create a trackDb.txt for them"""

    cmd = args[0]

    inDir = "."
    if options.inDir:
        inDir = options.inDir

    outDir = "."
    if options.outDir:
        outDir = options.outDir

    if cmd=="up":
        if len(args)<2:
            errAbort("The 'up' command requires one argument, the name of the hub. You can specify any name, "
                "ideally a short, meaningful string, e.g. atacseq, muscle-rna or yamamoto2022. Avoid special characters.")
        hubName = args[1]
        uploadFiles(inDir, hubName)
        return

    tdbDir = inDir
    if options.outDir:
        tdbDir = options.outDir

    if cmd=="jbrowse":
        importJbrowse(args[1], args[2], tdbDir)
    elif cmd == "tab":
        raToTab(args[1])
    elif cmd == "make":
        db = args[1]

        meta = parseMeta([inDir])
        dirFiles = readDirs(inDir, meta)

        hubFname = join(tdbDir, "hub.txt")
        logging.info("Writing %s" % hubFname)
        ofh = open(hubFname, "w")

        writeHubGenome(ofh, db, meta)

        makeTrackDbEntries(inDir, dirFiles, "top", tdbDir, ofh)
        makeTrackDbEntries(inDir, dirFiles, "comps", tdbDir, ofh)

        ofh.close()

    elif cmd=="conv":
        if len(args)==1:
            errAbort("First argument for conv command must be the database, e.g. hg19 or hg38")
        db = args[1]

        convTsvDir(inDir, db, outDir)

    elif cmd=="ct":
        url = args[1]
        doDownload = options.doDownload
        convCtUrlOrFile(url, inDir, outDir, doDownload)

    elif cmd=="add":
        if len(args)<6:
            errAbort("Please provide in this order: hub.txt-Filename, 'composite' or 'superTrack' or 'view', containerType, nameOfContainer, labelOfContainer")
        hubFname = args[1]
        cont = args[2]
        contType = args[3]
        contName = args[4]
        contLabel = args[5]

        if cont=="view":
            addView(hubFname, contType, contName, contLabel)
        else:
            addContainer(hubFname, cont, contType, contName, contLabel)


    elif cmd=="nest":
        if len(args)<4:
            errAbort("Please provide in this order: hub.txt-Filename, containerName, trackRegex")
        hubFname = args[1]
        contName = args[2]
        trackPat = args[3]

        nest(hubFname, contName, trackPat)

    elif cmd=="unnest":
        if len(args)<3:
            errAbort("Please provide in this order: hub.txt-Filename, trackRegex")
        hubFname = args[1]
        trackPat = args[2]

        unnest(hubFname, trackPat)

    #elif cmd=="view":
        #if len(args)<3:
            #errAbort("Please provide in this order: hub.txt-Filename, containerName, trackRegex")
        #hubFname = args[1]
        ##trackPat = args[2]

        #unnest(hubFname, trackPat)

        #trackPat = args[4]
        #splitPat = None
        #if len(args)>5:
            #splitPat = args[5]
    else:
        logging.error("Unknown command: '%s'" % args[0])


# ----------- main --------------
def main():
    args, options = parseArgs()

    hubtools(args, options)


main()
