#!/usr/bin/env python3

import logging, sys, optparse, os, requests, sys, urllib3, atexit, urllib
from os.path import isfile
from email.mime.text import MIMEText
from subprocess import Popen, PIPE

# makes sure that only one instance of the program runs at the same time
lockFname = None

# switch off insecure SSL warnings
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

fromEmail="qateam@gi.ucsc.edu"

emailTemplate = """Dear UCSC Public Hub author,

This is an automated email sent by the UCSC Genome Browser Group's hubPublicMail system to alert you
that your public track hub at the address:

%s

Has been inaccessible for at least 48 consecutive hours. If it continues to be offline, we may have to remove it
from our public hubs list at https://genome.ucsc.edu/cgi-bin/hgHubConnect

Do not hesitate to let us know if we can help you resolve this situation, e.g. by updating the URL
where the hub is hosted or possibly hosting the files on our servers.

You can reach us at genome-www@soe.ucsc.edu.

Thank you for your interest and contributions,
The UCSC Genome Browser Group
"""
# ==== functions =====
    
def parseArgs():
    " setup logging, parse command line arguments and options. -h shows auto-generated help page "
    parser = optparse.OptionParser("""usage: %prog [options] hgcentralname statusFile - send email if public hub is down
    Goes through the following steps:
    1) Get the list of all public hubs
    2) Try to get their URLs and write all to statusFile.
    3) If a hub fails, increase count in statusFile.
    4) If count is > 24, send an email to hub email and set the failCount to -48.

    Example:
       hubPublicMail hgcentraltest /tmp/hubPublicStatus.tab
    """)

    parser.add_option("-d", "--debug", dest="debug", action="store_true", help="show debug messages")
    #parser.add_option("-f", "--file", dest="file", action="store", help="run on file") 
    (options, args) = parser.parse_args()

    if args==[]:
        parser.print_help()
        exit(1)

    if options.debug:
        logging.basicConfig(level=logging.DEBUG)
        logging.getLogger().setLevel(logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)
        logging.getLogger().setLevel(logging.INFO)

    return args, options
# ----------- main --------------
def getHubUrls(centralName):
    " return list of huburls "
    logging.debug("Getting hubUrls from db %s" % centralName)
    if centralName == 'hgcentral':
        cmd = "hgsql -h genome-centdb %s -se 'select hubUrl from hubPublic'" % centralName
    else:
        cmd = "hgsql %s -se 'select hubUrl from hubPublic'" % centralName
    lines = os.popen(cmd).read().splitlines() # please do not suggest using subprocess in code review. Thx.
    return lines

def parseEmail(s):
    " return only email address given email value of hub.txt. handles these weird cases "
    # jferna10@ucsc.edu or max@soe.ucsc.edu
    # yzz2 at psu.edu
    # <a HREF="mailto:dunham@ebi.ac.uk"TARGET=_BLANK>Ian Dunham</a>
    if "<" in s:
        return s.split('"')[1]
    if " at " in s:
        return s.replace(" at ", "@")
    if " or " in s:
        return s.split(" or ")[1]
    return s

def downloadUrls(urls, emails):
    " try to read all hub.txt in URLs, return list of failed URLs, and dict hub -> email"
    didFail = list()
    emails = dict()

    for url in urls:
        logging.debug("Checking %s" % url)

        reqFailed = False
        if url.startswith("http"):
            try:
                f = requests.get(url, verify=False, timeout=5)
                text = f.text
            except KeyboardInterrupt: # handle ctrl-c for debugging
                sys.exit(1)
            except:
                reqFailed = True
        else:
            # FTP
            try:
                f = urllib.request.urlopen(url, timeout=10)
                text = f.read().decode("utf8")
            except:
                reqFailed = True

        if reqFailed:
            logging.debug("URL %s failed." % url)
            didFail.append(url)
            continue

        lines = text.splitlines()
        for l in lines:
            l = l.strip()
            if l=="":
                continue
            keyVal = l.strip().split(None, maxsplit=1)
            if len(keyVal)!=2:
                # some hubs may have broken hub.txt files. Treat these as if they were broken.
                didFail.append(url)
                break

            key, val = keyVal
            if not key=="email":
                continue
            emails[url] = parseEmail(val)

    return didFail, emails

def sendEmail(dest, body):
    " send email to dest "
    msg = MIMEText(body)
    msg["From"] = fromEmail
    msg["To"] = dest+","+"browserqa-group@ucsc.edu"
    msg["Subject"] = "Your UCSC public hub is down"
    p = Popen(["/usr/sbin/sendmail", "-t", "-oi"], stdin=PIPE)
    p.communicate(msg.as_bytes())
    # Both Python 2.X and 3.X
    # p.communicate(msg.as_bytes() if sys.version_info >= (3,0) else msg.as_string())
    # Python 2.X
    # p.communicate(msg.as_string())

def readStatus(fname):
    " read tab sep file with columns hubUrl, email, failCount "
    hubs = dict()
    if not isfile(fname):
        return hubs

    logging.debug("Reading %s" % fname)
    for line in open(fname):
        if line.startswith("#"):
            continue
        row = line.rstrip("\n").split("\t")
        if len(row)!=3:
            logging.error("Cannot parse line in status file: %s" % repr(line))
            assert(False)
        hubUrl, email, failCount = row
        hubs[hubUrl] = (email, int(failCount))
    return hubs

def mergeInfo(urls, oldUrlInfo, failedUrls, urlEmails):
    """ given a list hubPublic URLs, a list of failed URLs and a dict with url-> email,
    return a dict with URL -> (email, failedCount) """
    urlInfo = {}
    for url in urls:
        oldInfo = oldUrlInfo.get(url)
        if oldInfo is None:
            oldEmail = urlEmails.get(url)
            if oldEmail is None:
                print("URL %s is broken and there is no email in the status file. Skipping it." % url)
                continue
            oldInfo = [oldEmail, 0]

        email = urlEmails.get(url) # prefer most current email address
        if email is None:
            email = oldInfo[0]

        failCount = oldInfo[1]

        if url in failedUrls:
            failCount += 1
        else:
            failCount = 0

        urlInfo[url] = (email, failCount)

    return urlInfo

def sendEmails(urlInfo):
    " given dict url -> (email, failCount), send email if failCount > 24 and set failCount = -48 "
    for url, (destEmail, failCount) in urlInfo.items():
        if failCount>24:
            #logging.info("HUB %s BROKEN - sending email to %s" % (url, destEmail)) #Commenting out line to silence email
            emailText = emailTemplate % url
            sendEmail(destEmail, emailText)
            urlInfo[url] = (destEmail, -48)
    return urlInfo

def writeStatus(urlInfo, statusFname):
    " write new status file "
    statusTmp = statusFname+".tmp"
    logging.debug("Writing %s" % statusTmp)
    with open(statusTmp, "wt") as ofh:
        ofh.write("#url\temail\tfailCount\n")
        for url, (email, failCount) in urlInfo.items():
            ofh.write("\t".join([url, email, str(failCount)]))
            ofh.write("\n")
    logging.debug("Renaming %s to %s" % (statusTmp, statusFname))
    os.rename(statusTmp, statusFname)

def createLockFile(statusFname):
    """ when downloading files, weird things can happen. even wget sometimes gets stuck. So make
    sure that this program can't run multiple times """
    global lockFname
    lockFname = statusFname+".lock"

    if isfile(lockFname):
        logging.error("lockfile %s already exists. Check if this program is already running." % lockFname)
        sys.exit(1)

    open(lockFname, "w") # create file
    atexit.register(removeLock)

def removeLock():
    if isfile(lockFname):
        os.remove(lockFname)

def hubPublicMail(centralName, statusFname):
    " send email if a hub fails more than 24 times "
    createLockFile(statusFname)

    urls = getHubUrls(centralName)

    oldUrlInfo = readStatus(statusFname)

    failedUrls, urlEmails = downloadUrls(urls, oldUrlInfo)

    if len(failedUrls) > 10:
        logging.error(
            "%d broken hubs, which is more than the 10 allowed. Something is weird. Please check the network setup." % len(failedUrls))
        sys.exit(1)

    newUrlInfo = mergeInfo(urls, oldUrlInfo, failedUrls, urlEmails)
    newUrlInfo = sendEmails(newUrlInfo)

    writeStatus(newUrlInfo, statusFname)
    removeLock()

def main():
    args, options = parseArgs()

    centralName, statusFname = args
    hubPublicMail(centralName, statusFname)


main()
