# PromoterAI, Claude max, Mar 20 2026

# Source: promoterAI_tss500.tsv.gz from https://primateai3d.basespace.illumina.com/
# 262M rows, 118.6M unique variants, 39.5M unique positions, scores within 500bp of TSS

cd /hive/data/genomes/hg38/bed/promoterai
# download promoterAI_tss500.tsv.gz from Illumina BaseSpace (requires registration)

# convert to 4 bedGraph files (one per alt allele) + overlap BED
# picks max absolute score when transcripts overlap; overlap BED has all per-transcript scores
python3 ~/kent/src/hg/makeDb/scripts/promoterAiToBigWig.py

# sort bedGraphs and convert to bigWig
for alt in A C G T; do
    sort -k1,1 -k2,2n promoterAi_${alt}.bedGraph > promoterAi_${alt}.sorted.bedGraph
    bedGraphToBigWig promoterAi_${alt}.sorted.bedGraph /hive/data/genomes/hg38/chrom.sizes promoterAi_${alt}.bw
    rm promoterAi_${alt}.bedGraph promoterAi_${alt}.sorted.bedGraph
done

# sort overlap BED and convert to bigBed
sort -k1,1 -k2,2n promoterAi_overlaps.bed > promoterAi_overlaps.sorted.bed
bedToBigBed -type=bed9+ -as=$HOME/kent/src/hg/makeDb/scripts/promoterAiOverlaps.as -tab \
    promoterAi_overlaps.sorted.bed /hive/data/genomes/hg38/chrom.sizes promoterAi_overlaps.bb
rm promoterAi_overlaps.bed promoterAi_overlaps.sorted.bed

# symlinks
mkdir -p /gbdb/hg38/promoterAi
ln -s /hive/data/genomes/hg38/bed/promoterai/promoterAi_A.bw /gbdb/hg38/promoterAi/a.bw
ln -s /hive/data/genomes/hg38/bed/promoterai/promoterAi_C.bw /gbdb/hg38/promoterAi/c.bw
ln -s /hive/data/genomes/hg38/bed/promoterai/promoterAi_G.bw /gbdb/hg38/promoterAi/g.bw
ln -s /hive/data/genomes/hg38/bed/promoterai/promoterAi_T.bw /gbdb/hg38/promoterAi/t.bw
ln -s /hive/data/genomes/hg38/bed/promoterai/promoterAi_overlaps.bb /gbdb/hg38/promoterAi/overlaps.bb
