#!/usr/bin/tclsh

# DO NOT EDIT the /cluster/bin/scripts copy of this file -- 
# edit ~/kent/src/hg/utils/automation/blastz-make-joblist instead.

# Written by Scott Schwartz

# $Id: blastz-make-joblist,v 1.1 2006/10/09 20:44:33 angie Exp $


lappend argv DEF
set def [lindex $argv 0]


proc say {s} { puts stderr $s }
proc overlapping_intervals {a z d o} {
    set r {}
    set n 1
    set i $a
    for {set j [expr $a+$d-1]} {$i <= $z} {set i [expr $j+1]; incr j $d} {
        set k [expr $j + $o]
        if {$k > $z} {set k $z}
        lappend r [list $n $i $k]
        incr n
    }
    return $r
}

proc loadsize {file var} {
    global $var
    set f [open $file]
    array set $var [read $f]
    close $f
}

proc loadvars {file} {
    global env
    set HOME [set env(HOME)]
    set a {}; set b {};
    set f [open $file]
    set line {}
    while {[gets $f line] >= 0} {
        if {[string match "#*" $line]} {continue}
        if {[string match "export*" $line]} {continue}
        if {[scan $line { %[A-Z_0-9]=%s } a b]!=2} {continue}
        # puts stdout "set $a $b ; [subst $b]"
        global $a
        set $a [subst $b]
    }
    close $f
}

proc needfile {file} {
  return [expr {!([file exists $file] && [file size $file])}]
}

proc make_lenfile {seqdir lenfile seqfiles} {
    set files [lsort -dictionary [glob -nocomplain $seqdir/*.nib]]
    if {[llength $files]!=0} {
        if { [needfile $lenfile] } {
	    eval exec [concat nibSize $files >$lenfile]
        }
    } else {
        set files [lsort -dictionary [glob $seqdir/*.fa]]
        if { [needfile $lenfile] } {
	    eval exec [concat faSize -detailed=off $files >$lenfile]
        }
    }
    upvar $seqfiles f
    set f $files
}

proc set_var {new old dflt} {
    upvar $new v
    upvar $old w
    if [info exists w] { set v $w } else { set v $dflt }
}

# -- main --

loadvars $def

if ![string equal [file pathtype $DEF] absolute] {
    error "DEF must be absolute";
}

file mkdir $BASE

set_var seq1chunk SEQ1_CHUNK [expr 10*1000*1000]
set_var seq1lap   SEQ1_LAP   10000
set_var seq2chunk SEQ2_CHUNK [expr 30*1000*1000]
set_var seq2lap   SEQ2_LAP   0
set_var seq2_in_contigs SEQ2_IN_CONTIGS 0

say "Computing genome sizes"

set seq1files {}
set seq2files {}
make_lenfile $SEQ1_DIR $SEQ1_LEN seq1files
make_lenfile $SEQ2_DIR $SEQ2_LEN seq2files

loadsize $SEQ1_LEN seq1len
loadsize $SEQ2_LEN seq2len

say "Writing mkdir script: $BASE/xdir.sh"
set dirsh [open "$BASE/xdir.sh" w]
say "Writing job list"
foreach s1 $seq1files {
    set f1 [file tail $s1]
    set f1r [file rootname $f1]
    set D "$RAW/$f1r"

    puts $dirsh "mkdir -p $D"
    puts stderr "." nonewline

    set l1 $seq1len($f1r)
    foreach i1 [overlapping_intervals 1 $l1 $seq1chunk $seq1lap] {
        set n1 [lindex $i1 0]
        set a1 [lindex $i1 1]
        set z1 [lindex $i1 2]
        puts $dirsh "mkdir $D/$a1.$z1"
        foreach s2 $seq2files {
            set f2 [file tail $s2]
	    if $seq2_in_contigs {
	        puts stdout "$ALIGN $f1 $a1 $z1 $f2 0 0 $DEF"
	    } else {
                set l2 $seq2len([file rootname $f2])
                foreach i2 [overlapping_intervals 1 $l2 $seq2chunk $seq2lap] {
                    set n2 [lindex $i2 0]
                    set a2 [lindex $i2 1]
                    set z2 [lindex $i2 2] 
	            puts stdout "$ALIGN $f1 $a1 $z1 $f2 $a2 $z2 $DEF"
		}
            }
        }
    }
}
close $dirsh
puts stderr "."
