# MgraphTSPfile.pl updated Jan. 24, 2018 - 
# copyright 2018 Dan Gusfield
#
# call on a command line in a terminal window with:
#
# perl MgraphTSPfile.pl edge-weight-file max-weight-for-a-real-edge

# Program MgraphTSPfile.pl creates the GG TSP formulation to compute a TS *path* from an edge-weight file
# for a graph G,  with no specific start node.
# It adds a 0 node with edges between 0 and all other nodes 1 through n, with cost in the real data. 
# 0. That transformation creates graph G' from G. So, the ILP computes a TS tour in G'.
#
# Also, this version of the program handles asymmetric data - as long as for every pair of nodes (i,j)
# both (i,j) and (j,i) are allowed, i.e., with cost below threshold, or both are disallowed.
#
# This implements the TSP formulation in the cervix cancer paper in Microarrays, Sept. 2015
# by Lorenzo et al.  I use Fi,j for their yi,j
#
#
open (IN, "$ARGV[0]");  # The input distance data
open (TSP, ">M$ARGV[0].lp");
$bound = $ARGV[1]; # upper bound on meaningful data. Larger values indicate that the edge does not exist.
@D = ();
%bin = ();
%generals = ();
$F0 = "";
$source_constraints = "";
$sink_constraints = "";
$fconstraints = "";
$fflow = "";
$infflow = "";
$outfflow = "";
$objective = "";

$i = 1;
while ($line = <IN>) {
    $maxindex= length($line) - 1;
    @linedata = split (/ +/, $line); # changed Jan. 24, 2018
#    print "@linedata";
    $ncols = @linedata;
#    print "ncols $ncols\n";
       foreach $j (1 .. $ncols) {
             $D[$i][$j] = $linedata[$j-1];
       }
    $i++;
}
$n = $i-1; # this is the number of rows in the input matrix for the graph.

foreach $p (1..$n) {    # DG change n-1 to n, because we need a term for F0,n in the constraints.
    $source_constraints .= "+ F0,$p ";
    $sink_constraints .= "+ F$p,0 ";
    $bin{"F0,$p"} = 1;
    $bin{"F$p,0"} = 1;

    foreach $q ($p+1 .. $n) { 
     
     if ($p != $q) {
       if ($D[$p][$q] < $bound) {   # check if the distance of the (p,q) pair is from a real edge.

            $mult100 = int ($D[$p][$q] * 100); # the effect of this is to round to two decimal places after the .
            $D[$p][$q] = $mult100/100;
#
            $objective .= "+ $D[$p][$q] F$p,$q "; 
            $constraints .= "F$p,$q + F$q,$p <= 1\n"; # comment this out for the pure GG version - to test strength
            $bin{"F$p,$q"} = 1;
            $bin{"F$q,$p"} = 1;

            $fconstraints .= "f$p,$q - $n F$p,$q <= 0\n";
            $generals{"f$p,$q"} = 1; 
        }
        else {
           $F0 .= "F$p,$q = 0\n";  # otherwise set the binary F variable to 0, to ensure that edge (p,q) is not used,
                                   # and set the associated f variable to 0.
           $F0 .= "f$p,$q = 0\n";
        }

          if ($D[$q][$p] < $bound) { # now consider the reversed pair (q,p) which could be different from (p,q) in the asymmetric case.
            $mult100 = int ($D[$q][$p] * 100); # the effect of this is to round to two decimal places after the .
            $D[$q][$p] = $mult100/100;
#
            $objective .= "+ $D[$q][$p] F$q,$p ";
            $bin{"F$q,$p"} = 1;

            $fconstraints .= "f$q,$p - $n F$q,$p <= 0\n";
            $generals{"f$q,$p"} = 1; 
        }
        else {
           $F0 .= "F$q,$p = 0\n";
           $F0 .= "f$q,$p = 0\n";
        }
     }

   } # end of foreach q
} # end of foreach p

#$source_constraints .= "+ F0,$n = 1\n"; #DG Dec. 25, 2017
#$sink_constraints .= "+ F$n,0 = 1\n"; #DG Dec. 25
$source_constraints .= "= 1\n";
$sink_constraints .= "= 1\n";

foreach $q (1 .. $n) {
    $fflow .= "f0,$q - $n F0,$q = 0\n"; 
    $generals{"f0,$q"} = 1;
    $generals{"f$q,0"} = 1;
}

print TSP "minimize\n";
print TSP "$objective\n";
print TSP "such that\n";
print TSP "$source_constraints";
print TSP "$sink_constraints\n";
print TSP "$fconstraints\n";
print TSP "$fflow";

foreach $i (1..$n){      # here we set up the equalities that say that there must be an entrance to, and 
                         # an exit from each city.
                         #
   $Fstring = "";  # used to force one exit from each node $i.
   $Istring = "";  # used to force one entrance to each node $i.

   foreach $p (1..$n){
        $index = "$i,$p";
        if ($i != $p) {
           $Fstring .= "+ F$index ";  # this ensures that we have a tour, not just a path.
           $Istring .= "+ F$p,$i ";
        }
   }
      $Fstring .= "+ F$i,0 ";
      $Istring .= "+ F0,$i ";
      print TSP "$Fstring = 1\n";
      print TSP "$Istring = 1\n";
      print TSP  "\n";
}

foreach $i (1 .. $n) {    # constraints for semi-conservation at each non-zero node. One more unit of flow
                          # entering node $i than leaving it.
    $infflow = "";
    $outfflow = "";
    foreach $j (0 .. $n) {
       if ($i != $j) {
         $infflow .= "+ f$j,$i ";
         $outfflow .= "- f$i,$j ";
         $generals{"f$j,$i"};
         $generals{"f$i,$j"};
       }
    }
    print TSP "$infflow \n";
    print TSP  "$outfflow = 1 \n";
}


print TSP "$constraints \n";
print TSP "$F0";
print TSP "binaries\n";
foreach $var (sort keys %bin) {
  print TSP "$var\n";
}

print TSP "generals\n";
foreach $var (sort keys %generals) {
  print TSP "$var\n";
}

print TSP "end";
close (TSP);
print "\n The ILP formulation is in file M$ARGV[0].lp \n";
