#! /usr/bin/perl

#
# linklint2dot v0.03 -- Creates .dot files from linklint output
# Copyright (C) 2004 Christoph Sommer 
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#

#
# linklint2dot's home is at
#   http://www.deltadevelopment.de/users/christoph/linklint2dot/
#
# Examples for generating the source file:
#   linklint -list -xref -forward /@ > mysite.ll
#   linklint -http -no_query_string -host www.mysite.example /@ -list -xref -forward > mysite.ll
#
# Example invocation:
#   ./linklint2dot.pl < mysite.ll > mysite.dot
#
# Examples for rendering the graph:
#   springgraph < mysite.dot > mysite.png
#   dot -Tpng -Grankdir=LR < mysite.dot > mysite.png
#

#
# --- start of script ---
#

# read in complete source
@lines = <STDIN>;
foreach $line(@lines) {
  $input.=$line;
  }

# locate block with forward links
if ($input !~ m/# found\s+\w+\s+files with forward links\s*\n#-*\n((.|\n)*)/) {
  print STDERR "\n\nFatal: Could not find block with forward links.\n\n";
  print STDERR "Make sure you started linklint like this:\n";
  print STDERR "linklint /@ -list -xref -forward | linklint2dot.pl\n\n";
  die;
  }
$input = $1;

# each section ends with an empty line
@sections = split(/\n\n/, $input);

$nodes = 0;
# iterate over sections
foreach $section(@sections) {
  @lines = split(/\n/, $section);

  # first line contains source URL
  $source = shift(@lines);

  # assign an id to this URL (if not already assigned)
  if (!$nodenr{$source}) {
    $nodenr{$source} = ++$nodes;
    push @nodes, {
      'id' => $nodenr{$source},
      'label' => $source
      };
    }

  # links from this URL are indented, so skip a line if it's not
  if ($lines[0] !~ m/^\s/) { shift(@lines); }

  # skip the summary line
  if ($lines[0] =~ m/contains/) { shift(@lines); }

  # each indented line should now contain one destination URL
  foreach $line(@lines) {
    if ($line =~ m/\s+(.*)$/) {
      $dest = $1;

      # skip external links and links to resources that are clearly not HTML
      next if ($dest =~ m/^http|mailto/);
      next if ($dest =~ m/\.(gif|jpg|png|zip|pdf|PDF)$/);

      # assign an id to this URL (if not already assigned) 
      if (!$nodenr{$dest}) {
        $nodenr{$dest} = ++$nodes;
        push @nodes, {
          'id' => $nodenr{$dest},
          'label' => $dest
          };
        }

      # store this edge
      push @edges, {
        'source' => $nodenr{$source}, 
        'dest'   => $nodenr{$dest}
        };

      #$destcount{$nodenr{$dest}}++;
      }
    }
  }

# simple algorithm to calculate the "depth" of an URL relative to the root
$depth{1} = 1;
foreach $dummy(@edges) {
  foreach $edge(@edges) {
    if ($depth{$edge->{'source'}}) {
      if ((!$depth{$edge->{'dest'}}) ||
          ($depth{$edge->{'dest'}} > $depth{$edge->{'source'}} + 1)) {
            $depth{$edge->{'dest'}} = $depth{$edge->{'source'}} + 1;
            }
      }
    }
  }

print "digraph links {\n";
  
# output one node for each URL
foreach $node(@nodes) {
  print $node->{'id'}." [label=\"".$node->{'label'}."\"]\n";
  }

# output one edge for each link  
foreach $edge(@edges) {
  # do not output edges for links to URLs that "almost every URL refers to"
  #if ($destcount{$edge->{"dest"}} > 5) { next; }

  # only output edges for links to URLs farer away from the root
  if ($depth{$edge->{"dest"}} <= $depth{$edge->{"source"}}) { next; }

  print $edge->{'source'}." -> ".$edge->{"dest"}."\n";
  }
  
print "}\n";

#
# --- end of script, here be dragons ---
#
