#!/usr/bin/perl

use strict;
use warnings;
use utf8;
#use bigint;
use List::MoreUtils qw(uniq);
use HTML::TreeBuilder;
use Data::Dumper;
use Getopt::Long;
use UNIVERSAL 'isa';

my $class    = 'bdm::egiw';
my @to_keep  = ('from_setting', 'to_setting');
my $output_dir = '../html/bdm_doc';
my $input_dir = '../../../../library/doc/html/';

GetOptions(
    'class=s'      => \$class,
    'to-keep=s'    => \@to_keep,
    'output-dir=s' => \$output_dir,
    'input-dir=s' => \$input_dir,
) or die('Wrong options format');

my $filename = $class;
$filename =~ s/bdm::/classbdm_1_1/;
$filename =~ s/$/.html/;
my $out_filename = $output_dir . '/' . $filename;
open (my $OUT, ">$out_filename") 
    or die "cannot open $filename: $!";
$filename = $input_dir . '/' . $filename;

print STDERR "Processing $filename\n";

my $html = HTML::TreeBuilder->new();
$html->store_comments(1);
$html->parse_file($filename);

my $body        = $html->content_array_ref()->[1];
my $i = 0;
my $div_content;
while ($div_content = $body->content_array_ref()->[$i++]) {
    last if (defined $div_content->attr('class')) 
         && ($div_content->attr('class') eq 'contents');
}
$i = 0;
my $table;

FIND_TABLE:
while ($table = $div_content->content_array_ref()->[$i++]) {
    if ( isa($table, "HTML::Element") ) {
        last if $table->tag() eq 'table';
        my $container = $table;
        my $j = 0;
        while ($table = $container->content_array_ref()->[$j++]) {
            last FIND_TABLE if isa($table, "HTML::Element") && ($table->tag() eq 'table');
        }
    }
}

#;;print STDERR $table;
#;;print_content($table, 0, 5);
#;;print_content($div_content, 0, 5);

# generate mappings between member names and references
# find hrefs contained in each description div
my %ref_of;
my %name_of;
my %div_index_of;
my %hrefs_of_div;
$i = -1;
#;;print STDERR "start with \$i = $i\n";
while (my $node = $div_content->content_array_ref()->[++$i]) {
    next if !ref $node;

    my $comment;

    if ($node->tag() eq '~comment') {
        $comment = $node;
    }
    elsif($node->tag() eq 'p') {
        next if !defined $node->content_array_ref()->[1];
        next if !ref $node->content_array_ref()->[1];
        $comment = $node->content_array_ref()->[1];
        next if $comment->tag() ne '~comment';
    }
    else {
        next;
    }

    my ($member, $ref)  = parse_comment($comment);
    next if !defined $ref || !defined $member;

    $name_of{$ref}      = $member;
    $ref_of{$member}    = $ref;
    $div_index_of{$ref} = $i+1;
    $hrefs_of_div{$i+1} = [ find_hrefs($node->right()) ];
}
#;;print STDERR Data::Dumper->Dump([\%name_of], ['name_of']);
#;;print STDERR Data::Dumper->Dump([\%div_index_of], ['div_index_of']);
#;;print STDERR Data::Dumper->Dump([\%hrefs_of_div], ['hrefs_of_div']);

# find hrefs contained in each line of summary table
# enrich mappings between member names and references
my %tr_index_of;
my %hrefs_of_tr;
$i = -1;
while (my $tr = $table->content_array_ref()->[++$i]) {
    next if !defined $tr->content_array_ref()->[1];
    my $clss      = $tr->content_array_ref()->[1]->attr('class');
    #;;print STDERR "clss: $clss\n";
    my $anchor = $tr->content_array_ref()->[1]
                    ->content_array_ref()->[0];
    next if !defined $clss  ||  $clss ne 'memItemRight';
    next if !ref $anchor    ||  $anchor->tag() ne 'a';

    my ($ref) = $anchor->attr('href') =~ m/#(.*)/;
    my $member = $class . '::' . $anchor->as_text();
    #;;print STDERR "Member: $member\n";

    $tr_index_of{$ref} = $i;

    if (exists $name_of{$ref} && $name_of{$ref} ne $member) {
        warn "inconsistency in summary table and detailed description:\n"
             . "  $ref in summary:     $member\n" 
             . "  $ref in description: $name_of{$ref}\n"
             . "using the value from summary table";
    }
    $name_of{$ref}   = $member;
    $ref_of{$member} = $ref;

#    $hrefs_of_tr{$i} = [ find_hrefs($tr), find_hrefs($tr->right()) ];
}

#;;print STDERR Data::Dumper->Dump([\%hrefs_of_tr], ['hrefs_of_tr']);
#;;print STDERR Data::Dumper->Dump([\%tr_index_of], ['tr_index_of']);
#;;print STDERR Data::Dumper->Dump([\%name_of], ['name_of']);

# convert given function names to refs
my @to_keep_refs;
for my $fn (@to_keep) {
    push @to_keep_refs, $ref_of{$class . '::' . $fn};
}
#;;print STDERR Data::Dumper->Dump([\@to_keep_refs], ['to_keep_refs']);

# enrich to_keep_refs with functions referenced in descriptions of to_keep
# functions. Just one level depth, no recursion.
my @new_to_keep_refs;
for my $ref (@to_keep_refs) {
#    if (exists $tr_index_of{$ref}) {
#        push @new_to_keep_refs, @{ $hrefs_of_tr{ $tr_index_of{ $ref}} };
#    } 
    if (exists $div_index_of{$ref}) {
        push @new_to_keep_refs, @{ $hrefs_of_div{$div_index_of{$ref}} };
    }
}

@to_keep_refs = uniq (@to_keep_refs, @new_to_keep_refs);
#;;print STDERR Data::Dumper->Dump([\@to_keep_refs], ['to_keep_refs']);

my @to_keep_divs = sort grep {defined} @div_index_of{@to_keep_refs};
my @to_keep_trs  = sort grep {defined} @tr_index_of{ @to_keep_refs};
my @to_keep_names = sort @name_of{@to_keep_refs};
#;;print STDERR Data::Dumper->Dump([\@to_keep_divs], ['to_keep_divs']);
#;;print STDERR Data::Dumper->Dump([\@to_keep_trs], ['to_keep_trs']);
#;;print STDERR Data::Dumper->Dump([\@to_keep_names], ['to_keep_names']);

# detach all divs which are not in to_keep_divs
my @to_detach_divs = set_minus([values %div_index_of], \@to_keep_divs);
#;;print STDERR Data::Dumper->Dump([\@to_detach_divs], ['to_detach_divs']);
for my $node ( @{$div_content->content_array_ref()}[@to_detach_divs] ) {
    $node->detach(); 
}

# detach all trs which are not in to_keep_trs
my @to_detach_trs;

$i = -1;
while (my $tr = $table->content_array_ref()->[++$i]) {
    my $class = $tr->content_array_ref()->[0]->attr('class');
    next if  !defined $class   ||  $class ne 'memItemLeft';
    next if grep {$i == $_} @to_keep_trs;

    push @to_detach_trs, $i;

    my $tr2 = $tr->right();
    next if !defined $tr2;
    my $tr2_class = $tr2->content_array_ref()->[0]->attr('class');
    next if  !defined $tr2_class  ||  $tr2_class ne 'mdescLeft';

    push @to_detach_trs, $i+1;
}

for my $node ( @{$table->content_array_ref()}[@to_detach_trs] ) {
    $node->detach();
}

#print the result
print $OUT $html->as_HTML();

$html->delete;

#============================================================================

sub set_minus { 
    my ($original_set, $difference) = @_;
    my @result;
    for $i (@$original_set) {
        next if !defined $i;
        next if grep { $i == $_ } @$difference;
        push @result, $i;
    }
    return @result;
}

# get member name and doxygen reference number out of given doxygen comment
sub parse_comment {
    my $comment_el = shift or return (undef, undef);

    my $comment = $comment_el->as_HTML();
    my ($member, $ref) = $comment =~ m{
        <!-- [ ] doxytag: [ ]
        member="(.*?)"[ ]        # *? is non-greedy
        ref="(.*?)"
    }xms;

    return ($member, $ref);
}


# depth-first search
sub dfs { 
    my $node     = shift or return;
    return if !ref $node; # do not process text nodes

    my $function = shift or return; 
    my $data     = shift || {};
    my $depth    = shift || 0;
    my $limit    = shift || 10000; #inf;
    return if $depth > $limit;

    $function->($node, $data, $depth, $limit);

    for ($node->content_list()) {
        dfs($_, $function, $data, $depth+1, $limit);
    }
}

# find hrefs in given element which are pointing somewhere 
# into current document
sub find_hrefs {
    my $node  = shift;
    my @hrefs = ();

    my $fetch_href  = sub {
        my ($node, $hrefs) = @_;
        return if $node->tag() ne 'a' || !defined $node->attr('href');
        my ($hashpart) = $node->attr('href') =~ m/ .*? \# (.+) /xms;
        return if !$hashpart || !$name_of{$hashpart};
        push @$hrefs, $hashpart;
    };

    dfs($node, $fetch_href, \@hrefs);
    return @hrefs;
}

# print html subtree -- just for debugging
sub print_content {
    my ($node, $depth, $limit) = @_;
    my $print_tag = sub {
        my ($node, undef, $depth) = @_;
        print STDERR " " x $depth, $node->tag(), "\n";
        #;;print $node->as_HTML() if $node->tag() eq '~comment';
    };
    dfs($node, $print_tag, {}, $depth, $limit);
}

