#!/usr/bin/perl

use strict;
use warnings;
use utf8;
#use bigint;
use List::MoreUtils qw(uniq);
use HTML::TreeBuilder;
use Data::Dumper;
use Getopt::Long;
use UNIVERSAL 'isa';

my $output_dir = '../html/bdm_doc';
my $input_dir  = '../../../../library/doc/html/';
my $filename   = 'annotated.html';
my @classes = @ARGV;
;;;print STDERR Dumper \@classes;

GetOptions(
    'filename=s'   => \$filename,
    'output-dir=s' => \$output_dir,
    'input-dir=s'  => \$input_dir,
) or die('Wrong options format');

my $out_filename = $output_dir . '/' . $filename;
open (my $OUT, ">$out_filename") 
    or die "cannot open $filename: $!";
$filename = $input_dir . '/' . $filename;

print STDERR "Processing $filename\n";

my $html = HTML::TreeBuilder->new();
$html->store_comments(1);
$html->parse_file($filename);
my $body = $html->content_array_ref()->[1];

my $i = 0;
my $div_content;
while ($div_content = $body->content_array_ref()->[$i++]) {
    last if (defined $div_content->attr('class')) 
         && ($div_content->attr('class') eq 'contents');
}

$i = 0;
my $table;
while ($table = $div_content->content_array_ref()->[$i++]) {
    last if isa($table, "HTML::Element") && ( $table->tag() eq 'table' );
}

$i = 0;
my @to_delete;
while (my $node = $table->content_array_ref()->[$i++]) {
    #;;print_content($node);
    my $class = $node->content_array_ref()->[0] #td
                     ->content_array_ref()->[0] #a
                     ->content_array_ref()->[0]; #bdm::ARX

    #;;map {print STDERR " $_\n"} grep {$_ eq $class} @classes;
    #;;print STDERR "$class\n";

    if (grep {$_ eq $class} @classes) {
        ;;;print "$class is in @classes\n";
        next;
    };

    #;;print STDERR "$class will be detached\n";
    #;;print STDERR $node->tag() . ' ' . $node->{_parent}->tag() . "\n";

    push @to_delete, $node;
}

$_->delete() for @to_delete;

#;;print_content($table);

#print the result
print $OUT $html->as_HTML();

$html->delete;

#=============================================================================


# depth-first search
sub dfs { 
    my $node     = shift or return;
    return if !ref $node; # do not process text nodes

    my $function = shift or return; 
    my $data     = shift || {};
    my $depth    = shift || 0;
    my $limit    = shift || 10000; #inf;
    return if $depth > $limit;

    $function->($node, $data, $depth, $limit);

    for ($node->content_list()) {
        dfs($_, $function, $data, $depth+1, $limit);
    }
}

# print html subtree -- just for debugging
sub print_content {
    my ($node, $depth, $limit) = @_;
    my $print_tag = sub {
        my ($node, undef, $depth) = @_;
        print STDERR " " x $depth, $node->tag(), "\n";
        #;;print $node->as_HTML() if $node->tag() eq '~comment';
    };
    dfs($node, $print_tag, {}, $depth, $limit);
}

