#!/usr/bin/perl use strict; use warnings; use utf8; #use bigint; use List::MoreUtils qw(uniq); use HTML::TreeBuilder; use Data::Dumper; use Getopt::Long; use UNIVERSAL 'isa'; my $class = 'bdm::egiw'; my @to_keep = ('from_setting', 'to_setting'); my $output_dir = 'out'; my $input_dir = 'html'; GetOptions( 'class=s' => \$class, 'to-keep=s' => \@to_keep, 'output-dir=s' => \$output_dir, 'input-dir=s' => \$input_dir, ) or die('Wrong options format'); my $filename = $class; $filename =~ s/bdm::/classbdm_1_1/; $filename =~ s/$/.html/; my $out_filename = $output_dir . '/' . $filename; open (my $OUT, ">$out_filename") or die "cannot open $filename: $!"; $filename = $input_dir . '/' . $filename; print STDERR "Processing $filename\n"; my $html = HTML::TreeBuilder->new(); $html->store_comments(1); $html->parse_file($filename); my $body = $html->content_array_ref()->[1]; my $i = 0; my $div_content; while ($div_content = $body->content_array_ref()->[$i++]) { last if (defined $div_content->attr('class')) && ($div_content->attr('class') eq 'contents'); } $i = 0; my $table; FIND_TABLE: while ($table = $div_content->content_array_ref()->[$i++]) { if ( isa($table, "HTML::Element") ) { last if $table->tag() eq 'table'; my $container = $table; my $j = 0; while ($table = $container->content_array_ref()->[$j++]) { last FIND_TABLE if isa($table, "HTML::Element") && ($table->tag() eq 'table'); } } } #;;print STDERR $table; #;;print_content($table, 0, 5); #;;print_content($div_content, 0, 5); # generate mappings between member names and references # find hrefs contained in each description div my %ref_of; my %name_of; my %div_index_of; my %hrefs_of_div; $i = -1; #;;print STDERR "start with \$i = $i\n"; while (my $node = $div_content->content_array_ref()->[++$i]) { next if !ref $node; my $comment; if ($node->tag() eq '~comment') { $comment = $node; } elsif($node->tag() eq 'p') { next if !defined $node->content_array_ref()->[1]; next if !ref $node->content_array_ref()->[1]; $comment = $node->content_array_ref()->[1]; next if $comment->tag() ne '~comment'; } else { next; } my ($member, $ref) = parse_comment($comment); next if !defined $ref || !defined $member; $name_of{$ref} = $member; $ref_of{$member} = $ref; $div_index_of{$ref} = $i+1; $hrefs_of_div{$i+1} = [ find_hrefs($node->right()) ]; } #;;print STDERR Data::Dumper->Dump([\%name_of], ['name_of']); #;;print STDERR Data::Dumper->Dump([\%div_index_of], ['div_index_of']); #;;print STDERR Data::Dumper->Dump([\%hrefs_of_div], ['hrefs_of_div']); # find hrefs contained in each line of summary table # enrich mappings between member names and references my %tr_index_of; my %hrefs_of_tr; $i = -1; while (my $tr = $table->content_array_ref()->[++$i]) { next if !defined $tr->content_array_ref()->[1]; my $clss = $tr->content_array_ref()->[1]->attr('class'); #;;print STDERR "clss: $clss\n"; my $anchor = $tr->content_array_ref()->[1] ->content_array_ref()->[0]; next if !defined $clss || $clss ne 'memItemRight'; next if !ref $anchor || $anchor->tag() ne 'a'; my ($ref) = $anchor->attr('href') =~ m/#(.*)/; my $member = $class . '::' . $anchor->as_text(); #;;print STDERR "Member: $member\n"; $tr_index_of{$ref} = $i; if (exists $name_of{$ref} && $name_of{$ref} ne $member) { warn "inconsistency in summary table and detailed description:\n" . " $ref in summary: $member\n" . " $ref in description: $name_of{$ref}\n" . "using the value from summary table"; } $name_of{$ref} = $member; $ref_of{$member} = $ref; # $hrefs_of_tr{$i} = [ find_hrefs($tr), find_hrefs($tr->right()) ]; } #;;print STDERR Data::Dumper->Dump([\%hrefs_of_tr], ['hrefs_of_tr']); #;;print STDERR Data::Dumper->Dump([\%tr_index_of], ['tr_index_of']); #;;print STDERR Data::Dumper->Dump([\%name_of], ['name_of']); # convert given function names to refs my @to_keep_refs; for my $fn (@to_keep) { push @to_keep_refs, $ref_of{$class . '::' . $fn}; } #;;print STDERR Data::Dumper->Dump([\@to_keep_refs], ['to_keep_refs']); # enrich to_keep_refs with functions referenced in descriptions of to_keep # functions. Just one level depth, no recursion. my @new_to_keep_refs; for my $ref (@to_keep_refs) { # if (exists $tr_index_of{$ref}) { # push @new_to_keep_refs, @{ $hrefs_of_tr{ $tr_index_of{ $ref}} }; # } if (exists $div_index_of{$ref}) { push @new_to_keep_refs, @{ $hrefs_of_div{$div_index_of{$ref}} }; } } @to_keep_refs = uniq (@to_keep_refs, @new_to_keep_refs); #;;print STDERR Data::Dumper->Dump([\@to_keep_refs], ['to_keep_refs']); my @to_keep_divs = sort grep {defined} @div_index_of{@to_keep_refs}; my @to_keep_trs = sort grep {defined} @tr_index_of{ @to_keep_refs}; my @to_keep_names = sort @name_of{@to_keep_refs}; #;;print STDERR Data::Dumper->Dump([\@to_keep_divs], ['to_keep_divs']); #;;print STDERR Data::Dumper->Dump([\@to_keep_trs], ['to_keep_trs']); #;;print STDERR Data::Dumper->Dump([\@to_keep_names], ['to_keep_names']); # detach all divs which are not in to_keep_divs my @to_detach_divs = set_minus([values %div_index_of], \@to_keep_divs); #;;print STDERR Data::Dumper->Dump([\@to_detach_divs], ['to_detach_divs']); for my $node ( @{$div_content->content_array_ref()}[@to_detach_divs] ) { $node->detach(); } # detach all trs which are not in to_keep_trs my @to_detach_trs; $i = -1; while (my $tr = $table->content_array_ref()->[++$i]) { my $class = $tr->content_array_ref()->[0]->attr('class'); next if !defined $class || $class ne 'memItemLeft'; next if grep {$i == $_} @to_keep_trs; push @to_detach_trs, $i; my $tr2 = $tr->right(); next if !defined $tr2; my $tr2_class = $tr2->content_array_ref()->[0]->attr('class'); next if !defined $tr2_class || $tr2_class ne 'mdescLeft'; push @to_detach_trs, $i+1; } for my $node ( @{$table->content_array_ref()}[@to_detach_trs] ) { $node->detach(); } #print the result print $OUT $html->as_HTML(); $html->delete; #============================================================================ sub set_minus { my ($original_set, $difference) = @_; my @result; for $i (@$original_set) { next if !defined $i; next if grep { $i == $_ } @$difference; push @result, $i; } return @result; } # get member name and doxygen reference number out of given doxygen comment sub parse_comment { my $comment_el = shift or return (undef, undef); my $comment = $comment_el->as_HTML(); my ($member, $ref) = $comment =~ m{