[987] | 1 | #!/usr/bin/perl |
---|
| 2 | |
---|
| 3 | use strict; |
---|
| 4 | use warnings; |
---|
| 5 | use utf8; |
---|
| 6 | #use bigint; |
---|
| 7 | use List::MoreUtils qw(uniq); |
---|
| 8 | use HTML::TreeBuilder; |
---|
| 9 | use Data::Dumper; |
---|
| 10 | use Getopt::Long; |
---|
| 11 | use UNIVERSAL 'isa'; |
---|
| 12 | |
---|
| 13 | my $class = 'bdm::egiw'; |
---|
| 14 | my @to_keep = ('from_setting', 'to_setting'); |
---|
| 15 | my $output_dir = 'out'; |
---|
| 16 | my $input_dir = 'html'; |
---|
| 17 | |
---|
| 18 | GetOptions( |
---|
| 19 | 'class=s' => \$class, |
---|
| 20 | 'to-keep=s' => \@to_keep, |
---|
| 21 | 'output-dir=s' => \$output_dir, |
---|
| 22 | 'input-dir=s' => \$input_dir, |
---|
| 23 | ) or die('Wrong options format'); |
---|
| 24 | |
---|
| 25 | my $filename = $class; |
---|
| 26 | $filename =~ s/bdm::/classbdm_1_1/; |
---|
| 27 | $filename =~ s/$/.html/; |
---|
| 28 | my $out_filename = $output_dir . '/' . $filename; |
---|
| 29 | open (my $OUT, ">$out_filename") |
---|
| 30 | or die "cannot open $filename: $!"; |
---|
| 31 | $filename = $input_dir . '/' . $filename; |
---|
| 32 | |
---|
| 33 | print STDERR "Processing $filename\n"; |
---|
| 34 | |
---|
| 35 | my $html = HTML::TreeBuilder->new(); |
---|
| 36 | $html->store_comments(1); |
---|
| 37 | $html->parse_file($filename); |
---|
| 38 | |
---|
| 39 | my $body = $html->content_array_ref()->[1]; |
---|
| 40 | my $i = 0; |
---|
| 41 | my $div_content; |
---|
| 42 | while ($div_content = $body->content_array_ref()->[$i++]) { |
---|
| 43 | last if (defined $div_content->attr('class')) |
---|
| 44 | && ($div_content->attr('class') eq 'contents'); |
---|
| 45 | } |
---|
| 46 | $i = 0; |
---|
| 47 | my $table; |
---|
| 48 | |
---|
| 49 | FIND_TABLE: |
---|
| 50 | while ($table = $div_content->content_array_ref()->[$i++]) { |
---|
| 51 | if ( isa($table, "HTML::Element") ) { |
---|
| 52 | last if $table->tag() eq 'table'; |
---|
| 53 | my $container = $table; |
---|
| 54 | my $j = 0; |
---|
| 55 | while ($table = $container->content_array_ref()->[$j++]) { |
---|
| 56 | last FIND_TABLE if isa($table, "HTML::Element") && ($table->tag() eq 'table'); |
---|
| 57 | } |
---|
| 58 | } |
---|
| 59 | } |
---|
| 60 | |
---|
| 61 | #;;print STDERR $table; |
---|
| 62 | #;;print_content($table, 0, 5); |
---|
| 63 | #;;print_content($div_content, 0, 5); |
---|
| 64 | |
---|
| 65 | # generate mappings between member names and references |
---|
| 66 | # find hrefs contained in each description div |
---|
| 67 | my %ref_of; |
---|
| 68 | my %name_of; |
---|
| 69 | my %div_index_of; |
---|
| 70 | my %hrefs_of_div; |
---|
| 71 | $i = -1; |
---|
| 72 | #;;print STDERR "start with \$i = $i\n"; |
---|
| 73 | while (my $node = $div_content->content_array_ref()->[++$i]) { |
---|
| 74 | next if !ref $node; |
---|
| 75 | |
---|
| 76 | my $comment; |
---|
| 77 | |
---|
| 78 | if ($node->tag() eq '~comment') { |
---|
| 79 | $comment = $node; |
---|
| 80 | } |
---|
| 81 | elsif($node->tag() eq 'p') { |
---|
| 82 | next if !defined $node->content_array_ref()->[1]; |
---|
| 83 | next if !ref $node->content_array_ref()->[1]; |
---|
| 84 | $comment = $node->content_array_ref()->[1]; |
---|
| 85 | next if $comment->tag() ne '~comment'; |
---|
| 86 | } |
---|
| 87 | else { |
---|
| 88 | next; |
---|
| 89 | } |
---|
| 90 | |
---|
| 91 | my ($member, $ref) = parse_comment($comment); |
---|
| 92 | next if !defined $ref || !defined $member; |
---|
| 93 | |
---|
| 94 | $name_of{$ref} = $member; |
---|
| 95 | $ref_of{$member} = $ref; |
---|
| 96 | $div_index_of{$ref} = $i+1; |
---|
| 97 | $hrefs_of_div{$i+1} = [ find_hrefs($node->right()) ]; |
---|
| 98 | } |
---|
| 99 | #;;print STDERR Data::Dumper->Dump([\%name_of], ['name_of']); |
---|
| 100 | #;;print STDERR Data::Dumper->Dump([\%div_index_of], ['div_index_of']); |
---|
| 101 | #;;print STDERR Data::Dumper->Dump([\%hrefs_of_div], ['hrefs_of_div']); |
---|
| 102 | |
---|
| 103 | # find hrefs contained in each line of summary table |
---|
| 104 | # enrich mappings between member names and references |
---|
| 105 | my %tr_index_of; |
---|
| 106 | my %hrefs_of_tr; |
---|
| 107 | $i = -1; |
---|
| 108 | while (my $tr = $table->content_array_ref()->[++$i]) { |
---|
| 109 | next if !defined $tr->content_array_ref()->[1]; |
---|
| 110 | my $clss = $tr->content_array_ref()->[1]->attr('class'); |
---|
| 111 | #;;print STDERR "clss: $clss\n"; |
---|
| 112 | my $anchor = $tr->content_array_ref()->[1] |
---|
| 113 | ->content_array_ref()->[0]; |
---|
| 114 | next if !defined $clss || $clss ne 'memItemRight'; |
---|
| 115 | next if !ref $anchor || $anchor->tag() ne 'a'; |
---|
| 116 | |
---|
| 117 | my ($ref) = $anchor->attr('href') =~ m/#(.*)/; |
---|
| 118 | my $member = $class . '::' . $anchor->as_text(); |
---|
| 119 | #;;print STDERR "Member: $member\n"; |
---|
| 120 | |
---|
| 121 | $tr_index_of{$ref} = $i; |
---|
| 122 | |
---|
| 123 | if (exists $name_of{$ref} && $name_of{$ref} ne $member) { |
---|
| 124 | warn "inconsistency in summary table and detailed description:\n" |
---|
| 125 | . " $ref in summary: $member\n" |
---|
| 126 | . " $ref in description: $name_of{$ref}\n" |
---|
| 127 | . "using the value from summary table"; |
---|
| 128 | } |
---|
| 129 | $name_of{$ref} = $member; |
---|
| 130 | $ref_of{$member} = $ref; |
---|
| 131 | |
---|
| 132 | # $hrefs_of_tr{$i} = [ find_hrefs($tr), find_hrefs($tr->right()) ]; |
---|
| 133 | } |
---|
| 134 | |
---|
| 135 | #;;print STDERR Data::Dumper->Dump([\%hrefs_of_tr], ['hrefs_of_tr']); |
---|
| 136 | #;;print STDERR Data::Dumper->Dump([\%tr_index_of], ['tr_index_of']); |
---|
| 137 | #;;print STDERR Data::Dumper->Dump([\%name_of], ['name_of']); |
---|
| 138 | |
---|
| 139 | # convert given function names to refs |
---|
| 140 | my @to_keep_refs; |
---|
| 141 | for my $fn (@to_keep) { |
---|
| 142 | push @to_keep_refs, $ref_of{$class . '::' . $fn}; |
---|
| 143 | } |
---|
| 144 | #;;print STDERR Data::Dumper->Dump([\@to_keep_refs], ['to_keep_refs']); |
---|
| 145 | |
---|
| 146 | # enrich to_keep_refs with functions referenced in descriptions of to_keep |
---|
| 147 | # functions. Just one level depth, no recursion. |
---|
| 148 | my @new_to_keep_refs; |
---|
| 149 | for my $ref (@to_keep_refs) { |
---|
| 150 | # if (exists $tr_index_of{$ref}) { |
---|
| 151 | # push @new_to_keep_refs, @{ $hrefs_of_tr{ $tr_index_of{ $ref}} }; |
---|
| 152 | # } |
---|
| 153 | if (exists $div_index_of{$ref}) { |
---|
| 154 | push @new_to_keep_refs, @{ $hrefs_of_div{$div_index_of{$ref}} }; |
---|
| 155 | } |
---|
| 156 | } |
---|
| 157 | |
---|
| 158 | @to_keep_refs = uniq (@to_keep_refs, @new_to_keep_refs); |
---|
| 159 | #;;print STDERR Data::Dumper->Dump([\@to_keep_refs], ['to_keep_refs']); |
---|
| 160 | |
---|
| 161 | my @to_keep_divs = sort grep {defined} @div_index_of{@to_keep_refs}; |
---|
| 162 | my @to_keep_trs = sort grep {defined} @tr_index_of{ @to_keep_refs}; |
---|
| 163 | my @to_keep_names = sort @name_of{@to_keep_refs}; |
---|
| 164 | #;;print STDERR Data::Dumper->Dump([\@to_keep_divs], ['to_keep_divs']); |
---|
| 165 | #;;print STDERR Data::Dumper->Dump([\@to_keep_trs], ['to_keep_trs']); |
---|
| 166 | #;;print STDERR Data::Dumper->Dump([\@to_keep_names], ['to_keep_names']); |
---|
| 167 | |
---|
| 168 | # detach all divs which are not in to_keep_divs |
---|
| 169 | my @to_detach_divs = set_minus([values %div_index_of], \@to_keep_divs); |
---|
| 170 | #;;print STDERR Data::Dumper->Dump([\@to_detach_divs], ['to_detach_divs']); |
---|
| 171 | for my $node ( @{$div_content->content_array_ref()}[@to_detach_divs] ) { |
---|
| 172 | $node->detach(); |
---|
| 173 | } |
---|
| 174 | |
---|
| 175 | # detach all trs which are not in to_keep_trs |
---|
| 176 | my @to_detach_trs; |
---|
| 177 | |
---|
| 178 | $i = -1; |
---|
| 179 | while (my $tr = $table->content_array_ref()->[++$i]) { |
---|
| 180 | my $class = $tr->content_array_ref()->[0]->attr('class'); |
---|
| 181 | next if !defined $class || $class ne 'memItemLeft'; |
---|
| 182 | next if grep {$i == $_} @to_keep_trs; |
---|
| 183 | |
---|
| 184 | push @to_detach_trs, $i; |
---|
| 185 | |
---|
| 186 | my $tr2 = $tr->right(); |
---|
| 187 | next if !defined $tr2; |
---|
| 188 | my $tr2_class = $tr2->content_array_ref()->[0]->attr('class'); |
---|
| 189 | next if !defined $tr2_class || $tr2_class ne 'mdescLeft'; |
---|
| 190 | |
---|
| 191 | push @to_detach_trs, $i+1; |
---|
| 192 | } |
---|
| 193 | |
---|
| 194 | for my $node ( @{$table->content_array_ref()}[@to_detach_trs] ) { |
---|
| 195 | $node->detach(); |
---|
| 196 | } |
---|
| 197 | |
---|
| 198 | #print the result |
---|
| 199 | print $OUT $html->as_HTML(); |
---|
| 200 | |
---|
| 201 | $html->delete; |
---|
| 202 | |
---|
| 203 | #============================================================================ |
---|
| 204 | |
---|
| 205 | sub set_minus { |
---|
| 206 | my ($original_set, $difference) = @_; |
---|
| 207 | my @result; |
---|
| 208 | for $i (@$original_set) { |
---|
| 209 | next if !defined $i; |
---|
| 210 | next if grep { $i == $_ } @$difference; |
---|
| 211 | push @result, $i; |
---|
| 212 | } |
---|
| 213 | return @result; |
---|
| 214 | } |
---|
| 215 | |
---|
| 216 | # get member name and doxygen reference number out of given doxygen comment |
---|
| 217 | sub parse_comment { |
---|
| 218 | my $comment_el = shift or return (undef, undef); |
---|
| 219 | |
---|
| 220 | my $comment = $comment_el->as_HTML(); |
---|
| 221 | my ($member, $ref) = $comment =~ m{ |
---|
| 222 | <!-- [ ] doxytag: [ ] |
---|
| 223 | member="(.*?)"[ ] # *? is non-greedy |
---|
| 224 | ref="(.*?)" |
---|
| 225 | }xms; |
---|
| 226 | |
---|
| 227 | return ($member, $ref); |
---|
| 228 | } |
---|
| 229 | |
---|
| 230 | |
---|
| 231 | # depth-first search |
---|
| 232 | sub dfs { |
---|
| 233 | my $node = shift or return; |
---|
| 234 | return if !ref $node; # do not process text nodes |
---|
| 235 | |
---|
| 236 | my $function = shift or return; |
---|
| 237 | my $data = shift || {}; |
---|
| 238 | my $depth = shift || 0; |
---|
| 239 | my $limit = shift || 10000; #inf; |
---|
| 240 | return if $depth > $limit; |
---|
| 241 | |
---|
| 242 | $function->($node, $data, $depth, $limit); |
---|
| 243 | |
---|
| 244 | for ($node->content_list()) { |
---|
| 245 | dfs($_, $function, $data, $depth+1, $limit); |
---|
| 246 | } |
---|
| 247 | } |
---|
| 248 | |
---|
| 249 | # find hrefs in given element which are pointing somewhere |
---|
| 250 | # into current document |
---|
| 251 | sub find_hrefs { |
---|
| 252 | my $node = shift; |
---|
| 253 | my @hrefs = (); |
---|
| 254 | |
---|
| 255 | my $fetch_href = sub { |
---|
| 256 | my ($node, $hrefs) = @_; |
---|
| 257 | return if $node->tag() ne 'a' || !defined $node->attr('href'); |
---|
| 258 | my ($hashpart) = $node->attr('href') =~ m/ .*? \# (.+) /xms; |
---|
| 259 | return if !$hashpart || !$name_of{$hashpart}; |
---|
| 260 | push @$hrefs, $hashpart; |
---|
| 261 | }; |
---|
| 262 | |
---|
| 263 | dfs($node, $fetch_href, \@hrefs); |
---|
| 264 | return @hrefs; |
---|
| 265 | } |
---|
| 266 | |
---|
| 267 | # print html subtree -- just for debugging |
---|
| 268 | sub print_content { |
---|
| 269 | my ($node, $depth, $limit) = @_; |
---|
| 270 | my $print_tag = sub { |
---|
| 271 | my ($node, undef, $depth) = @_; |
---|
| 272 | print STDERR " " x $depth, $node->tag(), "\n"; |
---|
| 273 | #;;print $node->as_HTML() if $node->tag() eq '~comment'; |
---|
| 274 | }; |
---|
| 275 | dfs($node, $print_tag, {}, $depth, $limit); |
---|
| 276 | } |
---|