X-Git-Url: https://git.openssl.org/?p=openssl.git;a=blobdiff_plain;f=util%2Ffind-doc-nits;h=0feb01e27c668ef0ea964f25185cd99e8a3b35b4;hp=ff2409110a100a070b4a7b1323041f01f6652476;hb=b0d5c1cb0773588dca69875e122fac75252ccdf6;hpb=5a3371e22164592d7ff6d69245768ed4dde89a1a diff --git a/util/find-doc-nits b/util/find-doc-nits index ff2409110a..0feb01e27c 100755 --- a/util/find-doc-nits +++ b/util/find-doc-nits @@ -1,7 +1,7 @@ #! /usr/bin/env perl -# Copyright 2002-2016 The OpenSSL Project Authors. All Rights Reserved. +# Copyright 2002-2020 The OpenSSL Project Authors. All Rights Reserved. # -# Licensed under the OpenSSL license (the "License"). You may not use +# Licensed under the Apache License 2.0 (the "License"). You may not use # this file except in compliance with the License. You can obtain a copy # in the file LICENSE in the source distribution or at # https://www.openssl.org/source/license.html @@ -10,47 +10,263 @@ require 5.10.0; use warnings; use strict; + +use Carp qw(:DEFAULT cluck); use Pod::Checker; use File::Find; use File::Basename; use File::Spec::Functions; use Getopt::Std; -use lib catdir(dirname($0), "perl"); +use FindBin; +use lib "$FindBin::Bin/perl"; + use OpenSSL::Util::Pod; +use lib '.'; +use configdata; + +# Set to 1 for debug output +my $debug = 0; + +# Where to find openssl command +my $openssl = "./util/opensslwrap.sh"; + # Options. +our($opt_d); +our($opt_e); our($opt_s); -our($opt_u); +our($opt_o); our($opt_h); -our($opt_n); our($opt_l); +our($opt_n); +our($opt_p); +our($opt_u); +our($opt_v); +our($opt_c); -sub help() -{ +# Print usage message and exit. +sub help { print < [ 'NAME', 'DESCRIPTION', 'COPYRIGHT' ], + 1 => [ 'SYNOPSIS', 'OPTIONS' ], + 3 => [ 'SYNOPSIS', 'RETURN VALUES' ], + 5 => [ ], + 7 => [ ] + ); + +# Symbols that we ignored. +# They are internal macros that we currently don't document +my $ignored = qr/(?| ^i2d_ + | ^d2i_ + | ^DEPRECATEDIN + | \Q_fnsig(3)\E$ + | ^IMPLEMENT_ + | ^_?DECLARE_ + )/x; + +# Collect all POD files, both internal and public, and regardless of location +# We collect them in a hash table with each file being a key, so we can attach +# tags to them. For example, internal docs will have the word "internal" +# attached to them. +my %files = (); +# We collect files names on the fly, on known tag basis +my %collected_tags = (); +# We cache results based on tags +my %collected_results = (); + +# files OPTIONS +# +# Example: +# +# files(TAGS => 'manual'); +# files(TAGS => [ 'manual', 'man1' ]); +# +# This function returns an array of files corresponding to a set of tags +# given with the options "TAGS". The value of this option can be a single +# word, or an array of several words, which work as inclusive or exclusive +# selectors. Inclusive selectors are used to add one more set of files to +# the returned array, while exclusive selectors limit the set of files added +# to the array. The recognised tag values are: +# +# 'public_manual' - inclusive selector, adds public manuals to the +# returned array of files. +# 'internal_manual' - inclusive selector, adds internal manuals to the +# returned array of files. +# 'manual' - inclusive selector, adds any manual to the returned +# array of files. This is really a shorthand for +# 'public_manual' and 'internal_manual' combined. +# 'public_header' - inclusive selector, adds public headers to the +# returned array of files. +# 'header' - inclusive selector, adds any header file to the +# returned array of files. Since we currently only +# care about public headers, this is exactly +# equivalent to 'public_header', but is present for +# consistency. +# +# 'man1', 'man3', 'man5', 'man7' +# - exclusive selectors, only applicable together with +# any of the manual selectors. If any of these are +# present, only the manuals from the given sections +# will be include. If none of these are present, +# the manuals from all sections will be returned. +# +# All returned manual files come from configdata.pm. +# All returned header files come from looking inside +# "$config{sourcedir}/include/openssl" +# +sub files { + my %opts = ( @_ ); # Make a copy of the arguments + + $opts{TAGS} = [ $opts{TAGS} ] if ref($opts{TAGS}) eq ''; + + croak "No tags given, or not an array" + unless exists $opts{TAGS} && ref($opts{TAGS}) eq 'ARRAY'; + + my %tags = map { $_ => 1 } @{$opts{TAGS}}; + $tags{public_manual} = 1 + if $tags{manual} && ($tags{public} // !$tags{internal}); + $tags{internal_manual} = 1 + if $tags{manual} && ($tags{internal} // !$tags{public}); + $tags{public_header} = 1 + if $tags{header} && ($tags{public} // !$tags{internal}); + delete $tags{manual}; + delete $tags{header}; + delete $tags{public}; + delete $tags{internal}; + + my $tags_as_key = join(':', sort keys %tags); + + cluck "DEBUG[files]: This is how we got here!" if $debug; + print STDERR "DEBUG[files]: tags: $tags_as_key\n" if $debug; + + my %tags_to_collect = ( map { $_ => 1 } + grep { !exists $collected_tags{$_} } + keys %tags ); + + if ($tags_to_collect{public_manual}) { + print STDERR "DEBUG[files]: collecting public manuals\n" + if $debug; + + # The structure in configdata.pm is that $unified_info{mandocs} + # contains lists of man files, and in turn, $unified_info{depends} + # contains hash tables showing which POD file each of those man + # files depend on. We use that information to find the POD files, + # and to attach the man section they belong to as tags + foreach my $mansect ( @sections ) { + foreach ( map { @{$unified_info{depends}->{$_}} } + @{$unified_info{mandocs}->{$mansect}} ) { + $files{$_} = { $mansect => 1, public_manual => 1 }; + } + } + $collected_tags{public_manual} = 1; + } + + if ($tags_to_collect{internal_manual}) { + print STDERR "DEBUG[files]: collecting internal manuals\n" + if $debug; + + # We don't have the internal docs in configdata.pm. However, they + # are all in the source tree, so they're easy to find. + foreach my $mansect ( @sections ) { + foreach ( glob(catfile($config{sourcedir}, + 'doc', 'internal', $mansect, '*.pod')) ) { + $files{$_} = { $mansect => 1, internal_manual => 1 }; + } + } + $collected_tags{internal_manual} = 1; + } + + if ($tags_to_collect{public_header}) { + print STDERR "DEBUG[files]: collecting public headers\n" + if $debug; + + foreach ( glob(catfile($config{sourcedir}, + 'include', 'openssl', '*.h')) ) { + $files{$_} = { public_header => 1 }; + } + } + + my @result = @{$collected_results{$tags_as_key} // []}; + + if (!@result) { + # Produce a result based on caller tags + foreach my $type ( ( 'public_manual', 'internal_manual' ) ) { + next unless $tags{$type}; + + # If caller asked for specific sections, we care about sections. + # Otherwise, we give back all of them. + my @selected_sections = + grep { $tags{$_} } @sections; + @selected_sections = @sections unless @selected_sections; + + foreach my $section ( ( @selected_sections ) ) { + push @result, + ( sort { basename($a) cmp basename($b) } + grep { $files{$_}->{$type} && $files{$_}->{$section} } + keys %files ); + } + } + if ($tags{public_header}) { + push @result, + ( sort { basename($a) cmp basename($b) } + grep { $files{$_}->{public_header} } + keys %files ); + } + + if ($debug) { + print STDERR "DEBUG[files]: result:\n"; + print STDERR "DEBUG[files]: $_\n" foreach @result; + } + $collected_results{$tags_as_key} = [ @result ]; + } -my %mandatory_sections = - ( '*' => [ 'NAME', 'DESCRIPTION', 'COPYRIGHT' ], - 1 => [ 'SYNOPSIS', 'OPTIONS' ], - 3 => [ 'SYNOPSIS', 'RETURN VALUES' ], - 5 => [ ], - 7 => [ ] ); + return @result; +} + +# Print error message, set $status. +sub err { + print join(" ", @_), "\n"; + $status = 1 +} # Cross-check functions in the NAME and SYNOPSIS section. -sub name_synopsis() -{ +sub name_synopsis { my $id = shift; my $filename = shift; my $contents = shift; @@ -59,48 +275,72 @@ sub name_synopsis() return unless $contents =~ /=head1 NAME(.*)=head1 SYNOPSIS/ms; my $tmp = $1; $tmp =~ tr/\n/ /; - print "$id trailing comma before - in NAME\n" if $tmp =~ /, *-/; - $tmp =~ s/-.*//g; - $tmp =~ s/,//g; + err($id, "Trailing comma before - in NAME") + if $tmp =~ /, *-/; + $tmp =~ s/ -.*//g; + err($id, "POD markup among the names in NAME") + if $tmp =~ /[<>]/; + $tmp =~ s/ */ /g; + err($id, "Missing comma in NAME") + if $tmp =~ /[^,] /; my $dirname = dirname($filename); - my $simplename = basename($filename); - $simplename =~ s/.pod$//; + my $section = basename($dirname); + my $simplename = basename($filename, ".pod"); my $foundfilename = 0; my %foundfilenames = (); my %names; - foreach my $n ( split ' ', $tmp ) { + foreach my $n ( split ',', $tmp ) { + $n =~ s/^\s+//; + $n =~ s/\s+$//; + err($id, "The name '$n' contains white-space") + if $n =~ /\s/; $names{$n} = 1; $foundfilename++ if $n eq $simplename; $foundfilenames{$n} = 1 - if -f "$dirname/$n.pod" && $n ne $simplename; + if ( ( grep { basename($_) eq "$n.pod" } + files(TAGS => [ 'manual', $section ]) ) + && $n ne $simplename ); } - print "$id the following exist as other .pod files:\n", - join(" ", sort keys %foundfilenames), "\n" + err($id, "The following exist as other .pod files:", + sort keys %foundfilenames) if %foundfilenames; - print "$id $simplename (filename) missing from NAME section\n", + err($id, "$simplename (filename) missing from NAME section") unless $foundfilename; # Find all functions in SYNOPSIS return unless $contents =~ /=head1 SYNOPSIS(.*)=head1 DESCRIPTION/ms; my $syn = $1; foreach my $line ( split /\n+/, $syn ) { + next unless $line =~ /^\s/; my $sym; + my $is_prototype = 1; $line =~ s/STACK_OF\([^)]+\)/int/g; + $line =~ s/SPARSE_ARRAY_OF\([^)]+\)/int/g; $line =~ s/__declspec\([^)]+\)//; + if ( $line =~ /typedef.*\(\*\S+\)\s+\(/ ) { + # a callback function with whitespace before the argument list: + # typedef ... (*NAME) (... + err($id, "Function typedef has space before arg list: $line"); + } if ( $line =~ /env (\S*)=/ ) { # environment variable env NAME=... $sym = $1; - } elsif ( $line =~ /typedef.*\(\*(\S+)\)\(.*/ ) { - # a callback function: typedef ... (*NAME)(... + } elsif ( $line =~ /typedef.*\(\*(\S+)\)\s*\(/ ) { + # a callback function pointer: typedef ... (*NAME)(... + $sym = $1; + } elsif ( $line =~ /typedef.* (\S+)\(/ ) { + # a callback function signature: typedef ... NAME(... $sym = $1; } elsif ( $line =~ /typedef.* (\S+);/ ) { # a simple typedef: typedef ... NAME; + $is_prototype = 0; $sym = $1; } elsif ( $line =~ /enum (\S*) \{/ ) { # an enumeration: enum ... { $sym = $1; - } elsif ( $line =~ /#define ([A-Za-z0-9_]+)/ ) { + } elsif ( $line =~ /#(?:define|undef) ([A-Za-z0-9_]+)/ ) { + $is_prototype = 0; $sym = $1; } elsif ( $line =~ /([A-Za-z0-9_]+)\(/ ) { $sym = $1; @@ -108,72 +348,365 @@ sub name_synopsis() else { next; } - print "$id $sym missing from NAME section\n" + err($id, "$sym missing from NAME section") unless defined $names{$sym}; $names{$sym} = 2; # Do some sanity checks on the prototype. - print "$id prototype missing spaces around commas: $line\n" - if ( $line =~ /[a-z0-9],[^ ]/ ); + err($id, "Prototype missing spaces around commas: $line") + if $is_prototype && $line =~ /[a-z0-9],[^ ]/; } foreach my $n ( keys %names ) { next if $names{$n} == 2; - print "$id $n missing from SYNOPSIS\n"; + err($id, "$n missing from SYNOPSIS") } } -sub check() -{ +# Check if SECTION ($3) is located before BEFORE ($4) +sub check_section_location { + my $id = shift; + my $contents = shift; + my $section = shift; + my $before = shift; + + return unless $contents =~ /=head1 $section/ + and $contents =~ /=head1 $before/; + err($id, "$section should appear before $before section") + if $contents =~ /=head1 $before.*=head1 $section/ms; +} + +# Check if a =head1 is duplicated, or a =headX is duplicated within a +# =head1. Treats =head2 =head3 as equivalent -- it doesn't reset the head3 +# sets if it finds a =head2 -- but that is good enough for now. Also check +# for proper capitalization, trailing periods, etc. +sub check_head_style { + my $id = shift; + my $contents = shift; + my %head1; + my %subheads; + + foreach my $line ( split /\n+/, $contents ) { + next unless $line =~ /^=head/; + if ( $line =~ /head1/ ) { + err($id, "Duplicate section $line") + if defined $head1{$line}; + $head1{$line} = 1; + %subheads = (); + } else { + err($id, "Duplicate subsection $line") + if defined $subheads{$line}; + $subheads{$line} = 1; + } + err($id, "Period in =head") + if $line =~ /\.[^\w]/ or $line =~ /\.$/; + err($id, "not all uppercase in =head1") + if $line =~ /head1.*[a-z]/; + err($id, "All uppercase in subhead") + if $line =~ /head[234][ A-Z0-9]+$/; + } +} + +# Because we have options and symbols with extra markup, we need +# to take that into account, so we need a regexp that extracts +# markup chunks, including recursive markup. +# please read up on /(?R)/ in perlre(1) +# (note: order is important, (?R) needs to come before .) +# (note: non-greedy is important, or something like 'B and B' +# will be captured as one item) +my $markup_re = + qr/( # Capture group + [BIL]< # The start of what we recurse on + (?:(?-1)|.)*? # recurse the whole regexp (referring to + # the last opened capture group, i.e. the + # start of this regexp), or pick next + # character. Do NOT be greedy! + > # The end of what we recurse on + )/x; # (the x allows this sort of split up regexp) + +# Options must start with a dash, followed by a letter, possibly +# followed by letters, digits, dashes and underscores, and the last +# character must be a letter or a digit. +# We do also accept the single -? or -n, where n is a digit +my $option_re = + qr/(?: + \? # Single question mark + | + \d # Single digit + | + - # Single dash (--) + | + [[:alpha:]](?:[-_[:alnum:]]*?[[:alnum:]])? + )/x; + +# Helper function to check if a given $thing is properly marked up +# option. It returns one of these values: +# undef if it's not an option +# "" if it's a malformed option +# $unwrapped the option with the outermost B<> wrapping removed. +sub normalise_option { + my $id = shift; my $filename = shift; - my $dirname = basename(dirname($filename)); + my $thing = shift; + + my $unwrapped = $thing; + my $unmarked = $thing; + + # $unwrapped is the option with the outer B<> markup removed + $unwrapped =~ s/^B$//; + # $unmarked is the option with *all* markup removed + $unmarked =~ s/[BIL]<|>//msg; + + + # If we found an option, check it, collect it + if ( $unwrapped =~ /^\s*-/ ) { + return $unwrapped # return option with outer B<> removed + if $unmarked =~ /^-${option_re}$/; + return ""; # Malformed option + } + return undef; # Something else +} + +# Checks of command option (man1) formatting. The man1 checks are +# restricted to the SYNOPSIS and OPTIONS sections, the rest is too +# free form, we simply cannot be too strict there. + +sub option_check { + my $id = shift; + my $filename = shift; + my $contents = shift; + + my $synopsis = ($contents =~ /=head1\s+SYNOPSIS(.*?)=head1/s, $1); - my $contents = ''; - { - local $/ = undef; - open POD, $filename or die "Couldn't open $filename, $!"; - $contents = ; - close POD; + # Some pages have more than one OPTIONS section, let's make sure + # to get them all + my $options = ''; + while ( $contents =~ /=head1\s+[A-Z ]*?OPTIONS$(.*?)(?==head1)/msg ) { + $options .= $1; } + # Look for options with no or incorrect markup + while ( $synopsis =~ + /(?[:alnum:]])/msg ) { + err($id, "Malformed option [1] in SYNOPSIS: $&"); + } + + while ( $synopsis =~ /$markup_re/msg ) { + my $found = $&; + print STDERR "$id:DEBUG[option_check] SYNOPSIS: found $found\n" + if $debug; + my $option_uw = normalise_option($id, $filename, $found); + err($id, "Malformed option [2] in SYNOPSIS: $found") + if defined $option_uw && $option_uw eq ''; + } + + # In OPTIONS, we look for =item paragraphs. + # (?=^\s*$) detects an empty line. + while ( $options =~ /=item\s+(.*?)(?=^\s*$)/msg ) { + my $item = $&; + + while ( $item =~ /(\[\s*)?($markup_re)/msg ) { + my $found = $2; + print STDERR "$id:DEBUG[option_check] OPTIONS: found $&\n" + if $debug; + err($id, "Unexpected bracket in OPTIONS =item: $item") + if ($1 // '') ne '' && $found =~ /^B<\s*-/; + + my $option_uw = normalise_option($id, $filename, $found); + err($id, "Malformed option in OPTIONS: $found") + if defined $option_uw && $option_uw eq ''; + } + } +} + +# Normal symbol form +my $symbol_re = qr/[[:alpha:]_][_[:alnum:]]*?/; + +# Checks of function name (man3) formatting. The man3 checks are +# easier than the man1 checks, we only check the names followed by (), +# and only the names that have POD markup. +sub functionname_check { + my $id = shift; + my $filename = shift; + my $contents = shift; + + while ( $contents =~ /($markup_re)\(\)/msg ) { + print STDERR "$id:DEBUG[functionname_check] SYNOPSIS: found $&\n" + if $debug; + + my $symbol = $1; + my $unmarked = $symbol; + $unmarked =~ s/[BIL]<|>//msg; + + err($id, "Malformed symbol: $symbol") + unless $symbol =~ /^B<.*?>$/ && $unmarked =~ /^${symbol_re}$/ + } + + # We can't do the kind of collecting coolness that option_check() + # does, because there are too many things that can't be found in + # name repositories like the NAME sections, such as symbol names + # with a variable part (typically marked up as B_bar> +} + +# This is from http://man7.org/linux/man-pages/man7/man-pages.7.html +my %preferred_words = ( + 'bitmask' => 'bit mask', + 'builtin' => 'built-in', + #'epoch' => 'Epoch', # handled specially, below + 'file name' => 'filename', + 'file system' => 'filesystem', + 'host name' => 'hostname', + 'i-node' => 'inode', + 'lower case' => 'lowercase', + 'lower-case' => 'lowercase', + 'non-zero' => 'nonzero', + 'path name' => 'pathname', + 'pseudo-terminal' => 'pseudoterminal', + 'reserved port' => 'privileged port', + 'system port' => 'privileged port', + 'realtime' => 'real-time', + 'real time' => 'real-time', + 'runtime' => 'run time', + 'saved group ID'=> 'saved set-group-ID', + 'saved set-GID' => 'saved set-group-ID', + 'saved user ID' => 'saved set-user-ID', + 'saved set-UID' => 'saved set-user-ID', + 'set-GID' => 'set-group-ID', + 'setgid' => 'set-group-ID', + 'set-UID' => 'set-user-ID', + 'setuid' => 'set-user-ID', + 'super user' => 'superuser', + 'super-user' => 'superuser', + 'super block' => 'superblock', + 'super-block' => 'superblock', + 'time stamp' => 'timestamp', + 'time zone' => 'timezone', + 'upper case' => 'uppercase', + 'upper-case' => 'uppercase', + 'useable' => 'usable', + 'userspace' => 'user space', + 'user name' => 'username', + 'zeroes' => 'zeros' +); + +# Search manpage for words that have a different preferred use. +sub wording { + my $id = shift; + my $contents = shift; + + foreach my $k ( keys %preferred_words ) { + # Sigh, trademark + next if $k eq 'file system' + and $contents =~ /Microsoft Encrypted File System/; + err($id, "Found '$k' should use '$preferred_words{$k}'") + if $contents =~ /\b\Q$k\E\b/i; + } + err($id, "Found 'epoch' should use 'Epoch'") + if $contents =~ /\bepoch\b/; + if ( $id =~ m@man1/@ ) { + err($id, "found 'tool' in NAME, should use 'command'") + if $contents =~ /=head1 NAME.*\btool\b.*=head1 SYNOPSIS/s; + err($id, "found 'utility' in NAME, should use 'command'") + if $contents =~ /NAME.*\butility\b.*=head1 SYNOPSIS/s; + + } +} + +# Perform all sorts of nit/error checks on a manpage +sub check { + my %podinfo = @_; + my $filename = $podinfo{filename}; + my $dirname = basename(dirname($filename)); + my $contents = $podinfo{contents}; + my $id = "${filename}:1:"; + check_head_style($id, $contents); + + # Check ordering of some sections in man3 + if ( $filename =~ m|man3/| ) { + check_section_location($id, $contents, "RETURN VALUES", "EXAMPLES"); + check_section_location($id, $contents, "SEE ALSO", "HISTORY"); + check_section_location($id, $contents, "EXAMPLES", "SEE ALSO"); + } - &name_synopsis($id, $filename, $contents) - unless $contents =~ /=for comment generic/ - or $filename =~ m@man[157]/@; + # Make sure every link has a section. + while ( $contents =~ /$markup_re/msg ) { + my $target = $1; + next unless $target =~ /^L<(.*)>$/; # Skip if not L<...> + $target = $1; # Peal away L< and > + $target =~ s/\/[^\/]*$//; # Peal away possible anchor + $target =~ s/.*\|//g; # Peal away possible link text + next if $target eq ''; # Skip if links within page, or + next if $target =~ /::/; # links to a Perl module, or + next if $target =~ /^https?:/; # is a URL link, or + next if $target =~ /\([1357]\)$/; # it has a section + err($id, "Section missing in $target") + } + # Check for proper links to commands. + while ( $contents =~ /L<([^>]*)\(1\)(?:\/.*)?>/g ) { + my $target = $1; + next if $target =~ /openssl-?/; + next if ( grep { basename($_) eq "$target.pod" } + files(TAGS => [ 'manual', 'man1' ]) ); + # TODO: Filter out "foreign manual" links. + next if $target =~ /ps|apropos|sha1sum|procmail|perl/; + err($id, "Bad command link L<$target(1)>"); + } + # Check for proper in-man-3 API links. + while ( $contents =~ /L<([^>]*)\(3\)(?:\/.*)?>/g ) { + my $target = $1; + err($id, "Bad L<$target>") + unless $target =~ /^[_[:alpha:]][_[:alnum:]]*$/ + } + + unless ( $contents =~ /^=for openssl generic/ms ) { + if ( $filename =~ m|man3/| ) { + name_synopsis($id, $filename, $contents); + functionname_check($id, $filename, $contents); + } elsif ( $filename =~ m|man1/| ) { + option_check($id, $filename, $contents) + } + } - print "$id doesn't start with =pod\n" + wording($id, $contents); + + err($id, "Doesn't start with =pod") if $contents !~ /^=pod/; - print "$id doesn't end with =cut\n" + err($id, "Doesn't end with =cut") if $contents !~ /=cut\n$/; - print "$id more than one cut line.\n" + err($id, "More than one cut line.") if $contents =~ /=cut.*=cut/ms; - print "$id missing copyright\n" + err($id, "EXAMPLE not EXAMPLES section.") + if $contents =~ /=head1 EXAMPLE[^S]/; + err($id, "WARNING not WARNINGS section.") + if $contents =~ /=head1 WARNING[^S]/; + err($id, "Missing copyright") if $contents !~ /Copyright .* The OpenSSL Project Authors/; - print "$id copyright not last\n" + err($id, "Copyright not last") if $contents =~ /head1 COPYRIGHT.*=head/ms; - print "$id head2 in All uppercase\n" + err($id, "head2 in All uppercase") if $contents =~ /head2\s+[A-Z ]+\n/; - print "$id extra space after head\n" + err($id, "Extra space after head") if $contents =~ /=head\d\s\s+/; - print "$id period in NAME section\n" + err($id, "Period in NAME section") if $contents =~ /=head1 NAME.*\.\n.*=head1 SYNOPSIS/ms; - print "$id POD markup in NAME section\n" - if $contents =~ /=head1 NAME.*[<>].*=head1 SYNOPSIS/ms; - print "$id Duplicate $1 in L<>\n" + err($id, "Duplicate $1 in L<>") if $contents =~ /L<([^>]*)\|([^>]*)>/ && $1 eq $2; + err($id, "Bad =over $1") + if $contents =~ /=over([^ ][^24])/; + err($id, "Possible version style issue") + if $contents =~ /OpenSSL version [019]/; - # Look for multiple consecutive openssl #include lines. - # Consecutive because of files like md5.pod. Sometimes it's okay - # or necessary, as in ssl/SSL_set1_host.pod - if ( $contents !~ /=for comment multiple includes/ ) { + if ( $contents !~ /=for openssl multiple includes/ ) { + # Look for multiple consecutive openssl #include lines + # (non-consecutive lines are okay; see man3/MD5.pod). if ( $contents =~ /=head1 SYNOPSIS(.*)=head1 DESCRIPTION/ms ) { my $count = 0; foreach my $line ( split /\n+/, $1 ) { if ( $line =~ m@include ', $temp or die "Can't open $temp, $!"; podchecker($filename, $OUT); @@ -205,170 +726,409 @@ sub check() } close $OUT; unlink $temp || warn "Can't remove $temp, $!"; + + # Find what section this page is in; assume 3. + my $section = 3; + $section = $1 if $dirname =~ /man([1-9])/; + + foreach ( (@{$mandatory_sections{'*'}}, @{$mandatory_sections{$section}}) ) { + err($id, "Missing $_ head1 section") + if $contents !~ /^=head1\s+${_}\s*$/m; + } } -my %dups; +# Information database ############################################### -sub parsenum() -{ +# Map of links in each POD file; filename => [ "foo(1)", "bar(3)", ... ] +my %link_map = (); +# Map of names in each POD file or from "missing" files; possible values are: +# If found in a POD files, "name(s)" => filename +# If found in a "missing" file or external, "name(s)" => '' +my %name_map = (); + +# State of man-page names. +# %state is affected by loading util/*.num and util/*.syms +# Values may be one of: +# 'crypto' : belongs in libcrypto (loaded from libcrypto.num) +# 'ssl' : belongs in libssl (loaded from libssl.num) +# 'other' : belongs in libcrypto or libssl (loaded from other.syms) +# 'internal' : Internal +# 'public' : Public (generic name or external documentation) +# Any of these values except 'public' may be prefixed with 'missing_' +# to indicate that they are known to be missing. +my %state; +# %missing is affected by loading util/missing*.txt. Values may be one of: +# 'crypto' : belongs in libcrypto (loaded from libcrypto.num) +# 'ssl' : belongs in libssl (loaded from libssl.num) +# 'other' : belongs in libcrypto or libssl (loaded from other.syms) +# 'internal' : Internal +my %missing; + +# Parse libcrypto.num, etc., and return sorted list of what's there. +sub loadnum ($;$) { my $file = shift; - my @apis; + my $type = shift; + my @symbols; - open my $IN, '<', $file + open my $IN, '<', catfile($config{sourcedir}, $file) or die "Can't open $file, $!, stopped"; while ( <$IN> ) { + next if /^#/; next if /\bNOEXIST\b/; - next if /\bEXPORT_VAR_AS_FUNC\b/; - push @apis, $1 if /([^\s]+).\s/; + my @fields = split(); + die "Malformed line $. in $file: $_" + if scalar @fields != 2 && scalar @fields != 4; + $state{$fields[0].'(3)'} = $type // 'internal'; } - close $IN; - - print "# Found ", scalar(@apis), " in $file\n"; - return sort @apis; } -sub getdocced() +# Load file of symbol names that we know aren't documented. +sub loadmissing($;$) { - my $dir = shift; - my %return; - - foreach my $pod ( glob("$dir/*.pod") ) { - my %podinfo = extract_pod_info($pod); - foreach my $n ( @{$podinfo{names}} ) { - $return{$n} = $pod; - print "# Duplicate $n in $pod and $dups{$n}\n" - if defined $dups{$n} && $dups{$n} ne $pod; - $dups{$n} = $pod; - } + my $missingfile = shift; + my $type = shift; + + open FH, catfile($config{sourcedir}, $missingfile) + or die "Can't open $missingfile"; + while ( ) { + chomp; + next if /^#/; + $missing{$_} = $type // 'internal'; } + close FH; +} + +# Check that we have consistent public / internal documentation and declaration +sub checkstate () { + # Collect all known names, no matter where they come from + my %names = map { $_ => 1 } (keys %name_map, keys %state, keys %missing); - return %return; + # Check section 3, i.e. functions and macros + foreach ( grep { $_ =~ /\(3\)$/ } sort keys %names ) { + next if ( $name_map{$_} // '') eq '' || $_ =~ /$ignored/; + + # If a man-page isn't recorded public or if it's recorded missing + # and internal, it's declared to be internal. + my $declared_internal = + ($state{$_} // 'internal') eq 'internal' + || ($missing{$_} // '') eq 'internal'; + # If a man-page isn't recorded internal or if it's recorded missing + # and not internal, it's declared to be public + my $declared_public = + ($state{$_} // 'internal') ne 'internal' + || ($missing{$_} // 'internal') ne 'internal'; + + err("$_ is supposedly public but is documented as internal") + if ( $declared_public && $name_map{$_} =~ /\/internal\// ); + err("$_ is supposedly internal but is documented as public") + if ( $declared_internal && $name_map{$_} !~ /\/internal\// ); + } } -my %docced; +# Check for undocumented macros; ignore those in the "missing" file +# and do simple check for #define in our header files. +sub checkmacros { + my $count = 0; + my %seen; -sub printem() -{ - my $libname = shift; - my $numfile = shift; + foreach my $f ( files(TAGS => 'public_header') ) { + # Skip some internals we don't want to document yet. + my $b = basename($f); + next if $b eq 'asn1.h'; + next if $b eq 'asn1t.h'; + next if $b eq 'err.h'; + open(IN, $f) + or die "Can't open $f, $!"; + while ( ) { + next unless /^#\s*define\s*(\S+)\(/; + my $macro = "$1(3)"; # We know they're all in section 3 + next if defined $name_map{$macro} + || defined $missing{$macro} + || defined $seen{$macro} + || $macro =~ /$ignored/; + + err("$f:", "macro $macro undocumented") + if $opt_d || $opt_e; + $count++; + $seen{$macro} = 1; + } + close(IN); + } + err("# $count macros undocumented (count is approximate)") + if $count > 0; +} + +# Find out what is undocumented (filtering out the known missing ones) +# and display them. +sub printem ($) { + my $type = shift; my $count = 0; + my %seen; - foreach my $func ( &parsenum($numfile) ) { - next if $docced{$func}; + foreach my $func ( grep { $_ eq $type } sort keys %state ) { + $func .= '(3)'; # We know they're all in section 3 - # Skip ASN1 utilities - next if $func =~ /^ASN1_/; + # Skip functions known to be missing + next if $opt_v && defined $name_map{$func} && $name_map{$func} eq ''; - print "$libname:$func\n"; + # Skip known names + next if defined $name_map{$func} || defined $seen{$func}; + + err("$type:", "function $func undocumented") + if $opt_d || $opt_e; $count++; + $seen{$func} = 1; } - print "# Found $count missing from $numfile\n\n"; + err("# $count lib$type names are not documented") + if $count > 0; } - -# Collection of links in each POD file. -# filename => [ "foo(1)", "bar(3)", ... ] -my %link_collection = (); -# Collection of names in each POD file. -# "name(s)" => filename -my %name_collection = (); - +# Collect all the names in a manpage. sub collectnames { - my $filename = shift; + my %podinfo = @_; + my $filename = $podinfo{filename}; $filename =~ m|man(\d)/|; my $section = $1; my $simplename = basename($filename, ".pod"); my $id = "${filename}:1:"; + my $is_generic = $podinfo{contents} =~ /^=for openssl generic/ms; - my $contents = ''; - { - local $/ = undef; - open POD, $filename or die "Couldn't open $filename, $!"; - $contents = ; - close POD; - } - - $contents =~ /=head1 NAME([^=]*)=head1 /ms; - my $tmp = $1; - unless (defined $tmp) { - print "$id weird name section\n"; - return; - } - $tmp =~ tr/\n/ /; - $tmp =~ s/-.*//g; - - my @names = map { s/\s+//g; $_ } split(/,/, $tmp); - unless (grep { $simplename eq $_ } @names) { - print "$id missing $simplename\n"; - push @names, $simplename; + unless ( grep { $simplename eq $_ } @{$podinfo{names}} ) { + err($id, "$simplename not in NAME section"); + push @{$podinfo{names}}, $simplename; } - foreach my $name (@names) { + foreach my $name ( @{$podinfo{names}} ) { next if $name eq ""; + err($id, "'$name' contains whitespace") + if $name =~ /\s/; my $name_sec = "$name($section)"; - if (! exists $name_collection{$name_sec}) { - $name_collection{$name_sec} = $filename; - } else { #elsif ($filename ne $name_collection{$name_sec}) { - print "$id $name_sec also in $name_collection{$name_sec}\n"; + if ( !defined $name_map{$name_sec} ) { + $name_map{$name_sec} = $filename; + $state{$name_sec} = + ( $filename =~ /\/internal\// ? 'internal' : 'public' ) + if $is_generic; + } elsif ( $filename eq $name_map{$name_sec} ) { + err($id, "$name_sec duplicated in NAME section of", + $name_map{$name_sec}); + } elsif ( $name_map{$name_sec} ne '' ) { + err($id, "$name_sec also in NAME section of", + $name_map{$name_sec}); } } - my @foreign_names = - map { map { s/\s+//g; $_ } split(/,/, $_) } - $contents =~ /=for\s+comment\s+foreign\s+manuals:\s*(.*)\n\n/; - foreach (@foreign_names) { - $name_collection{$_} = undef; # It still exists! + if ( $podinfo{contents} =~ /=for openssl foreign manual (.*)\n/ ) { + foreach my $f ( split / /, $1 ) { + $name_map{$f} = ''; # It still exists! + $state{$f} = 'public'; # We assume! + } } - my @links = $contents =~ /L< + my @links = + $podinfo{contents} =~ /L< # if the link is of the form L, # then remove 'something'. Note that 'something' # may contain POD codes as well... (?:(?:[^\|]|<[^>]*>)*\|)? - # we're only interested in referenses that have + # we're only interested in references that have # a one digit section number ([^\/>\(]+\(\d\)) /gx; - $link_collection{$filename} = [ @links ]; + $link_map{$filename} = [ @links ]; } +# Look for L<> ("link") references that point to files that do not exist. sub checklinks { - foreach my $filename (sort keys %link_collection) { - foreach my $link (@{$link_collection{$filename}}) { - print "${filename}:1: reference to non-existing $link\n" - unless exists $name_collection{$link}; + foreach my $filename ( sort keys %link_map ) { + foreach my $link ( @{$link_map{$filename}} ) { + err("${filename}:1:", "reference to non-existing $link") + unless defined $name_map{$link} || defined $missing{$link}; + err("${filename}:1:", "reference of internal $link in public documentation $filename") + if ( ( ($state{$link} // '') eq 'internal' + || ($missing{$link} // '') eq 'internal' ) + && $filename !~ /\/internal\// ); + } + } +} + +# Cipher/digests to skip if they show up as "not implemented" +# because they are, via the "-*" construct. +my %skips = ( + 'aes128' => 1, + 'aes192' => 1, + 'aes256' => 1, + 'aria128' => 1, + 'aria192' => 1, + 'aria256' => 1, + 'camellia128' => 1, + 'camellia192' => 1, + 'camellia256' => 1, + 'des' => 1, + 'des3' => 1, + 'idea' => 1, + 'cipher' => 1, + 'digest' => 1, +); + +# Check the flags of a command and see if everything is in the manpage +sub checkflags { + my $cmd = shift; + my $doc = shift; + my %cmdopts; + my %docopts; + my %localskips; + + # Get the list of options in the command. + open CFH, "$openssl list --options $cmd|" + or die "Can list options for $cmd, $!"; + while ( ) { + chop; + s/ .$//; + $cmdopts{$_} = 1; + } + close CFH; + + # Get the list of flags from the synopsis + open CFH, "<$doc" + or die "Can't open $doc, $!"; + while ( ) { + chop; + last if /DESCRIPTION/; + if ( /=for openssl ifdef (.*)/ ) { + foreach my $f ( split / /, $1 ) { + $localskips{$f} = 1; + } + next; + } + my $opt; + if ( /\[B<-([^ >]+)/ ) { + $opt = $1; + } elsif ( /^B<-([^ >]+)/ ) { + $opt = $1; + } else { + next; + } + $opt = $1 if $opt =~ /I<(.*)/; + $docopts{$1} = 1; + } + close CFH; + + # See what's in the command not the manpage. + my @undocced = sort grep { !defined $docopts{$_} } keys %cmdopts; + foreach ( @undocced ) { + next if /-/; # Skip the -- end-of-flags marker + err("$doc: undocumented option -$_"); + } + + # See what's in the command not the manpage. + my @unimpl = sort grep { !defined $cmdopts{$_} } keys %docopts; + foreach ( @unimpl ) { + next if defined $skips{$_} || defined $localskips{$_}; + err("$doc: $cmd does not implement -$_"); + } +} + +## +## MAIN() +## Do the work requested by the various getopt flags. +## The flags are parsed in alphabetical order, just because we have +## to have *some way* of listing them. +## + +if ( $opt_c ) { + my @commands = (); + + # Get list of commands. + open FH, "$openssl list -1 -commands|" + or die "Can't list commands, $!"; + while ( ) { + chop; + push @commands, $_; + } + close FH; + + # See if each has a manpage. + foreach my $cmd ( @commands ) { + next if $cmd eq 'help' || $cmd eq 'exit'; + my @doc = ( grep { basename($_) eq "openssl-$cmd.pod" + # For "tsget" and "CA.pl" pod pages + || basename($_) eq "$cmd.pod" } + files(TAGS => [ 'manual', 'man1' ]) ); + my $num = scalar @doc; + if ($num > 1) { + err("$num manuals for 'openssl $cmd': ".join(", ", @doc)); + } elsif ($num < 1) { + err("no manual for 'openssl $cmd'"); + } else { + checkflags($cmd, @doc); } } + + # See what help is missing. + open FH, "$openssl list --missing-help |" + or die "Can't list missing help, $!"; + while ( ) { + chop; + my ($cmd, $flag) = split; + err("$cmd has no help for -$flag"); + } + close FH; + + exit $status; } -getopts('lnshu'); +# Populate %state +loadnum('util/libcrypto.num', 'crypto'); +loadnum('util/libssl.num', 'ssl'); +loadnum('util/other.syms', 'other'); +loadnum('util/other-internal.syms'); +if ( $opt_o ) { + loadmissing('util/missingmacro111.txt', 'crypto'); + loadmissing('util/missingcrypto111.txt', 'crypto'); + loadmissing('util/missingssl111.txt', 'ssl'); +} else { + loadmissing('util/missingmacro.txt', 'crypto'); + loadmissing('util/missingcrypto.txt', 'crypto'); + loadmissing('util/missingssl.txt', 'ssl'); + loadmissing('util/missingcrypto-internal.txt'); + loadmissing('util/missingssl-internal.txt'); +} -&help() if ( $opt_h ); +if ( $opt_n || $opt_l || $opt_u || $opt_v ) { + my @files_to_read = ( $opt_n && @ARGV ) ? @ARGV : files(TAGS => 'manual'); -die "Need one of -l -n -s or -u flags.\n" - unless $opt_l or $opt_n or $opt_s or $opt_u; + foreach (@files_to_read) { + my %podinfo = extract_pod_info($_, { debug => $debug }); -if ( $opt_n or $opt_s ) { - foreach (@ARGV ? @ARGV : glob('doc/*/*.pod')) { - &check($_); + collectnames(%podinfo) + if ( $opt_l || $opt_u || $opt_v ); + + check(%podinfo) + if ( $opt_n ); } } if ( $opt_l ) { - foreach (@ARGV ? @ARGV : glob('doc/*/*.pod')) { - collectnames($_); - } checklinks(); } -if ( $opt_u ) { - my %temp = &getdocced('doc/man3'); - foreach ( keys %temp ) { - $docced{$_} = $temp{$_}; +if ( $opt_n ) { + # If not given args, check that all man1 commands are named properly. + if ( scalar @ARGV == 0 ) { + foreach ( files(TAGS => [ 'public_manual', 'man1' ]) ) { + next if /CA.pl/ || /openssl\.pod/ || /tsget\.pod/; + err("$_ doesn't start with openssl-") unless /openssl-/; + } } - &printem('crypto', 'util/libcrypto.num'); - &printem('ssl', 'util/libssl.num'); } -exit; +checkstate(); + +if ( $opt_u || $opt_v) { + printem('crypto'); + printem('ssl'); + checkmacros(); +} + +exit $status;