+# Check if SECTION ($3) is located before BEFORE ($4)
+sub check_section_location {
+ my $id = shift;
+ my $contents = shift;
+ my $section = shift;
+ my $before = shift;
+
+ return unless $contents =~ /=head1 $section/
+ and $contents =~ /=head1 $before/;
+ err($id, "$section should appear before $before section")
+ if $contents =~ /=head1 $before.*=head1 $section/ms;
+}
+
+# Check if a =head1 is duplicated, or a =headX is duplicated within a
+# =head1. Treats =head2 =head3 as equivalent -- it doesn't reset the head3
+# sets if it finds a =head2 -- but that is good enough for now. Also check
+# for proper capitalization, trailing periods, etc.
+sub check_head_style {
+ my $id = shift;
+ my $contents = shift;
+ my %head1;
+ my %subheads;
+
+ foreach my $line ( split /\n+/, $contents ) {
+ next unless $line =~ /^=head/;
+ if ( $line =~ /head1/ ) {
+ err($id, "duplicate section $line")
+ if defined $head1{$line};
+ $head1{$line} = 1;
+ %subheads = ();
+ } else {
+ err($id, "duplicate subsection $line")
+ if defined $subheads{$line};
+ $subheads{$line} = 1;
+ }
+ err($id, "period in =head")
+ if $line =~ /\.[^\w]/ or $line =~ /\.$/;
+ err($id, "not all uppercase in =head1")
+ if $line =~ /head1.*[a-z]/;
+ err($id, "all uppercase in subhead")
+ if $line =~ /head[234][ A-Z0-9]+$/;
+ }
+}
+
+# Because we have options and symbols with extra markup, we need
+# to take that into account, so we need a regexp that extracts
+# markup chunks, including recursive markup.
+# please read up on /(?R)/ in perlre(1)
+# (note: order is important, (?R) needs to come before .)
+# (note: non-greedy is important, or something like 'B<foo> and B<bar>'
+# will be captured as one item)
+my $markup_re =
+ qr/( # Capture group
+ [BIL]< # The start of what we recurse on
+ (?:(?-1)|.)*? # recurse the whole regexp (refering to
+ # the last opened capture group, i.e. the
+ # start of this regexp), or pick next
+ # character. Do NOT be greedy!
+ > # The end of what we recurse on
+ )/x; # (the x allows this sort of split up regexp)
+
+# Options must start with a dash, followed by a letter, possibly
+# followed by letters, digits, dashes and underscores, and the last
+# character must be a letter or a digit.
+# We do also accept the single -? or -n, where n is a digit
+my $option_re =
+ qr/(?:
+ \? # Single question mark
+ |
+ \d # Single digit
+ |
+ - # Single dash (--)
+ |
+ [[:alpha:]](?:[-_[:alnum:]]*?[[:alnum:]])?
+ )/x;
+
+# Helper function to check if a given $thing is properly marked up
+# option. It returns one of these values:
+#
+# undef if it's not an option
+# "" if it's a malformed option
+# $unwrapped the option with the outermost B<> wrapping removed.
+sub normalise_option {
+ my $id = shift;
+ my $filename = shift;
+ my $thing = shift;
+
+ my $unwrapped = $thing;
+ my $unmarked = $thing;
+
+ # $unwrapped is the option with the outer B<> markup removed
+ $unwrapped =~ s/^B<//;
+ $unwrapped =~ s/>$//;
+ # $unmarked is the option with *all* markup removed
+ $unmarked =~ s/[BIL]<|>//msg;
+
+
+ # If we found an option, check it, collect it
+ if ( $unwrapped =~ /^\s*-/ ) {
+ return $unwrapped # return option with outer B<> removed
+ if $unmarked =~ /^-${option_re}$/;
+ return ""; # Malformed option
+ }
+ return undef; # Something else
+}
+
+# Checks of command option (man1) formatting. The man1 checks are
+# restricted to the SYNOPSIS and OPTIONS sections, the rest is too
+# free form, we simply cannot be too strict there.
+
+sub option_check {
+ my $id = shift;
+ my $filename = shift;
+ my $contents = shift;
+
+ my $synopsis = ($contents =~ /=head1\s+SYNOPSIS(.*?)=head1/s, $1);
+
+ # Some pages have more than one OPTIONS section, let's make sure
+ # to get them all
+ my $options = '';
+ while ( $contents =~ /=head1\s+[A-Z ]*?OPTIONS$(.*?)(?==head1)/msg ) {
+ $options .= $1;
+ }
+
+ # Look for options with no or incorrect markup
+ while ( $synopsis =~
+ /(?<![-<[:alnum:]])-(?:$markup_re|.)*(?![->[:alnum:]])/msg ) {
+ err($id, "Malformed option [1] in SYNOPSIS: $&");
+ }
+
+ while ( $synopsis =~ /$markup_re/msg ) {
+ my $found = $&;
+ print STDERR "$id:DEBUG[option_check] SYNOPSIS: found $found\n"
+ if $debug;
+ my $option_uw = normalise_option($id, $filename, $found);
+ err($id, "Malformed option [2] in SYNOPSIS: $found")
+ if defined $option_uw && $option_uw eq '';
+ }
+
+ # In OPTIONS, we look for =item paragraphs.
+ # (?=^\s*$) detects an empty line.
+ while ( $options =~ /=item\s+(.*?)(?=^\s*$)/msg ) {
+ my $item = $&;
+
+ while ( $item =~ /(\[\s*)?($markup_re)/msg ) {
+ my $found = $2;
+ print STDERR "$id:DEBUG[option_check] OPTIONS: found $&\n"
+ if $debug;
+ err($id, "Unexpected bracket in OPTIONS =item: $item")
+ if ($1 // '') ne '' && $found =~ /^B<\s*-/;
+
+ my $option_uw = normalise_option($id, $filename, $found);
+ err($id, "Malformed option in OPTIONS: $found")
+ if defined $option_uw && $option_uw eq '';
+ }
+ }
+}
+
+# Normal symbol form
+my $symbol_re = qr/[[:alpha:]_][_[:alnum:]]*?/;
+
+# Checks of function name (man3) formatting. The man3 checks are
+# easier than the man1 checks, we only check the names followed by (),
+# and only the names that have POD markup.
+
+sub functionname_check {
+ my $id = shift;
+ my $filename = shift;
+ my $contents = shift;
+
+ while ( $contents =~ /($markup_re)\(\)/msg ) {
+ print STDERR "$id:DEBUG[functionname_check] SYNOPSIS: found $&\n"
+ if $debug;
+
+ my $symbol = $1;
+ my $unmarked = $symbol;
+ $unmarked =~ s/[BIL]<|>//msg;
+
+ err($id, "Malformed symbol: $symbol")
+ unless $symbol =~ /^B<.*>$/ && $unmarked =~ /^${symbol_re}$/
+ }
+
+ # We can't do the kind of collecting coolness that option_check()
+ # does, because there are too many things that can't be found in
+ # name repositories like the NAME sections, such as symbol names
+ # with a variable part (typically marked up as B<foo_I<TYPE>_bar>
+}
+
+# This is from http://man7.org/linux/man-pages/man7/man-pages.7.html
+my %preferred_words = (
+ 'bitmask' => 'bit mask',
+ 'builtin' => 'built-in',
+ #'epoch' => 'Epoch', # handled specially, below
+ 'file name' => 'filename',
+ 'file system' => 'filesystem',
+ 'host name' => 'hostname',
+ 'i-node' => 'inode',
+ 'lower case' => 'lowercase',
+ 'lower-case' => 'lowercase',
+ 'non-zero' => 'nonzero',
+ 'path name' => 'pathname',
+ 'pseudo-terminal' => 'pseudoterminal',
+ 'reserved port' => 'privileged port',
+ 'system port' => 'privileged port',
+ 'realtime' => 'real-time',
+ 'real time' => 'real-time',
+ 'runtime' => 'run time',
+ 'saved group ID'=> 'saved set-group-ID',
+ 'saved set-GID' => 'saved set-group-ID',
+ 'saved user ID' => 'saved set-user-ID',
+ 'saved set-UID' => 'saved set-user-ID',
+ 'set-GID' => 'set-group-ID',
+ 'setgid' => 'set-group-ID',
+ 'set-UID' => 'set-user-ID',
+ 'setuid' => 'set-user-ID',
+ 'super user' => 'superuser',
+ 'super-user' => 'superuser',
+ 'super block' => 'superblock',
+ 'super-block' => 'superblock',
+ 'time stamp' => 'timestamp',
+ 'time zone' => 'timezone',
+ 'upper case' => 'uppercase',
+ 'upper-case' => 'uppercase',
+ 'useable' => 'usable',
+ 'userspace' => 'user space',
+ 'user name' => 'username',
+ 'zeroes' => 'zeros'
+);
+
+sub wording {
+ my $id = shift;
+ my $contents = shift;
+
+ foreach my $k ( keys %preferred_words ) {
+ # Sigh, trademark
+ next if $k eq 'file system'
+ and $contents =~ /Microsoft Encrypted File System/;
+ err($id, "found '$k' should use '$preferred_words{$k}'")
+ if $contents =~ /\b\Q$k\E\b/i;
+ }
+ err($id, "found 'epoch' should use 'Epoch'")
+ if $contents =~ /\bepoch\b/;
+}
+
+sub check {