#!/usr/bin/env perl
# ts=4
# Warren Block
# special thanks to Glen Barber for limitless
# patience and the use of his svn repository

# igor: check man pages and DocBook
# needs Perl 5.8 or higher

use strict;
use warnings;
use locale;

#  Copyright (c) 2012, 2013, 2014, 2015, 2016 Warren Block
#  All rights reserved.
#
#  Redistribution and use in source and binary forms, with or without
#  modification, are permitted provided that the following conditions
#  are met:
#  1. Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
#  2. Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#
#  THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
#  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
#  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
#  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
#  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
#  OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
#  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
#  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
#  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
#  SUCH DAMAGE.

use Getopt::Std;
use File::Basename;
use POSIX qw/strftime/;

my $file  = "/usr/bin/file";
my $gzcat = "/usr/bin/gzcat";
my $bzcat = "/usr/bin/bzcat";
my $man   = "/usr/bin/man";

my $tmpdir = "/tmp";

my $rev = '$Revision$';

my ($fh, $tmpfile, $stdinfile, $docdate);

my ($prevline, $prevnonblank, $origline) = ('', '');
my $ignoreblock;
my $titleblock = 0;
my $today;

my $linelensgml;
my ($startline, $stopline);
my $vid;
my ($ignoreblockstart, $ignoreblockend);
my %misspelled_words;
my @badphrases;
my @contractions;
my @freebsdobs;
my ($lc_regex, $uc_regex, $fixedcase_regex, $ignoreregex);
my ($indent_regex, $inline_regex);
my ($redundantword_regex, $redundanttagword_regex);
my (@straggler_tags, $literalblock_regex);
my $eos_regex;
my (@openclose_tags, $openclose_regex, %opentag, $list_regex, $parawrap_regex);

my ($bname, $type);

my $prog = basename($0);

sub usage {
	$rev =~ /Revision: (\d+)/;
	my $version = "1.$1";
	print <<USAGE;
$prog $version
usage: $prog -h
       $prog [-abcdefilmnorstuwxyzDERSWXZ] [-C range] [-L n] file [file ...]

    -h  show summary of command line options and exit

    Output options
        -R        ANSI highlights (use with 'less -R')
        -C range  Restrict output to a range of lines from the source file
        -v        Verbose output
        -V vid    Restrict output to a VID (or 'latest') in a VuXML file
        -X        XML output (overrides -R)

    Tests
        If individual test options are given, only those tests are done.

    Shortcuts
        -z  all standard non-whitespace tests
        -Z  all standard whitespace tests

    Tests for all files
        -a  abbreviations like "e.g.," and "i.e.,"
        -b  bad phrases
        -f  FreeBSD obsolete features
        -r  repeated words
        -s  spelling
        -u  contractions
        -w  whitespace
        -y  style suggestions (off by default)

    mdoc(7) tests
        -d  document date (.Dd)
        -e  sentences should begin on a new line
        -g  See Also xrefs are not duplicated
        -m  mdoc structure requirements
        -p  mdoc whitespace requirements
        -x  additional xref (.Xr) tests (off by default, implies -m)
        -D  all but document date (same as -abefmrsuw)

    DocBook tests
        -c  title capitalization
        -i  indentation
        -l  long lines (see -L below)
        -n  sentences start with two spaces
        -o  open/close tags match
        -t  tag usage style
        -E  writing style
        -S  straggler tags with undesired content whitespace
        -W  whitespace on SGML indentation

    DocBook test options
        -L n  set line length used in long line test (default 70)

    EXAMPLES

        $prog -R gpart.8.gz | less -R -S
        $prog -R -D -y /usr/share/man/man7/tuning.7.gz | less -R -S
        cat /usr/share/man/man1/csh.1.gz | $prog -D
        $prog -Rz chapter.sgml | less -RS
        $prog -R `find /usr/doc/en_US.ISO8859-1/ -name "*.xml"` | less -RS
        $prog -RD /usr/share/man/man8/* | less -RS

    gzip and bzip2 files are automatically decompressed.
USAGE
	exit 0;
}

our ($opt_a, $opt_b, $opt_c, $opt_d, $opt_e, $opt_f, $opt_g, $opt_h,
	 $opt_i, $opt_l, $opt_m, $opt_n, $opt_o, $opt_p, $opt_r, $opt_s,
	 $opt_t, $opt_u, $opt_v, $opt_w, $opt_x, $opt_y, $opt_z, $opt_C,
	 $opt_E, $opt_D, $opt_L, $opt_R, $opt_S, $opt_V, $opt_W, $opt_X,
	 $opt_Z);

getopts('abcdefghilmnoprstuvwxyzC:DEL:RSV:WXZ');

usage() if $opt_h;

my $verbose = 1 if $opt_v;

# ANSI color codes
my @colors = qw/ red green yellow blue magenta cyan /;
my %ansi;
my $inverse  = "\033[7m";
my $reset    = "\033[0;24;27m";
my $lf = '';	# filename
my $rf = '';
my $ll = '';	# line number
my $lr = '';
my $lh = '[';	# highlight
my $rh = ']';
my $li = '[';	# whitespace
my $ri = ']';

# mdoc SEE ALSO section flag and xrefs
my $seealso = 0;
my %seealsoxrefs;

# mdoc macros
my @macros = (qw/ Dd Dt Os Sh_NAME Nm Nd Sh_SYNOPSIS Sh_DESCRIPTION /);
my %macroval;

sub INT_handler {
	( close $fh or die "could not close filehandle:$!\n" ) if fileno($fh);
	removetempfiles();
	exit 0;
}

sub initialize {
	$today = strftime("%B %e, %Y", localtime);
	$today =~ s/  / /g;

	# ANSI color codes
	for my $i (0..@colors-1) {
		$ansi{"dark$colors[$i]"} = "\033["   . ($i+31) . "m";
		$ansi{"$colors[$i]"}     = "\033[1;" . ($i+31) . "m";
	}
	# minor hackery: darkblue is so dark it needs a white background
	$ansi{"darkblue"} = $ansi{"darkblue"} . "\033[47m";

	# use ANSI highlights
	if ( $opt_R ) {
		$lf = $ansi{darkyellow};	# filename
		$rf = $reset;
		$ll = $ansi{darkcyan};		# line number
		$lr = $reset;
		$lh = $ansi{darkgreen};		# highlight
		$rh = $reset;
		$li = $inverse;				# whitespace
		$ri = $reset;
	}

	# SGML line length
	$linelensgml = 70;
	if ( defined($opt_L) && ($opt_L =~ /(\d+)/) ) {
		$linelensgml = $1 if $1 > 0;
	}

	# -C start-end limits output to a range of lines
	if ( $opt_C ) {
		($startline, $stopline) = split(':|-', $opt_C);
		die "-C option requires a line number range (start- | start-end | -end)\n" unless $startline || $stopline;
	}

	# -V vid limits output to a range of lines
	if ( $opt_V ) {
		$vid = $opt_V;
		unless ($vid eq 'latest' || $vid =~/.*-.*-/) {
			die "-V requires vulnerability ID like 348bfa69-25a2-11e5-ade1-0011d823eebd\n";
		}
		$startline=999999;
		$stopline = $startline;
	}

	# -D equals -abefgmprsuw
	if ( $opt_D ) {
		$opt_a = $opt_b = $opt_e = $opt_f = $opt_g = $opt_m = $opt_p
			   = $opt_r = $opt_s = $opt_u = $opt_w = 1;
	}

	if ( $opt_z ) {
		# all non-whitespace tests
		$opt_a = $opt_b = $opt_c = $opt_d = $opt_e = $opt_f = $opt_g
			   = $opt_m = $opt_o = $opt_p = $opt_r = $opt_s = $opt_u
			   = $opt_E = $opt_S = 1;
	}

	if ( $opt_Z ) {
		# all whitespace tests
		$opt_i = $opt_l = $opt_n = $opt_t = $opt_w = $opt_W = 1;
	}

	if ( $opt_x ) {
		# -x implies -m
		$opt_m = 1;
	}

	# if no tests are chosen, do them all
	unless ( $opt_a || $opt_b || $opt_c || $opt_d || $opt_e
		  || $opt_f || $opt_g || $opt_i || $opt_l || $opt_m
		  || $opt_n || $opt_o || $opt_p || $opt_r || $opt_s
		  || $opt_t || $opt_u || $opt_w || $opt_x || $opt_y
		  || $opt_E || $opt_S || $opt_W ) {
		$opt_a = $opt_b = $opt_c = $opt_d = $opt_e
			   = $opt_f = $opt_g = $opt_i = $opt_l = $opt_m
			   = $opt_n = $opt_o = $opt_p = $opt_r = $opt_s
			   = $opt_t = $opt_u = $opt_w = $opt_E = $opt_S
			   = $opt_W = 1;
		$opt_x = $opt_y = 0;
	}

	init_ignoreblocks();
	init_spellingerrors();
	init_badphrases();
	init_contractions();
	init_freebsdobs();
	init_doc_titles();
	init_doc_indentation();
	init_doc_sentence();
	init_doc_openclose();
	init_literalblock_regex();
	init_doc_writestyle();
	init_doc_stragglers();

	# ctrl-c handler
	$SIG{'INT'} = 'INT_handler';
	# do the same thing if the pipe closes
	$SIG{'PIPE'} = 'INT_handler';

	# autoflush
	$| = 1;

	# allow stdin
	push @ARGV, "stdin" if $#ARGV < 0;
}

sub firstext {
	my $fname = shift;
	my $ext = '';
	if ( basename($fname) =~ /\.(.*?)(?:\.|$)/ ) {
		$ext = $1;
	}
	return $ext;
}

sub lastext {
	my $fname = shift;
	my $ext = '';
	if ( basename($fname) =~ /\.([^.]*?)$/ ) {
		$ext = $1;
	}
	return $ext;
}

sub baseonly {
	my $fname = shift;
	$fname = basename($fname);
	$fname =~ s/\..*$//;
	return $fname;
}

sub tmpfilename {
	my $fname = shift;
	my $ext = firstext($fname);
	my $name = baseonly($fname);
	return "$tmpdir/$prog-tmp-$$-$name.$ext";
}

sub filetype {
	my $fname = shift;
	# detect type from extension if possible
	my $ext = lastext($fname);
	if ( $ext ) {
		print "detecting file type by extension: '$ext'\n" if $verbose;
		for ( $ext ) {
			if    ( /\d{1}/ ) { return "troff"   }
			elsif ( /bz2/i  ) { return "bzip"    }
			elsif ( /gz/i   ) { return "gzip"    }
			elsif ( /sgml/i ) { return "sgml"    }
			elsif ( /xml/i  ) { return "xml"     }
			else              { return "unknown" }
		}
	}
	# fall back to file(1)
	print "detecting file type with file(1)\n" if $verbose;
	my $out = `$file -b $fname`;
	$out =~ /^(\S+\s+\S+)/;	# first two words
	if ( $1 ) {
		my $id = $1;
		for ( $id ) {
			if    ( /^troff/ )         { return "troff"   }
			elsif ( /^exported SGML/ ) { return "sgml"    }
			# some DocBook documents are detected as "Lisp/Scheme"
			elsif ( /^Lisp\/Scheme/ )  { return "sgml"    }
			elsif ( /^gzip/ )          { return "gzip"    }
			elsif ( /^bzip/ )          { return "bzip"    }
			else                       { return "unknown" }
		}
	}
	return "unknown";
}

sub uncompress {
	my ($fname, $type) = @_;
	my $tmpfile = tmpfilename($fname);
	print "uncompressing '$fname' to '$tmpfile'\n" if $verbose;
	for ( $type ) {
		if ( /gzip/ ) {
			system("$gzcat $fname > $tmpfile") == 0
				or die "could not create '$tmpfile':$!\n";
		}
		elsif ( /bzip/ ) {
			system("$bzcat $fname > $tmpfile") == 0
				or die "could not create '$tmpfile':$!\n";
		}
		else {
			die "unknown compression type '$type'\n";
		}
	}
	return $tmpfile;
}

sub writestdinfile {
	$stdinfile = "$tmpdir/$prog-stdin.$$";
	open $fh, ">", $stdinfile or die "could not create '$stdinfile':$!\n";
	print $fh <STDIN>;
	close $fh or die "could not close '$stdinfile':$!\n";
	return $stdinfile;
}

sub removetempfiles {
	if ( $stdinfile && -f $stdinfile ) {
		print "deleting stdinfile '$stdinfile'\n" if $verbose;
		unlink $stdinfile or die "could not remove '$stdinfile':$!\n";
	}
	if ( $tmpfile && -f $tmpfile ) {
		print "deleting tmpfile '$tmpfile'\n" if $verbose;
		unlink $tmpfile   or die "could not remove '$tmpfile':$!\n";
	}
}

sub xmlize {
	my $txt = shift;
	$txt =~ s/'/&apos;/g;
	$txt =~ s/"/&quot;/g;
	$txt =~ s/</&lt;/g;
	$txt =~ s/>/&gt;/g;
	return $txt;
}

sub showline {
	my ($bname, $linenum, $color, $errordesc, $txt) = @_;
	# limit output to line number range
	return if $startline && ($. < $startline);
	if ( !$opt_X ) {
		print "$lf$bname$rf:";
		print "$ll$linenum$lr:";
		print $color if $opt_R;
		print "$errordesc";
		print $reset if $opt_R;
		print ":$txt\n";
	} else {
		print "    <error ";
		print "line=\"$linenum\" ";
		# these two are not presently implemented in igor
		print "column=\"1\" ";
		print "severity=\"warning\" ";
		#
		print "message=\"", xmlize($errordesc), "\" ";
		print "source=\"$prog\"";
		print "/>\n";
	}
}

sub is_lowercase {
	my $word = shift;
	return $word =~ /^[a-z]{1}/;
}

sub is_uppercase {
	my $word = shift;
	return $word =~ /^[A-Z]{1}/;
}

sub highlight_word {
	my ($txt, $word) = @_;
	$txt =~ s/\Q$word\E/$lh$word$rh/g;
	return $txt;
}

sub highlight_string {
	my $txt = shift;
	return "$lh$txt$rh";
}

sub expand_tabs {
	my $txt = shift;
	$txt =~ s/\t/        /g;
	return $txt;
}

sub leading_space {
	my $txt = shift;
	my $leading;
	$txt =~ /^(\s+)/;
	$leading = ($1 ? $1 : '');
	$leading = expand_tabs($leading);
	return $leading;
}

sub splitter {
	my $txt = shift;
	return ($txt) unless ( $txt =~ /$ignoreblockstart|$ignoreblockend/ );
	my @split = split /($ignoreblockstart|$ignoreblockend)/, $txt;
	return grep { ! /^\s*$/ } @split;
}

sub init_ignoreblocks {
	print "initializing ignoreblocks\n" if $verbose;
	# create regex for sgml block start and end
	my @ignoreblock_tags = qw/ literallayout screen programlisting /;
	$ignoreblockstart = '(?:<!--|<!\[';
	for my $tag (@ignoreblock_tags) {
		$ignoreblockstart .= "|<$tag.*?>";
	}
	$ignoreblockstart .= ')';
	$ignoreblockend = '(?:-->|\]\]>';
	for my $tag (@ignoreblock_tags) {
		$ignoreblockend .= "|<\/$tag>";
	}
	$ignoreblockend .= ')';
}

sub showwhitespace {
	my $txt = shift;
	$txt =~ s/\t/{tab}/g;
	return $txt;
}

# global tests

sub abbrevs {
	my ($bname, $line, $txt) = @_;
	return if $txt =~ /^\s*$/;
	return if $ignoreblock;
	my $txtbak = $txt;;

	if ( $txt =~ /(?:\W|^)c\.f\./i ) {
		$txt =~ s/(c\.f\.)/$lh$1$rh/i;
		showline($bname, $line, $ansi{darkmagenta}, 'use "cf."', $txt);
	}

	$txt = $txtbak;
	if ( $txt =~ /(?:\W|^)e\.?g\.(?:[^,:]|$)/ ) {
		$txt =~ s/(e\.?g\.)/$lh$1$rh/;
		showline($bname, $line, $ansi{darkmagenta}, 'add comma after "e.g."', $txt);
	}

	$txt = $txtbak;
	if ( $txt =~ /(?:\W|^)i\.?e\.(?:[^,:]|$)/ ) {
		$txt =~ s/(i\.?e\.)/$lh$1$rh/;
		showline($bname, $line, $ansi{darkmagenta}, 'add comma after "i.e."', $txt);
	}

	$txt = $txtbak;
	if ( $txt =~ /(?:\W|^)a\.k\.a\./i ) {
		$txt =~ s/(a\.k\.a\.)/$lh$1$rh/i;
		showline($bname, $line, $ansi{darkmagenta}, 'use "aka" (AP style)', $txt);
	}

	$txt = $txtbak;
	if ( $txt =~ /(?:\W|^)v\.?s(?:\.|\s|$)/i ) {
		$txt =~ s/(v\.?s\.)/$lh$1$rh/i;
		showline($bname, $line, $ansi{darkmagenta}, '"versus" abbreviated', $txt);
	}
}

sub init_badphrases {
	print "initializing badphrases\n" if $verbose;
	@badphrases = ('2nd', '3rd', '3way', '4th', '5th','allow to',
				   'allows to', 'become gain', 'be also', 'been also',
				   'being build', 'can not', "chroot'd", "compress'd",
				   'could might', 'could of', 'equally as', 'for to',
				   "ftp'd", 'get take', "gzip'd", 'in on', 'it self',
				   'may will', "mfc'ed", 'might could', 'often are'
				   ,"or'ing", 'that without', 'the a', 'the each',
				   'the that', 'the to', 'this mean that', 'to can',
				   'to for', 'to of', 'to performs', 'will has',
				   'with to', 'would of',);
}

sub badphrases {
	my ($bname, $line, $txt) = @_;
	my $txtbak = $txt;
	return if $txt =~ /^\s*$/;

	for my $bad (@badphrases) {
		$txt = $txtbak;
		# check for a loose but fast match first
		if ( $txt =~ /\Q$bad\E/i ) {
			if ( $txt =~ s/\b(\Q$bad\E)\b/$lh$1$rh/i ) {
				showline($bname, $line, $ansi{yellow}, 'bad phrase', $txt);
			}
		}

		# detect bad phrases wrapping over two lines
		# skip this test if the phrase was all on the previous line
		next if ( $prevline =~ /\Q$bad\E\b/i );

		$txt = "$prevline $txtbak";
		if ( $txt =~ /\Q$bad\E\b/i ) {
			my @right = split /\s/, $bad;
			my @left  = ();
			my $leftstr = '';
			while ( @right ) {
				push @left, shift @right;
				$leftstr = join ' ',@left;
				last if ( $prevline =~ /(\Q$leftstr\E)\s*$/i );
			}
			unless ( $leftstr =~ /\Q$bad\E/ ) {
				showline($bname, $line - 1, $ansi{yellow}, 'bad phrase',
					"... $lh$leftstr$rh");
				$txt = $txtbak;
				my $rightstr = join ' ', @right;
				$txt =~ s/(\Q$rightstr\E)/$lh$1$rh/i;
				showline($bname, $line, $ansi{yellow}, 'bad phrase', $txt);
			}
		}
	}
}

sub init_contractions {
	print "initializing contractions\n" if $verbose;
	@contractions = ("aren't", "can't", "doesn't", "don't", "hasn't",
					 "i'll", "i'm", "isn't", "it's", "i've", "let's",
					 "shouldn't", "that's", "they'll", "you're",
					 "you've", "we'd", "we'll", "we're", "we've",
					 "won't", "would've");
}

sub contractions {
	my ($bname, $line, $txt) = @_;
	my $txtbak = $txt;
	return if $txt =~ /^\s*$/;

	for my $con (@contractions) {
		$txt = $txtbak;
		if ( $txt =~ /\Q$con\E/i ) {
			if ( $txt =~ s/\b(\Q$con\E)\b/$lh$1$rh/i ) {
				showline($bname, $line, $ansi{yellow}, 'contraction', $txt);
			}
		}
	}
}

sub init_freebsdobs {
	print "initializing FreeBSDobs\n" if $verbose;
	@freebsdobs = qw/ cvsup /;
}

sub freebsdobsolete {
	my ($bname, $line, $txt) = @_;
	return if $txt =~ /^\s*$/;

	for my $word (@freebsdobs) {
		if ( $txt =~ s/(\s+)($word)([^.]+.*)$/$1$lh$2$lr$3/ ) {
			showline($bname, $line, $ansi{darkgreen}, 'freebsd-obsolete', $txt);
		}
	}
}

sub repeatedwords {
	my ($bname, $line, $txt) = @_;
	return if $txt =~ /^\s*$/;

	my $txtbak = $txt;
	my %count = ();
	my @words = grep(! /^\s*$/, split /\b/, $txt);
	map { $count{$_}++ } @words;
	my @multiples = grep { $count{$_} > 1 } keys %count;
	#for my $word (keys %count) {
	for my $word (@multiples) {
		# skip special cases
		# repeated numbers
		next if $word =~ /\d{1}/;
		# repeated slashes
		next if $word eq '/';
		# repeated rows of dashes
		next if $word =~ /-+/;
		# repeated rows of underscores
		next if $word =~ /_+/;
		# skip some mdoc commands
		next if $word =~ /Fl|Ns|Oc|Oo/;
		$txt = $txtbak;
		if ( $txt =~ s/\b(\Q$word\E\s+\Q$word\E)\b/$lh$1$rh/i ) {
			print "repeatedwords: repeat found:'$word'\n" if $verbose;
			showline($bname, $line, $ansi{darkred}, 'repeated', $txt);
		}
	}
	# check for repeated word from the end of the previous line
	# to the beginning of the current line
	# $prevline =~ m%(\w+\s+)*([^ *.#|+-]+\s*)$%;
	$prevline =~ m%(\w+\s+)*(\S+\s*)$%;
	my $cmd = ($1 ? $1 : '');
	my $prevlastword = ($2 ? $2 : '');
	# short-circuit when the previous line...
	# had no last word
	return unless $prevlastword;
	# didn't repeat any of the words on the current line
	$count{$prevlastword}++;
	return unless $count{$prevlastword} > 1;
	# was a groff(7) comment
	return if $prevlastword eq '.c';
	# was a groff(7) zero-space character for tables (\&.)
	return if $prevlastword eq '\&.';
	# was a single non-word character
	return if $prevlastword =~ /^\W{1}$/;
	# was an mdoc(7) or nroff(7) comment
	return if $prevlastword =~ /^\W{1}\\\"/;
	# was an mdoc command
	return if $prevlastword =~ /\.(?:Ar|Oo|Nm|Tp)/i;
	# when the next-to-last word was an mdoc command
	return if $cmd =~ /Ar |Cm |Fa |Em |Ic |Ip |It |Li |Pa |Ss /i;
	if ( $txt =~ s/^\s*(\Q$prevlastword\E)(\s+.*)$/$lh$1$rh$2/ ) {
		showline($bname, $line - 1, $ansi{darkred}, 'repeated',
			"... $cmd$lh$prevlastword$rh");
		showline($bname, $line, $ansi{darkred}, 'repeated', $txt);
	}
}

# read an external file of spelling errors
# the misspelled word is the first sequence of \w or ' characters
# up to a non-word character
sub readspelling {
	my $spname = shift;
	my $added = 0;
	print "adding spelling file '$spname'\n" if $verbose;
	open my $sf, '<', $spname or die "cannot open '$spname':$!\n";
	while ( <$sf> ) {
		next if /^$/;
		next if /^\s*#/;
		if ( /^\s*((?:\w|\')+)\W+/ ) {
			$misspelled_words{$1} = 1;
			$added++;
		}
	}
	close $sf or die "could not close '$spname':$!\n";
	print "added misspellings: $added\n" if $verbose;
}

# list of common spellingwords
sub init_spellingerrors {
	print "initializing spellingerrors\n" if $verbose;
	for my $word (qw/ &nbps; aan abandonned abble abel abismal abjectely ablve abondan abotu abour abouy
		abscence absense absolue absolut absolutelly absolutly absoulte abuttes acccess accelleration
		acceptible acces accesable accesed accesing accessable accidentaly accidently acclerate
		acclerating accomadate accomodate accoring accound accpeted accroding accross accuarate acculate
		acess achitecture achive acknowledgent acordingly acquisions acse actal actaully activly actuall
		actualy actyually acutally acutual acutually adapated adavnce adddress adde addesses addiotional
		additioanl additioanlly additionnal additonal additonally addres addreses addressess addresss
		addtional addtions adecuate adhear adhearance adherance adiministration adjustement administator
		adminstrator adminstrators admited adress adressed adresses advence adventerous advertisment
		advetise advetised adviasory advices aer afterall afternoont agai agains ageing aggree
		aggregatable aggresive aggresively aggrigate agian agregate agregation agregious agressive
		agressively agrivating agument ahold ahte ahve akses aksually alaram albel albels alergic
		algoritm alignement allign alligned allmost alloacted allos allready allright allthough allways
		alot alreay alreday alredy alright altenrative althought althougn altough alwways amasing amke
		ammend ammount amn amybe analasys analyizing ancestory ande anderstand andthe ane anf annonymous
		annotatation annotatations annoucement annoucing announcment annoyting anny anonnyed anonymus
		anormalous ansamble answeres antiq anual anyay anyhitng anyhoo anymore anyonw anyore anythign
		anyways anywere aobut apac apllay apllication apparant apparantly apparentely apparentry
		apparnetly appart appartment appearence appearred appendencies apperantly appercaite appers
		appicable appleances appleis appliabce applicatin applieds applogize appraently appriciate
		appriciated appropiate approprate apreacciate aprecciate apreciate apreciated apropriate aproval
		aptch aqueue aquire arbitary arbitrafy arbitraty arbritrary archiecture architectual arent
		arguements arguemnt aritmetic aritmetics arledy arond aroudn aroung arrisen arround arrray
		artikels aslo asoc asparin assigenments assocation assoicated assotiations assumtion aswers
		asychronous asynchonously asynchroneous atack athalon athe athentication athough atleast atrget
		atribute attachements attatude attemps attemts attmpted attrackt attrackted atuomatic
		atuomatically augus autentication autheinticating authenticatation authenticatication authention
		authetnication authoratative authorative authorty automaticali automaticall automaticaly
		automaticly autonimous avaiable avaialble avaible availabe availabel availablity availbility
		availible availiblity availlable avaliable avalibale avalible avarage avialable avilable aweful
		awer awhile awlso awsome axatly axcuse backplain bakcup bannana bartition basec basicly basse
		bateria baybe beachmark beacuse beahvior beated becasue becease beceause becouse becuase becuse
		beeing beffer befor beggining begining beginnig behaiver behauvier behaviuor behvaiour beign
		beleive belive belived benefitial benfit benifit beoken beowser ber berak bercause berkley
		beseuse besure beter bettr betwen betwenn beurocratic beween beyonf bgack bgiger bheve biger
		bikesheding bince bineary birght birt blatently bloatwed bloging bnechmark boostrap boostrapping
		bootabe bootleneck bootlenecks bootsrap boradband bordism borken borre borred borring boting
		bottem bottonm boundries boundry boxd bradband branche briner bringign brocessor broked
		brokeness broser brower browesable browseable browseble browswer btit buch bugzills buidl
		buildling buildt buile buillt buld bulding bulds bultin burried bycicle bysect bysected byt
		cacheing calatog calcualted cale calender calles callibrate caluclate caluclated camllia
		campatibility cange cannnot cannonical cant capabilites capabilties capabiltiy capabily
		capitzliation captial captialism caracteristics casse casues catagory catched cathegory ceck
		ceep ceratin ceratinly cerificate certaintly certian certifcate certifcates certificat
		certifictate certiin certiinly chace chacing chage chaged chages challange challanging chane
		chang changable changess changs chaning chanse charakteristic charakteristics cheapter chech
		checkng checksuming chek chekc chekcing cheked cheking chhosing chian chipest choise choosed
		choosen choses chronologocal chunck chuncking cince cirruption claimst clal clarifynig
		classifcation cleand cleandepened clearification clearl clen cliens cloked cluter cmmit cmopile
		cmopiles cmplain cmplaining cna cnanot cnditions coampre cobsidered cofiguration colision
		colisions colom comands comapred combersome comemnts comiling comit comiters comitted comitter
		comlplex commandline commen commenly commer commerical commericial commersial comming commited
		commiter commiters commiting committment committs commnad commnads commnand commnications
		communciation communciations comooil comooiled compability comparision comparisions
		compatability compatabilty compatablity compatiable compatibilty compatiblity compentens compiel
		compilcated compilling compiltaion complaing complainig comples complet completly completness
		complie componet componetn compontens comprimise compromiseable comptemporary compule comsume
		comsumed comsumption comunication comunity concatanated concensus concerne conctacted conect
		conected conection conections conernced conerter conerters configrable configration
		configruation configuation configuer configuered configuraiton configurate configurateion
		confimation confiuration confiused confugure confussion congraturation congraturations conitinue
		conjob conjuction connecion connecs connecter connecters connectin connenctions connet conneting
		connnects conntact conntect conpact conputer conreoller consensous consensu consept consequtive
		conserns conservatie considerd consistant consistentency consitute conslusion construcgtor
		consuption contai containg contect conteins contens continously continu continus contiune contol
		contrained contribuition contributer contributers controled controler controll conujunction
		conut conuter conuters conveinently convelient convenent conveniece convertion convesation
		convienient convinience coordinatory coorect coorected coorparative copiedd copmiler copmilers
		coppied corectly correced correctely correcture correleate corresponsding corrsponding cosnole
		costantly couldnt cound cource courious courve coyping crach craching crahs crahsed crasch
		crasching crassing crasy crazyness creapage creapt creat creatopm credentail credentails creeate
		crertainly crnuch crnuching crystalize csvup cuase culpit curcuit curiosly currenly currentlu
		currnetly currrently curser customaril custommer custommers cuttoff cuty cvould cvs2vn damange
		damanged datas dayt dbout deactive deaemon deaemons deafult deaktivate dealocates deamon deamons
		deatched deault debuf debuging decendant decentant decicission decidely decission declerations
		decliens decompresssion decribed decriptor ded defalt defaut defautl deffirent definate
		definately definiately definitiely definitly definitons defintion degradate degugging dehaviour
		deicde deine deines deivce dekstop delcared delending deleteing deley deliever delievers
		dellicious delste demnstrate demonstarte demonstarted depcreation depdendency depedancy
		depedencies depedency depeding depednent depencdny dependacies dependancies dependancy
		dependancys dependant dependding dependeancy dependeant dependecies dependecy dependend
		dependendencies dependiences dependiency dependig depenesis deploies deprechated deprectated
		depricated derivats derrivates desapointed desaster desasters descendand descendents desciptors
		descirption descrete describd descrpition descrption desease deseases desing desireable
		desperatly despert desprate destinatino destine destory detatched detec detecing detemine
		deterined detirmines devdeloper deveation deveices develoeprs developement developeminet
		developped developper developpers developre developvers devestate devestating devide devided
		devies devinces devisions devives devleop devot dfault diable diabled diablog dich dictaded
		dictonary did'n didicated didnt didsk didunt dieing diferent diffcult diffence differenciate
		differencies differenlty differents differnce differnces differnece differnetiates differnt
		differrent diffrent diffrently diffsof dificult dificulty diging dilema diliver dilligence
		dind't dindt diphthongs dircet dirctory direclty directorys diretly diretories diretory dirft
		dirver diry disabe disappered disasterous disclamier discourraged discoverd discuessed dismouted
		dispair dispalay dispaly dissable dissabled dissapeared dissapointment dissillusioned distain
		distiguish distord distorded distribition distribitions distribtue distributted distribvution
		distrubute distrubuted dnow docuentary documantation documenation documentaiton documentatino
		documentiation documention documetation documtns doen't doenst doesen doesent doesnot doesnt
		doest domainmame domani donatiosn donde donn't donot dont donw dor dotally doues droped droping
		drustrating dubts ducplications duplictiy duratoin duratoins durign durning durring dwsktop
		dynaic ean eanble earler easely eather ebeen ecah eceived ecourage ecouraged ect ecurrent
		effecive effetive effetively efficancy efficency efficent efficently effor efford eficciently
		efter ehere ehternet eitehr elememt eletrical eletrically elipsis elliminates emaling embaress
		embaressing eme emial emporer enabe enbale enchanced enclousure enconding enconter encouaging
		encrypion encyrpt encyrpted ende endianess endoresed endtdate enior enitre enitrely enivorement
		enoountering enought enourmous enow enscrambled ensute enteries enterprse enthousiast
		enthusiatic entierly entirly entites enviorement enviornment enviornmental envirionment
		enviroment enviroments environement environnement equipted equivalen equivilent erebuild erlier
		erliere errore errorneusly erros escolated esier esiest esle esome essense estracting ethenret
		etherenet ething ethings etnry evenning eventaul eventaully eventhough everthing everythign
		everytime everyting evet eveyr evne evreyone ewhich exagerate examble exapnd excactly excat
		exceedes excelent excellant excercize excersise excert excesive exclusivly execept execption
		execptions exectable exectables exectuable execuation exellent exemple exemtion exeption
		exercice exernal exibits exisiting exisitng existance existsing exmaple expalin expecially
		experied experince expession expiremental expirience expirt explaination explainations explaned
		explans explantation explatnation explcit explcitly explicitely exponentionally exquse exsist
		exsits exstra extemely exteneded extenstions extentensible extention extentions externel
		extranious extreemly extremly facilites facter faield failded faile failes failur faimiliar
		faliure falsh familar farely farwarding fase faught feasable februrary febuary fecth feebsd
		feelt fgights fianlly fids fiel fiels fien fienw figureing fileame filewall filks filname finaly
		finnaly firmwares firmwrae fisrt fitler fixe fixen fixztion flages flasg flexable fo focuss
		fodler fodlers folkz folllowed follwing follwo follwoing follwong folow folowed folowing fomr
		forbiden forcable forece foreignphrse forgoten formate formated formost fornated forsee
		forthermore forume forusers fot foto fotos foudn foward fowarding fractoinal fraemwork
		fragemented fragmentated fragmentatio frameowkr fransisco franticly frebsd freedback freeed
		freezed freind frequence freze frezze frim frimware frome fthernet fucntion fuction fulfil
		funcational funcition functionmames functionnality functoin functuion funtion furhter furstrate
		furstrating furthur fush futher futur fysical gaint garanties gatherd gauging gaurd geeting
		generaet generall generaly generat genertaes geniue geograhically gernal gernerates geting
		gettign ghostscrip giove givent glas gnerated gnoime godo gohostscrip goiung gonna gonne goot
		gotta grafic grammer grap grapics gratefull grately graub greaet greate greatful greatfully
		greif grpahs gruop gthe guage guarateed guarentee guarenteed guarentees guarranteed guarrentee
		guidence gurantees hackyness hade haed hai haing halp hanbook handeling hapen hapilly happend
		happended happends happing happpens hardisk hardwares hardwrae harmpless harrass harrassment
		harsch hashs hasve hatered hav havea havent havfe havn't headup healt healty heathly heavly
		heirarchy hellon helpfuk helpfull hep hereon hessitate hessitation hexadecimals hexidecimal
		hibarnate hibarnating hiearchy hierachy hierarchial hierarhy higest hight hightlight higlight
		higlighted higly hinderences hiuge hobbiest hodling homours honets honnest honnestly honnor
		honnorr honnorred honnors honst hookled hopful hopfully horiztonal horiztonally hounderd
		hounderds howeber howevrr hsotname hsotnames htat hte hter htere hthe htink htis hudge hunderts
		hypens hypervisior hypocracy ibn idee identially identifer identifers identifiy identiy idff
		idosyncracies iea ifhghting ilde im imagen imagening imatating imbeded imeplementation immanent
		immediatly immenent immidiate immidiatly immitating impariment impedence impelment impled
		implemenation implementaitons implementating implementng implemetation implemetn implentor
		implicitely implicits impliment implimentation implmentation imporant importent imporvement
		imposable imposible improbe improove improoved improvments imprted inacativity inaccesible
		inadvertant inadvertantly incase incedent incldue incluseion incomming incompeents incomptaible
		inconsistancy inconsitent inconvienent incopatible incrase incrimental incrment incrmental
		indefinately indefinitly indend indended indendently indending indentical indentifier
		indentifiers indention indentions indepedently independant independantly independendly
		independet indepth indestrcteble indiate indiciations indicies individial indivual indivudual
		indstalled inetersting infact infavour infomation informations informatoin infrastcture
		infrasture infromation inherity inital initalise initalization initalize initalized initiatior
		initiliased initilize inititialization inport inpossible inpunt inputed inquiery insall insatll
		insatlled insensivite instace instal instalation instaled instaler installad installaed
		installaing installatio installtion installtions instanciation insted insterest insterested
		insteresting instractions instructuions instuctions insturction insturctions intall intallation
		integerate integreated integrituy intendend intepretation interal interations interchangable
		interchangably interconverts interes interesing interesitng interesst interessting intereting
		interfacce interfactive interfer interferring intergrated interimttant interimttantly interist
		interisting intermal intermittant intermittantly internaly internat interneal interogate
		interpretedt interpretted interpretter interpretting interressing interrest interresting
		interrestingly interrim interrups intersted intersting interupt interwined intial intialization
		intialize intolerate intregate intrest intrested intresting introduceing introduciton intruction
		intruptions invarients invicible invole involes involvemnt invoplved invovle irt isnt isntall
		isntance isoltation isonly issueing ist istead isuus isystem ita iteinerant itelf ith itnel
		itseld ive iwll jailes joing jornal jounal jsut jugde juged juste kenrel kerel kerenel kerenels
		kerenl kerle kernal kernell kernl keybaord killled kno knowlegde knowlege knowlodgeable knwo
		konw kust kwyrod labes lable lables laeyer lagacy lanaguage langage languge laods larged lastest
		laterly latley latre laught laughted layed layput lazyness leasure leat leav legitimite lemme
		lenght lese leson leter lettesrs lexicographal lgertimately libararies libary librairies
		libraray libraris libraru licencing licene liek lien liesure lifing lightnig ligned liinux
		likeing likly liks limitatons limtations lineair linerly ling liniarly lised lisens listet
		listning lite literrally litte littel litteral litterally liviness llow lniux loadeded loally
		locak localy loccked locically locla loder loged loggoued loggs loging loink lok loke lokking
		loks looh lookig lookking looksy loopack loosing loosly losseless lpatop lpdng lter ltieral mabe
		maby mabye macademia machien machiens machin machince machinew maching machne macrow macrows
		madee maek mahually mailling maintainace maintainance maintaince maintanance maintaned
		maintanence maintenable maintence maintened maintener mames manageement managemnet managent
		managment mananged manangement manaul manditory mangagement manged mangment manpage manpages
		manuallying manualy manuell manufactring manyally marcro marcros markkup maschine mashine
		mashines maske mater mathced maun maxaximum maximium maximun mdorn meaninful meantine measusre
		mechanim mechanims mechiansm mechnism mechnisms memeber memery memroy ment mentined mentionned
		menue meny mergeing mericracy meriticracy merrits mes mesage mesages messege messgae messgaes
		metada methode metod mfcd micrcontroller microbnechmark minimze mininum minmum minum minumum
		minut minuts miror mis miscelleneous miscellenious mising misprediced missign missinc missking
		misspeling missplelling misterious mistery mistypted misunterstood mkaes mke moble modifing
		modifiy modifiyng modifyed modiying momment monalithic moniter monolitic mont montherboard
		montor montoring monut monuted mooved morethbord mornig morron mortherboard mostely moter
		motercycle motercycles motiviation moudels mountign mpre mssing muliple multile multipled
		multipy multople mutiple mutualy mvoed mysefl myst myt namming natioal natsy ncessary ncie
		nderstand necassary neccasary neccesary neccesery neccessary necesary necessairely necessarely
		needto neet neetwork neglegt negociate negociated neightbor nemisis nescessarily nescessary
		nessesery newcommers nimber nintees nobady noet noice noipe nomally nonexistant noone normaly
		normanlly notaions notavailable notefection nothern nothin noticable notied notofocations
		notquite nouvou nto numberic numer numner nusance nutrual obejct obfascated objejcts obselete
		obsolote obsticles obvoius ocassionaly occassion occassionally occassions occation occations
		occurance occurances occured occurence occurences occuried occuring ocure oether ofcourse
		offenseive offical ofr oftem ohne okey om ommisions ommit ommited ommitt ommitted omre omrning
		onfigured ongoin onl onle onlne onlt onsult onthe ontop onts onw ony oparation operationg
		opertunity opion opionion opionions opperation oppertunity oppinion oppions oppisite oprations
		oprion oprions optial optiion optionnal optionsal optoin ordenary orginal orginally originaes
		origine origional orignal ot otehr otsuts ouf ouput ouputing outher outout outstaning outtage
		overhall overidden overlaping overlayed overrided overriden overritten overwritting ovre owkr
		pacakge pacakges pachae packge packges padd padds paert paied painfull pakcet panices pannel
		parallell paramenter parametr parametrs paramtere paramters paranthesis parctice paremeter
		parenticies parhaps parition paritioning paritions parntheses parrallel parrellel partameters
		partialy particualar particulary partion partions partionting partipate partiton partitoning
		partitons passprhase passtrough passwrd pasto patche patchex pathalogical pathces pathes peaople
		peform peformance peformed peice peices pengiun peopel pepetual pepetually peploe perfecly
		perfom perfomance perfoms perfor perfored performace performancing performence performend perhas
		periperal peripherial peripherials permanant permantly peroid persisent persistant personnal
		personnally personnaly persoon pertubation peticular pevious pfew pgk phabriator pheraps
		phisical phoneix phorase phyiscall physcal physial physicaly piblic pitty placte plaing
		plateform platfrom platorms playign pleae plin plisss poatch poblem poblematic poeple
		pofessional poinitng poirts poitn poitner poitners politley poluting polution pople popularuity
		pordriere porevious porject porrtability porst portes portupgrde posible positiv positve
		possability possbile posseses possibe possibillity possilbe possition postion postitions postive
		postress poting potr potupgrade poud poudirere poudrier poudrierre pourdiere pourdriere
		pouridere poweful powerfull poyrts prameter pratcice preatty preblem preceed preceeded
		preceeding preceeds precice precidence precisly predictibly preemtive prefere prefered prefering
		preferrable preferrably preffer preffered prefferred preform preformance premission premissions
		preoblematic prepair prepairing preperation preperations preprend preprietary preprocesor
		presense presidence presonally presumeably pretection prety pretyt preume prevelent previos
		previouse previousely previuos previus prevoius pricipal primative primatives princial principes
		priorisation priotity prirority pritn pritnf pritnfs privelege priveleged priviledge priviledges
		privilige privledged privleges probabilly probabyl probaly probbaly probblem probem problaly
		proble problen problme problmes probobly proccess proccesses proceedure proces proceses procols
		proctect proepr proeprly profesional profesionals proffesional profie profilier profissional
		progam progams progess programable programatic programlistning programm programms progrtam
		projcet projecte prolematic prolonges promiscous promiscuos promisive promissed promissing
		prompot promt proove propably propaged proped propegation propigate propogate propogation
		propolsal proporion proporty propper propreitary propreitery propsing prorammer prorgram
		prosessor prosponed prot protcol protcols protec prots provde provent provice providre pseuuedo
		pshycial pssword psuedo ptach ptiner pudate puncing puroses pursache pursached puting qeustion
		quandries quard quater quaterly queestion querys quesston questionr questoin questsions queueing
		qui quickier quiety quirck quire quitted quoteas rabase rabased rabmling raccomand rae ralative
		rans rapidely rase rasing raspberri rater reactoin readd readning realated realease realibly
		realloacted realy realyl realyt reaosn reard rearding reasoably reasonnable reassambled reate
		reboote rebove rebuilded rebuitling rebult reccomended receieve recevied recient reciently
		recieve recieved recinded recive recoide recomend recomendation recomended recommand recommanded
		recommanding recommened recommented recongnize recongnized reconigize recrecreate recrusively
		redable redering rediculous redunacy redundance redundantcy reduntant reeated reelvent reember
		referece refered referes refering refernce refernces refernece refferance refreind refridgerator
		refulat regardes regened regularely regularlly regulat reguls regural reivew reized relaly
		relase relases relavent releated relese relesed relevent reloation reloations relply rember
		rembers remdial remebered remebers rememeber rememver remmeber remobal remvoe remvoed rendtion
		repare reparing repative repetion repitition repititions replaceing replacemnet replacment
		replases replce repleaced reponding reponse reponses reponsible reposotory repostory repport
		reprecussion reprecussions reproducable reproducibily reproductible reprository repy requiment
		requireing requiretd requirments requistes requred rerurn resampeling resaonnable resemblence
		resently resetart resetted resiilver resiliant resilliancy resillience resilliency resillient
		resise resistnace resitor resitors resivoir resliver reslove resloving resolf reson resonable
		resonably resons resouce respecitively responce respository respresentation resseler ressource
		ressources restaring restartet restaurnat restaurnats restert resuce resuerrect resuerrecting
		resurections resusccitate rethnik retnia retreive retrive returs reuild revalent reveiw reversse
		revison revisons rewcursion rewite rewriten rezervation riddens rigth riht rmeoval rmore rmove
		roken roling rott roughy rreally rreplace rrquest rudamentary runing runinig runned runnig
		runnign runnnig runnning ruote ruter sacn saerch safed sahred saif saior sais salavge satified
		satsify saturage sayd scenartio sched scheduld schedulling scritp scrubing scrupt seached
		secction secend secion secions secondes secttion secturity secund securiy seeem seemless
		seemlessly seens seether senarios sence sendt sentance sepaking separatly separe separtely
		sepcial sepcific sepcifies sepcify seperate seperated seperately seperates seperating seperation
		seperator seprate sequencially serach seraching serch serching sercurity serie seriosuly serius
		seriusly serivce serveral servicability servise sesion setable setiing seting setings settt
		settup sevice sexond sey shae shaer shaers sheding shepard shepards shephard shepharding
		shooping shoping shoud shoudl shoudn't shoulld showen shrinked shuld shure shuting shyed
		siginificant significnat signle siilar sile sime similat simillar simle simpel simpl simpley
		simplfied simplier simpliifed simplyfies simular simultanious simultaniously singel singeling
		singels singnificant sinificant sinse sintax sirene sistems sitll skiped sligh slighly slove
		sloved slpw slue smaler smebody smeone snapshoted snashot snoflake snopped soe soehow soemone
		soemones soemthing soething softaware softner softwae sofware sohuld soif soild soley solition
		solulotion solusion someoene somes somethign somethin somethng sometime someting sometjhing
		someway somoene somthing somwhere sonud sonuds soo soruce sparce spearator specfic specfied
		specfy specifes specifi specifiaction specificially specificly specifig specifing specifiy
		specifiying specifyed spectacte speficy sperate spesific spindels spititng splic spliting
		splitted spose spreadth srews srtuff srync ssorted sspares ssytem stabalization stabel stadnard
		stairing standart standerd stantdard startet starup staticlly statred statuc steller steping
		stilla stiring stkicks stoll stollen stoped stoping stoppe stoppped storge storige straigh
		strang strangly strat strategie strenght strenghts striaght stricktly strippped stroage
		structurees strucutre strucutred struggel struggeling stucked stucture stuf stylle stystem
		stystems subet submited submiter submitt substaintally substition substract substraction
		subsytem subsytems subverion succed succeded succeds succesful succesfully successfull
		successfuly suceeding sucesfully sucess sucessful sucessfull sucessors suckser sucksers
		suddently sudirectories suffecient sufficent sufficieintly suficient sugesstion sugest sugested
		suggesiton suggestsions suggetion suggetions sugroup suject sumbit sume summery superceed
		superiour supoose suposed suposedly suppoert suppor suppotr suppotred supprts supress supressed
		supresses suprise suprised suprising surpise surpised surpressed surprice surpriced surprize
		surprized surronded surroudn surroudning susbtitute suspec suspection sutiable sutuation swape
		swepped swich switche swith swithc switich switiching swop syas sychronized symetrical symptome
		symtom symtoms synchronisaton syncrhonous syncrhonously syncronize syncronized synonomous sysem
		sysetm syslodg systeam systme systmes sytem sytems sytsem taged taging taht tahts talkes targer
		targerts tat te teamm techer techical techincal techincally techncially tecnological teh tehre
		tehse tehy tempaltes tempature temperatire templaitize temporarely tenticles tere terirrlbe
		termal termianl terminilogy termonology termperature termporary tey th tha thaat thak thaknk
		thakns thaks thank's thankje thansk thanx thatis thats thay theese thefirst themeing thems
		themself theoraticly theorethically ther therads therefor theres therory thets theyre theyve
		thie thier thign thigns thingking thinke thinkg thinkw thinling thirs thnak thnig thnk tho thos
		thouch thoug thougt thouogh threated thremal throgh throtteling throug throughly throught
		throuhg throwed thru thrugh tht thta thudner thwo thye ti tiems tihngs tihs timestatmp tinket
		tinketing tipycal tirck tird titeled tlak tlaking tnan todays todl togehter togethe tohers tols
		tomake tommorow toolcain toolchian topick topoligy totaly tottaly tought tougue tounge
		touschreen tpage tpye traafic tradeing traditoin traditoinal tradtional trafic trailling
		tranalation tranalations tranfer tranfered tranferred tranfers transfered transfering
		translateion translater translaters transltion transmision traslate traslation treatement trid
		triede triewd trigonmetric tring tripple trival trnaslate trnaslated trofy troublehsooting
		troubleshoute troughout trow trows trpi trrue trry trubolsome truely trully tryed tryied tryign
		tsable tsart tsill tsrarted tthe tthis tu tunning tunr turend turnt tutoriales tye tyhrow
		typicall typicaly udnerstand udnerstandable udpate udpates uesd uisng umounted uncapable
		unchaged unchange uncoment unconsistent undefinied undefinitely undeflowed undersatnd understadn
		understadning understandlable understandood understaning undertsand undescores undesireable
		undestand undustrialized unecessary unecrypted uner unexpectad unexpectadly unfortauntely
		unfortenately unfortuante unfortunant unfortunantly unfortunatelly unfortunatly unfortuntelly
		unfrastructure unfreezed uniion uniquily unitentinally univeristy unknwn unkown unlinke
		unmouting unnceccessary unneccessary unnecssary unprivilegded unrelevant unresolveable
		unreversable unsubstanciated unsuccesful unsucessfully unsutiable untill untis unuseable upate
		updaing updateing updatting updte updtes upgade upgaded upgarde upo upp uppon uprade upsteam
		upstrewams upto ur usally useable useage usedul usefull useing usesd usign ussage usse ussually
		usully ut utilites utilties uttrerly vagrand varaible varanty varialbe varialbes variaty
		varliable varois varoius vender vengeace veresion verion verions verison verry versionn
		versionned versionning versoin verty veryify vey virilization virtial virutal visable voa
		volenteer volenteers voltave vontinues votlage vulnability waas waht wahtever wakupe wantd
		warant waranted warrent wass webupage wecam wehre wek wel wer wether whanever whats whcih whe
		whene whereever wheres whewn whhich whie whihc whild whilte whinning whish whit whith whne
		whould wht wich wierd wiht wihtout wilde wirh wirtten wistle wistles witdh withe withhin withing
		withme withough withouth witk witout witt wizzard wlll wnat wnats wnet wo woked woking
		wonderfull woner wont wor worflows workint workoad workoads workstion worng worrty woth woud
		woudl would'nt wouldbe wouldnt wouls wranty wraper wriatble writen writtend writting wroking
		wroute wsouse wuch xontains ycould yea yeild yeilds yesm yhe youd youi youll youre yu yuo yut /) {
			$misspelled_words{$word} = 1;
	}
	print "spellingerrors: ", scalar (keys %misspelled_words), " misspellings known\n" if $verbose;
	my @spellfiles;
	# IGORSPELLFILES environment variable is a whitespace-separated list of files
	push (@spellfiles, split /\s/, $ENV{'IGORSPELLFILES'}) if defined($ENV{'IGORSPELLFILES'});
	# all files found in /usr/local/etc/igor/spelling
	push (@spellfiles, split /\s/, `ls /usr/local/etc/igor/spelling/*`) if -d '/usr/local/etc/igor/spelling';
	for my $spellfile (@spellfiles) {
		readspelling($spellfile);
	}
}

sub spellingerrors {
	my ($bname, $line, $txt) = @_;
	return if $txt =~ /^\s*$/;

	my $txtbak = $txt;
	my @words = split /\W+/, $txt;
	for my $currentword (@words) {
		if ( $misspelled_words{lc($currentword)} ) {
			$txt = highlight_word($txt, $currentword);
		}
	}
	if ( $txt ne $txtbak ) {
		showline($bname, $line, $ansi{darkmagenta}, 'spelling', $txt);
	}
}

sub whitespace {
	my ($bname, $line, $txt) = @_;
	return if $txt =~ /^$/;

	my $txtbak = $txt;
	if ( $txt =~ s/^(\s+)$/$li$1$ri/ ) {
		showline($bname, $line, $ansi{darkblue}, 'blank line with whitespace', $txt);
	}
	$txt = $txtbak;
	if ( $txt =~ s/(\S+)(\s+)$/$1$li$2$ri/ ) {
		showline($bname, $line, $ansi{darkblue}, 'trailing whitespace', $txt);
	}
	$txt = $txtbak;
	if ( $txt =~ s/( +)\t+/$li$1$ri/ ) {
		showline($bname, $line, $ansi{darkmagenta}, 'tab after space', $txt);
	}
}


# global batch tests
sub style {
	my ($bname, $txt) = @_;
	print "$lf$bname style check:$rf\n";

	my $you = ($txt =~ s/you\b/you/gi);
	my $your = ($txt =~ s/your/your/gi);
	if ( $you || $your ) {
		print "  $lh\"you\" used $you time", ($you==1 ? '':'s'), "$rh\n" if $you;
		print "  $lh\"your\" used $your time", ($your==1 ? '':'s'), "$rh\n" if $your;
		print "    \"You\" and \"your\" are informal and subjective.\n";
		print "    Attempt to be formal and objective: \"the file\" rather than \"your file\".\n";
	}

	my $should = ($txt =~ s/should/should/gi);
	if ( $should ) {
		print "  $lh\"should\" used $should time", ($should==1 ? '':'s'), "$rh\n";
		print "    Use \"should\" sparingly, it is feeble and suggests unsureness.\n";
		print "    Attempt to be imperative: \"do this\" rather than \"you should do this\".\n";
	}

	my $obviously = ($txt =~ s/obviously/obviously/gi);
	if ( $obviously ) {
		print "  $lh\"obviously\" used $obviously time", ($obviously==1 ? '':'s'), "$rh\n";
		print "    If it is really obvious, it does not need to be pointed out.\n";
	}

	my $needless = ($txt =~ s/needless to say/needless to say/gi);
	if ( $needless ) {
		print "  $lh\"needless to say\" used $needless time", ($needless==1 ? '':'s'), "$rh\n";
		print "    If it doesn't need to be said, why say it?\n";
	}

	my $thefollowing = ($txt =~ s/the following/the following/gi);
	my $asfollows    = ($txt =~ s/as follows/as follows/gi);
	if ( $thefollowing || $asfollows ) {
		print "  $lh\"the following\" used $thefollowing time", ($thefollowing==1 ? '':'s'), "$rh\n" if $thefollowing;
		print "  $lh\"as follows\" used $asfollows time",       ($asfollows==1    ? '':'s'), "$rh\n" if $asfollows;
		print "    If something is following, the reader can see it without being told.\n";
	}

	my $followingexample = ($txt =~ s/following example/following example/gi);
	if ( $followingexample ) {
		print "  $lh\"following example\" used $followingexample time", ($followingexample==1 ? '':'s'), "$rh\n";
		print "    If an example is following, the reader can see it without being told.\n";
	}

	my $simply = ($txt =~ s/simply/simply/gi);
	my $basically = ($txt =~ s/basically/basically/gi);
	if ( $simply || $basically ) {
		print "  $lh\"simply\" used $simply time", ($simply==1 ? '':'s'), "$rh\n" if $simply;
		print "    Use \"simply\" to mean \"in a simple manner\", \"just\", or \"merely\", not the\n";
		print "    patronizing \"details omitted because they are not simple enough for you\".\n";
		print "  $lh\"basically\" used $basically time", ($basically==1 ? '':'s'), "$rh\n" if $basically;
		print "    Use \"basically\" to mean \"essentially\" or \"fundamentally\", not \"only the\n";
		print "    basics are shown because anything more will be too complicated for you\".\n";
	}

	my $the = ($txt =~ s/(?:^the|\.\s+the)\b/the/gi);
	my $sent = ($txt =~ s/([^.]+\.\s+)/$1/gi);
	my $percent = ($sent > 0 ? int($the/$sent*100) : 0);
	if ( $the && ($percent > 19) ) {
		print "  $lh\"The\" used to start a sentence $the time", ($the==1 ? '':'s'), " in $sent sentence", ($sent==1 ? '':'s'), " ($percent%)$rh\n";
		print "    Starting too many sentences with \"the\" can be repetitive\n";
		print "    and dull to read.\n";
	}

	my $cf = ($txt =~ s/\Wcf\./cf./gi);
	my $eg = ($txt =~ s/e\.g\./e.g./gi);
	my $ie = ($txt =~ s/i\.e\./i.e./gi);
	my $nb = ($txt =~ s/n\.b\./n.b./gi);
	if ( $cf ) {
		print "  $lh\"cf.\" used $cf time", ($cf==1 ? '':'s'), "$rh\n";
		print "    \"Cf.\" (Latin \"confer\") means \"${lf}compare$rf\" and is mostly used in academic\n";
		print "    and scientific writing.  Consider replacing with the more common English\n";
		print "    words.\n";
	}
	if ( $eg ) {
		print "  $lh\"e.g.\" used $eg time", ($eg==1 ? '':'s'), "$rh\n";
		print "    \"E.g.\" (Latin \"exempli gratia\") means \"${lf}for example$rf\" and is mostly\n";
		print "    used in academic and scientific writing.  Consider replacing with the\n";
		print "    more common English words.  Both forms are usually followed by a\n";
		print "    comma for a verbal pause:  \"e.g., a b c\" or \"for example, a b c\"\n";
	}
	if ( $ie ) {
		print "  $lh\"i.e.\" used $ie time", ($ie==1 ? '':'s'), "$rh\n";
		print "    \"I.e.\" (Latin \"id est\") means \"${lf}that is$rf\" and is mostly used in academic\n";
		print "    and scientific writing.  Consider replacing with the more common\n";
		print "    English words.  Both forms are usually followed by a comma for\n";
		print "    a verbal pause:  \"i.e., a b c\" or \"that is, a b c\"\n";
	}
	if ( $nb ) {
		print "  $lh\"n.b.\" used $nb time", ($nb==1 ? '':'s'), "$rh\n";
		print "    \"N.b.\" (Latin \"nota bene\") means \"${lf}note$rf\" or \"${lf}take notice${rf}\" and is mostly\n";
		print "    used in academic and scientific writing.  Consider replacing with\n";
		print "    the more common English words.\n";
	}

	my $inorderto = ($txt =~ s/in order to/in order to/gi);
	if ( $inorderto ) {
		print "  $lh\"in order to\" used $inorderto time", ($inorderto==1 ? '':'s'), "$rh\n";
		print "    Unless \"in order to\" has some special meaning here, \"to\" is simpler.\n";
	}

	my $invoke = ($txt =~ s/invoke/invoke/gi);
	if ( $invoke ) {
		print "  $lh\"invoke\" used $invoke time", ($invoke==1 ? '':'s'), "$rh\n";
		print "    Unless \"invoke\" has some special meaning in context, \"run\" is simpler.\n";
	}

	my $parenplural = ($txt =~ s/\(s\)/\(s\)/gi);
	if ( $parenplural ) {
		print "  $lh\"(s)\" used $parenplural time", ($parenplural==1 ? '':'s'), "$rh\n";
		print "    Please do not form plurals this way.  It is a holdover from lazy\n";
		print "    programming practices, is difficult to read, and almost always\n";
		print "    unnecessary.  A plural formed with a plain \"s\" is usually correct\n";
		print "    when speaking about numbers of one or more.\n";
	}

	my $asofnow = ($txt =~ s/as of now/as of now/gi);
	if ( $asofnow ) {
		print "  $lh\"as of now\" used $asofnow time", ($asofnow==1 ? '':'s'), "$rh\n";
		print "    \"at present\" is clearer.\n";
	}

	# type-specific tests
	if ( $type eq "troff" ) {
		my $examples = ($txt =~ /\n\.\s*Sh\s+EXAMPLES/i);
		unless ( $examples ) {
			print "  ${lh}no \"EXAMPLES\" section found$rh\n";
			print "    Even trivial examples can improve clarity.\n";
			print "    Common-use examples are better yet.\n";
		}
	}
}

# mdoc line-by-line tests
my @md_displays;
sub mdoc_whitespace {
	my ($bname, $line, $txt) = @_;

	if ( $txt =~ /^\.\s*Bd\s/ ) {
		push @md_displays, ($txt =~ /-(?:literal|unfilled)/ || 0);
	} elsif ( $txt =~ /^\.\s*Ed\b/ ) {
		pop @md_displays;
	} elsif ( ! length $txt && ! grep $_, @md_displays ) {
		showline($bname, $line, $ansi{darkblue}, "blank line", $txt);
	}
}

sub mdoc_date {
	my ($bname, $line, $txt) = @_;
	return if $txt =~ /^\s*$/;

	if ( $txt =~ s/^(\.\s*Dd\s+)(.*)$/$1$lh$2$rh/ ) {
		$docdate = $2;
		showline($bname, $line, $ansi{darkyellow}, "date not today, $today", $txt) if $docdate ne $today;
	}
}

sub mdoc_sentence {
	my ($bname, $line, $txt) = @_;
	return if $txt =~ /^\s*$/;

	if ( $txt =~ s/^(\w{2,}.*?[^ .]{2,}\.\s+)(A |I |\w{2,})(.*)$/$1$lh$2$3$rh/ ) {
		showline($bname, $line, $ansi{darkcyan}, 'sentence not on new line', $txt);
	}
}

sub init_mdoc_uniqxrefs {
	print "initializing mdoc_uniqxrefs\n" if $verbose;
	%seealsoxrefs = ();
}

sub mdoc_uniqxrefs {
	my ($bname, $line, $txt) = @_;
	return if $txt =~ /^\s*$/;

	# set a flag to indicate when a .Sh SEE ALSO section is found
	if ( $txt =~ /^\.Sh\s+(.*)/i ) {
		$seealso = ( $1 =~ /SEE ALSO/i );
		print "mdoc_uniqxrefs: SEE ALSO section found\n" if $verbose;
		return;
	}

	# only check xrefs for repeats inside a SEE ALSO section
	if ( $seealso ) {
		# if inside a SEE ALSO section, stop looking for duplicates
		# after non-.Xr macros.  These would probably be text sections
		# talking about the external references, not included in the list.
		if ( ($txt =~ /^\./) && ($txt !~ /^\.Xr/i) ) {
			$seealso = 0;
			return;
		}

		# allow both valid mdoc formats (.Xr umount 8 ,)
		# and bad ones (.Xr xorg.conf(5),)
		if ( $txt =~ /\.Xr\s+(.*)(?:\s|\()(\d{1}\w?)/i ) {
			my $xrefname = $1;
			my $xrefsect = $2;
			if ( $seealsoxrefs{"$xrefname-$xrefsect"} ) {
				$txt =~ s/($xrefname.*$xrefsect)/$lh$1$rh/g;
				showline($bname, $line, $ansi{yellow}, "duplicate SEE ALSO reference", $txt);
			} else {
				$seealsoxrefs{"$xrefname-$xrefsect"} = 1;
			}
		}
	}
}

sub showmacvals {
	my ($lastmacro, $bname, $line) = @_;
	for my $macro (@macros) {
		last if $macro eq $lastmacro;
		unless ( $macroval{$macro} ) {
			showline($bname, $line, $ansi{red}, ".$lastmacro used here", "but .$macro has not been defined");
		}
	}
}

sub init_mdoc_structure {
	print "initializing mdoc_structure\n" if $verbose;
	for my $macro (@macros) {
		$macro =~ tr/_/ /;
		$macroval{$macro} = '';
	}
}

sub mdoc_structure {
	my ($bname, $line, $txt) = @_;
	return if $txt =~ /^\s*$/;

	# skip if the line starts with an mdoc macro
	# technically, whitespace is allowed before macros
	return unless $txt =~ /^\s*\./;

	# check for required minimum macros
	my $parm;
	for my $macro (@macros) {
		$parm = '';
		$macro =~ tr/_/ /;
		next if $macroval{$macro};
		if ( $txt =~ /^\.\s*\Q$macro\E\s*(.*)/i ) {
			my $parm = $1;
			# provide a blank parameter for macros with optional parameters
			$parm = ' ' if ($macro =~ /^Os|Sh NAME|Sh SYNOPSIS|Sh DESCRIPTION/) && (!$parm);
			$macroval{$macro} = $parm;
			showmacvals($macro, $bname, $line);
			last;
		}
	}

	# check external refs (.Xr)
	# suggested by Glen Barber
	return unless $txt =~ /^.Xr/;

	# characters to treat as whitespace in an Xr macro
	my $wspace = '[ (),.:]';
	# character class for section numbers
	# an initial number possibly followed by a letter
	my $sect = '\d{1}[A-Za-z]?';

	my $xname = '';
	$xname = $1 if $txt =~ /^.Xr$wspace+(\S+)/;
	my $xsection = '';
	$xsection = $1 if $txt =~ /^.Xr$wspace+\S+$wspace+($sect)/;

	if ( ! $xname ) {
		showline($bname, $line, $ansi{yellow}, 'xref name missing', $txt);
		return;
	}

	if ( $xname =~ /\($sect\)/ ) {
		$txt =~ s/($xname)/$lh$1$rh/;
		showline($bname, $line, $ansi{yellow}, 'section number in name', $txt);
		return;
	}

	if ( $xsection && ($xsection gt "9") ) {
		$txt =~ s/^(.Xr$wspace+\S+$wspace+)($sect)/$1$lh$2$rh/;
		showline($bname, $line, $ansi{yellow}, 'section higher than 9', $txt);
		# no point in checking for sections higher than 9
		return;
	}

	if ( $opt_x ) {
		system("$man -w $xsection $xname >/dev/null 2>&1");
		if ( $? ) {
			if ( $xsection ) {
				$txt =~ s/^(.Xr$wspace+)(\S+$wspace+$sect)/$1$lh$2$rh/;
			} else {
				$txt =~ s/^(.Xr$wspace+)(\S+)/$1$lh$2$rh/;
			}
			showline($bname, $line, $ansi{darkmagenta}, 'external man page not found', $txt);
			# not found, no point in checking if it's this one
			return;
		}
	}

	# is this external reference referring to itself?
	# skip if the .Nm macro has no value
	return if $macroval{'Nm'} ne $xname;
	my $currsection = '';
	if ( $macroval{'Dt'} =~ /^\S+\s+($sect)/ ) {
		$currsection = $1;
	}
	return if $xsection ne $currsection;
	if ( $xsection && $currsection ) {
			$txt =~ s/^(.Xr$wspace+)(\S+$wspace+$sect)/$1$lh$2$rh/;
		} else {
			$txt =~ s/^(.Xr$wspace+)(\S+)/$1$lh$2$rh/;
		}
	showline($bname, $line, $ansi{darkmagenta}, 'xref refers to *this* page (use .Nm)', $txt);
}


# DocBook line-by-line tests

sub init_doc_titles {
	print "initializing doc_titles\n" if $verbose;
	# build regex of words that should be lowercase in titles
	my @lc_words = qw/ a an and at by down for from in into like near
					   nor of off on onto or over past the to upon with /;
	$lc_regex = '(?:' . join('|', @lc_words) . ')';
	my @uc_words = qw/ about are how log new not set tag use
					   one two three four five six seven eight nine /;
	$uc_regex = '(?:' . join('|', @uc_words) . ')';
	my @fixedcase_words = qw/ amd64 i386 iSCSI x86 /;
	$fixedcase_regex = '(?:' . join('|', @fixedcase_words) . ')';

	# build regex for ignoring DocBook tagged words in titles
	# like <command>ls</command>
	my @ignoretags = qw/ acronym application command filename function
						 link literal varname replaceable systemitem tag /;
	for my $tag (@ignoretags) {
		$tag = "<$tag.*?>.*?<\/$tag>";
	}
	$ignoreregex = '<anchor.*?>|' . join('|', @ignoretags)
}

sub doc_titles {
	my ($bname, $line, $txt) = @_;
	return if $txt =~ /^\s*$/;

	my $txtbak = $txt;

	return if $ignoreblock;
	$titleblock = 1 if $txt =~ /<title/;
	return unless $titleblock;

	print "doc_titles: '$txt'\n" if $verbose;

	my @words;

	# take the text from between title tags, or the
	# whole line if a title tag is not present
	# split the result into an array of words, keeping
	# ignorable DocBook tags wrapped around text
	if ( ($txt =~ /<title.*?>(.*?)(?:<\/title>|$)/)
		 || ($txt =~ /(.*)(?:<\/title>)/) ) {
		@words = split /($ignoreregex|\s+)/, $1;
	} else {
		@words = split /($ignoreregex|\s+)/, $txt;
	}

	# use AP style: capitalize words longer than three letters
	WORD: for my $i (0..$#words) {
		my $word = $words[$i];

		print "doc_titles: analyzing '$word'\n" if $verbose;

		next WORD if $word =~ /\s+/;
		next WORD if $word =~ /$ignoreregex/;

		# special case: skip the contents of some unfinished tags
		# <title>Configuring <acronym role="Domain Name
		#   System">DNS</acronym></title>
		next WORD if $word =~ /(?:role)=/;

		# special case: allow single lowercase "s" for plurals
		next WORD if $word eq 's';

		# special case words that should not be capitalized
		next WORD if $word =~ /^$fixedcase_regex$/;

		# first word should be capitalized
		if ( ($txt =~ /<title/) && ($i == 0) ) {
			if ( is_lowercase($word) ) {
				$words[$i] = highlight_string($word);
			}
			# first word is special, skip other tests
			next WORD;
		}

		# last word should be capitalized
		if ( ($txt =~ /<\/title/) && ($i == $#words) ) {
			if ( is_lowercase($word) ) {
				$words[$i] = highlight_string($word);
			}
			# last word is special, skip other tests
			last WORD;
		}

		# words that should be lower case
		if ( is_uppercase($word) ) {
			if ( $word =~ /^$lc_regex$/i ) {
				$words[$i] = highlight_string($word);
				next WORD;
			}
		}

		# words that should be upper case
		if ( is_lowercase($word) ) {
			if ( $word !~ /^$lc_regex$/i ) {
				if ( (length($word) > 3) ) {
					$words[$i] = highlight_string($word);
					next WORD;
				}
			}
			if ( $word =~ /^$uc_regex$/i ) {
				$words[$i] = highlight_string($word);
				next WORD;
			}
		}
	}

	# reconstruct the now-capitalized title
	$txt = '';
	$txt = $1 if $txtbak =~ /^(.*<title.*?>)/;
	$txt .= join('', @words);
	$txt .= $1 if $txtbak =~ /(<\/title.*?>)/;

	if ( $txt ne $txtbak ) {
		print "doc_titles:\n     original='$txtbak'\n  highlighted='$txt'\n" if $verbose;
		showline($bname, $line, $ansi{blue}, 'capitalization', $txt);
	}

	$titleblock = 0 if $txt =~ /<\/title>/;
}

sub init_doc_indentation {
	print "initializing doc_indentation\n" if $verbose;
	# build regex for detecting DocBook tags that begin or
	# end an indented section
	my @indent_tags = qw/ abstract answer appendix article articleinfo
						  author authorgroup biblioentry bibliography
						  biblioset blockquote book bookinfo callout
						  calloutlist category chapter chapterinfo colophon
						  caution contrib date day entry event example
						  figure formalpara funcdef funcsynopsis
						  funcprototype glossary glossdef glossdiv
						  glossentry glossterm important imageobject
						  imageobjectco info informaltable
						  informalexample itemizedlist legalnotice
						  listitem mediaobject mediaobjectco month name
						  note orderedlist para paramdef partintro
						  personname preface procedure qandadiv
						  qandaentry qandaset question row screenco
						  sect1 sect2 sect3 sect4 sect5 section
						  seglistitem segmentedlist sidebar step
						  stepalternatives surname table tbody tgroup
						  thead tip title variablelist varlistentry
						  warning year /;
	# add VuXML tags
	@indent_tags = (@indent_tags, qw/ affects body cvename dates
						description discovery head html li name p range
						references topic ul vuln vuxml /);
	@indent_tags = (sort {length($b) <=> length($a)} @indent_tags);
	print "indentation tags: @indent_tags\n" if $verbose;
	$indent_regex = '(?:' . join('|', @indent_tags) . ')';
	print "indentation regex: $indent_regex\n" if $verbose;
	# build regex for inline tags like
	# <filename>blah</filename>
	my @inline_tags = qw/ a acronym application citetitle command
						  computeroutput devicename emphasis envar
						  errorname filename firstterm footnote function
						  guimenu guimenuitem hostid imagedata indexterm
						  keycap keycombo link literal makevar option
						  optional package parameter primary quote
						  remark replaceable secondary see seg sgmltag
						  simpara strong structname systemitem term tt
						  ulink uri varname /;
	# add VuXML tags
	@inline_tags = (@inline_tags, qw/ ge gt le lt url /);
	@inline_tags = (sort {length($b) <=> length($a)} @inline_tags);
	print "inline tags: @inline_tags\n" if $verbose;
	$inline_regex = '(?:' . join('|', @inline_tags) . ')';
	print "inline regex: $inline_regex\n" if $verbose;
}

sub doc_indentation {
	my ($bname, $line, $currline) = @_;
	my ($init_prev_indent, $init_curr_indent);
	return if $currline =~ /^\s*$/;

	# indents are not significant inside ignorable SGML blocks.
	return if $ignoreblock;

	return if $currline =~ /^\s*<!--.*-->\s*$/;

	# \b is needed here to prevent <parameter> being detected as <para>
	return unless $prevnonblank =~ /<\/*$indent_regex\b.*?>/;

	my $prev_indent = length(leading_space($prevnonblank));
	my $curr_indent = length(leading_space($currline));
	if ( $verbose ) {
		# save initial values for later verbose reporting
		$init_prev_indent = $prev_indent;
		$init_curr_indent = $curr_indent;
	}

	# indent once for open tag on previous line
	$prev_indent += 2 if $prevnonblank =~ /<$indent_regex\b/;

	# allow for inline tag indenting, like
	# <link
	#   url=
	# or
	# <makevar>xyz
	#   abc</makevar>
	my $count = 0;
	$count += ($prevnonblank =~ s/(<$inline_regex)\b/$1/g);
	$count -= ($prevnonblank =~ s/(<\/$inline_regex)\b/$1/g);
	$prev_indent += (2 * $count);

	# if previous line ends in an open xref, indent
	$prev_indent += 2 if ($prevnonblank =~ /<xref\s*$/);

	# <xref> has no close tag, but uses "linkend=" the same as <link>
	# which *does* have a close tag... so if there's a linkend= on
	# previous line but no </ulink> or </link> on either previous
	# or current lines, assume it's an xref and outdent
	my $broken_regex = '(?:(?:linkend|url)=)';
	if ( $prevnonblank =~ /^\s*$broken_regex/ ) {
		if ($prevnonblank !~ /<\/(?:link|ulink)/) {
			if ($currline !~ /<\/(?:link|ulink)/) {
				$prev_indent -= 2;
			}
		}
	}

	# outdent for close tag at end of previous line
	$prev_indent -= 2 if ($prevnonblank =~ /\S+.*<\/$indent_regex>\s*$/);

	# outdent for close tag at the start of this line
	$prev_indent -= 2 if ($currline =~ /^\s*<\/$indent_regex/);

	# outdent after footnote
	$prev_indent -=2 if $prevnonblank =~ /<\/para><\/footnote>/;

	# singleton tags like <entry/> are really just an empty
	# open/close tag, <entry></entry>, allow for them
	$prev_indent -=2 if $prevnonblank =~ /\/>$/;

	# close tags after long sections of nonindented blocks,
	# like the end of a programlisting, cannot be correctly
	# checked for indentation in this hacky way, so ignore them
	if ( ($prevnonblank =~ /$ignoreblockstart|$ignoreblockend/)
		|| ($currline =~ /$ignoreblockend/) ) {
		$curr_indent = $prev_indent;
	}

	if ( $curr_indent != $prev_indent ) {
		if ( $verbose ) {
			print "doc_indentation:\n";
			my $vprev = showwhitespace($prevnonblank);
			my $vcurr = showwhitespace($currline);
			print "previous nonblank line: '$vprev\'\n";
			print "          current line: '$vcurr\'\n";
			print "\t\t\t\tinitial\tfinal\n";
			print "previous nonblank indent:\t$init_prev_indent\t$prev_indent\n";
			print "          current indent:\t$init_curr_indent\t$curr_indent\n";
		}
		my $out = $origline;
		$out =~ s/(^\s+)/$li$1$ri/;
		showline($bname, $line, $ansi{darkred}, 'bad tag indent', $out);
	}
}

# split and return leading space and content
sub splitleading {
	my $txt = shift;
	my $inspace = '';
	my $content = $txt;
	if ( $txt =~ /^(\s*)(.*)/ ) {
		$inspace = $1 if $1;
		$content = $2 if $2;
	}
	return ($inspace, $content);
}

sub doc_longlines {
	my ($bname, $line, $txt) = @_;
	return if $txt =~ /^\s*$/;
	return if $ignoreblock;
	# if line is longer than $linelensgml (normally 70) chars
	# and the part after the indent has spaces
	# this should be smarter, like seeing if the part before the space
	# will benefit from wrapping

	# ignore long lines with these tags
	return if $txt =~ /<(?:!DOCTYPE|!ENTITY|pubdate|releaseinfo)/;

	$txt = expand_tabs($txt);

	if ( length($txt) > $linelensgml ) {
		my ($inspace, $content) = splitleading($txt);
		my $currline = substr($content, 0, $linelensgml - length($inspace));
		my $nextline = substr($content, length($currline));
		if ( $currline =~ / / ) {
			$currline =~ s/^(.*)? (.*)$/$1$li $ri$2/;
			showline($bname, $line, $ansi{green}, 'wrap long line', "$inspace$currline$nextline");
		} elsif ( $nextline =~ s/ /$li $ri/ ) {
			showline($bname, $line, $ansi{green}, 'wrap long line', "$inspace$currline$nextline");
		}
	}
}

sub init_doc_sentence {
	print "initializing doc_sentence\n" if $verbose;
	# end of sentence characters: literal dot, question mark, exclamation point
	$eos_regex = '\.|\?\!';
}

sub doc_sentence {
	my ($bname, $line, $txt) = @_;

	return if $txt =~ /^\s*$/;
	return if $ignoreblock;

	# skip if there is no end-of-sentence character
	return unless $txt =~ /(?:$eos_regex)/;

	my $errcount = 0;
	my ($inspace, $content) = splitleading($txt);
	my @sentences = grep (! /^$/, split /((?:.*?(?:$eos_regex)+\s+)|(?:<.*?>))/, $content);

	for my $s (@sentences) {
		# skip unless it has a one-space possible sentence start
		next unless $s =~ /\. $/;

		# SGML markup, like "<emphasis>bold</emphasis>."
		#next if $s =~ />\. $/;

		# single dots, like from "find . -name '*.sgml'"
		next if $s =~ / \. $/;

		# initials
		next if $s =~ /[A-Z]{1}\. $/;

		# common abbreviations
		next if $s =~ /(?:Ave|Dr|Ed|etc|Inc|Jr|Mass|Pub|Sp|St|Str|str|o\.o)\. $/;

		# ignore misuse of cf., e.g., i.e., and v.s., they are not
		# end of sentence errors
		next if $s =~ /(?:cf|e(?:\.)*g|i\.e|v\.s)\. $/i;

		# months
		next if $s =~ /(?:Jan|Feb|Mar|Apr|May|Jul|Aug|Sep|Oct|Nov|Dec)\. $/;

		# numbers, like "... and 1997."
		next if $s =~ /\d+\. $/;

		# ellipsis
		next if $s =~ /\.\.\. $/;

		# it must be a single-space sentence start
		$s =~ s/ $/$li $ri/;
		$errcount++;
	}

	if ( $errcount ) {
		# reassemble the now-highlighted string
		$txt = $inspace . join('', @sentences);
		showline($bname, $line, $ansi{darkblue}, 'use two spaces at sentence start', $txt);
	}
}

sub init_doc_openclose {
	print "initializing doc_openclose\n" if $verbose;
	@openclose_tags = qw/ callout entry filename footnote li listitem literal p para row step /;
	for my $tag (@openclose_tags) {
		$opentag{$tag} = 0;
	}
	$openclose_regex = join('|', @openclose_tags);
	my @list_tags = qw/ itemizedlist orderedlist variablelist /;
	$list_regex = join('|', @list_tags);
	my @parawrap_tags = qw/ footnote listitem /;
	$parawrap_regex = join('|', @parawrap_tags);
}

sub doc_openclose {
	my ($bname, $line, $txt) = @_;
	return if $txt =~ /^\s*$/;
	return if $ignoreblock;
	return unless $txt =~ /</;

	my $errcount = 0;
	my ($inspace, $content) = splitleading($txt);
	my @chunks = split(/(<.*?(?:>|$))/, $content);
	@chunks = grep (! /^\s*$/, @chunks);

	for my $chunk (@chunks) {
		next unless $chunk =~ /</;

		for my $tag (@openclose_tags) {
			next unless $chunk =~ /(?:$openclose_regex)/;
			if ( $chunk =~ /$tag/ ) {
				# check for open without close
				if ( $opentag{$tag} && $chunk =~ /<$tag\b/ ) {
					$chunk =~ s/(<$tag\b)/$lh$1$rh/;
					showline($bname, $line, $ansi{red}, "open <$tag> without closing", $inspace . join('', @chunks));
				}

				# check for close without open
				if ( ! $opentag{$tag} && $chunk =~ /<\/$tag>/ ) {
					$chunk =~ s/(<\/$tag\W)/$lh$1$rh/;
					showline($bname, $line, $ansi{red}, "close </$tag> without opening", $inspace . join('', @chunks));
				}

				# evaluate closes
				$opentag{$tag} = 0 if $chunk =~ /<\/$tag>/;
				# evaluate opens
				$opentag{$tag} = 1 if $chunk =~ /<$tag\b/;
			}
		}

		# special-case closes
		# <para> can be inside footnotes or lists
		$opentag{'para'} = 0 if $chunk =~ /<(?:$parawrap_regex)\b/;
		$opentag{'para'} = 0 if $chunk =~ /<\/(?:$list_regex)>/;

		# list tags like <itemizedlist> start a new list
		# so 'listitem' is no longer open
		$opentag{'listitem'} = 0 if $chunk =~ /<(?:$list_regex)\b/;

		# procedures can be nested, so <procedure> closes <step>
		$opentag{'step'} = 0 if $chunk =~ /<procedure\b/;


		# special-case opens
		$opentag{'para'} = 1 if $chunk =~ /<\/(?:$parawrap_regex)>/;
		$opentag{'para'} = 1 if $chunk =~ /<(?:$list_regex)\b/;

		# list tags like </itemizedlist> end a list
		# so 'listitem' is open again
		$opentag{'listitem'} = 1 if $chunk =~ /<\/(?:$list_regex)>/;

		# procedures can be nested, so </procedure> opens <step>
		$opentag{'step'} = 1 if $chunk =~ /<\/procedure\b/;
	}
}

sub init_literalblock_regex {
	print "initializing literalblock_regex\n" if $verbose;
	# used by multiple tests
	$literalblock_regex = 'literallayout|programlisting|screen';
}

sub doc_tagstyle_whitespace {
	my ($bname, $line, $currline) = @_;
	return if $ignoreblock;

	my $currlinebak = $currline;

	# <title>
	if ( $currline =~ s/^(\s*\S+.*?)(<title)/$1$lh$2$rh/ ) {
		showline($bname, $line, $ansi{darkcyan}, 'put <title> on new line', $currline);
		$currline = $currlinebak;
	}

	# <para>
	if ( $currline =~ s/(<para>)\s*$/$1/ ) {
		showline($bname, $line, $ansi{red}, 'start <para> content on same line', $currline);
	}
	if ( $currline =~ s/(<\/para>)([^< ]+)$/$1$lh$2$rh/ ) {
		showline($bname, $line, $ansi{red}, 'character data is not allowed here', $currline);
		$currline = $currlinebak;
	}

	# (programlisting>
	if ( $currline =~ /<programlisting/ ) {
		# <programlisting> should not be used as an inline tag
		if ( $currline =~ s/(\S+\s*<programlisting.*?>)/$lh$1$rh/ ) {
			showline($bname, $line, $ansi{red}, 'do not use <programlisting> inline in other elements', $currline);
			$currline = $currlinebak;
		} elsif ( ($currline =~ /\s*<programlisting/)
			&& ($prevnonblank !~ /<\/(?:entry|formalpara|indexterm|note|para|programlisting|screen|title)>\s*$/) ) {
			# <programlisting> allowed inside these elements
			return if $prevnonblank =~ /<(?:example|informalexample)>/;
			$currline =~ s/(<programlisting.*?>)/$lh$1$rh/;
			showline($bname, $line, $ansi{red}, 'do not use <programlisting> inside other elements', $currline);
			$currline = $currlinebak;
		}
	}

	# elements that should be preceded by a blank line
	if ( $prevline =~ /\S+/ ) {
		# an open tag like <informalexample> is okay, otherwise
		# there should be a blank line before these tags
		if ( ($prevline !~ /<.*?>\s*$/) && ($currline =~ s/(<(?:$literalblock_regex).*?(?:>|$))/$lh$1$rh/) ) {
			showline($bname, $line, $ansi{darkcyan}, "precede $1 with a blank line", $currline);
			$currline = $currlinebak;
		}
	}

	# elements that should be followed by a blank line
	if ( $currline =~ /\S+/ ) {
		# a close tag like </note> is okay, otherwise there
		# should be a blank line after these tags
		# unless they are followed by another close tag on the same line
		# example: </literallayout></entry>
		# if ( ($currline !~ /^\s*<\//) && ($prevline =~ /(<\/(?:$literalblock_regex|row|step|title)>)/) ) {
		if ( ($currline !~ /^\s*<\//) && ($prevline =~ /(<\/(?:$literalblock_regex|row|step|title)>)/) && ($prevline !~ /<\/entry>$/) ) {
			showline($bname, $line, $ansi{darkcyan}, "add blank line after $1 on previous line", "$lh$currline$rh");
		}
	}
}

sub init_doc_writestyle {
	print "initializing doc_writestyle\n" if $verbose;
	$redundantword_regex = 'command|filename|keycap|option';
	$redundanttagword_regex = '(<\/(?:command> command|filename> file|keycap> key|option> option))\b';
}

sub doc_writestyle {
	my ($bname, $line, $currline) = @_;
	return if $ignoreblock;

	my $currlinebak = $currline;

	# test for redundant markup and words starting on the previous line
	if ( $prevline =~ /(<\/(?:$redundantword_regex)>*\s*$)/ ) {
		my $prevend = $1;
		for my $word (split('|', $redundantword_regex)) {
			next unless $prevend =~ /$word/;
			next unless $currline =~ /^\s*>*\s*(\w+)\s*(?:\W+|$)/;
			my $firstword = $1;
			if ( "$prevend $firstword" =~ /$redundanttagword_regex/ ) {
				$currline =~ s/^(\s*)($firstword)\b/$1$lh$2$rh/;
				showline($bname, $line-1, $ansi{darkmagenta}, 'redundant markup and word', "... $lh$prevend$rh");
				showline($bname, $line,   $ansi{darkmagenta}, 'redundant markup and word', $currline);
				$currline = $currlinebak;
				last;
			}
		}
	}

	# test for redundant markup and words on the current line
	if ( $currline =~ /$redundantword_regex/ ) {
		if ( $currline =~ s/$redundanttagword_regex/$lh$1$rh/ ) {
			showline($bname, $line, $ansi{darkmagenta}, 'redundant markup and word', $currline);
			$currline = $currlinebak;
		}
	}
}

sub init_doc_stragglers {
	print "initializing doc_stragglers\n" if $verbose;
	@straggler_tags = qw/ application command entry filename
						  guibutton guimenu keycap link literal para
						  title ulink uri varname /;
}

sub doc_stragglers {
	my ($bname, $line, $txt) = @_;
	return if $txt =~ /^\s*$/;

	# check for spaces after open tags or before close tags
	# like <title> Something</title>
	# or <filename>/etc/rc.conf </filename>

	# these tags should not have spaces or tabs around content
	# opening tags (this will not catch link tags with attributes)
	for my $tag (@straggler_tags) {
		next if $tag eq 'entry';
		if ( $txt =~ /(<$tag>\s+)/ ) {
			print "doc_stragglers opening tags: tag='$tag', found='$1'\n" if $verbose;
			$txt = highlight_word($txt, $1);
			showline($bname, $line, $ansi{yellow}, "space before content", $txt);
		}
	}
	# closing tags
	for my $tag (@straggler_tags) {
		next if $tag eq 'entry';
		if ( $txt =~ /(\s+<\/$tag>)/ ) {
			print "doc_stragglers closing tags: tag='$tag', found='$1'\n" if $verbose;
			$txt = highlight_word($txt, $1);
			showline($bname, $line, $ansi{yellow}, "space after content", $txt);
		}
	}
	# special case: link tags
	# like <link xlink:href="&url.articles.gjournal-desktop;">
	# ignore the opening < and just key off of xlink:href
	if ( $txt =~ /(xlink:href\S+?>)(.)/ ) {
		my $lastchar = $2;
		if ( $lastchar eq ' ' || $lastchar eq "\t" ) {
			print "doc_stragglers xlink:href, found='$1$lastchar'\n" if $verbose;
			$txt = highlight_word($txt, $1);
			showline($bname, $line, $ansi{yellow}, "space before content", $txt);
		}
	}

	# check for literal start tags without listing on the same line
	my $tag;
	if ( $txt =~ />\s*$/ ) {
		if ( $txt =~ /<($literalblock_regex)[^<]?>$/ ) {
			$tag = $1;
			$txt =~ s/(<$tag[^<]?>)$/$lh$1$rh/;
			showline($bname, $line, $ansi{yellow}, "put <$tag> listing on same line", $txt);
			return;
		} elsif ( $txt =~ /^\s*<\/($literalblock_regex)[^<]?>/ ) {
			$tag = $1;
			$txt =~ s/(<\/$tag[^<]?>)$/$lh$1$rh/;
			showline($bname, $line, $ansi{yellow}, "straggling </$tag>", $txt);
			return;
		}
	}

	# the following tests are only for close tags at the start of a line
	return unless $txt =~ /^\s*<\//;

	return if $ignoreblock;

	# stragglers can't be detected when coming out of an ignore block
	return if ( $prevline =~ /$ignoreblockstart|$ignoreblockend/ );

	# more special-case hackery to handle
	#   </table>
	# </para>
	if ( ($prevline =~ /<\/table>\s*$/)
		&& ($txt =~ /^\s*<\/para>\s*$/) ) {
		return;
	}

	# even more special-case hackery to handle
	#   <para>...</para>
	#   <note>...</note>
	# </entry>
	if ( ($prevline =~ /<\/para>|<\/note>\s*$/)
		&& ($txt =~ /^\s*<\/entry>\s*$/) ) {
		return;
	}

	for my $tag (@straggler_tags) {
		if ( $txt =~ /^\s*(<\/$tag>)\s*$/ ) {
			$txt = highlight_word($txt, $1);
			showline($bname, $line, $ansi{yellow}, "straggling </$tag>", $txt);
		}
	}
}

sub doc_whitespace {
	my ($bname, $line, $txt) = @_;
	my $txtbak = $txt;

	# indents and tabs/spaces are not significant inside
	# ignorable SGML blocks
	return if $ignoreblock;

	# multiples of eight spaces at the start a line
	# (after zero or more tabs) should be a tab
	if ( $txt =~ s/^(\t* {8})+/$li$1$ri/g ) {
		showline($bname, $line, $ansi{darkmagenta}, 'use tabs instead of spaces', $txt);
	}

	# tabs hidden in paragraphs is also bad
	$txt = $txtbak;
	if ( $txt =~ s/^(\s*\S+)(.*)(\t)/$1$2$li$3$ri/ ) {
		showline($bname, $line, $ansi{darkmagenta}, 'tab in content', $txt);
	}

	# if coming out of an ignoreblock, odd spaces are
	# an artifact of splitting the line and can't be checked
	return if ( $prevline =~ /$ignoreblockstart|$ignoreblockend/ );

	# one or more occurrences of single tabs or double spaces,
	# followed by a single space, is a bad indent
	# if ( $txt =~ s/^((?:(?:  )+|(?:\t+))* )\b/$li$1$ri/ ) {

	# but simpler just to expand tabs to 8 spaces
	# and check for an odd number of spaces
	$txt = $txtbak;
	$txt = expand_tabs($txt);
	if ( $txt =~ s/^((?:  )* )\b/$li$1$ri/ ) {
		showline($bname, $line, $ansi{darkred}, 'bad indent', $txt);
	}
}


# DocBook batch tests


# remember previous line for comparison
sub saveprevline {
	my $pline = shift;
	$prevline = $pline;
	if ( $pline =~ /\S+/ ) {
		# treat comments as blank lines
		return if $pline =~ /\s*<!--/;
		return if $pline =~ /-->\s*$/;
		$prevnonblank = $pline;
	}
}


initialize();

if ( $opt_X ) {
	print "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
	print "<checkstyle version=\"7.1\">\n";
}

# main loop
foreach my $fname (@ARGV) {
	if ( $fname ne 'stdin' ) {
		next if -d $fname;
		unless ( -f $fname ) {
			print "$fname: not found\n";
			next;
		}
		unless ( -r $fname ) {
			print "$fname: not readable\n";
			next;
		}
	}

	unless ( $opt_X ) {
		print "$fname:\n" if $#ARGV > 0;
	} else {
		print "  <file name=\"", xmlize($fname), "\">\n";
	}
	$fname = writestdinfile() if $fname eq "stdin";

	$bname = basename($fname);
	$tmpfile = '';
	$type = filetype($fname);

	if ( $type =~ /gzip|bzip/ ) {
		$tmpfile = uncompress($fname, $type);
		$type = filetype($tmpfile);
	}

	print "detected file type:$type\n" if $verbose;

	open $fh, '<', ($tmpfile ? $tmpfile : $fname) or die "cannot open '$tmpfile':$!\n";

	# reset for each new document
	init_mdoc_uniqxrefs() if $opt_g;	# mdoc see also xrefs
	init_mdoc_structure() if $opt_m;	# mdoc tag presence
	$ignoreblock = 0;		# ignore SGML block
	my $saveindent = '';	# SGML indent level

	# line-by-line tests
	while (<$fh>) {
		# limit output to one vulnerability ID
		if ( $vid ) {
			if ( $_ =~ /<vuln/ ) {
				print "checking for VID in '$_'\n" if $verbose;
				if ( $vid eq 'latest' ) {
					$startline = $. if $_ =~ /<vuln vid=/;
				} else {
					$startline = $. if $_ =~ /<vuln vid=\"$vid\"/;
				}
				$stopline  = $. if $_ =~ /<\/vuln/;
				print "VID: startline=$startline, stopline=$stopline\n" if $verbose;
			}
		}

		# end if past specified ending line number
		last if $stopline && ($. > $stopline);

		chomp;

		# global tests
		abbrevs($bname, $., $_)         if $opt_a;
		badphrases($bname, $., $_)      if $opt_b;
		contractions($bname, $., $_)    if $opt_u;
		freebsdobsolete($bname, $., $_) if $opt_f;
		repeatedwords($bname, $., $_)   if $opt_r;
		spellingerrors($bname, $., $_)  if $opt_s;
		whitespace($bname, $., $_)      if $opt_w;

		# mdoc line tests
		if ( $type eq "troff" ) {
			next if /^\.\\\"/;	# ignore comments for these tests

			mdoc_whitespace($bname, $., $_) if $opt_p;
			mdoc_date($bname, $., $_)       if $opt_d;
			mdoc_sentence($bname, $., $_)   if $opt_e;
			mdoc_uniqxrefs($bname, $., $_)  if $opt_g;
			mdoc_structure($bname, $., $_)  if $opt_m;
		}

		# DocBook line tests
		if ( $type =~ /sgml|xml/ ) {
			$origline = $_;
			doc_stragglers($bname, $., $_)          if $opt_S;
			doc_tagstyle_whitespace($bname, $., $_) if $opt_t;
			for my $segment (splitter($_)) {
				if ( $segment =~ /($ignoreblockstart)/ ) {
					# when entering an ignore block, test the full
					# line for indentation unless it is a comment
					unless ( $origline =~ /^\s*<!--/ ) {
						doc_indentation($bname, $., $origline) if $opt_i;
						# test just the indent for whitespace
						my ($origindent, undef) = splitleading($origline);
						doc_whitespace($bname, $., $origindent) if $opt_W;
						$saveindent = leading_space($origline);
						# save the same state information as the main loop would
						saveprevline($saveindent . $1);
						# test just the leading whitespace
					}
					$ignoreblock++;
					next;
				} elsif ( $segment =~ /($ignoreblockend)/ ) {
					# restore the indent level at the end of an ignore block
					$ignoreblock--;
					$prevline = substr($saveindent,0,length($saveindent)-2) . $1;
					next;
				}
				doc_titles($bname, $., $segment)      if $opt_c;
				doc_indentation($bname, $., $segment) if $opt_i;
				doc_longlines($bname, $., $segment)   if $opt_l;
				doc_sentence($bname, $., $segment)    if $opt_n;
				doc_openclose($bname, $., $segment)   if $opt_o;
				doc_writestyle($bname, $., $segment)  if $opt_E;
				doc_whitespace($bname, $., $segment)  if $opt_W;
			}
		}
		saveprevline($_);
	}

	close $fh or die "could not close file:$!\n";

	if ( $opt_d || $opt_y ) {
		# skip batch tests if a line range is set
		last if $opt_C;

		# slurp the whole file
		open $fh, '<', ($tmpfile ? $tmpfile : $fname) or die "cannot open '$tmpfile':$!\n";
		my $fulltext = do { local($/); <$fh> };
		close $fh or die "could not close file:$!\n";

		# global batch tests
		style($bname, $fulltext) if $opt_y;

		# mdoc batch tests
		if ( ($type eq "troff") && ($opt_d) && (!$docdate) ) {
			showline($bname, '-', '.Dd date not set', '', '');
		}
	}

	if ( $opt_X ) {
		print "  </file>\n";
	}

	removetempfiles();
}

if ( $opt_X ) {
	print "</checkstyle>\n";
}