#!perl use 5.008; $^W = 1; # use strict; $SIG{CHLD} = 'IGNORE'; $SIG{INT} = sub { exit(2) }; ${^WIN32_SLOPPY_STAT} = 1; my @Warnings; $SIG{__WARN__} = sub { push @Warnings, @_ }; our $VERSION = '3.01'; our (@Argv_end, $Bin_dir, %Col, $Col_Reset, $Context_line, $Context_line2, $Context_lineno, $Context_lineno2, @Exclude_dirs, @Exclude_exts, %Env, @FileFind_opts, $File, $Filepath, $HOME_dir, @Ini_files, $Msg_rs, $Newline, @P, %Peg_longopt, %Peg_p, %Peg_Q, %Peg_z, %Peg_zz, @Perlexpr_mung, $Verbose, $Z); my $Usage = <<"EOT"; Usage: peg [OPTION]... PERLEXPR [FILE]... Try `peg --help' for more information. EOT my (@Before, @Cmdline_dirs, @Cmdline_files, %Globbed, @Is_ascii_text, @Match_exts, @Matched_files, %Opt, @Peg_options_ARGV, @Perlexpr, @Perlexpr_k); my ($Beep, $Binary_file, $Buffer_contents, $Buffer_fh, $Buffer_output, $Bytes_read, $C, $Code_after_open, $Code_at_end, $Code_before_close, $Code_before_open, $Code_per_line, $Console_width, $Context_matcher, $Context_matcher2, $Count, $CRLF_to_newline, $Do_globbing, $Err, $First, $Found, $Guess_encoding, $Input_encoding, $Input_record_separator, $Inside_archive, $JJ_gap, $Last_matches_file, $Line_matched, $Match_failed, $Matched_before, $Matches, $Max_matches, $MTime, $MTime_new, $MTime_old, $Needs_crlf_layer, $Newline_literal, $Offset, $Opt_d, $Opt_m, $Opt_oo, $Opt_p_expr, $Opt_pp_code, $Opt_pp_expr, $Opt_r_cmd, $Opt_r_cmd_silent, $Opt_r_fork, $Opt_s, $Opt_ss, $Opt_y, $Opt_yy, $Output_BOM, $Output_encoding, $P, $Perlexpr, $Perlexpr_q, $Plus_one, $Print_context_matcher, $Printed_Context_line, $Printed_Context_line2, $Q_F, $Q_FILE, $Q_handler_re, $Q_nonarchive_re, $Search, $Search_STDIN, $Simple_Perlexpr, $Size, $Size_max, $Size_min, $Slurp, $Slurp_maxsize, $Start_time, $STDIN_is_terminal, $STDOUT_is_terminal, $Wide_chars, $Use_matchvars); my ($DQ, $SQ, $BT, $EM, $GT, $LT, $PC) = qw( " ' ` ! > < % ); my $R_file = sprintf "peg_%010d%04d.txt", time(), abs($$); my ($Worker_count, $Worker_work) = (2, 16); my $Is_Win32 = $^O eq 'MSWin32'; my ($After, $Before) = (2, 2); my $Called = caller(); END { close STDOUT or die_("can't close STDOUT: $!") }; load_ini_files(); process_ARGV(); process_options1(); build_Perlexpr(); process_options2(); build_search(); show_debug() if $Opt{D}; run(); $Called ? return : exit(@Matched_files ? 0 : 1); sub eval_ { eval $_[0] } sub chomp_ { $_[0] =~ s/\015?\012\z// } sub autoflush { select((select(shift), $| = 1)[0]) } sub warn_ { my $msg = join '', @_; chomp_ $msg; print STDERR "peg: $msg\n"; } # warn_ sub die_ { warn_ @_; exit(2); } # die_ sub cwd { require Cwd; local $_ = Cwd::cwd(); tr|\\|/| if $Is_Win32; $_ .= '/' unless m|/\z|; $_ = ucfirst if m|^\w:/|; return $_; } # cwd sub ee { $@ =~ s/^(.*) at .* line \d+.*\z/$1/s; chomp_ $@; $@ .= "\n"; return $@; } # ee sub load_ini_files { $Data::Dumper::Indent = 1; $Data::Dumper::Sortkeys = 1; $Bin_dir = ($_ = $ENV{PEG_BINDIR}) ? $_ : ($0 =~ /^(.*[\\\/])/) ? $1 : (( require FindBin), $FindBin::RealBin); $HOME_dir = $ENV{HOME} || $ENV{USERPROFILE} || '/'; for ($Bin_dir, $HOME_dir) { tr|\\|/| if $Is_Win32; $_ .= '/' unless m|/\z|; $_ = ucfirst if m|^\w:/|; } @FileFind_opts = ( 'preprocess' => sub { sort { lc($a) cmp lc($b) } @_ }, 'no_chdir' => $Is_Win32, ); $Last_matches_file = "${HOME_dir}.peg_matches"; $Msg_rs = "\003\003\003\000"; my %orig_ENV = %ENV; unless (@ARGV and $ARGV[0] =~ /^-YY/) { my @f = ($_ = $ENV{PEG_INI}) ? ($_) : ("${Bin_dir}peg_ini.pl", "${HOME_dir}.peg_ini.pl"); foreach my $f (@f) { next unless -f $f; eval { require $f }; $@ and die_ "bad ini file $f:\n", @Warnings, ⅇ push @Ini_files, $INC{$f}; } } foreach my $k (keys %ENV) { next unless $k =~ /^PEG_/; (exists $Env{$k} and (!exists $orig_ENV{$k} or $ENV{$k} ne $orig_ENV{$k})) and warn_ "ini files set $k in both %ENV and %Env"; $Env{$k} = $ENV{$k}; } $Beep = exists $Env{PEG_BEEP} ? "\a" : ""; $Console_width = $Env{PEG_CONSOLE_WIDTH} || 70; $Slurp_maxsize = $Env{PEG_SLURP_MAXSIZE} || 67_108_864; # 2**26 $Do_globbing = (exists $Env{PEG_GLOB} ? $Env{PEG_GLOB} : $Is_Win32); $Peg_longopt{'help'} = sub { help($_[0]->[0]) }; $Peg_longopt{'buffer-output'} = sub { $Buffer_output = 1 }; } # load_ini_files sub process_ARGV { my (@argv, $init_peg_options, %peg_options); my $options = 1; my $context = ''; my $pe_type = ''; if (@ARGV == 1 and $ARGV[0] eq '-V') { die_ sprintf "v%s Perl %vd %s", $VERSION, $^V, $^X; } $Opt{$_} = 0 for ('a'..'z', 'A'..'Z', '#', qw(% = + _ / \ { })); if ($_ = $Env{PEG_OPTIONS}) { if (/[\s\"]/) { # handle quoted arguments while (s/^\s+//, length) { if (/^\"/) { # eg. "a double ""quoted"" string" s/^"([^"]*(?:""[^"]*)*)"(?:\s|$)// or die_ "bad double quoted string in PEG_OPTIONS: $_"; (my $arg = $1) =~ s/""/"/g; push @Peg_options_ARGV, $arg; } else { s/^(\S+)//; push @Peg_options_ARGV, $1; } } } else { $_ = "-$_" unless /^-/; @Peg_options_ARGV = ($_); } @argv = @Peg_options_ARGV; } push @argv, @ARGV; while (defined ($_ = shift @argv)) { # Keep a copy of %Opt at the end of PEG_OPTIONS. %peg_options = %Opt if (@argv == $#ARGV and !$init_peg_options++); # Firstly, some OPTIONs take an argument. if ($Opt{e}) { if ($Opt{e} == 1) { push @Perlexpr, $_ } else { push @Cmdline_files, $_ } $Opt{e} = 0; } elsif ($Opt{f}) { open(my $fin, "<", $_) or die_ "can't open -f file $_: $!"; my %seen; while (<$fin>) { chomp_ $_; next if $_ eq '' or $seen{$_}++; if ($Opt{f} == 1) { push @Perlexpr, $_ } else { push @Cmdline_files, $_ } } $Opt{f} = 0; } elsif ($Opt{'m'}) { /^\d+$/ or die_ "-m expected integer argument: $_"; $Max_matches = $_; $Opt_m = $Opt{'m'}; $Opt{'m'} = 0; } elsif ($Opt{M}) { my $time = $_; my ($num, $fix, $interval); # in secs if ($time =~ s/\#(\d+(?:\.\d*)?)([smhdw])?$//) { # INTERVAL ($interval, my $units) = ($1, $2 || 'd'); if ($units eq 'm') { $interval *= 60 } elsif ($units eq 'h') { $interval *= 60*60 } elsif ($units eq 'd') { $interval *= 60*60*24 } elsif ($units eq 'w') { $interval *= 60*60*24*7 } } if ($time =~ /^(\d+):(\d*)(:(\d*))?(?:-(\d+))?$/) { # EXACT my $sec_specified = defined $3; my ($hrs, $min, $sec, $days) = ($1, $2 || 0, $4 || 0, $5 || 0); die_ "bad -M time: $time" if ($hrs >= 24 or $min >= 60 or $sec >= 60); my @lt = localtime $^T; my $now = $lt[0] + 60*$lt[1] + 60*60*$lt[2]; my $given = $sec + 60*$min + 60*60*$hrs; $num = $days*24*60*60 + $now - $given; $num < 0 and warn_ "future -M time: $time$Beep"; $fix = $sec_specified ? 0 : 59; } elsif ($time =~ m|^(\d+)/(\d+)(?:/(\d+))?$| or $time =~ m|^()(\d+)-(\d+)-(\d+)$|) { # DATE my ($day, $mon, $yr) = length($1) ? ($1, $2, $3) : ($4, $3, $2); die_ "bad -M date: $time" if ($day > 31 or $mon > 12); $yr = (localtime $^T)[5] unless defined $yr; require Time::Local; my $t = Time::Local::timelocal_nocheck(0,0,0,$day,$mon-1,$yr); $num = $^T - $t; $num < 0 and warn_ "future -M date: $time$Beep"; $fix = 24*60*60 - 1; } elsif ($time =~ /^(\d+(?:\.\d*)?)([smhdtw])?$/) { # OFFSET ($num, my $units) = ($1, $2 || 'd'); $fix = 0; if ($units eq 'm') { $num *= 60 } elsif ($units eq 'h') { $num *= 60*60 } elsif ($units eq 'd') { $num *= 60*60*24 } elsif ($units eq 't') { $num = (($num - 1)*24 + (localtime $^T)[2])*60*60 } elsif ($units eq 'w') { $num *= 60*60*24*7 } } elsif ($time =~ /^(.+)\@$/) { # FILE@ my $file = $1; die_ "-M no such file: $file" unless -f $file; $num = $^T - (stat _)[9]; $fix = 0; } elsif (-f $time) { # FILE $num = $^T - (stat _)[9]; $fix = 0; } else { die_ "bad -M argument: $time"; } if (defined $interval) { $MTime_old = [$num + $interval, $fix]; $MTime_new = [$num - $interval, $fix]; } elsif ($Opt{M} > 1) { $MTime_old = [$num, $fix]; } else { $MTime_new = [$num, $fix]; } $Opt{M} = 0; } elsif ($Opt{p}) { my $negated = s/^\#(?=[\w\.\,\-\:]+$)//; my $expr = ''; if (/^\w+$/) { # ALIAS my $alias = $Env{"PEG_P_" . uc($_)} || $Peg_p{$_}; $_ = $alias if $alias; } if (/^[\w\.\,\-\:]+$/) { # EXTENSION my ($dotty, @exts); foreach my $ext (split ':', $_) { $ext =~ s|^\.||; # -p .txt == -p txt next if $ext eq ''; # cf. -p :a:b: $dotty = 1 if $ext =~ /\./; # cf. -p tar.gz push @exts, $ext; } if ($dotty or $Opt{p} > 1) { $expr .= "!" if $negated; $expr .= "/\\.(?:" . join('|', map quotemeta, @exts) . ")\\z/i"; } elsif ($negated) { push @Exclude_exts, @exts; } elsif (@Match_exts) { # NB. "-p a:b:c -p b:c:d" := "-p b:c" my %ext = map { lc($_) => 1 } @exts; @Match_exts = grep $ext{lc($_)}, @Match_exts or die_ "null -p match"; } else { @Match_exts = @exts; } } else { # EXPR $expr = $_; $expr =~ s/^-s\s* "\xEF\xBB\xBF", 'utf16be' => "\xFE\xFF", 'utf16le' => "\xFF\xFE", 'utf32be' => "\x00\x00\xFE\xFF", 'utf32le' => "\xFF\xFE\x00\x00", }->{$Output_encoding} || die_ "BOM unknown for -} encoding: $Output_encoding"; $Opt{'}'} = 0; } # Named long options. elsif ($options and /^-(-?)([\w-]{3,})$/ and (exists $Peg_longopt{$2} or $1 && die_ "unknown longopt: $2")) { my $opt = $2; eval { $Peg_longopt{$opt}->(\@argv, \@Cmdline_files) }; $@ and die_ "--$opt: ", ⅇ } # Now check for an OPTION argument. elsif ($options && s/^-(?=.)//) { while (s/^(.)//) { my $opt = $1; if ($opt =~ /^[abcdefhiklmnopqrstvwxyzABCDEFGHIJKLMNOPQRSTWXZ_=\+\#\/\{\}\\]$/) { # Available: gjuUW # Options set in PEG_OPTIONS do not count towards overloading. if ($peg_options{$opt}) { delete $peg_options{$opt}; $Opt{$opt} = 1; } else { ++$Opt{$opt}; } $context = $opt if ($opt =~ /^[ABC]$/); $pe_type = $opt if ($opt =~ /^[koO]$/); } elsif ($opt =~ /^\d$/) { while (s/^(\d)//) { $opt = (10 * $opt) + $1 } if ($Opt{'m'}) { $Max_matches = $opt; $Opt_m = $Opt{'m'}; $Opt{'m'} = 0; } else { $After = $opt if ($context ne 'B'); $Before = $opt if ($context ne 'A'); $Opt{C} = 1 unless $context; } } elsif ($opt eq '-') { $options = undef } elsif ($opt eq 'V') { ++$Verbose } elsif ($opt eq 'Y') { my @neg = s/^,(.*)$// ? split //, $1 : keys %Opt; my %neg; $Opt{$_} = 0, $neg{$_} = 1 for @neg; $neg{'m'} and $Opt_m = undef; $neg{'M'} and $MTime_new = $MTime_old = undef; $neg{'p'} and $Opt_p_expr = $Opt_pp_expr = undef, @Exclude_dirs = @Exclude_exts = @Match_exts = (); $neg{'P'} and $Code_before_close = $Code_before_open = $Code_after_open = $Code_at_end = $Code_per_line = undef; $neg{'S'} and $Size_max = $Size_min = undef; $neg{'z'} and $Context_matcher = $Context_matcher2 = undef; $neg{'/'} and $Input_record_separator = undef; $neg{'{'} and $Input_encoding = undef; $neg{'}'} and $Output_encoding = undef; # Leave @Perlexpr, @Cmdline_files } elsif ($opt eq '%') { require Time::HiRes; $Start_time ||= Time::HiRes::time(); ++$Opt{'%'}; } else { die_ "unknown option -- $opt\n$Usage"; } } } # Typically, first non OPTION argument is the PERLEXPR. elsif (!(@Perlexpr or @Perlexpr_k) or ($options and $pe_type ne '')) { if ($pe_type eq 'k') { push @Perlexpr_k, $_ } else { push @Perlexpr, $_ } } # Arguments which are neither OPTION nor PERLEXPR are FILEs. else { push @Cmdline_files, $_; } # Handle arguments that need to be processed last. if (@Argv_end and !@argv) { $options = 1; @argv = @Argv_end; @Argv_end = (); } } if ($Opt{X} > 1) { while () { chomp_ $_; next if $_ eq ''; if ($pe_type eq 'k') { push @Perlexpr_k, $_ } else { push @Perlexpr, $_ } } $Opt{X} %= 2; } foreach my $opt (qw(e f m M p P z / { })) { die_ "option requires an argument -- $opt" if $Opt{$opt}; } die $Usage unless (@Perlexpr or @Perlexpr_k or $Opt{'='}); } # process_ARGV sub last_matches { my $return_fullpaths = shift; open(my $fin, "<", $Last_matches_file) or die_ "can't open $Last_matches_file: $!"; my $cwd = cwd(); my $drive = ($cwd =~ m|^(\w:)/| ? uc($1) : ''); my (@matches, %seen); while (<$fin>) { chomp_ $_; s/^\Q$cwd//o or ($drive and s/^\Q$drive//o) unless $return_fullpaths; push @matches, $_ unless $seen{$_}++; } return @matches; } # last_matches sub save_matches { return if ($Opt_yy or !@Matched_files or $Search_STDIN); open(my $fout, ">", $Last_matches_file) or (warn_ "can't write to $Last_matches_file: $!"), return; my $cwd = cwd(); my $drive = ($cwd =~ m|^(\w:)/| ? uc($1) : ''); foreach my $f (@Matched_files) { $f =~ tr|\\|/| if $Is_Win32; if ($Is_Win32 and $f =~ m|^\w:/|) { $f = ucfirst($f) } elsif ($f =~ m|^//|) {} # UNC elsif ($f =~ m|^/|) { $f = "$drive$f" } else { $f = "$cwd$f" } print $fout $f, "\n"; } close $fout or warn_ "can't close $Last_matches_file: $!"; } # save_matches sub process_options1 { $Opt_s = ($Opt{'s'} == 1); $Opt_ss = $Opt{'s'}; $Opt_y = ($Opt{'y'} % 2); $Opt_yy = ($Opt{'y'} > 1); $Opt{"\\"} = 0 unless $Is_Win32; $STDIN_is_terminal = -t STDIN; $STDOUT_is_terminal = -t STDOUT; if (!$STDOUT_is_terminal or $Opt{R}) { $Opt{'#'} = 0 unless $Opt{'#'} > 1; } if ($Output_encoding and $Opt{'#'}) { warn_ "-} prevents colored output" unless $Opt_s; $Opt{'#'} = 0; } if (defined $Size_max and defined $Size_min and $Size_max < $Size_min) { ($Size_max, $Size_min) = ($Size_min, $Size_max); } if ($MTime_old) { if ($MTime_new and $MTime_new->[0] < $MTime_old->[0]) { ($MTime_new, $MTime_old) = ($MTime_old, $MTime_new); } $MTime_old = $MTime_old->[0] - $MTime_old->[1]; } $MTime_new = $MTime_new->[0] if $MTime_new; if ($Is_Win32 and ($STDOUT_is_terminal or $Opt{'#'}) and !$Output_encoding) { # This is needed to properly handle >127 chars in the correct codepage. eval { require Win32::Console::ANSI; }; if ($@) { $Opt{'#'} and die_ "can't color output:\n", ⅇ unless (exists $Env{PEG_NO_WIN32_CONSOLE_ANSI}) { warn_ "failed to load Win32::Console::ANSI" unless $Opt_s; } } } my %types = qw(f filename c colon l lineno b offset n nonmatch m match z z_context y z_context2); if ($Opt{'#'}) { require Term::ANSIColor; # Default coloring mimics GNU grep 2.5.3's --color. my $peg_color = $Env{PEG_COLOR} || 'b=g,c=c,f=m,l=g,m=dr,z=c'; $Col_Reset = Term::ANSIColor::color('reset'); $Col{$_} = $Col_Reset for values %types; $peg_color =~ s/\s+//g; foreach my $specifier (split /,/, lc $peg_color) { eval { $specifier =~ /^(\w)=(.+)$/ or die; my ($t, $col_def) = ($1, $2); my $type = $types{$t} or die; $Col{$type} = get_col($col_def); }; $@ and die_ "bad specifier '$specifier' in PEG_COLOR: $peg_color"; } } else { $Col{$_} = '' for values %types; $Col_Reset = ''; } if ($Opt{'='}) { my @files = last_matches($Opt{H}); warn_ scalar(@files), " files matched" unless $Opt_s; my $sort = $Opt{t} + ($Opt{l} ? $Opt{l} - 1 : 0); # -=ll := -=lt my $long = $Opt{l} && !$Opt{h}; # -=llh := -=t my $do_stat = ($long or $sort or defined $Size_max or defined $Size_min or defined $MTime_new or defined $MTime_old); add_excludes_to_Opt_p_expr(); my ($index, $link, @matches, $mtime, $size); foreach my $file (@files) { ++$index; if ($Opt_p_expr) { $_ = $File = $file; unless (eval_ $Opt_p_expr) { $@ and warn_ "-p error: $file: ", ⅇ next; } } $file =~ tr|/|\\| if $Opt{"\\"} == 1; if ($do_stat) { my @s = stat($file) or ($Opt_s || warn_ "can't stat $file: $!"), next; $size = $s[7]; $mtime = $Is_Win32 ? ($s[9] > $s[10] ? $s[9] : $s[10]) : $s[9]; next if (defined $MTime_new and $mtime < $^T - $MTime_new); next if (defined $MTime_old and $mtime > $^T - $MTime_old); next if (defined $Size_min and $size < $Size_min); next if (defined $Size_max and $size > $Size_max); } $link = undef; if ($long and !$Is_Win32 and -l $file) { $link = readlink $file; defined $link or warn_ "can't readlink $file: $!" unless $Opt_s; } push @matches, [$mtime, $size, $file, $index, $link]; } my $filtered = @files - @matches; $filtered and warn_ "$filtered files filtered" unless $Opt_s; $sort and @matches = sort { $sort == 1 ? $a->[0] <=> $b->[0] : $sort == 2 ? $b->[0] <=> $a->[0] : $sort == 3 ? $a->[1] <=> $b->[1] : $sort == 4 ? $b->[1] <=> $a->[1] : 0 or $a->[2] cmp $b->[2]; } @matches; foreach my $m (@matches) { if ($long) { my @t = localtime $m->[0]; my $day = qw(Sun Mon Tue Wed Thu Fri Sat)[$t[6]]; my $mon = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)[$t[4]]; printf "%04d %s %2d %s %2d:%02d:%02d %9d ", 1900+$t[5], $mon, $t[3], $day, $t[2], $t[1], $t[0], $m->[1]; } printf "%s%-4d%s ", $Col{lineno}, $m->[3], $Col_Reset if $Opt{n}; print $Col{filename}, $m->[2], $Col_Reset; print " -> ", $Col{filename}, $m->[4], $Col_Reset if defined $m->[4]; print "\n"; last if (defined $Max_matches and --$Max_matches <= 0); } if ($Opt_y) { @Matched_files = map $_->[2], @matches; save_matches(); } exit; } # -E overrides -F & -G, and -F overrides -G. if ($Opt{E}) { $Opt{F} = $Opt{G} = 0; } elsif ($Opt{F}) { # Use -FF in PEG_OPTIONS to make it be overrideable by -G on the command line. if ($Opt{F} == 1) { $Opt{G} = 0; } elsif ($Opt{G}) { $Opt{F} = 0; } } if ($Context_matcher2 and !$Context_matcher) { $Context_matcher = $Context_matcher2; $Context_matcher2 = undef; } $Opt{k} = 0 if !@Perlexpr_k; $Opt{O} = 1 if ($Opt{l} and $Opt{o} > 1); $Print_context_matcher = $Context_matcher; # In order of precedence. if ($Opt{o} > 1 and !$Opt{O}) { die_ "expected a non -k PERLEXPR" unless @Perlexpr; # cf. "peg -ook A B" $Opt_oo = 1; $Opt{$_} = 0 for qw(c k l L Z); } if ($Opt{k}) { $Print_context_matcher = $Opt_m = undef; $Opt{$_} = 0 for qw(b c h l A B C J L N O Z); } if ($Opt{O}) { $Print_context_matcher = $Opt_m = undef; $Opt{$_} = 0 for qw(b c h l A B C J L N Z); } if ($Opt{Z}) { $Print_context_matcher = $Opt_m = undef; $Opt{$_} = 0 for qw(b c h l A B C L); } if ($Opt{L}) { $Print_context_matcher = $Opt_m = undef; $Opt{$_} = 0 for qw(b c h l A B C J); } # GNU grep has -l override -c; peg works the other way around. if ($Opt{c}) { $Print_context_matcher = undef; $Opt{$_} = 0 for qw(b l h A B C J); } if ($Opt{l}) { $Print_context_matcher = $Opt_m = undef; $Opt{$_} = 0 for qw(b h A B C J); } $Opt{w} = 0 if $Opt{x}; $Opt_pp_expr = undef unless $Opt{Q}; push @FileFind_opts, 'silent' => 1 if $Opt_ss; } # process_options1 sub add_excludes_to_Opt_p_expr { return unless (@Exclude_dirs or @Exclude_exts or @Match_exts); $Opt_p_expr ||= '1'; if (@Exclude_dirs) { my $dirs = join '|', map quotemeta, @Exclude_dirs; my $slash = $Is_Win32 ? "[/\\\\]" : "/"; $Opt_p_expr = "(\$File !~ m{(?:^|$slash)(?:$dirs)$slash})\n\tand ($Opt_p_expr)"; } if (@Match_exts) { my $exts = join '|', map quotemeta, @Match_exts; $Opt_p_expr = "(/\\.(?:$exts)\\z/i)\n\tand ($Opt_p_expr)"; } elsif (@Exclude_exts) { my $exts = join '|', map quotemeta, @Exclude_exts; $Opt_p_expr = "(!/\\.(?:$exts)\\z/i)\n\tand ($Opt_p_expr)"; } } # add_excludes_to_Opt_p_expr sub get_col { my $col_def = shift; my %col = qw(r red g green y yellow b blue m magenta c cyan w white k black); $col_def =~ /^(d?)(\w)(o(d?)(\w))?$/ or die; my ($d, $c, $od, $oc) = ($1, $2, $4, $5); die unless (exists $col{$c} and (!$oc or exists $col{$oc})); return '' unless $Opt{'#'}; my $col = $col{$c}; $col = "bold $col" if $d; $col = "$col on_$col{$oc}" if $oc; $col = "underline $col" if $od; return Term::ANSIColor::color("reset $col"); } # get_col sub build_Perlexpr { my $iwx = ($Opt{i} or $Opt{w} or $Opt{x}); # if $simple then -F else -G # If the PERLEXPR is simple enough, then it is faster to read # the file in one go and perform the match on a single line. $Slurp = !($Opt{v} or $Opt{x} or $Opt{E} or $Opt{Q} or $Opt_oo or $Code_per_line); # Can we guarantee that after a true PERLEXPR, that ("$`$&$'" eq $_)? $Simple_Perlexpr = !($Opt{v} or $Opt{E} or @Perlexpr_mung); # When coloring, if possible use ${^MATCH} and not $&. $Use_matchvars = ($Opt{'#'} and $] >= 5.010 and $Simple_Perlexpr and !($Opt{c} or $Opt{k} or $Opt{l} or $Opt{L} or $Opt{O} or $Opt{Z})); if ($Opt{E}) { $iwx and warn_ "-E ignores -i/-w/-x$Beep" unless $Opt_s; } else { for (@Perlexpr, @Perlexpr_k) { if ($Opt{F}) { $_ = quotemeta($_); } else { if (!$Opt{G} and m{^[\+\$]|/}) { # Beware accidental pattern option eg. peg -i /foo/ bar $iwx and warn_ "interpreting as expression: $_$Beep" unless $Opt_s; $Slurp = $Simple_Perlexpr = $Use_matchvars = undef; next; } else { s|/|\\/|g; # cf. "peg -G '^/' f" vs "peg -F '^/' f" # Do not slurp if PERLEXPR matches line ends or crosses newlines. $Slurp = undef if ($Slurp && /(?:^|[^\\\[])\^/ # eg. "^#include" or "end$|^begin" or /[^\\]\$(?:\W|$)/ # eg. "end$" or /\\[azZ]/ # string start/end or /\[\^/ # cf. peg -lG "foo[^x]+bar" ); } } $_ = '\b(?:' . $_ . ')\b' if $Opt{w}; # cf. peg -w "a|b" $_ = '^(?:' . $_ . ')$' if $Opt{x}; $_ = '/' . $_ . '/'; $_ .= 'i' if ($Opt{i} and ($Opt{i} == 1 or $_ eq lc $_)); $_ .= "\000" if $Use_matchvars; } } if ($Opt_oo) { $Perlexpr = join ",\n\t", map({"((" . $Perlexpr_k[$_] . ")\t and \$Match_failed = 1, last)"} (0..$#Perlexpr_k)), map({"((" . $Perlexpr[$_] . ")\t and \$Line_matched = \$Match$_ = 1)"} (0..$#Perlexpr)), "\$Line_matched"; } elsif ($Opt{k}) { $Perlexpr = join ",\n\t", map({"((" . $Perlexpr_k[$_] . ")\t && (\$Match_failed = 1, last))"} (0..$#Perlexpr_k)), map({"(\$Match$_ ||= (" . $Perlexpr[$_] . "))"} (0..$#Perlexpr)); } elsif ($Opt{O}) { $Perlexpr = join ",\n\t", map({"(\$Match$_ ||= (" . $Perlexpr[$_] . "))"} (0..$#Perlexpr)), ('(' . join(' && ', map "\$Match$_", (0 .. $#Perlexpr)) . ')'); } elsif (@Perlexpr > 1) { $Perlexpr = join "\n\t|| ", map "($_)", @Perlexpr; } else { $Perlexpr = $Perlexpr[0]; } $Perlexpr = 'not (' . $Perlexpr . ')' if $Opt{v}; # Keep a copy of Perlexpr without the /p modifiers. ($Perlexpr_q = $Perlexpr) =~ tr/\000//d; if ($Use_matchvars) { $Perlexpr =~ tr/\000/p/ } else { $Perlexpr = $Perlexpr_q } # Apply any user defined PERLEXPR transformations. if (@Perlexpr_mung) { my $orig = $Perlexpr; $_->(\$Perlexpr) for @Perlexpr_mung; $Slurp = undef if $Perlexpr ne $orig; } # Check the PERLEXPR is valid Perl code. eval_ "if (0 and ($Perlexpr)) {}"; if ($@) { my $ee = join '', @Warnings, ⅇ if ((@Perlexpr + @Perlexpr_k) > 1) { # Determine first bad expression. foreach my $pe (@Perlexpr, @Perlexpr_k) { @Warnings = (); $pe =~ s/\000\z//; eval_ "if (0 and ($pe)) {}"; $@ and die_ "error in the Perl expression: $pe\n", @Warnings, ⅇ } } die_ "error in Perl expression: $Perlexpr\n$ee"; } } # build_Perlexpr sub process_options2 { $Opt_m ||= 0; # undef -> 0 # "peg -l +1 ..." is a special case. It skips reading the file. $Plus_one = ($Opt{l} and $Perlexpr eq '+1'); if ($Opt{c} and $Env{PEG_CCC}) { if ($Opt{c} == 1) { $Opt{c} = 3 } elsif ($Opt{c} == 3) { $Opt{c} = 1 } } if ($Is_Win32) { # If the PERLEXPR refers to newlines then we need to convert CRLFs to "\n"s. foreach my $code ($Perlexpr, $Code_per_line, $Context_matcher, $Context_matcher2) { next unless defined $code; if ($code =~ /\$\/|\\n|\bchomp\b/ and $code !~ /\# PEG_NEWLINE_NEUTRAL/) { $CRLF_to_newline = 1; last; } } # Do we need a ":crlf" layer on the output? $Needs_crlf_layer = 1 if (($CRLF_to_newline or $Opt{N} or $Opt{Z}) and !($STDOUT_is_terminal or $Opt{k} or $Opt{l} or $Opt{L} or $Opt{O})); } if ($Is_Win32 and !$Needs_crlf_layer) { $Newline = "\015\012"; $Newline_literal = "\\015\\012"; } else { $Newline = "\n"; $Newline_literal = "\\n"; } foreach my $m ($Context_matcher, $Context_matcher2) { next unless defined $m; eval_ "if (0 and ($m)) {}"; $@ and die_ "bad -z context matcher: $m\n", ⅇ } # Provide mechanism to swap context colors cf. peg -zz c -z "/^\s*case\b/" ... if ($Opt{'#'} and $_ = $Context_matcher2 and /\# PEG_Z_PRIMARY_COLOR/) { my $tmp = $Col{z_context}; $Col{z_context} = $Col{z_context2}; $Col{z_context2} = $tmp; } foreach my $code ($Code_after_open, $Code_at_end, $Code_before_close, $Code_before_open, $Code_per_line) { next unless defined $code; $code =~ s/\bRETURN\b/ " do { print \"$Col{filename}\$File$Col_Reset$Newline_literal\"; push \@Matched_files, \$File; return }" /eg; eval_ "if (0) {\n$code\n}"; $@ and die_ "bad -P code: $code\n", ⅇ } $Opt{K} = 0 if $Input_encoding; if ($Opt{K} and $Guess_encoding = $Env{PEG_GUESS_ENCODING}) { require Encode::Guess; eval { Encode::Guess->set_suspects(split /\s+/, $Guess_encoding) }; $@ and die_ "bad PEG_GUESS_ENCODING: $Guess_encoding:\n", ⅇ $Encode::Guess::NoUTFAutoGuess = 1; } $Opt{A} = $Opt{B} = 1 if $Opt{C}; # If '-+' specified, then ignore the peg result files unless they are # explicitly named on the command line eg. "*/*/peg*" if ($Opt{'+'}) { foreach my $file (@Cmdline_files) { if ($file =~ /\bpeg/) { $Opt{'+'} = 0; last if $Do_globbing; } elsif (!$Do_globbing) { $Opt{'+'} = 1; last; } } $Opt{'+'} = 0 if ($_ = $Opt_p_expr and /peg/); } unless ($Opt{r} or $Opt{X} or $Opt_y) { # implicit file list if (@Cmdline_files) { # The single filename "-" indicates to read STDIN. if (@Cmdline_files == 1 and $Cmdline_files[0] eq '-') { @Cmdline_files = (); } } elsif ($STDIN_is_terminal) { $Opt{r} = 1; } } my ($glob_failed, $found_globbed_file); if ($Do_globbing) { my $dosglob = $Is_Win32 ? !$Env{PEG_USE_BSDGLOB} : $Env{PEG_USE_DOSGLOB}; my ($glob, @f, @glob_results); foreach my $f (@Cmdline_files) { if ($f =~ /\*/ or ($f =~ /\?/ and !($Is_Win32 and $f =~ /^[\\\/]{2}\?[^\?]+\z/)) or (!$dosglob and $f =~ /^~|\[.*\]|\{.*\}/ and not -e $f)) { my $f_orig = $f; $f =~ s|(\*\*+)| join '/', split //, $1 |eg; # **c -> */*c $glob ||= do { if ($dosglob) { require File::DosGlob; sub { my $pat = $_[0]; if ($pat =~ /\s/) { $pat =~ s|\\|\\\\|g; $pat =~ s|([\s'])|\\$1|g; } return File::DosGlob::glob($pat); }; } else { require File::Glob; sub { return File::Glob::bsd_glob($_[0]); }; } }; if (@glob_results = $glob->($f)) { if ($f =~ /^(?:.*[\\\/])?\*\z/) { # A non specific glob eg. "src/*". foreach my $gf (@glob_results) { $Globbed{$gf} = 1 } } push @f, @glob_results; $found_globbed_file = 1; } else { warn_ "glob failed to match any files for: $f_orig" unless $Opt_s; $glob_failed = 1; } } else { push @f, $f; } } @Cmdline_files = @f; } if ($Opt{d}) { my @files; foreach my $f (@Cmdline_files) { if (-d $f) { push @Cmdline_dirs, $f; } else { push @files, $f; } } if ($Opt{d} > 1 and @files) { @Cmdline_dirs = (); } elsif (@Cmdline_dirs) { @Cmdline_files = @files; $Opt_d = 1; } } if ($Opt_y) { $Opt{'+'} = 0; push @Cmdline_files, last_matches(); } unless (@Cmdline_files or $Opt{r} or $Opt_d or $Opt{X} or $Opt_y) { die_ "no files found" if $glob_failed; $Search_STDIN = 1; $Opt{a} ||= 1; $Opt{I} = 0; $Opt{J} = 0; $Opt{_} = 0; $Opt_p_expr = undef; $Slurp = undef; if ($Opt{K}) { warn_ "-K does not work on STDIN" unless $Opt_s; $Opt{K} = 0; } } $Slurp = undef if $Input_record_separator; $Opt{_} = 0 unless ($Opt{R} and $STDOUT_is_terminal and !$Opt_s); $Opt{_} = 0 if (@Cmdline_files == 1 and !($Opt{r} or $Opt_d or $Opt_y or $Opt{Q} or $Opt{X})); $Opt{_} = 0 if ($Opt{X} and $STDIN_is_terminal); # cf. "find . | peg -XR_ foo" vs "peg -XR_ foo" if ($Opt{J} >= 2) { my $JJ_mode = 'sss'; # s=separated (gap); c=compact (no gap); h=header; d=disabled. if ($Opt{J} > 2) { $JJ_mode = 'sds'; if ($_ = $Env{PEG_JJ_MODE}) { /^[cdhs]{3}$/ or die_ "bad PEG_JJ_MODE: $_"; $JJ_mode = $_; } } $JJ_mode =~ /^(.)(.)(.)$/; # 1=terminal, 2=!terminal, 3=-R. $JJ_mode = $Opt{R} ? $3 : $STDOUT_is_terminal ? $1 : $2; if ($JJ_mode eq 'd') { $Opt{J} = 0; } elsif ($JJ_mode eq 's') { $JJ_gap = 1; } elsif ($JJ_mode eq 'h') { $Opt{J} = 1; } } if ($Opt{H}) { $Opt{h} = 0; $Opt{J} = 0 if $Opt{J} >= 2; } elsif ($Opt{h}) { $Opt{J} = 0; } elsif (@Cmdline_files <= 1 and !($Opt_d or $Opt{r} or $Opt{Q} or $Opt_y or $Opt{X} or $found_globbed_file)) { $Opt{h} = 1; $Opt{J} = 0 if $Opt{J} >= 2; } elsif ($Opt{J} >= 2 and $Opt{Z}) { $Opt{J} = 0; } elsif ($Opt{J}) { $Opt{h} = 1; } if ($Opt{I} == 3) { # -III := -I, but -a overrides it. $Opt{I} = $Opt{a} ? 0 : 1; } elsif ($Opt{a} and $Opt{I} == 1) { warn_ "possible conflict between -a and -I$Beep" unless $Opt_s; } if ($Opt{Q}) { die_ "-Q needs a %Peg_Q" unless %Peg_Q; my (@archive_exts, @non_archive_exts); while (my ($ext, $code) = each %Peg_Q) { die_ "uppercase extension: $ext" if ($ext ne lc $ext); die_ "\$Peg_Q{'$ext'} is not a valid CODE ref" unless ref $Peg_Q{$ext} eq 'CODE' and defined &{$Peg_Q{$ext}}; next if $ext eq '*'; if ($ext =~ s/^\*//) { push @archive_exts, $ext; $Peg_Q{$ext} = $code; delete $Peg_Q{"*$ext"}; } else { push @non_archive_exts, $ext; } } my $gen_re = sub { return unless @_; return "\\.(?i)(" . (join '|', map quotemeta, sort { length($b) <=> length($a) or $a cmp $b } @_) . ")\\z"; }; if ($Opt_pp_expr) { $Opt_pp_code = "sub pp {\n return 1 unless \@_;\n local \$_ = shift;\n"; $Opt_pp_code .= ' warn_ "V: in pp($_)";' . "\n" if $Verbose; $Opt_pp_code .= " return (($Opt_pp_expr)"; if (my $archive_re = $gen_re->(@archive_exts)) { $Opt_pp_code .= " or /$archive_re/" if $Opt{Q} == 1; # -QQ := no recurse } $Opt_pp_code .= ");\n}"; } else { $Opt_pp_code = 'sub pp { @_ ? 1 : 0 }'; } eval_ $Opt_pp_code; $@ and die_ "bad -pp code: $Opt_pp_code\n", ⅇ $Q_handler_re = $gen_re->(@archive_exts, @non_archive_exts); $Q_nonarchive_re = $gen_re->(@non_archive_exts); warn_ "-Q cannot guess input encoding" if $Opt{K}; } if ($Opt{t}) { my $new_first = ($Opt{t} == 1); my @tmp; foreach my $f (@Cmdline_files) { # Always do non files last. push @tmp, [$f, -f $f ? -M _ : $new_first ? 9e9 : -9e9]; }; if ($Opt{r} or $Opt_d) { require File::Find; my @dirs = (($Opt{r} ? '.' : ()), @Cmdline_dirs); eval { File::Find::find({ @FileFind_opts, 'wanted' => sub { my $Mtime = $File::Find::Mtime || (-f $_ ? -M _ : return); push @tmp, [$File::Find::name, $Mtime]; }}, @dirs); }; $@ and die_ "File::Find::find failed: ", ⅇ $Opt{r} = $Opt_d = 0; } @Cmdline_files = map $_->[0], sort { $new_first ? $a->[1] <=> $b->[1] : $b->[1] <=> $a->[1] or $a->[0] cmp $b->[0] } @tmp; } # Hard code support for qfind. my $qfind = $Bin_dir . "qfind" . ($Is_Win32 ? ".exe" : ""); my $r_cmd = (exists $Env{PEG_R_CMD} ? $Env{PEG_R_CMD} : -x $qfind ? $qfind : undef); my $using_qfind = ($_ = $r_cmd and /qfind/); $r_cmd .= " $_" if ($using_qfind and $_ = $Env{PEG_QFIND_ARGS}); my $qfind_dir = ''; # XXX IPC::Open3 does not like a redirected STDOUT. if ($r_cmd and ($STDOUT_is_terminal or ($using_qfind and $Opt_ss))) { if ($Opt{r}) { $Opt_r_cmd = $r_cmd; } elsif ($Opt_d and $using_qfind and @Cmdline_dirs == 1 and $Cmdline_dirs[0] ne '.') { # Use qfind if there is a single directory to be searched. # NB. "peg foo -d ." is assumed to be a test of F:F:f. my $dir = $Cmdline_dirs[0]; $dir =~ tr|\\|/| if $Is_Win32; # beware trailing backslash acting on double quote $qfind_dir = " -- " . quote_arg($dir); $Opt_r_cmd = $r_cmd; $Opt_d = undef; } } if ($Opt_r_cmd) { if ($using_qfind) { if ($Opt_ss) { $Opt_r_cmd .= " -s"; # No stderr messages. $Opt_r_cmd_silent = 1; } unless ($Opt{L} or $Opt{k} or $Plus_one) { $Opt_r_cmd .= " -z"; # Filter zero size files. } $Opt_r_cmd .= " -r" if ($Opt_s and !$Plus_one); # Filter non readable files $Opt_r_cmd .= " -N=" . ($^T - $MTime_new) if defined $MTime_new; $Opt_r_cmd .= " -O=" . ($^T - $MTime_old) if defined $MTime_old; $Opt_r_cmd .= " -J=$Size_min" if defined $Size_min; $Opt_r_cmd .= " -K=$Size_max" if defined $Size_max; if (@Exclude_dirs) { $Opt_r_cmd .= " " . quote_arg("-d=" . join ':', @Exclude_dirs); } if (@Match_exts) { $Opt_r_cmd .= " " . quote_arg("-p=" . join ':', @Match_exts); } elsif (@Exclude_exts) { $Opt_r_cmd .= " " . quote_arg("-e=" . join ':', @Exclude_exts); } $Opt_r_cmd .= $qfind_dir; } my $r_fork = (exists $Env{PEG_R_FORK} ? $Env{PEG_R_FORK} : 1); if ($r_fork and $Opt{r} <= 1 and not ( # -rr := do not fork. ($Is_Win32 and ( # XXX avoid various Win32 bugs. # Using fork() from a do'd script crashes on exit. $Called # Avoid ITHREADS+//i performance bug. or $Opt{i} or $Perlexpr =~ m|/.*/i| # Using ITHREADS+encoding crashes. or $Opt{K} or $Input_encoding or $Output_encoding)) or $Plus_one or !$STDOUT_is_terminal or $Opt{R} # Avoid saving interleaved output. or $Opt{c} == 2 or (($Opt{l} or $Opt{O}) and $Opt{n}) or $Opt_m > 1 or $Opt{q} or $Opt{Q} or $Opt{Z} or (grep { defined and /\# PEG_NO_FORK/ } $Code_after_open, $Code_at_end, $Code_before_close, $Code_before_open, $Code_per_line, $Output_encoding))) { if ($r_fork =~ /^(\d),(\d{1,2})$/) { ($Worker_count, $Worker_work) = ($1, $2); } $Slurp_maxsize = int($Slurp_maxsize / $Worker_count); $Opt_r_fork = 1; } $Opt{r} = 0; } if ($Opt{K}) { # An ASCII text lookup table: 9=tab, 10=LF, 13=CR, 32-126=isprint $Is_ascii_text[255] = undef; $Is_ascii_text[$_] = 1 for (9, 10, 13, 32..126); } if ($Opt_oo) { open($Buffer_fh, "+>", \$Buffer_contents) or die_ "can't open: $!"; binmode($Buffer_fh, $Input_encoding ? ":utf8" : ":raw") or die_ "binmode failed: $!"; } } # process_options2 sub quote_arg { my $arg = shift; return $arg if $arg =~ m|^[\w\-\.=:,/]+$|; return $Is_Win32 ? "\"$arg\"" : "'$arg'"; } # quote_arg sub help { my $opt = shift; if (defined $opt) { $opt =~ s/^-{0,2}(.+)/$1/; $opt = 'A' if $opt =~ /^[BC\d]$/; my @out; while () { if (/^=item\s+B<--?\Q$opt/) { push @out, help_line($_, 0); last; } } die_ "no such option '$opt'" unless @out; my $over = 0; while () { if (/^=over/) { ++$over } elsif (/^=back/) { last unless $over-- > 0 } else { last if (/^=/ and !$over); push @out, help_line($_, $over); } } # Strip consecutive blank lines. print "\n"; my ($is_empty, $last_empty); while (defined ($_ = shift @out)) { $is_empty = /^\s*$/; print unless ($is_empty and ($last_empty or !grep /\S/, @out)); $last_empty = $is_empty; } print "\n"; } elsif (-t STDOUT) { system qq(perldoc "$0"); } else { print qx(perldoc "$0"); } exit; } # help sub help_line { my ($line, $over) = @_; return '' if ($line =~ /^=(over|back)/); my $title = ($line =~ s/^=item\s+//) ? 1 : 0; if ($line =~ /^\S/) { # NB. indented POD is verbatim $line =~ s/\bB<(.+?)>/"$1"/g; # bold $line =~ s/\bI<(.+?)>/*$1*/g; # italic $line =~ s/\bC<([\$\%\@]\S*?)>/$1/g; # code1 $line =~ s/\bC<(\S+?)>/"$1"/g; # code2 $line =~ s/\bC<< (.+?) >>/``$1''/g; # code3a $line =~ s/\bC<(.+?)>/``$1''/g; # code3b $line =~ s/\bL<(.+?)\/(.+)>/$2 in the $1 manpage/g; # link $line =~ s/\b\w<(.+?)>/$1/g; # other } my $indent = ' ' x (2 - $title + 2*$over); my @lines = ("$indent$line"); if ($title and $line !~ /^\*$/) { push @lines, ($indent . ("=" x (length($line) - 1)) . "\n"); } return @lines; } # help_line sub show_debug { my $verbose = $Opt{D} > 1; my $i; if ($verbose) { print "# peg v$VERSION $0\n\n"; print "# Perl version $] $^X\n\n"; print "# cwd => ", cwd(), "\n\n"; } if (@Ini_files) { print "# Ini files =>\n"; foreach my $ini_file (@Ini_files) { print "\t$ini_file\n"; next unless $verbose; open(my $fin, "<", $ini_file) or (print "open failed: $!\n"), next; print " $.:\t$_" while (<$fin>); print "\n"; } print "\n"; } else { print "# No ini files\n\n"; } if ($verbose) { my @env; if (@env = grep !exists $ENV{$_}, keys %Env) { print "# Env =>\n"; printf "\t%-12s = %s\n", $_, $Env{$_} for sort @env; print "\n"; } if (@env = grep /^PEG_/, keys %ENV) { print "# ENV =>\n"; printf "\t%-12s = %s\n", $_, $ENV{$_} for sort @env; print "\n"; } print "# HOME directory => $HOME_dir\n\n"; print "# Bin directory => $Bin_dir\n\n"; print "# STDIN is not a terminal\n\n" unless $STDIN_is_terminal; print "# STDOUT is not a terminal\n\n" unless $STDOUT_is_terminal; if ($Opt{'#'}) { print "# Colors =>\n"; printf "\t%-12s %s<#>$Col_Reset\n", $_, $Col{$_} for sort keys %Col; print "\n"; } print "# \%INC =>\n"; printf "\t%-24s = %s\n", $_, $INC{$_} for sort keys %INC; print "\n"; print "# %Peg_p =>\n", map("\t$_\t= $Peg_p{$_}\n", sort keys %Peg_p), "\n" if keys %Peg_p; print "# %Peg_z =>\n", map("\t$_\t= $Peg_z{$_}\n", sort keys %Peg_z), "\n" if keys %Peg_z; print "# %Peg_zz =>\n", map("\t$_\t= $Peg_zz{$_}\n", sort keys %Peg_zz), "\n" if keys %Peg_zz; print "# keys %Peg_Q =>\n", map("\t$_\n", sort keys %Peg_Q), "\n" if keys %Peg_Q; my @longopts = sort grep !/^help$/, keys %Peg_longopt; print "# keys %Peg_longopt =>\n", map("\t$_\n", @longopts), "\n" if @longopts; } if (@Peg_options_ARGV) { $i = 0; print "# PEG_OPTIONS =>\n"; print "\t", ++$i, ": ", $_, "\n" for @Peg_options_ARGV; print "\n"; } $i = 0; print "# ARGV =>\n", map({("\t", ++$i, ": ", $_, "\n")} @ARGV), "\n"; print "# Enabled options => "; print join '', map {$_ x $Opt{$_}} sort grep $Opt{$_}, keys %Opt; print "\n\n"; print "# Reading from STDIN\n\n" if $Search_STDIN; print "# PEG_R_CMD => $Opt_r_cmd\n\n" if $Opt_r_cmd; print "# PEG_R_FORK => $Worker_count x $Worker_work\n\n" if $Opt_r_fork; print "# -p expr =>\n\n$Opt_p_expr\n\n" if ($Opt_p_expr and $Opt_r_fork); print "# -pp code =>\n\n$Opt_pp_code\n\n" if $Opt_pp_code; if (@Cmdline_files) { print "# Command line files (", scalar @Cmdline_files, ") =>\n"; print map "\t$_\n", @Cmdline_files if (@Cmdline_files < 10 or $verbose); print "\n"; } if ($Opt_d) { print "# Command line directories (", scalar @Cmdline_dirs, ") =>\n"; print map "\t$_\n", @Cmdline_dirs if (@Cmdline_dirs < 10 or $verbose); print "\n"; } print "# -M => ", scalar localtime($^T - $MTime_new), "\n\n" if defined $MTime_new; print "# -MM => ", scalar localtime($^T - $MTime_old), "\n\n" if defined $MTime_old; print "# -S min => $Size_min\n\n" if defined $Size_min; print "# -S max => $Size_max\n\n" if defined $Size_max; print "# Internal Perl code =>\n$Search\n"; print "# Warnings =>\n", @Warnings, $Beep, "\n" if @Warnings; exit; } # show_debug sub near { @_ == 1 or @_ == 2 or die "usage: near(PATTERN|SUB ?,RANGE?)\n"; my $arg = shift; my $arg_is_sub = (ref $arg eq 'CODE'); my $N = 10; my $start = 0; if (@_) { my $range = shift; $range =~ /^(-?)(\d*)$/ or die "bad RANGE argument to near(): $range\n"; $start = 1 if ($1 || $range eq ''); $N = $2 || @P; } $N = @P if $N > @P; my ($line, $matched); eval { for (my $i = $start; $i <= $N; ++$i) { $line = ($i == 0) ? $_ : $P[-$i]; # NB. $_ is the current line if ($arg_is_sub) { local $_ = $line; $matched = $arg->(); } else { $matched = ($line =~ /$arg/); } last if $matched; } }; $@ and die "error in near():\n", ⅇ return $matched ? 1 : 0; } # near sub nearq { my $str = shift; die "nearq: expected string: $str\n" if ref $str or !length $str; return near(quotemeta($str), @_); } # nearq { my %regexps; sub nearby { my $N = 10; if (@_ and ref($_[0]) eq 'SCALAR') { $N = ${ +shift } } @_ >= 2 or die "usage: nearby(?\\N,? PAT1, PAT2 ...)\n"; my ($i, $j, $regexp, @regexps); foreach my $pat (@_) { eval { push @regexps, ($regexps{$pat} ||= qr/$pat/) }; $@ and die "error in nearby pattern: $pat\n", ⅇ } my $match_idx = -1; for ($i = 0; $i < @regexps; ++$i) { $regexp = $regexps[$i]; if ($_ =~ /$regexp/) { # NB. $_ is the current line return 1 if ($match_idx != -1); $match_idx = $i; } } return 0 if $match_idx == -1; $N = @P if $N > @P; for ($i = 0; $i < @regexps; ++$i) { next if $i == $match_idx; $regexp = $regexps[$i]; for ($j = 1; $j <= $N; ++$j) { return 1 if ($P[-$j] =~ /$regexp/); } } return 0; } # nearby } sub colorall { my $pattern = shift; die "usage: colorall(PATTERN ?,COLOR_DEFINITION?)" unless length $pattern; my $match_col = $Col{match}; if (@_) { my $col_def = shift; unless (exists $Col{$col_def}) { eval { $Col{$col_def} = get_col($col_def) }; $@ and die "colorall: bad color '$col_def'\n"; } $match_col = $Col{$col_def}; } my $matches = 0 + eval { s/($pattern)/ $match_col . $1 . $Col{nonmatch} /eg; }; $@ and die "error in colorall:\n", ⅇ return $matches; } # colorall sub Z_display { my $file = shift; unless (defined $Z) { warn_ +(defined $file ? "$file: " : ()), "\$Z is not defined" unless $Opt_s; return; } my $file_colon = ''; if (defined $file) { if ($Opt{J}) { print header($file); } else { $file_colon = $Col{filename} . $file . $Col{colon} . ':' . $Col_Reset; } } if ($Opt{Z} >= 3 and $Opt{Z} <= 4) { require Data::Dumper; print $file_colon, "\n" if $file_colon; print Data::Dumper->Dump([$Z], ['Z']), "\n"; } elsif (ref($Z) eq 'HASH') { my $numeric_cmp = 1; foreach my $v (values %$Z) { unless (defined $v and $v =~ /^[+-]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?$/) { $numeric_cmp = 0; last; } } # -ZZZZZ := -Z but do not numerically sort the keys. my @keys = ($numeric_cmp and $Opt{Z} <= 2) ? (sort { $Z->{$b} <=> $Z->{$a} or $a cmp $b } keys %$Z) : (sort keys %$Z); my $sep = ($Opt{T} ? "\t=> " : " => "); print $file_colon, "\n" if $file_colon; foreach my $key (@keys) { my $v = $Z->{$key}; $key =~ s/[\015\012]+\z//; if (defined $v) { $v =~ s/[\015\012]+\z//; print $key, $sep, $v, "\n"; } else { print $key, "\n"; } } } elsif (ref($Z) eq 'ARRAY') { print $file_colon, "\n" if $file_colon; foreach my $v (@$Z) { $v =~ s/[\015\012]+\z//; print $v, "\n"; } } else { chomp_ $Z; print $file_colon, ($file_colon && $Opt{T} ? "\t" : ()), $Z, "\n"; } } # Z_display sub build_search { my ($gap, $nonmatch_print, $output, $print); my $context = ($Opt{A} || $Opt{B}); if ($Opt{c} or $Opt{k} or $Opt{L} or $Opt{Z}) { $output = undef; } elsif ($Opt{l} or $Opt{O}) { $output = '"'; $output .= "$Col{lineno}\@{[scalar \@Matched_files]}$Col_Reset\\t" if $Opt{n}; $output .= '$File' . ($Opt{l} > 1 ? '\0' : $Newline_literal) . '"'; } else { $output = ''; $output = "\$Offset:" if $Opt{b}; $output = "\$.:$output" if $Opt{n}; $output = "\$File:$output" if !$Opt{h}; $output = "\"$output\$_\"" if $output; } if (defined $output) { $print = 'print' . ($output ? " $output" : '') . ';'; $print .= ' last;' if ($Opt{l} or $Opt{O}); } if ($context) { $output ||= '$_'; $gap = ($Opt{A} ? $After : 0) + ($Opt{B} ? $Before : 0); ($nonmatch_print = $print) =~ tr/:/-/; $output =~ tr/:/-/; $Perlexpr = "(\$Matches < $Max_matches) && ($Perlexpr)" if $Opt_m; } if ($Opt{T}) { my $sep = $Col_Reset . "\\t"; for ($output, $print, $nonmatch_print) { defined and s/^(.*[:\-])/$1$sep/; } } # Are any of the AUTOVARS used? my ($needs_reset, $uses_C, $uses_Filepath, $uses_P); foreach my $code ($Perlexpr, $Code_per_line) { next unless defined $code; $uses_C ||= ($code =~ /\$C\b/); $uses_P ||= ($code =~ /\$P\[|\@P\b|\bnear(?:by|q)?\(/); } foreach my $code ($Perlexpr, $Code_after_open, $Code_before_close, $Code_before_open, $Code_per_line, $Opt_p_expr) { next unless defined $code; $uses_Filepath ||= ($code =~ /\$Filepath\b/); $needs_reset ||= ($code =~ /[\$\@\%][a-z]/ and $code !~ /\# PEG_NO_RESET/); } if ($uses_C and !$Context_matcher) { warn_ "\$C requires -z option$Beep" unless $Opt_s; $uses_C = undef; } elsif (!($uses_C or $Print_context_matcher)) { $Context_matcher = undef; } if ($Opt{'#'}) { for ($output, $print, $nonmatch_print) { next unless defined; s|\$File\\([n0])|$Col{filename}\$File$Col_Reset\\$1| or s|\$File|$Col{filename}\$File|; s|\$\.|$Col{lineno}\$.|; s|\$Offset|$Col{offset}\$Offset|; s|([:\-])|$Col{colon}$1|g; } for ($output) { last unless defined; if (!length $_) { $_ = '$_' } s|^\$_$|"$Col{nonmatch}\$_$Col_Reset"| or s|\$_|$Col{nonmatch}\$_$Col_Reset|; } for ($print) { last unless defined; last if ($Opt{k} or $Opt{l} or $Opt{L} or $Opt{O}); s|;$||g; my $orig_print = $_; my $ensure_newline = ($Simple_Perlexpr or !$Opt{N}) ? '' : ($Perlexpr =~ /\bcolorall\b/) ? q[, (/\n(?:\Q] . $Col_Reset . q[\E)?\z/ ? () : "\n")] : q[, (/\n\z/ ? () : "\n")]; my $cl = "$Col{nonmatch}\$`$Col{match}\$&$Col{nonmatch}\$'$Col_Reset"; s|\$_|$cl| or s|^print$|print "$cl"|; if ($Simple_Perlexpr) { $_ .= $ensure_newline . ';'; } else { # Fix the case where $`$&$' is not the same as $_ eg. peg -# "/(...)/ and $_=$1" $orig_print =~ s|^print$|print "$Col{nonmatch}\$_$Col_Reset"| or $orig_print =~ s|\$_|$Col{nonmatch}\$_$Col_Reset|; $_ = q{($_ eq "$`$&$'")} . "\n\t? ($_$ensure_newline)\n\t: ($orig_print$ensure_newline);"; } if ($Use_matchvars) { s|\$\`|\$\{\^PREMATCH\}|g; s|\$\&|\$\{\^MATCH\}|g; s|\$\'|\$\{\^POSTMATCH\}|g; } } for ($nonmatch_print) { last unless defined; my $ncl = "$Col{nonmatch}\$_$Col_Reset"; s|\$_|$ncl| or s|print(;?)$|print "$ncl"$1|; } # Remove redundant color resets: for ($output, $print, $nonmatch_print) { next unless defined; while (s|(\Q$Col_Reset\E[\$\.\:\-\w\s]*)\Q$Col_Reset\E|$1|) {} s/\"\Q$Col_Reset\E/\"/; } } $Count = $Matches = 0; my $Opt_B = ($Opt{B} and $Before > 0); my $print_header = ($Opt{J} and !$Opt{Z}); my $use_First = ($print_header or $context or !($Opt{L} or $Opt{k})); my $Opt_b_bytes = ($Opt{b} == 1); my $Opt_b_column = ($Opt{b} >= 2); my $assign_Offset = '$Offset = ' . (exists $Env{PEG_B_HEX} ? 'sprintf "%#x", ' : '') . 'tell(F);'; my $fix_newline = "s/\\015?\\012\\z//; \$_ .= \"$Newline_literal\";"; my $ensure_trailing_newline = ($Opt{N} or $Input_record_separator); my $Col_File = $Col{filename} . '$File' . $Col_Reset; # The context matching code's circular buffer method does not work when # multiple lines are treated as one eg. if $Code_per_line merges backslashed # lines into one, then the sequence of $.'s muddles $Before[$. % $Before] # So provide a mechanism to force the use of v1's safe (but slow) push/shift stack method. my $safe_before_context = ($Opt_B and $_ = $Code_per_line and /\# PEG_SAFE_BEFORE_CONTEXT/); my $needs_Binary_file = (($Opt{I} or !($Opt{a} or $Opt{c} or $Opt{l} or $Opt{L} or $Opt{Z})) and !$Plus_one); my $binary_file_matches = !($Opt{a} or $Opt{c} or $Opt{I} == 1 or $Opt{l} or $Opt{L} or $Opt{Z}); my $skip_open = ($_ = $Code_before_open and /goto process_file/); my ($sysread_slurp, $irs_slurp, $quick_no_match_test); if ($Slurp and !($Plus_one or $skip_open)) { if (@Perlexpr_k or $Opt{l} or $Opt{L} or $Opt{O}) { if ($Opt{K} or $Input_encoding) { $irs_slurp = 1; } else { $sysread_slurp = 1; } } elsif (!($Opt{K} or $Input_encoding or $Opt{a} > 1)) { $quick_no_match_test = 1; } } my $qfind_only = ($_ = $Opt_r_cmd and /qfind/ and !(@Cmdline_files or $Opt_d or $Opt{X})); my $save_context = ($_ = $Code_per_line and /\$Printed_Context_line/); add_excludes_to_Opt_p_expr() unless $qfind_only; $Search = "sub search {\n"; $Search .= " warn_ \"V: in search() _=\$_ F:F:n=\$File::Find::name\";\n" if $Verbose; $Search .= " local \$/ = $Input_record_separator;\n" if $Input_record_separator; if ($Opt{Q}) { $Search .= " if (defined \$Q_FILE) {\n"; $Search .= " \$File = \$Q_FILE;\n \$Q_FILE = undef;\n"; $Search .= " warn_ \"V: called via Q() File=\$File\";\n" if $Verbose; $Search .= " \$Filepath = \$File;\n" if $uses_Filepath; $Search .= " \$_ = \$File;\n return unless ($Opt_pp_expr);\n" if $Opt_pp_expr; $Search .= " *F = \$Q_F;\n"; if ($Input_encoding) { my $layer = ":encoding($Input_encoding)"; $layer = ":raw:perlio$layer" if $Is_Win32; $Search .= " binmode(F, '$layer')\n or " . ($Opt_s ? '' : "(warn_ \"binmode '$layer' failed: \$!\"), ") . "return;\n"; } $Search .= " show_progress(\$File);\n" if $Opt{_}; $Search .= " goto process_file;\n"; $Search .= " }\n"; } ####$Search .= q{ print "DBG: _=$_\tF:F:n=$File::Find::name", (0 ? "\tF:F:d=$File::Find::dir\n" : ()) ,"\n";} . "\n"; ####$Search .= q{ print "DBG: cwd=", cwd(), "\n";} . "\n"; if ($Opt{'+'}) { $Search .= " return if /\\bpeg_\\d+\\.txt\\z/;\n"; } elsif ($Opt{R}) { $Search .= " return if /\\b" . quotemeta($R_file) . "\\z/;\n"; } $Search .= " \$File = \$File::Find::name;\n"; unless ($qfind_only) { $Search .= ' $File =~ s|^\.' . ($Is_Win32 ? "[/\\\\]" : '/') . "||;\n"; $Search .= ' $_ =~ s|^\.' . ($Is_Win32 ? "[/\\\\]" : '/') . "||;\n"; # Needed on Win32 for almost "too long" filenames. } $Search .= " \$File =~ tr|/|\\\\|;\n" if $Opt{"\\"} == 1; $Search .= " \$File =~ tr|\\\\|/|;\n" if $Opt{"\\"} == 2; $Search .= " \$Filepath = \$_;\n" if $uses_Filepath; $Search .= " show_progress(\$File);\n" if $Opt{_}; # NB. -p only applies to files on the filesystem and not to files within archives cf. peg -Qp .zip foo my $pV = $Verbose ? '(warn_ "V: -p skipping"), ' : ''; if ($Opt_p_expr) { # Ensure -p test is not repeated by get_files cf. peg -p "-s > 1024" -r file. if ($Opt{Q}) { if ($Opt_pp_expr) { $Search .= " ${pV}return unless (\$Inside_archive ? ($Opt_pp_expr) : ($Opt_p_expr));\n"; } else { $Search .= " ${pV}return unless (\$Inside_archive or ($Opt_p_expr));\n"; } $Opt_p_expr = undef; } elsif (!$Opt_r_fork or @Cmdline_files or $Opt_d or $Opt{X}) { # -p test done in get_files code. $Search .= " ${pV}return unless ($Opt_p_expr);\n"; $Opt_p_expr = undef; } } elsif ($Opt_pp_expr) { $Search .= " ${pV}return unless (!\$Inside_archive or ($Opt_pp_expr));\n"; } if ((defined $MTime_new or defined $MTime_old) and !$qfind_only) { $Search .= " \$MTime = "; $Search .= "\$File::Find::Mtime || " if (($Opt{r} or $Opt_d) and defined $File::Find::Mtime); $Search .= "-M \$_;\n "; $Search .= "(warn_ \"V: -M skipping\"), " if $Verbose; $Search .= "return unless ("; $Search .= "\$Inside_archive or " if $Opt{Q}; # Allow non existant files to trigger "can't open" error. $Search .= $Plus_one ? 'defined $MTime and ' : '!defined $MTime or '; # NB. 1e-6 days << 1 sec. my $fmt = sub { sprintf "%.7f", ($_[0]/(24*60*60) + ($_[1] ? -1e-6 : 1e-6)) }; if (!defined $MTime_old) { my $new = $fmt->($MTime_new); $Search .= "\$MTime < $new);\n"; } elsif (!defined $MTime_new) { my $old = $fmt->($MTime_old, 1); $Search .= "\$MTime > $old);\n"; } else { my $new = $fmt->($MTime_new); my $old = $fmt->($MTime_old, 1); $Search .= "(\$MTime > $old and \$MTime < $new));\n"; } } my $have_size; if ((defined $Size_max or defined $Size_min) and !$qfind_only) { $have_size = 1; $Search .= " \$Size = "; $Search .= "\$File::Find::Size >= 0 ? \$File::Find::Size : " if (($Opt{r} or $Opt_d) and defined $File::Find::Size); $Search .= "-s \$_;\n"; $Search .= ($Verbose ? ' (warn_ "V: -S skipping too small"),' : '') . " return if \$Size < $Size_min;\n" if defined $Size_min; $Search .= ($Verbose ? ' (warn_ "V: -S skipping too small"),' : '') . " return if \$Size > $Size_max;\n" if defined $Size_max; } if ($Opt{Q}) { my $star_handler = exists $Peg_Q{'*'}; $Search .= " if (\$File =~ /$Q_handler_re/ and -f \$_) {\n" unless $star_handler; $Search .= ' my $ext = ' . ($star_handler ? "'*'" : 'lc $1') . ";\n"; $Search .= " warn_ \"V: calling '\$ext' -Q handler\";\n" if $Verbose; $Search .= ' my $ok = eval { $Peg_Q{$ext}->($_, $File) };' . "\n"; $Search .= ' $@ and die_ "-Q handler error: $File\n$@";' . "\n"; $Search .= " return if \$ok;\n"; $Search .= " warn_ \"V: -Q handler returned false - continuing search\";\n" if $Verbose; $Search .= " }\n" unless $star_handler; } if ($Plus_one) { $Search .= " -f \$_ or ((-e _) || warn_ \"no such file: \$File\"), return;\n" unless ($qfind_only or $Opt{Q}); # "peg -l +1 *" should not show directory names $Search .= " $Code_before_open;\n" if $Code_before_open; $Search .= " push \@Matched_files, \$File;\n"; $Search .= " print \"" . ($Opt{n} ? "$Col{lineno}\@{[scalar \@Matched_files]}$Col_Reset\\t" : '') . $Col_File . ($Opt{l} > 1 ? '\0' : $Newline_literal) . "\";\n"; $Search .= " return;\n\n"; } $Search .= " $Code_before_open;\n" if ($Code_before_open and !$Plus_one); $Search .= " warn_ \"V: open'ing file\";\n" if $Verbose; if ($Search_STDIN) { $Search .= ' open(F, "<-") or die_ "cannot open STDIN: $!";' . "\n"; if ($Input_encoding) { # NB. doesn't handle BOMs $Search .= " binmode(F, ':encoding($Input_encoding)')\n"; $Search .= " or die_ \"binmode failed on STDIN with $Input_encoding: \$!\";\n"; } elsif ($Is_Win32) { $Search .= " binmode F or die_ \"binmode failed on STDIN: \$!\";\n"; } } else { # Do not check for a directory and then skip the open as we must still # allow for "peg x a_directory" to at least try to read it (sometimes can). # On systems that perform filename globbing there is the dilemma of whether # to warn about not being able to open a directory. There are two distinct # use cases: "peg x *" (no warning is preferable) and "peg x afile adir" # (a warning is preferable). The compromise solution is to warn unless -ss. my $warn_on_failed_open_code = ''; if (%Globbed) { $warn_on_failed_open_code = '(exists $Globbed{$_} and -d $_) or '; # cf. "peg main *" vs "peg main *c" } elsif ($qfind_only) { # qfind does not output directories. } elsif ($Opt_r_cmd or $Opt{r} or $Opt_d or $Opt{X} or (!$Do_globbing and $Opt_ss)) { $warn_on_failed_open_code = '-d $_ or '; # cf. "find . | peg -X foo" } if ($Opt{K}) { $Search .= " \$Wide_chars = 0;\n" if $Opt_oo; $Search .= ' *F = magic_open($_, $File)'; } else { my $layer = ''; if ($Input_encoding) { $layer = ":encoding($Input_encoding)"; $layer = ":raw:perlio$layer" if $Is_Win32; } $Search .= ' open(F, "<' . $layer . '", $_)'; } $Search .= $Opt_s ? " || return;\n" : "\n || ((${warn_on_failed_open_code}warn_ \"can't open \$File: \$" . ($Opt{K} ? 'Err' : '!') . "\"), return);\n"; if ($Is_Win32 and !($Opt{K} or $Input_encoding or ($_ = $Input_record_separator and /\\n/))) { $Search .= " binmode F or die_ \"binmode failed for \$File: \$!\";\n"; } ########$Search .= q{ warn_ "DBG: IO layers: ", (join ', ', PerlIO::get_layers(\*F, details => 1));} . "\n"; } # Stop if the output channel goes eg. if running thro' a pager which quits: $Search .= " print '' or goto done;\n" unless ($STDOUT_is_terminal or $Opt{R}); # NB. need to local-ise $/ if there are any outer readline()s ie. or -X's . $Search .= ' ' . (($Opt_r_cmd or $Opt{X}) ? 'local ' : '') . '$/ = (-s F < ' . $Slurp_maxsize . ') ? undef : "\n";' . "\n" if $irs_slurp; $Search .= " \$Size = -s F;\n" if ($sysread_slurp and !$have_size); $Search .= " \$Slurp = (\$Size < $Slurp_maxsize);\n" if $sysread_slurp; $Search .= "process_file:\n" if ($Opt{Q} or $skip_open); if ($Opt{Q}) { $Search .= " \$File =~ tr|/|\\\\|;\n" if $Opt{"\\"} == 1; $Search .= " \$File =~ tr|\\\\|/|;\n" if $Opt{"\\"} == 2; } if ($needs_Binary_file and ($Opt{I} or !$quick_no_match_test)) { $Search .= " eval { \$Binary_file = -B F };\n"; $Search .= ' $@ and ' . ($Opt_s ? '' : '(warn_ "error reading $File: ", &ee), ') . "close(F), return;\n"; $Search .= ' warn_ "V: file is ", ($Binary_file ? "" : "not "), "binary";' . "\n" if $Verbose; $Search .= ' $Binary_file ' . ($Opt{I} == 1 ? '&&' : '||') . " (close(F), return);\n" if $Opt{I}; } $Search .= " reset 'a-z';\n" if $needs_reset; $Search .= " \$After = $After;\n" if $Opt{A}; $Search .= " \@Before = ();\n" if $Opt_B; $Search .= " \$C = undef;\n" if $uses_C; $Search .= " \$Context_line = undef;\n" if $Context_matcher; $Search .= " \$Context_line2 = undef;\n" if $Context_matcher2; $Search .= " \$Count = 0;\n" if ($Opt{c} == 1 or $Opt{c} >= 3); $Search .= " \$Matches = 0;\n" if $Opt_m == 1; $Search .= " \$First = 1;\n" if $use_First; $Search .= " \$Found = 0;\n" if $Opt{L}; $Search .= ' ' . join("\n\t= ", map "\$Match$_", 0..$#Perlexpr) . " = 0;\n" if (($Opt{k} and @Perlexpr) or $Opt{O} or $Opt_oo); $Search .= " \$Match_failed = 0;\n" if @Perlexpr_k; $Search .= " \@P = ();\n" if $uses_P; $Search .= " \$Printed_Context_line = '';\n \$Printed_Context_line2 = '';\n" if $save_context; $Search .= " undef \$Z;\n" if ($Opt{Z} % 2); if ($Opt_oo) { $Search .= ' seek($Buffer_fh, 0, 0) or die_ "seek failed: $!";' . "\n"; $Search .= " \$Buffer_contents = '';\n"; $Search .= ' binmode($Buffer_fh, $Wide_chars ? ":utf8" : ":raw") or die_ "binmode failed: $!";' . "\n" if $Opt{K}; $Search .= " my \$Orig_fh = select;\n select \$Buffer_fh;\n"; } $Search .= " $Code_after_open;\n" if $Code_after_open; ####$Search .= ' print "DBG: $File: ", (join ", ", PerlIO::get_layers(\*F, details => 1)), " pos=", tell(F), "\n";' . "\n"; $Search .= " eval {\n"; if ($quick_no_match_test) { # This is an optimisation based on the assumption that most files do not match. $Search .= " \$Size = -s F;\n" unless $have_size; $Search .= " if (\$Size < $Slurp_maxsize) {\n"; # NB. return's below jump to end of enclosing eval block. $Search .= ' sysseek(F, 0, 0) or ' . ($Opt_s ? '' : q{(warn_ "sysseek failed $File: $!"), }) . "return;\n" if ($needs_Binary_file and $Opt{I}); $Search .= ' $Bytes_read = sysread(F, $_, $Size);' . "\n"; $Search .= ' defined $Bytes_read or ' . ($Opt_s ? '' : q{(warn_ "sysread failed $File: $!"), }) . "return;\n"; $Search .= ' $Bytes_read == $Size or ' . ($Opt_s ? '' : q{(warn_ "slurp failed $File: read $Bytes_read not $Size"), }) . "return;\n"; $Search .= " (warn_ \"V: no match\"),\n" if $Verbose; $Search .= " return unless ($Perlexpr_q);\n"; $Search .= ' seek(F, 0, 0) or ' . ($Opt_s ? '' : q{(warn_ "seek failed $File: $!"), }) . "return;\n"; $Search .= " }\n"; $Search .= " \$Binary_file = -B F;\n" if ($needs_Binary_file and !$Opt{I}); $Search .= " if (\$Binary_file) {\n push \@Matched_files, \$File;\n print \"Binary file $Col_File matches$Newline_literal\";\n return;\n }\n" if $binary_file_matches; } $Search .= " $assign_Offset\n" if $Opt_b_bytes; $Search .= " warn_ \"V: reading file\";\n" if $Verbose; # Reading a file using a single sysread is quicker than using "$/=undef". if ($sysread_slurp) { $Search .= " while (1) {\n"; $Search .= " if (\$Slurp) {\n"; $Search .= " last if \$Slurp == -1; \$Slurp = -1;\n"; $Search .= ' sysseek(F, 0, 0) or ' . ($Opt_s ? '' : q{(warn_ "sysseek failed $File: $!"), }) . "last;\n" if $needs_Binary_file; $Search .= " \$Bytes_read = sysread(F, \$_, \$Size);\n"; $Search .= ' defined $Bytes_read or ' . ($Opt_s ? '' : q{(warn_ "sysread failed $File: $!"), }) . "last;\n"; $Search .= ' $Bytes_read == $Size or ' . ($Opt_s ? '' : q{(warn_ "slurp failed $File: read $Bytes_read not $Size"), }) . "last;\n"; $Search .= " } else {\n"; $Search .= " \$_ = readline(*F);\n"; $Search .= " last unless defined;\n"; $Search .= " }\n"; } else { $Search .= " while () {\n"; } $Search .= " \$Line_matched = 0;\n" if $Opt_oo; if ($Opt{a} > 1) { if ($Opt{a} == 2) { $Search .= " tr|\\x00||d;\n"; $Search .= " tr|\\x01-\\x08\\x0b-\\x1f\\x7f-\\xff| |s;\n"; } else { $Search .= " tr|\\x00-\\x08\\x0b-\\x1f\\x7f-\\xff| |s;\n"; } } $Search .= " s/\\015\\012/\\n/g;\n" if ($Input_record_separator and ($CRLF_to_newline or $Opt{N})); # fix internal newlines if ($ensure_trailing_newline and $Opt{'#'}) { $Search .= " s/\\015?\\012\\z//; \$_ .= \"\\n\";\n"; } elsif ($CRLF_to_newline and !$Input_record_separator) { $Search .= " s/\\015\\012\\z/\\n/;\n"; } ####$Search .= q< print "DBG: ", join ' ', unpack("C*", $_), "\n"; next;> . "\n"; $Search .= " \$P = \$_;\n" if $uses_P; if ($Context_matcher) { # The context code can modify the input line $_ in order to set a different context line. # If this happens we wrap the context code with "local $_ = $_" to ensure the line # matching code uses the correct value. Since this is expensive, a buyout is provided. my $needs_local; foreach my $code ($Context_matcher, $Context_matcher2) { next unless defined $code; next if $code =~ /\# PEG_FAST_Z_CONTEXT/; if ($code =~ /\$_\s*\.?=[^~]/) { $needs_local = 1; last; } # Allow "$var =~ s/foo/bar/", but not a $_ modifiying "s/foo/bar/". while ($code =~ /(.*?)\bs\//g) { unless ($1 =~ /\$(?:_\w+|[a-zA-Z]\w*)\s*=~\s*\z/) { $needs_local = 1; last; } } } $Search .= " {local \$_ = \$_;\n" if $needs_local; $Search .= " if ($Context_matcher) {\n"; $Search .= " \$C = \$_;\n" if $uses_C; $Search .= " \$Context_line = \$_;\n"; $Search .= " \$Context_lineno = \$.;\n"; ########$Search .= ' warn_ "V: -z ($.) $_";' . "\n" if $Verbose; $Search .= " }\n"; if ($Context_matcher2) { $Search .= " if ($Context_matcher2) {\n"; $Search .= " \$Context_line = undef;\n" unless $Env{PEG_Z_INDEPENDENT}; $Search .= " \$Context_line2 = \$_;\n"; $Search .= " \$Context_lineno2 = \$.;\n"; ############$Search .= ' warn_ "V: -zz ($.) $_";' . "\n" if $Verbose; $Search .= " }\n"; } $Search .= " }\n" if $needs_local; } $Search .= " $Code_per_line;\n" if $Code_per_line; $Search .= " shift \@Before if (\@Before > $Before);\n" if $safe_before_context; $Search .= " \$Offset = 1;\n" if ($Opt_b_column and $context); # Need to clear $& to avoid possible false coloring of a matched line where $& is due to the context match and not PERLEXPR. $Search .= " 'X' =~ /X/;\n" if (($Opt{'#'} and ($Context_matcher or $Code_per_line) and !$Simple_Perlexpr) or $Opt_b_column); $Search .= " study;\n" if ((@Perlexpr + @Perlexpr_k) >= 20); $Search .= " if ($Perlexpr) {\n"; $Search .= " $::Code_on_match\n" if $::Code_on_match; # undocumented hook NB. code should localise $& etc. $Search .= " next;\n" if $Opt{k}; unless ($Opt{L}) { $Search .= " exit;\n" if $Opt{q}; $Search .= ' $First && push @Matched_files, $File;' . "\n" unless $Opt_oo; } $Search .= " \$Binary_file and (print \"Binary file $Col_File matches$Newline_literal\"), last;\n" if ($binary_file_matches and !$quick_no_match_test); $Search .= " \$Offset = (\$-[0] || 0) + 1;\n" if $Opt_b_column; # NB. avoid "Use of uninitialized value" warning. $Search .= " $fix_newline\n" if ($ensure_trailing_newline and !$Opt{'#'}); $Search .= " ++\$Count;\n" if $Opt{c}; $Search .= " ++\$Matches;\n" if $Opt_m; $Search .= " \$Found = 1;\n last;\n" if $Opt{L}; if ($print_header) { $Search .= " print "; if ($Opt{J} == 1) { $Search .= "header(\$File)"; } else { # NB. can't always rely on @Matched_files. $Search .= ($Opt_r_fork or $Opt_oo) ? "\"$Newline_literal\", " : "+(\@Matched_files > 1 ? \"$Newline_literal\" : ''), " if $JJ_gap; $Search .= "\"$Col_File$Newline_literal\""; } $Search .= " if \$First;\n"; } if ($context) { # Insert "--" separator when appropriate.. # NB. can't rely on $Matched_before if fork'ing. $Search .= " print \"--$Newline_literal\" if ("; $Search .= $Opt{J} ? '(!$First && ' : (($Opt_r_fork ? '' : '$Matched_before++ && ') . '($First || '); $Search .= "(\$After > $gap)));\n"; } if ($Print_context_matcher) { my $fmt1 = $Env{PEG_CONTEXT_FORMAT} || '**** ($.) $_'; my $fmt2 = $Env{PEG_CONTEXT_FORMAT2} || '++++ ($.) $_'; $fmt1 =~ s|\$_\b|\$Context_line|; $fmt2 =~ s|\$_\b|\$Context_line2|; $fmt1 =~ s|\$\.|\$Context_lineno|; $fmt2 =~ s|\$\.|\$Context_lineno2|; if ($Context_matcher2) { $Search .= " if (defined \$Context_line2) {\n"; if ($Env{PEG_Z_INDEPENDENT}) { # ensure context ordered correctly. $Search .= " if (defined \$Context_line and \$Context_lineno < \$Context_lineno2) {\n"; $Search .= " \$Printed_Context_line = \$Context_line;\n" if $save_context; $Search .= " \$Context_line =~ s/\\015?\\012\\z//;\n"; # inline chomp_ $Search .= " print \"$Col{z_context}$fmt1$Col_Reset$Newline_literal\";\n"; $Search .= " \$Context_line = undef;\n"; $Search .= " }\n"; } $Search .= " \$Printed_Context_line2 = \$Context_line2;\n" if $save_context; $Search .= " \$Context_line2 =~ s/\\015?\\012\\z//;\n"; # inline chomp_ $Search .= " print \"$Col{z_context2}$fmt2$Col_Reset$Newline_literal\";\n"; $Search .= " \$Context_line2 = undef;\n"; $Search .= " }\n"; } $Search .= " if (defined \$Context_line) {\n"; $Search .= " \$Printed_Context_line = \$Context_line;\n" if $save_context; $Search .= " \$Context_line =~ s/\\015?\\012\\z//;\n"; # inline chomp_ $Search .= " print \"$Col{z_context}$fmt1$Col_Reset$Newline_literal\";\n"; $Search .= " \$Context_line = undef;\n"; $Search .= " }\n"; } $Search .= $safe_before_context ? " print \@Before;\n" : " print grep defined, \@Before[(\$. % $Before)..@{[$Before-1]}, 0..((\$. % $Before)-1)];\n" if $Opt_B; $Search .= " $::Code_on_match2\n" if $::Code_on_match2; # undocumented hook $Search .= " $print\n" if $print; $Search .= " " . ($Opt_m == 1 ? 'last' : 'goto done') . " if \$Matches >= $Max_matches;\n" if ($Opt_m and !$context); $Search .= " \$After = 0;\n" if $context; $Search .= " \@Before = ();\n" if $Opt_B; $Search .= " \$First = undef;\n" if $use_First; $Search .= " }\n"; $Search .= " elsif (++\$After <= $After) {\n" if $Opt{A}; $Search .= " $fix_newline\n" if ($Opt{A} and $ensure_trailing_newline); $Search .= " $nonmatch_print\n }\n" if $Opt{A}; $Search .= " else {\n" if ($Opt{B} or ($context and $Opt_m)); $Search .= " " . ($Opt_m > 1 ? 'goto done' : 'last') . " if (\$Matches >= $Max_matches);\n" if ($Opt_m and $context); $Search .= " ++\$After;\n" if (!$Opt{A} and $Opt{B}); $Search .= " $fix_newline\n" if ($Opt_B and $ensure_trailing_newline); $Search .= $safe_before_context ? " push \@Before, $output;\n" : " \$Before[\$. % $Before] = $output;\n" if $Opt_B; $Search .= " }\n" if ($Opt{B} or ($context and $Opt_m)); $Search .= " $assign_Offset\n" if $Opt_b_bytes; $Search .= " push \@P, \$P;\n" if $uses_P; $Search .= " }\n"; # NB. in the event of an exception, we can't print $_ as it's contents may trigger another exception in the output IO! $Search .= ' }' . ($Opt_s ? '' : '; $@ and (print STDERR "\npeg: error at line $. of $File:\n", &ee)' . ($Opt_r_fork ? '' : ', exit(2)')) . ";\n"; $Search .= " $Code_before_close;\n" if $Code_before_close; $Search .= " close(F);\n" unless $Search_STDIN; if ($Opt_oo) { $Search .= " select \$Orig_fh;\n if ("; $Search .= "!\$Match_failed\n\t&& " if @Perlexpr_k; $Search .= join "\n\t&& ", map "\$Match$_", (0 .. $#Perlexpr); $Search .= ") {\n"; $Search .= " if (\$Wide_chars) {\n" if $Opt{K}; if ($Opt{K} or $Input_encoding) { # NB. $Buffer_contents is a *byte* string. $Search .= ' seek($Buffer_fh, 0, 0) or die_ "seek failed: $!";' . "\n"; $Search .= " my \$buf;\n"; $Search .= " print \$buf while read(\$Buffer_fh, \$buf, 2048) > 0;\n"; } $Search .= " } else {\n" if $Opt{K}; $Search .= " print \$Buffer_contents;\n" unless $Input_encoding; $Search .= " }\n" if $Opt{K}; $Search .= " push \@Matched_files, \$File;\n }\n"; } $Search .= " goto done if (\$Matches >= $Max_matches);\n" if ($context and $Opt_m > 1); if ($Opt{k}) { $Search .= " if (!\$Match_failed" . join("", map({"\n\t&& \$Match$_"} (0..$#Perlexpr))) . ") {\n"; $Search .= " exit;\n" if $Opt{q}; $Search .= " print \"$Col_File$Newline_literal\";\n"; $Search .= " push \@Matched_files, \$File;\n"; $Search .= " }\n"; } if ($Opt{c} == 1 or $Opt{c} >= 3) { $Search .= ' print "' . ($Opt{h} ? '' : "$Col_File$Col{colon}:$Col_Reset") . "\$Count$Newline_literal\""; $Search .= " if \$Count" if ($Opt{c} >= 3); $Search .= ";\n"; } $Search .= " Z_display(" . ((!$Opt{h} or $Opt{J}) ? '$File' : '') . ");\n" if ($Opt{Z} % 2); if ($Opt{L}) { $Search .= " unless (\$Found) {\n"; $Search .= " exit;\n" if $Opt{q}; $Search .= " print \"$Col_File$Newline_literal\";\n"; $Search .= " push \@Matched_files, \$File;\n"; $Search .= " }\n"; } $Search .= ' warn_ "V: done search()\n\n";' . "\n" if $Verbose; $Search .= "}\n"; $Search =~ s/(\bwarn_ \"V:)/$1 \$\$/g if ($Verbose and $Opt_r_fork); eval_ $Search; $@ and die_ "error while eval'ing:\n\n$Search\n", @Warnings, ⅇ } # build_search sub header { my $file = shift; my $border = $Newline . $Col{colon} . (":" x (6 + length($file))) . $Col_Reset . $Newline; my $cc = $Col{colon} . "::" . $Col_Reset; return $border . $cc . " " . $Col{filename} . $file . $Col_Reset . " " . $cc . $border . $Newline; } # header # magic_open() - attempt to open a file using the 'correct' encoding. # # Ensure there is no :crlf layer on the filehandle. # 1. We want the CRs. # 2. The :crlf layer interferes badly with encodings. For example, # tell()'s result on "<:encoding(utf16le):crlf" filehandles # are not aligned to the original file's bytes. # sub magic_open { my ($file, $fullpath) = @_; # Open the file for reading *binary*, but ensure no :crlf layer. open(my $fh, "<:raw:perlio", $file) or $Err = $!, return; my $len = read($fh, my $data, 8); defined $len or $Err = "read failed: $!", return; return $fh if $len < 2; my @res; # (encoding, start_offset) my @b = unpack("C*", $data); # Look for a BOM. if ($b[0]==0xEF and $len >= 3 and $b[1]==0xBB and $b[2]==0xBF) { @res = ('utf8', 3); } elsif ($b[0]==0xFF and $b[1]==0xFE) { if (!($len % 4) and !$b[2] and !$b[3]) { @res = ('utf32le', 4) } elsif (!($len % 2)) { @res = ('utf16le', 2) } } elsif ($b[0]==0xFE and $b[1]==0xFF and !($len % 2)) { @res = ('utf16be', 2); } elsif (!$b[0] and !$b[1] and $b[2]==0xFE and $b[3]==0xFF and !($len % 4)) { @res = ('utf32be', 4); } # OK - cannot find a BOM, perhaps it's ASCII text encoded in UTF(16|32). elsif ((!$b[0] or !$b[1]) and !($len % 2)) { if ( (!$b[1] and $Is_ascii_text[$b[0]] and ($len==2 or (!$b[3] and $Is_ascii_text[$b[2]] and ($len==4 or (!$b[5] and $Is_ascii_text[$b[4]] and ($len==6 or (!$b[7] and $Is_ascii_text[$b[6]])))))))) { @res = ('utf16le', 0); } elsif (!$b[0] and $Is_ascii_text[$b[1]] and ($len==2 or (!$b[2] and $Is_ascii_text[$b[3]] and ($len==4 or (!$b[4] and $Is_ascii_text[$b[5]] and ($len==6 or (!$b[6] and $Is_ascii_text[$b[7]]))))))) { @res = ('utf16be', 0); } elsif ($len==4 or $len==8) { if ( (!$b[1] and !$b[2] and !$b[3] and $Is_ascii_text[$b[0]]) and ($len==4 or (!$b[5] and !$b[6] and !$b[7] and $Is_ascii_text[$b[4]]))) { @res = ('utf32le', 0) } elsif ( (!$b[0] and !$b[1] and !$b[2] and $Is_ascii_text[$b[3]]) and ($len==4 or (!$b[4] and !$b[5] and !$b[6] and $Is_ascii_text[$b[7]]))) { @res = ('utf32be', 0) } } } my ($encoding, $start_offset); if (@res) { ($encoding, $start_offset) = @res; warn_ "assuming $encoding: $fullpath" unless $Opt_ss; } else { $start_offset = 0; if ($Guess_encoding) { seek($fh, 0, 0) or $Err = "seek failed: $!", return; $len = read($fh, $data, 8192); defined $len or $Err = "read failed: $!", return; my $enc_obj = Encode::Guess::guess_encoding($data); if (ref $enc_obj and $enc_obj->name ne 'ascii') { $encoding = $enc_obj->name; warn_ "encoding guessed as $encoding: $fullpath" unless $Opt_ss; } } } seek($fh, $start_offset, 0) or $Err = "seek failed: $!", return; if ($encoding) { eval { binmode($fh, ":encoding($encoding)") or die "binmode failed: $!"; $Wide_chars = 1; }; $@ and ($Err = "$encoding encoding error $fullpath:\n" . &ee), return; } ####print "DBG: $fullpath: ", (join ', ', PerlIO::get_layers($fh, details => 1)), "\n"; return $fh; } # magic_open { my $last_file; sub show_progress { my $file = shift; my $N = $Console_width - 10; # Ensure $file fits the terminal width. # Try: a/b/c/def -> a/b/~/def -> a/~/def -> ~/def -> ~/d~f $file =~ tr|\\|/| if $Is_Win32; if (length($file) > $N) { my $fits; if ($file =~ m|^(.+)/([^/]+)\z|) { my ($root, $tail) = ($1, $2); while ($root =~ s|/+[^/]+\z||) { if (length($root) + length($tail) < $N) { $file = "$root/~/$tail"; $fits = 1; last; } } if (!$fits and length($tail) < $N) { $file = "~/$tail"; $fits = 1; } } unless ($fits) { my $N_2 = int($N / 2); $file =~ s|^.+/|~/|; $file = substr($file, 0, $N_2) . "~" . substr($file, length($file) - $N_2); } } if (defined $last_file) { # When consecutive filenames truncate the same, show progress has been made. $file .= '*' if $file eq $last_file; # Don't reprint their common prefix to prevent flickering. my $lower = 0; # lower <= common < upper my $upper = 1 + ((length($file) < length($last_file)) ? length($file) : length($last_file)); while ($upper - $lower > 1) { my $try = int(($lower + $upper) / 2); if (substr($file, 0, $try) eq substr($last_file, 0, $try)) { $lower = $try } else { $upper = $try } } my $common = $lower; my $out = ''; my $overhang = length($last_file) - length($file); if ($overhang > 0) { # erase overhanging characters of last_file $out .= "\b" x $overhang; $out .= " " x $overhang; } $out .= "\b" x (length($last_file) - $common); $out .= substr $file, $common; print STDOUT $out; } else { print STDOUT "peg: $file"; } $last_file = $file; } # show_progress } sub Q { my ($fh, $filename, $within_archive) = @_; # File within archives which themselves need to call an S_handler are # written to a temporary file, and then the handler is called on that. if ($within_archive and ($Opt{Q} == 1 ? $filename =~ /$Q_handler_re/o : ($Q_nonarchive_re and $filename =~ /$Q_nonarchive_re/o))) { my $ext = lc $1; require File::Temp; my ($fout, $tempfile) = File::Temp::tempfile("peg-Q-XXXXX", SUFFIX => ".$ext", UNLINK => 1); binmode $fout or die "binmode failed: $!\n"; my ($len, $buf); while ($len = sysread($fh, $buf, 65_536)) { syswrite($fout, $buf, $len) or die "error writing to tempfile: $!\n"; } close $fout or die "can't close tempfile: $!\n"; close $fh; warn_ "V: Q() calling $ext handler with ('$tempfile', '$filename')" if $Verbose; ++$Inside_archive; $Peg_Q{$ext}->($tempfile, $filename); --$Inside_archive; unlink $tempfile; } else { ($Q_F, $Q_FILE) = ($fh, $filename); warn_ "V: Q() calling search('$filename')" if $Verbose; search(); } } # Q sub search_files { foreach my $file (@{$_[0]}) { $_ = $File::Find::name = $file; search(); } } # search_files sub run { $/ = "\n"; my $cwd = cwd(); my $flush_err; my $Profile = $Env{PEG_PROFILE}; # A simple profiling mechanism warn_ "PROFILE = $Profile$Beep" if $Profile; # NB. open3() must be run before STDERR is redirected by "-R_" code my $r_cmd_finished; if ($Opt_r_cmd) { die_ "profiling R_CMD not supported" if $Profile; my ($interrupt, $r_cmd_pid); if ($Opt_r_cmd_silent) { $r_cmd_pid = open(R_CMD_OUT, "$Opt_r_cmd|") or die_ "failed to run $Opt_r_cmd: $!"; } else { require IPC::Open3; require IO::File; *R_CMD_ERR = IO::File->new_tmpfile; # Ensure that the R_CMD process does not become a zombie if an # interrupt occurs before it has finished and triggers peg to exit. # In particular, it is possible for an interrupt to occur after # open3() has created the R_CMD process but before it has returned. $r_cmd_pid = eval { local $SIG{INT} = sub { $interrupt = 1 }; IPC::Open3::open3(undef, \*R_CMD_OUT, ">&R_CMD_ERR", $Opt_r_cmd); }; $@ || !$r_cmd_pid and die_ "failed to run $Opt_r_cmd\n", ⅇ } eval "END { kill('KILL', \$r_cmd_pid) unless \$r_cmd_finished }"; $SIG{INT}->() if $interrupt; warn_ "created process $r_cmd_pid for $Opt_r_cmd" unless $Opt_ss; binmode R_CMD_OUT; } if ($Opt{R}) { require Fcntl; my $mode = (&Fcntl::O_WRONLY | &Fcntl::O_EXCL | &Fcntl::O_CREAT); my $R_dir = $cwd; unless (sysopen(OUT, $R_file, $mode, 0600)) { warn_ "can't create -R file in current directory: $!" unless $Opt_s; $R_dir = $HOME_dir; sysopen(OUT, "$R_dir$R_file", $mode, 0600) or die_ "can't create -R file: $!"; } warn_ "-R: $R_dir$R_file" unless $Opt_s; select OUT; if ($Opt{_}) { autoflush(\*STDOUT); # Save STDERR output till the end to avoid clobbering the progress output. open(OLDERR, ">&", \*STDERR) or die_ "can't save STDERR: $!"; close STDERR; my $saved_err = ''; open(STDERR, ">", \$saved_err) or (print STDOUT "peg: can't redirect STDERR: $!\n"), exit(2); $flush_err = sub { return unless defined $saved_err; close STDERR; open(STDERR, ">&OLDERR") or (print STDOUT "\npeg: can't restore STDERR: $!\n"), exit(2); print STDERR $saved_err; $saved_err = undef; }; eval "END { \$flush_err->() }"; $SIG{__DIE__} = sub { my $err = shift; return if $err =~ m|Encode/ConfigLocal|; show_progress("!error!"); print STDOUT "\n"; $flush_err->(); $@ = $err; warn_ "exception caught; maybe at line $. of $File:\n", ⅇ exit(2); }; show_progress("*start*"); } } # Flush output unless we know it's going to a file. $| = ($Opt{R} or $Buffer_output) ? 0 : 1; my $layer; if ($Output_encoding) { if ($Output_BOM) { binmode select() or die_ "binmode failed on output: $!"; print $Output_BOM; } $layer = ":encoding($Output_encoding)"; if ($Is_Win32) { if ($Needs_crlf_layer) { # Leave implicit :crlf layer on utf8 output. unless ($Output_encoding eq 'utf8') { $layer = ':pop' . $layer . ':crlf'; # Reposition :crlf layer. } } else { $layer = ':pop' . $layer; # Remove redundant :crlf layer. } } } elsif ($Needs_crlf_layer) { $layer = ':crlf'; } else { $layer = ':raw'; # NB. needed on Win32 to prevent CRLF -> CRCRLF! } eval { binmode(select(), $layer) or die $!; }; $@ and die_ "failed to binmode output using '$layer':\n", ⅇ ####print "DBG: output '$layer' => ", (join ', ', PerlIO::get_layers(select(), details => 1)), "\n"; if ($Opt{R} > 1) { my $header = <<"EOT"; ## # TIME : @{[ scalar localtime ]} # CWD : @{[ $cwd ]} # ARGV : @{[ join "\n# ", @ARGV ]} ## EOT $header =~ s/\n/$Newline/g; print $header; } # Ensure we don't leave console incorrectly colored if interrupted. # SIGQUIT (Ctrl-Pause on Win32) saves the files matched so far. $SIG{INT} = $SIG{QUIT} = $SIG{TERM} = sub { # XXX Win32 fix for unread STDIN contents leaking to shell when run via bat file. if ($Is_Win32 and !$STDIN_is_terminal) { while () {} } print $Col_Reset; chdir($cwd) && save_matches() if $_[0] eq 'QUIT'; exit(2); }; # By default, ignore warnings from here on in. $SIG{__WARN__} = sub { warn_ "! ", @_ if $Verbose }; start:; search_files(\@Cmdline_files); if ($Opt_r_cmd) { if ($Opt_r_fork) { fork_workers(); } else { while () { s/\012\z//; $File::Find::name = $_; search(); } } unless ($Opt_r_cmd_silent) { seek(R_CMD_ERR, 0, 0) or die_ "seek failed: $!"; while () { warn_($_) unless $Opt_ss; } close R_CMD_ERR; } close R_CMD_OUT; $r_cmd_finished = 1; } if ($Opt{r} or $Opt_d) { require File::Find; local $SIG{__WARN__} = sub { my $err = $_[0]; $err =~ s/^(.*) at .* line \d+.*\z/$1/s; $err =~ s/^Can't opendir\((?:\.\/)?(.+)\): /can't opendir $1: /; $err =~ s/^C(\w+\'t )/c$1/; # If search() is in the call stack then $File is valid. for (my $i = 0; my @cs = caller($i); ++$i) { if ($cs[3] eq 'main::search') { return if $i == 2; # Ignore warnings from PERLEXPR $err = "$File: $err"; last; } } warn_ $err; } unless $Opt_ss; my @dirs = (($Opt{r} ? '.' : ()), @Cmdline_dirs); eval { File::Find::find({ @FileFind_opts, 'wanted' => \&search }, @dirs); }; $@ and warn_ "File::Find::find failed: ", ⅇ chdir($cwd) or die_ "can't chdir back to $cwd: $!"; } if ($Opt{X}) { # Avoid interleaving the file list and output on the terminal. if ($STDOUT_is_terminal and $STDIN_is_terminal and !$Opt{R}) { warn_ "buffering up -X file list" unless $Opt_s; my @files; while () { chomp_ $_; next if $_ eq ''; push @files, $_; } search_files(\@files); } else { while () { chomp_ $_; next if $_ eq ''; $File::Find::name = $_; search(); } } } if ($Search_STDIN) { search_files(['-']); } # Finished searching specified files. if (--$Profile > 0) { goto start; } done:; if ($Code_at_end) { eval_ $Code_at_end; $@ and warn_ "-PPPP code gave an error:\n", ⅇ } if ($Opt{c} == 2) { print $Count, $Newline; } elsif ($Opt{Z} and !($Opt{Z} % 2)) { Z_display(); } print $Col_Reset; if ($Opt{R}) { if ($Opt{_}) { show_progress("*done*"); print STDOUT "\n"; $flush_err->(); } select STDOUT; close OUT or warn_ "failed to close -R file: $!"; } chdir($cwd) or die_ "can't chdir back to $cwd: $!"; save_matches(); if ($Opt{'%'}) { my $took = sprintf "%.2f", (0.1 + Time::HiRes::time() - $Start_time); warn_ "took $took seconds"; } } # run # The fork/pipe code below is derived from Jeff Rodriguez's Parallel::Fork::BossWorker. # sub fork_workers { warn_ "forking $Worker_count by $Worker_work" unless $Opt_ss; require PerlIO::scalar; pipe(my $from_workers_pid, my $to_boss_pid) or die_ "pipe failed: $!"; autoflush($to_boss_pid); my %workers; for (1 .. $Worker_count) { pipe(my $from_boss_files, my $to_worker_files) and pipe(my $from_worker_msg, my $to_boss_msg) or die_ "pipe failed: $!"; autoflush($to_worker_files); autoflush($to_boss_msg); my $pid = fork; die_ "fork failed: $!" unless defined $pid; if ($pid) { $workers{$pid} = [$to_worker_files, $from_worker_msg]; close $from_boss_files; close $to_boss_msg; } else { $SIG{PIPE} = sub { exit }; close $from_workers_pid; close $from_worker_msg; close $to_worker_files; send_msg($to_boss_pid, $$); worker($from_boss_files, $to_boss_pid, $to_boss_msg); exit; } } close $to_boss_pid; my $INT_handler = $SIG{INT}; local $SIG{INT} = local $SIG{PIPE} = sub { foreach my $pid (keys %workers) { my ($to_worker_files, $from_worker_msg) = @{$workers{$pid}}; close $to_worker_files; close $from_worker_msg; } close $from_workers_pid; $INT_handler->(); }; my (@files, $r_cmd_done); my $code = <<'EOT'; sub { return if $r_cmd_done; my $n = $Worker_work; while () { s/\012\z//; EOT if ($Opt_p_expr) { $code .= "\t\t\$File = \$_;\n\t\t"; $code .= '(warn_ "V: skipping $_"), ' if $Verbose; $code .= "next unless ($Opt_p_expr);\n"; } $code .= <<'EOT'; push @files, $_; return if --$n <= 0; } $r_cmd_done = 1; }; EOT my $get_files = eval $code; $@ and die_ "bad code:\n$code\n", ⅇ $get_files->(); while (my $pid = receive_msg($from_workers_pid)) { my $msg_waiting = ($pid =~ s/^!//); my ($to_worker_files, $from_worker_msg) = @{$workers{$pid}}; my $msg = $msg_waiting ? receive_msg($from_worker_msg) : ''; if (@files) { send_msg($to_worker_files, join "\000", @files); @files = (); } else { close $to_worker_files; close $from_worker_msg; delete $workers{$pid}; } if (length $msg) { $msg =~ /(.*?)\001/sg; print STDERR $1 if length $1; $msg =~ /(.*?)\001/sg; push @Matched_files, split /\000/, $1 if length $1; $msg =~ /(.*)/sg; print $1 if length $1; } $get_files->(); } } # fork_workers sub worker { my ($from_boss_files, $to_boss_pid, $to_boss_msg) = @_; open(my $outfh, ">", \my $out) or die_ "can't open: $!"; select $outfh; close STDERR; open(STDERR, ">", \my $err) or die_ "can't redirect STDERR: $!"; while (my $files = receive_msg($from_boss_files)) { seek(STDERR, 0, 0) and seek($outfh, 0, 0) or (print STDOUT "peg: can't seek: $!\n"), exit; $err = $out = ''; @Matched_files = (); foreach my $file (split /\000/, $files) { $File::Find::name = $_ = $file; search(); } if (length $out or length $err or @Matched_files) { my $msg = $err . "\001" . join("\000", @Matched_files) . "\001" . $out; send_msg($to_boss_pid, "!$$"); send_msg($to_boss_msg, $msg); } else { send_msg($to_boss_pid, $$); } } close $to_boss_pid; } # worker sub receive_msg { my $fh = shift; local $/ = $Msg_rs; my $msg = <$fh>; chomp $msg if defined $msg; return $msg; } # receive_msg sub send_msg { my ($fh, $msg) = @_; print $fh $msg, $Msg_rs; } # send_msg # Avoid "used only once" warnings. 1 or ($Data::Dumper::Indent, $Data::Dumper::Sortkeys, $Encode::Guess::NoUTFAutoGuess, $File::Find::Size, $FindBin::RealBin, $::OLDERR); __END__ =head1 NAME peg - Perl expression grep =head1 SYNOPSIS peg [OPTION]... PERLEXPR [FILE]... =head1 DESCRIPTION B is a file search tool similar to the Unix program B. It uses PERLEXPR to match lines from a list of input files. PERLEXPR is either a Perl expression, literal text, or a Perl regular expression pattern. The following rules determine which of these it is. =over 4 =item 1. If any of B<-E>, B<-F> or B<-G> are specified, they take precedence. =item 2. If PERLEXPR starts with a C<+> or a C<$>, or if it contains a C then it is assumed to be an expression ie. B<-E> is assumed. =item 3. Otherwise, the PERLEXPR is assumed to be a regular expression pattern ie. B<-G> is assumed. =back For example, the following are all equivalent: % peg -E m,needle,i haystack # rule 1 % peg /needle/i haystack # rule 2 % peg (?i)needle haystack # rule 3 % peg -i needle haystack # rule 3 To assist in quoting battles against less enlightened shells, B provides the following variables: ($DQ, $SQ, $BT, $EM, $GT, $LT, $PC) = qw( " ' ` ! > < % ) B will C any Perl variables beginning with a lowercase letter prior to searching each file. If no files are specified then if STDIN is attached to the terminal then B<-r> is assumed else B reads from standard input. =head1 OPTIONS The options include equivalents to all of standard B, and most of the GNU extensions. Note that some are subtly different. They can be grouped I in the argument list (except after B<-->, or after options that take an argument). For example, C, C and C are all equivalent. Some options are overloaded to have different behaviour if they are specified more than once. Options can also be set via the environment variable B. If less than two files are specified, then B<-h> is assumed. =head2 Selection and interpretation of PERLEXPR =over 4 =item B<-E> Assume PERLEXPR is a Perl expression. =item B<-F> Assume PERLEXPR is a fixed literal string. Thus, C is equivalent to C. =item B<-G> Assume PERLEXPR is a Perl regular expression I. Thus, C is equivalent to C. =item B<-e> I =over 4 =item B<-e PERLEXPR> Specify a PERLEXPR to match. If used more than once, then it is equivalent to using B<-o>. For example, C, C, and C are all equivalent. =item B<-ee FILE> Specify a file to search. For example, C, will search for the string B<-text> in the file B<-filename>. =back =item B<-f> I =over 4 =item B<-f FILE> FILE is a file containing further PERLEXPRs. Lines will be adjudged to match if they match any of the PERLEXPRs. =item B<-ff FILE> FILE is a file containing files to search. =back =item B<-i> I =over 4 =item B<-i> Ignore case distinctions. Enables B<-G>. =item B<-ii> I matching. Ignore case distinctions if PERLEXPR is entirely lowercase. =back =item B<-o> I =over 4 =item B<-o> Non option arguments following the B<-o> option up until B<--> are interpreted as further PERLEXPRs. Lines will be adjudged to match if they match any of the PERLEXPRs. For example, C is equivalent to C. =item B<-oo> Similar to B<-o>, but only prints the results from files that contain a match to all the PERLEXPRs. =back =item B<-O> Equivalent to B<-ool>. For example, C will print the names of files containing all of the given strings. =item B<-k> Similar to B<-O>, except each PERLEXPR must not match anywhere within the file. It can be thought of as being to B<-O> what B<-L> is to B<-l>. For example, C will print the names of the files that contain all of aa, bb and cc, but none of AA, BB or CC. =item B<-v> Negates the sense of PERLEXPR. =item B<-w> Force PERLEXPR to match only whole I. Enables B<-G>. =item B<-x> Force PERLEXPR to match only whole I. Enables B<-G>. =back =head2 File selection =over 4 =item B<-d> I =over 4 =item B<-d> Any directories in the file list will be searched recursively for files to process. =item B<-dd> The same as B<-d> except it only applies if all the files specified are directories. =back =item B<-I> I =over 4 =item B<-I> Do not process binary files. =item B<-II> Process I binary files. =back =item B<-K> Attempt to I detect each file's encoding. It does this by looking at the first few bytes of the file for: =over 4 =item * A byte order mark (BOM) at the start of the file. Supports utf16le, utf16be, utf32le, utf32be and utf8. =item * No byte order mark, but first few bytes look like the file is ASCII encoded in UTF-16 or UTF-32. =back If this does not succeed, then an encoding can still be determined if the environment variable B is set. This should be a whitespace delimited list of encodings, for example C. These are then tried by B to see if the encoding can be determined. =item B<-M> I This takes a B