Also look in the next-highest directory when detecting VCS; add SVN
[interchange.git] / lib / Vend / Glimpse.pm
1 # Vend::Glimpse - Search indexes with Glimpse
2 #
3 # Adapted for use with Interchange from Search::Glimpse
4 #
5 # Copyright (C) 2002-2017 Interchange Development Group
6 # Copyright (C) 1996-2002 Red Hat, Inc.
7 #
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public
19 # License along with this program; if not, write to the Free
20 # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
21 # MA  02110-1301  USA.
22
23 package Vend::Glimpse;
24 require Vend::Search;
25 @ISA = qw(Vend::Search);
26
27 $VERSION = '2.17';
28 use strict;
29 use Vend::File;
30 use Vend::Util;
31
32
33 sub array {
34         my ($s, $opt) = @_;
35         $s->{mv_list_only} = 1; # makes perform_search only return results array
36         return Vend::Scan::perform_search($opt, undef, $s);
37 }
38
39 sub hash {
40         my ($s, $opt) = @_;
41         $s->{mv_return_reference} = 'HASH';
42         $s->{mv_list_only} = 1; # makes perform_search only return results array
43         return Vend::Scan::perform_search($opt, undef, $s);
44 }
45
46 sub list {
47         my ($s, $opt) = @_;
48         $s->{mv_return_reference} = 'LIST';
49         $s->{mv_list_only} = 1; # makes perform_search only return results array
50         return Vend::Scan::perform_search($opt, undef, $s);
51 }
52
53 my %Default = (
54                 matches                 => 0,
55                 mv_head_skip            => 0,
56                 mv_index_delim          => "\t",
57                 mv_record_delim         => "\n",
58                 mv_matchlimit           => 50,
59                 mv_max_matches          => 2000,
60                 mv_min_string           => 4,
61 );
62
63
64 sub init {
65         my ($s, $options) = @_;
66
67         @{$s}{keys %Default} = (values %Default);
68         $s->{mv_base_directory}     = $Vend::Cfg->{ProductDir} || 'products',
69         $s->{mv_begin_string}       = [];
70         $s->{mv_all_chars}              = [1];
71         $s->{mv_case}               = [];
72         $s->{mv_column_op}          = [];
73         $s->{mv_negate}             = [];
74         $s->{mv_numeric}            = [];
75         $s->{mv_orsearch}           = [];
76         $s->{mv_searchspec}             = [];
77         $s->{mv_search_group}       = [];
78         $s->{mv_search_field}       = [];
79         $s->{mv_search_file}        = [];
80         $s->{mv_searchspec}         = [];
81         $s->{mv_sort_option}        = [];
82         $s->{mv_substring_match}    = [];
83         $s->{mv_field_file}         = $::Variable->{MV_DEFAULT_SEARCH_FILE}[0];
84         $s->{glimpse_cmd} = $Vend::Cfg->{Glimpse} || 'glimpse';
85
86         for(keys %$options) {
87                 $s->{$_} = $options->{$_};
88         }
89
90         return;
91 }
92
93 sub new {
94         my ($class, %options) = @_;
95         my $s = new Vend::Search;
96         bless $s, $class;
97         $s->init(\%options);
98         return $s;
99 }
100
101 sub search {
102
103         my($s,%options) = @_;
104
105         my(@out);
106         my($limit_sub,$return_sub,$delayed_return);
107         my($dict_limit,$f,$key,$val);
108         my($searchfile, @searchfiles);
109         my(@specs);
110         my(@pats);
111
112         while (($key,$val) = each %options) {
113                 $s->{$key} = $val;
114         }
115
116         @searchfiles = @{$s->{mv_search_file}};
117
118         for(@searchfiles) {
119                 $_ = Vend::Util::catfile($s->{mv_base_directory}, $_)
120                         unless Vend::Util::file_name_is_absolute($_);
121         }
122
123         unless ($s->{mv_no_hide} or $s->{mv_hide_field}) {
124                 my $dbref = $s->{table} || undef;
125                 if (! $dbref) {
126                         my $table = $s->{mv_field_file};
127                         $table =~ s:.*/::;
128                         $table =~ s/\..*//;
129                         $dbref = Vend::Data::database_exists_ref($table);
130                 }
131                 if ($dbref) {
132                         my $hf = $dbref->config('HIDE_FIELD');
133                         $s->{mv_hide_field} = $hf if defined $hf;
134 #::logDebug("mv_hide_field=$hf");
135                 }
136         }
137
138 #::logDebug("gsearch: self=" . ::Vend::Util::uneval_it({%$s}));
139         $s->{mv_return_delim} = $s->{mv_index_delim}
140                 unless defined $s->{mv_return_delim};
141
142         return $s->search_error("Search with glimpse, no glimpse configured.")
143                 if ! $s->{glimpse_cmd};
144
145         @specs = @{$s->{mv_searchspec}};
146
147         @pats = $s->spec_check(@specs);
148
149         return undef if $s->{matches} == -1;
150
151         # Build glimpse line
152         my @cmd;
153         push @cmd, $s->{glimpse_cmd};
154         push @cmd, "-H $s->{mv_base_directory}"
155                         unless $s->{glimpse_cmd} =~ /\s+-H/;
156
157         if ($s->{mv_spelling_errors}) {
158                 $s->{mv_spelling_errors} = int  $s->{mv_spelling_errors};
159                 push @cmd, '-' . $s->{mv_spelling_errors};
160         }
161
162         push @cmd, "-i" unless $s->{mv_case} and $s->{mv_case}[0];
163         push @cmd, "-h" unless $s->{mv_return_file_name};
164         push @cmd, "-y -L $s->{mv_max_matches}:0:$s->{mv_max_matches}";
165         push(@cmd, "-F '$s->{mv_search_file}[0]'")
166                 if defined $s->{mv_search_file}[0];
167
168         push @cmd, '-w' unless @{$s->{mv_substring_match}};
169         push(@cmd, '-O -X') if $s->{mv_return_file_name};
170         
171         if($s->{mv_return_file_name}) { 
172                 push @cmd, "-d 'NeVAiRbE'";
173         }
174         elsif (! $s->{mv_record_delim} or $s->{mv_record_delim} eq "\n") { 
175                  #intentionally empty 
176         }
177         elsif ($s->{mv_record_delim} =~ /^\n+(.*)/) {
178                 #This doesn't handle two newlines, unfortunately
179                 push @cmd, "-d '^$1'";
180         }
181         else {
182                 $s->{mv_record_delim} =~ s/'/\\'/g; 
183                 push @cmd, "-d '$s->{mv_record_delim}'";
184         }
185
186         if($s->{regex_specs}) {
187                 @pats = @{$s->{regex_specs}};
188         }
189
190         my $joiner = $s->{mv_orsearch}[0] ? ',' : ';';
191
192         # clear errors for non-eval code paths below
193         undef $@;
194         if ($s->{mv_coordinate}) {
195                 undef $f;
196         }
197         elsif ($s->{mv_return_all}) {
198                 return $s->search_error("mv_return_all not valid for Glimpse.");
199         }
200         elsif ($s->{mv_orsearch}[0]) {
201                 # Put mv_min_string in instead of mv_substring_match to avoid
202                 # \b in search function
203                 eval {$f = $s->create_search_or(
204                                                                         $s->get_scalar(
205                                                                                         qw/mv_case mv_min_string mv_negate/
206                                                                                         ),
207                                                                                 @pats                                   )};
208         }
209         else  { 
210                 # Put mv_min_string in instead of mv_substring_match to avoid
211                 # \b in search function
212                 eval {$f = $s->create_search_and(
213                                                                         $s->get_scalar(
214                                                                                         qw/mv_case mv_min_string mv_negate/
215                                                                                         ),
216                                                                                 @pats                                   )};
217         }
218         $@  and  return $s->search_error("Function creation: $@");
219
220         local($/) = $s->{mv_record_delim} || "\n";
221
222         $s->save_specs();
223         
224         my $spec = join $joiner, @pats;
225         $spec =~ s/'/./g;
226
227         if(length($spec) < $s->{mv_min_string}) {
228                 my $msg = errmsg(
229                                         "Glimpse search string less than minimum %s characters: %s",
230                                         $s->{mv_min_string},
231                                         $spec,
232                                 );
233                 return $s->search_error($msg);
234         }
235
236         push @cmd, "'$spec'";
237
238         $joiner = $spec;
239         $joiner =~ s/['";,]//g;
240         if(length($joiner) < $s->{mv_min_string}) {
241                 my $msg = ::errmsg (<<EOF, $s->{mv_min_string}, $joiner);
242 Search strings must be at least %s characters.
243 You had '%s' as the operative characters  of your search strings.
244 EOF
245                 return $s->search_error($msg);
246         }
247
248         my $cmd = join ' ', @cmd;
249
250 #::logDebug("Glimpse command '$cmd'");
251
252         GLIMPSE: {
253
254                 open(Vend::Glimpse::SEARCH, "$cmd |")
255                         or ::logError( "Couldn't fork glimpse search '$cmd': $!"), next;
256                 #$s->adjust_delimiter(\*SEARCH) if $s->{mv_delimiter_auto};
257                 my $line;
258                 my $field_names;
259
260                 # Get field names only if no sort (will throw it off) or
261                 # not already defined
262                 if($s->{mv_field_file}) {
263                         allowed_file($s->{mv_field_file})
264                                 or return $s->search_error("can't open fields file");
265                         $s->{mv_field_file} =
266                                         ::catfile($Vend::Cfg->{ProductDir}, $s->{mv_field_file})
267                                 unless ::file_name_is_absolute($s->{mv_field_file});
268                         open(FF, "< $s->{mv_field_file}")
269                                 or return $s->search_error("can't open fields file");
270                         chomp($field_names = <FF>);
271                 }
272                 if($field_names) {
273                         $field_names =~ s/^\s+//;
274                         my @laundry = (qw/mv_search_field mv_range_look mv_return_fields/);
275                         $s->hash_fields(
276                                                 [ split /\Q$s->{mv_index_delim}/, $field_names ],
277                                                 @laundry,
278                         );
279                         undef $field_names;
280                 }
281
282 #::logDebug("search after getting fields: self=" . ::uneval({%$s}));
283                 my $prospect;
284
285                 eval {
286                         ($limit_sub, $prospect) = $s->get_limit($f, 1);
287                 };
288
289                 $@  and  return $s->search_error("Limit subroutine creation: $@");
290
291                 $f = $prospect if $prospect;
292
293                 eval {($return_sub, $delayed_return) = $s->get_return(undef, 1)};
294
295                 $@  and  return $s->search_error("Return subroutine creation: $@");
296
297                 if(! defined $f and defined $limit_sub) {
298 #::logDebug("no f, limit");
299                         while(<Vend::Glimpse::SEARCH>) {
300                                 next unless &$limit_sub($_);
301                                 (push @out, $_ and last)
302                                         if $s->{mv_return_file_name};
303                                 push @out, &$return_sub($_);
304                         }
305                 }
306                 elsif(defined $limit_sub) {
307 #::logDebug("f, limit");
308 #::logDebug("record_delim: |$s->{mv_record_delim}|, delim=|$/|");
309                         while(<Vend::Glimpse::SEARCH>) {
310 #::logDebug("in line: $_");
311                                 next unless &$f();
312 #::logDebug("match line f: $_");
313                                 next unless &$limit_sub($_);
314 #::logDebug("match line limit: $_");
315                                 (push @out, $_ and last)
316                                         if $s->{mv_return_file_name};
317                                 push @out, &$return_sub($_);
318                         }
319                 }
320                 elsif (!defined $f) {
321 #::logDebug("no f, no limit");
322                         return $s->search_error('No search definition');
323                 }
324                 else {
325 #::logDebug("f, no limit");
326                         while(<Vend::Glimpse::SEARCH>) {
327                                 next unless &$f();
328                                 (push @out, $_ and last)
329                                         if $s->{mv_return_file_name};
330                                 push @out, &$return_sub($_);
331                         }
332                 }
333 #::logDebug("gsearch before closing search: self=" . ::Vend::Util::uneval_it({%$s}));
334                 close Vend::Glimpse::SEARCH;
335                 $s->restore_specs();
336         }
337
338         $s->{matches} = scalar(@out);
339
340 #::logDebug("gsearch before delayed return: self=" . ::Vend::Util::uneval_it({%$s}));
341         if($delayed_return and $s->{matches} > 0) {
342                 $s->hash_fields($s->{mv_field_names}, qw/mv_sort_field/);
343 #::logDebug("gsearch after hash fields: self=" . ::Vend::Util::uneval_it({%$s}));
344                 $s->sort_search_return(\@out);
345                 $delayed_return = $s->get_return(1);
346                 @out = map { $delayed_return->($_) } @out;
347         }
348 #::logDebug("after delayed return: self=" . ::Vend::Util::uneval_it({%$s}));
349
350         if($s->{mv_unique}) {
351                 my %seen;
352                 @out = grep ! $seen{$_->[0]}++, @out;
353                 $s->{matches} = scalar(@out);
354         }
355
356         if ($s->{matches} > $s->{mv_matchlimit} and $s->{mv_matchlimit} > 0) {
357                 $s->save_more(\@out)
358                         or ::logError("Error saving matches: $!");
359                 if ($s->{mv_first_match}) {
360                         splice(@out,0,$s->{mv_first_match});
361                         $s->{mv_next_pointer} = $s->{mv_first_match} + $s->{mv_matchlimit};
362                         $s->{mv_next_pointer} = 0
363                                 if $s->{mv_next_pointer} > $s->{matches};
364                 }
365                 $#out = $s->{mv_matchlimit} - 1;
366         }
367
368         if(! $s->{mv_return_reference}) {
369                 $s->{mv_results} = \@out;
370                 return $s;
371         }
372         elsif($s->{mv_return_reference} eq 'LIST') {
373                 my $col = scalar @{$s->{mv_return_fields}};
374                 @out = map { join $s->{mv_return_delim}, @$_ } @out;
375                 $s->{mv_results} = join $s->{mv_record_delim}, @out;
376         }
377         elsif($s->{mv_return_reference} eq 'HASH') {
378                 my $col = scalar @{$s->{mv_return_fields}};
379                 my @col;
380                 my @names;
381                 @names = @{$s->{mv_field_names}};
382                 $names[0] eq '0' and $names[0] = 'code';
383                 my %hash;
384                 my $key;
385                 for (@out) {
386                         @col = split /$s->{mv_return_delim}/, $_, $col;
387                         $hash{$col[0]} = {};
388                         @{ $hash{$col[0]} } {@names} = @col;
389                 }
390                 $s->{mv_results} = \%hash;
391         }
392         return $s;
393 }
394
395 # Unfortunate hack need for Safe searches
396 *create_search_and  = \&Vend::Search::create_search_and;
397 *create_search_or   = \&Vend::Search::create_search_or;
398 *dump_options       = \&Vend::Search::dump_options;
399 *escape             = \&Vend::Search::escape;
400 *get_limit          = \&Vend::Search::get_limit;
401 *get_return         = \&Vend::Search::get_return;
402 *get_scalar         = \&Vend::Search::get_scalar;
403 *hash_fields        = \&Vend::Search::hash_fields;
404 *map_ops            = \&Vend::Search::map_ops;
405 *more_matches       = \&Vend::Search::more_matches;
406 *range_check        = \&Vend::Search::range_check;
407 *restore_specs      = \&Vend::Search::restore_specs;
408 *save_context       = \&Vend::Search::save_context;
409 *save_more          = \&Vend::Search::save_more;
410 *save_specs         = \&Vend::Search::save_specs;
411 *saved_params       = \&Vend::Search::saved_params;
412 *search_error       = \&Vend::Search::search_error;
413 *sort_search_return = \&Vend::Search::sort_search_return;
414 *spec_check         = \&Vend::Search::spec_check;
415 *splice_specs       = \&Vend::Search::splice_specs;
416
417 1;
418 __END__