Also look in the next-highest directory when detecting VCS; add SVN
[interchange.git] / lib / Vend / TextSearch.pm
1 # Vend::TextSearch - Search indexes with Perl
2 #
3 # Adapted for use with Interchange from Search::TextSearch
4 #
5 # Copyright (C) 2002-2017 Interchange Development Group
6 # Copyright (C) 1996-2002 Red Hat, Inc.
7 #
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public
19 # License along with this program; if not, write to the Free
20 # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
21 # MA  02110-1301  USA.
22
23 package Vend::TextSearch;
24 require Vend::Search;
25 require Exporter;
26
27 use vars qw(@ISA);
28 @ISA = qw(Vend::Search);
29
30 $VERSION = '2.19';
31
32 use Search::Dict;
33 use strict;
34 no warnings qw(uninitialized numeric);
35
36 sub array {
37         my ($s, $opt) = @_;
38         $s->{mv_list_only} = 1; # makes perform_search only return results array
39         return Vend::Scan::perform_search($opt, undef, $s);
40 }
41
42 sub hash {
43         my ($s, $opt) = @_;
44         $s->{mv_return_reference} = 'HASH';
45         $s->{mv_list_only} = 1; # makes perform_search only return results array
46         return Vend::Scan::perform_search($opt, undef, $s);
47 }
48
49 sub list {
50         my ($s, $opt) = @_;
51         $s->{mv_return_reference} = 'LIST';
52         $s->{mv_list_only} = 1; # makes perform_search only return results array
53         return Vend::Scan::perform_search($opt, undef, $s);
54 }
55
56 my %Default = (
57                 matches                 => 0,
58                 mv_head_skip            => 1,
59                 mv_index_delim          => "\t",
60                 mv_matchlimit           => 50,
61                 mv_min_string           => 1,
62         );
63
64
65 sub init {
66         my ($s, $options) = @_;
67
68         @{$s}{keys %Default} = (values %Default);
69         $s->{mv_base_directory}     = $Vend::Cfg->{ProductDir} || 'products',
70         $s->{mv_begin_string}       = [];
71         $s->{mv_all_chars}              = [1];
72         $s->{mv_case}               = [];
73         $s->{mv_column_op}          = [];
74         $s->{mv_negate}             = [];
75         $s->{mv_numeric}            = [];
76         $s->{mv_orsearch}           = [];
77         $s->{mv_searchspec}             = [];
78         $s->{mv_search_group}       = [];
79         $s->{mv_search_field}       = [];
80         $s->{mv_search_file}        = $::Variable->{MV_DEFAULT_SEARCH_FILE}
81                                                                         || ['products.asc'];
82         $s->{mv_searchspec}         = [];
83         $s->{mv_sort_option}        = [];
84         $s->{mv_substring_match}    = [];
85
86         for(keys %$options) {
87                 $s->{$_} = $options->{$_};
88         }
89
90         return;
91 }
92
93 sub new {
94         my ($class, %options) = @_;
95         my $s = new Vend::Search;
96         bless $s, $class;
97         $s->init(\%options);
98         return $s;
99 }
100
101 sub search {
102
103         my($s,%options) = @_;
104
105         my(@out);
106         my($limit_sub,$return_sub,$delayed_return);
107         my($dict_limit,$f,$key,$val);
108         my($searchfile, @searchfiles);
109         my(@specs);
110         my(@pats);
111
112         while (($key,$val) = each %options) {
113                 $s->{$key} = $val;
114         }
115
116         unless (@searchfiles = @{$s->{mv_search_file}}) {
117                 @searchfiles = @{$::Variable->{MV_DEFAULT_SEARCH_FILE}};
118         }
119 #::logDebug("searchfiles=@searchfiles");
120         for(@searchfiles) {
121                 $_ = Vend::Util::catfile($s->{mv_base_directory}, $_)
122                         unless Vend::Util::file_name_is_absolute($_);
123         }
124
125
126         # Auto-index search
127         if(     $s->{mv_dict_look}
128                 and defined $s->{mv_dict_limit}
129                 and $s->{mv_dict_limit} =~ /[^-0-9]/    )
130         {
131                 my $f = $s->{mv_dict_limit};
132                 $s->{mv_dict_limit} = -1;
133                 for (@searchfiles) {
134                         next unless -f "$_.$f"; 
135                         $_ .= ".$f";
136                         $s->{mv_return_fields} = [1];
137                 }
138         }
139 #::logDebug("search: self=" . ::Vend::Util::uneval_it({%$s}));
140         $s->{mv_return_delim} = $s->{mv_index_delim}
141                 unless defined $s->{mv_return_delim};
142
143         @specs = @{$s->{mv_searchspec}};
144
145         @pats = $s->spec_check(@specs);
146
147         # clear errors for non-eval code paths
148         undef $@;
149         if ($s->{mv_coordinate}) {
150                 undef $f;
151         }
152         elsif ($s->{mv_return_all}) {
153                 $f = sub {1};
154         }
155         elsif ($s->{mv_orsearch}[0]) {
156                 eval {$f = $s->create_search_or(
157                                                                         $s->get_scalar(
158                                                                                         qw/mv_case mv_substring_match mv_negate/
159                                                                                         ),
160                                                                                 @pats                                   )};
161         }
162         else  { 
163                 eval {$f = $s->create_search_and(
164                                                                         $s->get_scalar(
165                                                                                         qw/mv_case mv_substring_match mv_negate/
166                                                                                         ),
167                                                                                 @pats                                   )};
168         }
169         $@  and  return $s->search_error("Function creation: $@");
170         
171         local($/) = $s->{mv_record_delim} || "\n";
172
173         $s->save_specs();
174
175 #::logDebug("search before open @searchfiles: self=" . ::Vend::Util::uneval_it({%$s}));
176
177 #::logDebug("searchfiles=@searchfiles");
178         while ( $searchfile = shift @searchfiles ) {
179
180                 my $field_names;
181                 -f $searchfile && open(SEARCH, "< $searchfile")
182                         or ::logError( "Couldn't open search file '$searchfile': $!"), next;
183                 $s->adjust_delimiter(\*SEARCH) if $s->{mv_delimiter_auto};
184                 my $line;
185
186                 # Get field names only if no sort (will throw it off) or
187                 # not already defined
188                 if($s->{mv_head_skip} == 1) {
189                         chomp($field_names = <SEARCH>);
190                 }
191                 elsif($s->{mv_head_skip} > 1) {
192                         while(<SEARCH>) {
193                                 chomp($field_names = $_);
194                                 last if $. >= $s->{mv_head_skip};
195                         }
196                 }
197                 if($field_names) {
198                         $field_names =~ s/^\s+//;
199                         $field_names =~ s/\s+$//;
200                         my @laundry = (qw/mv_search_field mv_range_look mv_return_fields/);
201                         $s->hash_fields(
202                                                 [ split /\Q$s->{mv_index_delim}/, $field_names ],
203                                                 @laundry,
204                         );
205                         undef $field_names;
206                 }
207
208                 my $prospect;
209
210                 eval {
211                         # 1 refers to fact you have to make ref from line
212                         ($limit_sub, $prospect) = $s->get_limit($f, 1);
213                 };
214
215                 $@  and  return $s->search_error("Limit subroutine creation: $@");
216
217                 $f = $prospect if $prospect;
218
219                 eval {($return_sub, $delayed_return) = $s->get_return( undef, 1 )};
220
221                 $@  and  return $s->search_error("Return subroutine creation: $@");
222
223                 if($s->{mv_dict_end}) {
224                         if(!$s->{mv_dict_order} && !$s->{mv_dict_fold}) {
225                                 $dict_limit = sub {
226                                                 $_[0] gt $s->{mv_dict_end};
227                                 };
228                         }
229                         elsif(!$s->{mv_dict_order}) {
230                                 $dict_limit = sub {
231                                                 "\L$_[0]" gt "\L$s->{mv_dict_end}";
232                                 };
233                         }
234                         elsif(!$s->{mv_dict_fold}) {
235                                 $dict_limit = sub {
236                                                 my($line) = @_;
237                                                 my($end) = $s->{mv_dict_end};
238                                                 $line =~ tr/A-Za-z0-9_ //cd;
239                                                 $end =~ tr/A-Za-z0-9_ //cd;
240                                                 $line gt $end;
241                                 };
242                         }
243                         else {
244                                 $dict_limit = sub {
245                                                 my($line) = lc @_;
246                                                 my($end) = lc $s->{mv_dict_end};
247                                                 $line =~ tr/a-z0-9_ //cd;
248                                                 $end =~ tr/a-z0-9_ //cd;
249                                                 $line gt $end;
250                                 };
251                         }
252                 }
253
254                 if($s->{mv_dict_look}) {
255                         look(\*SEARCH,
256                                 $s->{mv_dict_look},
257                                 $s->{mv_dict_order},
258                                 $s->{mv_dict_fold});
259                 }
260
261                 if($s->{mv_dict_end} && defined $limit_sub) {
262                         while(<SEARCH>) {
263                                 last if $dict_limit->($_);
264                                 next unless $limit_sub->($_);
265                                 chomp;
266                                 (push @out, $searchfile and last)
267                                         if $s->{mv_return_file_name};
268                                 push @out, $return_sub->($_);
269                         }
270                 }
271                 elsif($s->{mv_dict_end}) {
272                         while(<SEARCH>) {
273                                 last if $dict_limit->($_);
274                                 next unless &$f();
275                                 chomp;
276                                 (push @out, $searchfile and last)
277                                         if $s->{mv_return_file_name};
278                                 push @out, $return_sub->($_);
279                         }
280                 }
281                 elsif($limit_sub) {
282 #::logDebug("limit_sub");
283                         while(<SEARCH>) {
284                                 next unless $limit_sub->($_);
285                                 chomp;
286                                 (push @out, $searchfile and last)
287                                         if $s->{mv_return_file_name};
288                                 push @out, $return_sub->($_);
289                         }
290                 }
291                 elsif (! $f) {
292                         return $s->search_error('No search definition');
293                 }
294                 else {
295 #::logDebug("no limit_sub");
296                         while(<SEARCH>) {
297                                 next unless &$f();
298                                 chomp;
299                                 (push @out, $searchfile and last)
300                                         if $s->{mv_return_file_name};
301                                 push @out, $return_sub->($_);
302                         }
303                 }
304                 close SEARCH;
305                 $s->restore_specs();
306         }
307
308 #::logDebug("before delayed return: self=" . ::Vend::Util::uneval_it({%$s}));
309 #::logDebug("before delayed return: out=" . ::Vend::Util::uneval_it(\@out));
310
311         # Search the results and return
312         if($s->{mv_next_search}) {
313                 @out = $s->search_reference(\@out);
314 #::logDebug("did next_search: " . ::uneval(\@out));
315         }
316
317         $s->{matches} = scalar(@out);
318
319         if($delayed_return and $s->{matches} > 0) {
320                 $s->hash_fields($s->{mv_field_names}, qw/mv_sort_field/);
321 #::logDebug("after hash fields: self=" . ::Vend::Util::uneval_it({%$s}));
322                 $s->sort_search_return(\@out);
323                 $delayed_return = $s->get_return(1);
324                 @out = map { $delayed_return->($_) } @out;
325         }
326 #::logDebug("after delayed return: self=" . ::Vend::Util::uneval_it({%$s}));
327 #::logDebug("after delayed return: out=" . ::Vend::Util::uneval_it(\@out));
328
329         if($s->{mv_unique}) {
330                 my %seen;
331                 @out = grep ! $seen{$_->[0]}++, @out;
332         }
333
334         if($s->{mv_max_matches} > 0) {
335                 splice @out, $s->{mv_max_matches};
336         }
337
338         $s->{matches} = scalar(@out);
339
340         if ($s->{matches} > $s->{mv_matchlimit} and $s->{mv_matchlimit} > 0) {
341                 $s->save_more(\@out)
342                         or ::logError("Error saving matches: $!");
343                 if ($s->{mv_first_match}) {
344                         splice(@out,0,$s->{mv_first_match});
345                         $s->{mv_next_pointer} = $s->{mv_first_match} + $s->{mv_matchlimit};
346                         $s->{mv_next_pointer} = 0
347                                 if $s->{mv_next_pointer} > $s->{matches};
348                 }
349                 elsif ($s->{mv_start_match}) {
350                         my $comp = $s->{mv_start_match};
351                         my $i = -1;
352                         my $found;
353                         for(@out) {
354                                 $i++;
355                                 next unless $_->[0] eq $comp;
356                                 $found = $i;
357                                 last;
358                         }
359                         if(! $found and $s->{mv_numeric}[0]) {
360                                 for(@out) {
361                                         $i++;
362                                         next unless $_->[0] >= $comp;
363                                         $found = $i;
364                                         last;
365                                 }
366                         }
367                         elsif (! $found) {
368                                 for(@out) {
369                                         $i++;
370                                         next unless $_->[0] ge $comp;
371                                         $found = $i;
372                                         last;
373                                 }
374                         }
375                         if($found) {
376                                 splice(@out,0,$found);
377                                 $s->{mv_first_match} = $found;
378                                 $s->{mv_next_pointer} = $found + $s->{mv_matchlimit};
379                                 $s->{mv_next_pointer} = 0
380                                         if $s->{mv_next_pointer} > $s->{matches};
381                         }
382                 }
383                 $#out = $s->{mv_matchlimit} - 1;
384         }
385
386         if(! $s->{mv_return_reference}) {
387                 $s->{mv_results} = \@out;
388                 return $s;
389         }
390         elsif($s->{mv_return_reference} eq 'LIST') {
391                 my $col = scalar @{$s->{mv_return_fields}};
392                 @out = map { join $s->{mv_return_delim}, @$_ } @out;
393                 $s->{mv_results} = join $s->{mv_record_delim}, @out;
394         }
395         elsif($s->{mv_return_reference} eq 'HASH') {
396                 my @names;
397                 @names = @{ $s->{mv_field_names} }[ @{$s->{mv_return_fields}} ];
398                 $names[0] eq '0' and $names[0] = 'code';
399                 my @ary;
400                 for (@out) {
401                         my $h = {};
402                         @{ $h } {@names} = @$_;
403                         push @ary, $h;
404                 }
405                 $s->{mv_results} = \@ary;
406         }
407         return $s;
408 }
409
410 # Unfortunate hack need for Safe searches
411 *create_search_and  = \&Vend::Search::create_search_and;
412 *create_search_or   = \&Vend::Search::create_search_or;
413 *dump_options       = \&Vend::Search::dump_options;
414 *escape             = \&Vend::Search::escape;
415 *get_limit          = \&Vend::Search::get_limit;
416 *get_return         = \&Vend::Search::get_return;
417 *get_scalar         = \&Vend::Search::get_scalar;
418 *hash_fields        = \&Vend::Search::hash_fields;
419 *map_ops            = \&Vend::Search::map_ops;
420 *more_matches       = \&Vend::Search::more_matches;
421 *range_check        = \&Vend::Search::range_check;
422 *restore_specs      = \&Vend::Search::restore_specs;
423 *save_context       = \&Vend::Search::save_context;
424 *save_more          = \&Vend::Search::save_more;
425 *save_specs         = \&Vend::Search::save_specs;
426 *saved_params       = \&Vend::Search::saved_params;
427 *search_error       = \&Vend::Search::search_error;
428 *sort_search_return = \&Vend::Search::sort_search_return;
429 *spec_check         = \&Vend::Search::spec_check;
430 *splice_specs       = \&Vend::Search::splice_specs;
431
432 1;
433 __END__