UserDB: log timestamps to second granularity
[interchange.git] / lib / Vend / TextSearch.pm
1 # Vend::TextSearch - Search indexes with Perl
2 #
3 # $Id: TextSearch.pm,v 2.18 2008-07-11 12:07:55 racke Exp $
4 #
5 # Adapted for use with Interchange from Search::TextSearch
6 #
7 # Copyright (C) 2002-2008 Interchange Development Group
8 # Copyright (C) 1996-2002 Red Hat, Inc.
9 #
10 # This program is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation; either version 2 of the License, or
13 # (at your option) any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 # GNU General Public License for more details.
19 #
20 # You should have received a copy of the GNU General Public
21 # License along with this program; if not, write to the Free
22 # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
23 # MA  02110-1301  USA.
24
25 package Vend::TextSearch;
26 require Vend::Search;
27 require Exporter;
28
29 use vars qw(@ISA);
30 @ISA = qw(Vend::Search);
31
32 $VERSION = substr(q$Revision: 2.18 $, 10);
33
34 use Search::Dict;
35 use strict;
36 no warnings qw(uninitialized numeric);
37
38 sub array {
39         my ($s, $opt) = @_;
40         $s->{mv_list_only} = 1; # makes perform_search only return results array
41         return Vend::Scan::perform_search($opt, undef, $s);
42 }
43
44 sub hash {
45         my ($s, $opt) = @_;
46         $s->{mv_return_reference} = 'HASH';
47         $s->{mv_list_only} = 1; # makes perform_search only return results array
48         return Vend::Scan::perform_search($opt, undef, $s);
49 }
50
51 sub list {
52         my ($s, $opt) = @_;
53         $s->{mv_return_reference} = 'LIST';
54         $s->{mv_list_only} = 1; # makes perform_search only return results array
55         return Vend::Scan::perform_search($opt, undef, $s);
56 }
57
58 my %Default = (
59                 matches                 => 0,
60                 mv_head_skip            => 1,
61                 mv_index_delim          => "\t",
62                 mv_matchlimit           => 50,
63                 mv_min_string           => 1,
64         );
65
66
67 sub init {
68         my ($s, $options) = @_;
69
70         @{$s}{keys %Default} = (values %Default);
71         $s->{mv_base_directory}     = $Vend::Cfg->{ProductDir} || 'products',
72         $s->{mv_begin_string}       = [];
73         $s->{mv_all_chars}              = [1];
74         $s->{mv_case}               = [];
75         $s->{mv_column_op}          = [];
76         $s->{mv_negate}             = [];
77         $s->{mv_numeric}            = [];
78         $s->{mv_orsearch}           = [];
79         $s->{mv_searchspec}             = [];
80         $s->{mv_search_group}       = [];
81         $s->{mv_search_field}       = [];
82         $s->{mv_search_file}        = $::Variable->{MV_DEFAULT_SEARCH_FILE}
83                                                                         || ['products.asc'];
84         $s->{mv_searchspec}         = [];
85         $s->{mv_sort_option}        = [];
86         $s->{mv_substring_match}    = [];
87
88         for(keys %$options) {
89                 $s->{$_} = $options->{$_};
90         }
91
92         return;
93 }
94
95 sub new {
96         my ($class, %options) = @_;
97         my $s = new Vend::Search;
98         bless $s, $class;
99         $s->init(\%options);
100         return $s;
101 }
102
103 sub search {
104
105         my($s,%options) = @_;
106
107         my(@out);
108         my($limit_sub,$return_sub,$delayed_return);
109         my($dict_limit,$f,$key,$val);
110         my($searchfile, @searchfiles);
111         my(@specs);
112         my(@pats);
113
114         while (($key,$val) = each %options) {
115                 $s->{$key} = $val;
116         }
117
118         unless (@searchfiles = @{$s->{mv_search_file}}) {
119                 @searchfiles = @{$::Variable->{MV_DEFAULT_SEARCH_FILE}};
120         }
121 #::logDebug("searchfiles=@searchfiles");
122         for(@searchfiles) {
123                 $_ = Vend::Util::catfile($s->{mv_base_directory}, $_)
124                         unless Vend::Util::file_name_is_absolute($_);
125         }
126
127
128         # Auto-index search
129         if(     $s->{mv_dict_look}
130                 and defined $s->{mv_dict_limit}
131                 and $s->{mv_dict_limit} =~ /[^-0-9]/    )
132         {
133                 my $f = $s->{mv_dict_limit};
134                 $s->{mv_dict_limit} = -1;
135                 for (@searchfiles) {
136                         next unless -f "$_.$f"; 
137                         $_ .= ".$f";
138                         $s->{mv_return_fields} = [1];
139                 }
140         }
141 #::logDebug("search: self=" . ::Vend::Util::uneval_it({%$s}));
142         $s->{mv_return_delim} = $s->{mv_index_delim}
143                 unless defined $s->{mv_return_delim};
144
145         @specs = @{$s->{mv_searchspec}};
146
147         @pats = $s->spec_check(@specs);
148
149         if ($s->{mv_coordinate}) {
150                 undef $f;
151         }
152         elsif ($s->{mv_return_all}) {
153                 $f = sub {1};
154         }
155         elsif ($s->{mv_orsearch}[0]) {
156                 eval {$f = $s->create_search_or(
157                                                                         $s->get_scalar(
158                                                                                         qw/mv_case mv_substring_match mv_negate/
159                                                                                         ),
160                                                                                 @pats                                   )};
161         }
162         else  { 
163                 eval {$f = $s->create_search_and(
164                                                                         $s->get_scalar(
165                                                                                         qw/mv_case mv_substring_match mv_negate/
166                                                                                         ),
167                                                                                 @pats                                   )};
168         }
169
170         $@  and  return $s->search_error("Function creation: $@");
171         
172         local($/) = $s->{mv_record_delim} || "\n";
173
174         $s->save_specs();
175
176 #::logDebug("search before open @searchfiles: self=" . ::Vend::Util::uneval_it({%$s}));
177
178 #::logDebug("searchfiles=@searchfiles");
179         while ( $searchfile = shift @searchfiles ) {
180
181                 my $field_names;
182                 -f $searchfile && open(SEARCH, "< $searchfile")
183                         or ::logError( "Couldn't open search file '$searchfile': $!"), next;
184                 $s->adjust_delimiter(\*SEARCH) if $s->{mv_delimiter_auto};
185                 my $line;
186
187                 # Get field names only if no sort (will throw it off) or
188                 # not already defined
189                 if($s->{mv_head_skip} == 1) {
190                         chomp($field_names = <SEARCH>);
191                 }
192                 elsif($s->{mv_head_skip} > 1) {
193                         while(<SEARCH>) {
194                                 chomp($field_names = $_);
195                                 last if $. >= $s->{mv_head_skip};
196                         }
197                 }
198                 if($field_names) {
199                         $field_names =~ s/^\s+//;
200                         $field_names =~ s/\s+$//;
201                         my @laundry = (qw/mv_search_field mv_range_look mv_return_fields/);
202                         $s->hash_fields(
203                                                 [ split /\Q$s->{mv_index_delim}/, $field_names ],
204                                                 @laundry,
205                         );
206                         undef $field_names;
207                 }
208
209                 my $prospect;
210
211                 eval {
212                         # 1 refers to fact you have to make ref from line
213                         ($limit_sub, $prospect) = $s->get_limit($f, 1);
214                 };
215
216                 $@  and  return $s->search_error("Limit subroutine creation: $@");
217
218                 $f = $prospect if $prospect;
219
220                 eval {($return_sub, $delayed_return) = $s->get_return( undef, 1 )};
221
222                 $@  and  return $s->search_error("Return subroutine creation: $@");
223
224                 if($s->{mv_dict_end}) {
225                         if(!$s->{mv_dict_order} && !$s->{mv_dict_fold}) {
226                                 $dict_limit = sub {
227                                                 $_[0] gt $s->{mv_dict_end};
228                                 };
229                         }
230                         elsif(!$s->{mv_dict_order}) {
231                                 $dict_limit = sub {
232                                                 "\L$_[0]" gt "\L$s->{mv_dict_end}";
233                                 };
234                         }
235                         elsif(!$s->{mv_dict_fold}) {
236                                 $dict_limit = sub {
237                                                 my($line) = @_;
238                                                 my($end) = $s->{mv_dict_end};
239                                                 $line =~ tr/A-Za-z0-9_ //cd;
240                                                 $end =~ tr/A-Za-z0-9_ //cd;
241                                                 $line gt $end;
242                                 };
243                         }
244                         else {
245                                 $dict_limit = sub {
246                                                 my($line) = lc @_;
247                                                 my($end) = lc $s->{mv_dict_end};
248                                                 $line =~ tr/a-z0-9_ //cd;
249                                                 $end =~ tr/a-z0-9_ //cd;
250                                                 $line gt $end;
251                                 };
252                         }
253                 }
254
255                 if($s->{mv_dict_look}) {
256                         look(\*SEARCH,
257                                 $s->{mv_dict_look},
258                                 $s->{mv_dict_order},
259                                 $s->{mv_dict_fold});
260                 }
261
262                 if($s->{mv_dict_end} && defined $limit_sub) {
263                         while(<SEARCH>) {
264                                 last if $dict_limit->($_);
265                                 next unless $limit_sub->($_);
266                                 chomp;
267                                 (push @out, $searchfile and last)
268                                         if $s->{mv_return_file_name};
269                                 push @out, $return_sub->($_);
270                         }
271                 }
272                 elsif($s->{mv_dict_end}) {
273                         while(<SEARCH>) {
274                                 last if $dict_limit->($_);
275                                 next unless &$f();
276                                 chomp;
277                                 (push @out, $searchfile and last)
278                                         if $s->{mv_return_file_name};
279                                 push @out, $return_sub->($_);
280                         }
281                 }
282                 elsif($limit_sub) {
283 #::logDebug("limit_sub");
284                         while(<SEARCH>) {
285                                 next unless $limit_sub->($_);
286                                 chomp;
287                                 (push @out, $searchfile and last)
288                                         if $s->{mv_return_file_name};
289                                 push @out, $return_sub->($_);
290                         }
291                 }
292                 elsif (! $f) {
293                         return $s->search_error('No search definition');
294                 }
295                 else {
296 #::logDebug("no limit_sub");
297                         while(<SEARCH>) {
298                                 next unless &$f();
299                                 chomp;
300                                 (push @out, $searchfile and last)
301                                         if $s->{mv_return_file_name};
302                                 push @out, $return_sub->($_);
303                         }
304                 }
305                 close SEARCH;
306                 $s->restore_specs();
307         }
308
309 #::logDebug("before delayed return: self=" . ::Vend::Util::uneval_it({%$s}));
310 #::logDebug("before delayed return: out=" . ::Vend::Util::uneval_it(\@out));
311
312         # Search the results and return
313         if($s->{mv_next_search}) {
314                 @out = $s->search_reference(\@out);
315 #::logDebug("did next_search: " . ::uneval(\@out));
316         }
317
318         $s->{matches} = scalar(@out);
319
320         if($delayed_return and $s->{matches} > 0) {
321                 $s->hash_fields($s->{mv_field_names}, qw/mv_sort_field/);
322 #::logDebug("after hash fields: self=" . ::Vend::Util::uneval_it({%$s}));
323                 $s->sort_search_return(\@out);
324                 $delayed_return = $s->get_return(1);
325                 @out = map { $delayed_return->($_) } @out;
326         }
327 #::logDebug("after delayed return: self=" . ::Vend::Util::uneval_it({%$s}));
328 #::logDebug("after delayed return: out=" . ::Vend::Util::uneval_it(\@out));
329
330         if($s->{mv_unique}) {
331                 my %seen;
332                 @out = grep ! $seen{$_->[0]}++, @out;
333         }
334
335         if($s->{mv_max_matches} > 0) {
336                 splice @out, $s->{mv_max_matches};
337         }
338
339         $s->{matches} = scalar(@out);
340
341         if ($s->{matches} > $s->{mv_matchlimit} and $s->{mv_matchlimit} > 0) {
342                 $s->save_more(\@out)
343                         or ::logError("Error saving matches: $!");
344                 if ($s->{mv_first_match}) {
345                         splice(@out,0,$s->{mv_first_match});
346                         $s->{mv_next_pointer} = $s->{mv_first_match} + $s->{mv_matchlimit};
347                         $s->{mv_next_pointer} = 0
348                                 if $s->{mv_next_pointer} > $s->{matches};
349                 }
350                 elsif ($s->{mv_start_match}) {
351                         my $comp = $s->{mv_start_match};
352                         my $i = -1;
353                         my $found;
354                         for(@out) {
355                                 $i++;
356                                 next unless $_->[0] eq $comp;
357                                 $found = $i;
358                                 last;
359                         }
360                         if(! $found and $s->{mv_numeric}[0]) {
361                                 for(@out) {
362                                         $i++;
363                                         next unless $_->[0] >= $comp;
364                                         $found = $i;
365                                         last;
366                                 }
367                         }
368                         elsif (! $found) {
369                                 for(@out) {
370                                         $i++;
371                                         next unless $_->[0] ge $comp;
372                                         $found = $i;
373                                         last;
374                                 }
375                         }
376                         if($found) {
377                                 splice(@out,0,$found);
378                                 $s->{mv_first_match} = $found;
379                                 $s->{mv_next_pointer} = $found + $s->{mv_matchlimit};
380                                 $s->{mv_next_pointer} = 0
381                                         if $s->{mv_next_pointer} > $s->{matches};
382                         }
383                 }
384                 $#out = $s->{mv_matchlimit} - 1;
385         }
386
387         if(! $s->{mv_return_reference}) {
388                 $s->{mv_results} = \@out;
389                 return $s;
390         }
391         elsif($s->{mv_return_reference} eq 'LIST') {
392                 my $col = scalar @{$s->{mv_return_fields}};
393                 @out = map { join $s->{mv_return_delim}, @$_ } @out;
394                 $s->{mv_results} = join $s->{mv_record_delim}, @out;
395         }
396         elsif($s->{mv_return_reference} eq 'HASH') {
397                 my @names;
398                 @names = @{ $s->{mv_field_names} }[ @{$s->{mv_return_fields}} ];
399                 $names[0] eq '0' and $names[0] = 'code';
400                 my @ary;
401                 for (@out) {
402                         my $h = {};
403                         @{ $h } {@names} = @$_;
404                         push @ary, $h;
405                 }
406                 $s->{mv_results} = \@ary;
407         }
408         return $s;
409 }
410
411 # Unfortunate hack need for Safe searches
412 *create_search_and  = \&Vend::Search::create_search_and;
413 *create_search_or   = \&Vend::Search::create_search_or;
414 *dump_options       = \&Vend::Search::dump_options;
415 *escape             = \&Vend::Search::escape;
416 *get_limit          = \&Vend::Search::get_limit;
417 *get_return         = \&Vend::Search::get_return;
418 *get_scalar         = \&Vend::Search::get_scalar;
419 *hash_fields        = \&Vend::Search::hash_fields;
420 *map_ops            = \&Vend::Search::map_ops;
421 *more_matches       = \&Vend::Search::more_matches;
422 *range_check        = \&Vend::Search::range_check;
423 *restore_specs      = \&Vend::Search::restore_specs;
424 *save_context       = \&Vend::Search::save_context;
425 *save_more          = \&Vend::Search::save_more;
426 *save_specs         = \&Vend::Search::save_specs;
427 *saved_params       = \&Vend::Search::saved_params;
428 *search_error       = \&Vend::Search::search_error;
429 *sort_search_return = \&Vend::Search::sort_search_return;
430 *spec_check         = \&Vend::Search::spec_check;
431 *splice_specs       = \&Vend::Search::splice_specs;
432
433 1;
434 __END__