Revert "Embed Safe 2.07 into Vend::Safe to avoid various problems with recent version...
[interchange.git] / lib / Vend / Glimpse.pm
1 # Vend::Glimpse - Search indexes with Glimpse
2 #
3 # $Id: Glimpse.pm,v 2.16 2007-08-09 13:40:53 pajamian Exp $
4 #
5 # Adapted for use with Interchange from Search::Glimpse
6 #
7 # Copyright (C) 2002-2007 Interchange Development Group
8 # Copyright (C) 1996-2002 Red Hat, Inc.
9 #
10 # This program is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation; either version 2 of the License, or
13 # (at your option) any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 # GNU General Public License for more details.
19 #
20 # You should have received a copy of the GNU General Public
21 # License along with this program; if not, write to the Free
22 # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
23 # MA  02110-1301  USA.
24
25 package Vend::Glimpse;
26 require Vend::Search;
27 @ISA = qw(Vend::Search);
28
29 $VERSION = substr(q$Revision: 2.16 $, 10);
30 use strict;
31 use Vend::File;
32 use Vend::Util;
33
34
35 sub array {
36         my ($s, $opt) = @_;
37         $s->{mv_list_only} = 1; # makes perform_search only return results array
38         return Vend::Scan::perform_search($opt, undef, $s);
39 }
40
41 sub hash {
42         my ($s, $opt) = @_;
43         $s->{mv_return_reference} = 'HASH';
44         $s->{mv_list_only} = 1; # makes perform_search only return results array
45         return Vend::Scan::perform_search($opt, undef, $s);
46 }
47
48 sub list {
49         my ($s, $opt) = @_;
50         $s->{mv_return_reference} = 'LIST';
51         $s->{mv_list_only} = 1; # makes perform_search only return results array
52         return Vend::Scan::perform_search($opt, undef, $s);
53 }
54
55 my %Default = (
56                 matches                 => 0,
57                 mv_head_skip            => 0,
58                 mv_index_delim          => "\t",
59                 mv_record_delim         => "\n",
60                 mv_matchlimit           => 50,
61                 mv_max_matches          => 2000,
62                 mv_min_string           => 4,
63 );
64
65
66 sub init {
67         my ($s, $options) = @_;
68
69         @{$s}{keys %Default} = (values %Default);
70         $s->{mv_base_directory}     = $Vend::Cfg->{ProductDir} || 'products',
71         $s->{mv_begin_string}       = [];
72         $s->{mv_all_chars}              = [1];
73         $s->{mv_case}               = [];
74         $s->{mv_column_op}          = [];
75         $s->{mv_negate}             = [];
76         $s->{mv_numeric}            = [];
77         $s->{mv_orsearch}           = [];
78         $s->{mv_searchspec}             = [];
79         $s->{mv_search_group}       = [];
80         $s->{mv_search_field}       = [];
81         $s->{mv_search_file}        = [];
82         $s->{mv_searchspec}         = [];
83         $s->{mv_sort_option}        = [];
84         $s->{mv_substring_match}    = [];
85         $s->{mv_field_file}         = $::Variable->{MV_DEFAULT_SEARCH_FILE}[0];
86         $s->{glimpse_cmd} = $Vend::Cfg->{Glimpse} || 'glimpse';
87
88         for(keys %$options) {
89                 $s->{$_} = $options->{$_};
90         }
91
92         return;
93 }
94
95 sub new {
96         my ($class, %options) = @_;
97         my $s = new Vend::Search;
98         bless $s, $class;
99         $s->init(\%options);
100         return $s;
101 }
102
103 sub search {
104
105         my($s,%options) = @_;
106
107         my(@out);
108         my($limit_sub,$return_sub,$delayed_return);
109         my($dict_limit,$f,$key,$val);
110         my($searchfile, @searchfiles);
111         my(@specs);
112         my(@pats);
113
114         while (($key,$val) = each %options) {
115                 $s->{$key} = $val;
116         }
117
118         @searchfiles = @{$s->{mv_search_file}};
119
120         for(@searchfiles) {
121                 $_ = Vend::Util::catfile($s->{mv_base_directory}, $_)
122                         unless Vend::Util::file_name_is_absolute($_);
123         }
124
125         unless ($s->{mv_no_hide} or $s->{mv_hide_field}) {
126                 my $dbref = $s->{table} || undef;
127                 if (! $dbref) {
128                         my $table = $s->{mv_field_file};
129                         $table =~ s:.*/::;
130                         $table =~ s/\..*//;
131                         $dbref = Vend::Data::database_exists_ref($table);
132                 }
133                 if ($dbref) {
134                         my $hf = $dbref->config('HIDE_FIELD');
135                         $s->{mv_hide_field} = $hf if defined $hf;
136 #::logDebug("mv_hide_field=$hf");
137                 }
138         }
139
140 #::logDebug("gsearch: self=" . ::Vend::Util::uneval_it({%$s}));
141         $s->{mv_return_delim} = $s->{mv_index_delim}
142                 unless defined $s->{mv_return_delim};
143
144         return $s->search_error("Search with glimpse, no glimpse configured.")
145                 if ! $s->{glimpse_cmd};
146
147         @specs = @{$s->{mv_searchspec}};
148
149         @pats = $s->spec_check(@specs);
150
151         return undef if $s->{matches} == -1;
152
153         # Build glimpse line
154         my @cmd;
155         push @cmd, $s->{glimpse_cmd};
156         push @cmd, "-H $s->{mv_base_directory}"
157                         unless $s->{glimpse_cmd} =~ /\s+-H/;
158
159         if ($s->{mv_spelling_errors}) {
160                 $s->{mv_spelling_errors} = int  $s->{mv_spelling_errors};
161                 push @cmd, '-' . $s->{mv_spelling_errors};
162         }
163
164         push @cmd, "-i" unless $s->{mv_case} and $s->{mv_case}[0];
165         push @cmd, "-h" unless $s->{mv_return_file_name};
166         push @cmd, "-y -L $s->{mv_max_matches}:0:$s->{mv_max_matches}";
167         push(@cmd, "-F '$s->{mv_search_file}[0]'")
168                 if defined $s->{mv_search_file}[0];
169
170         push @cmd, '-w' unless @{$s->{mv_substring_match}};
171         push(@cmd, '-O -X') if $s->{mv_return_file_name};
172         
173         if($s->{mv_return_file_name}) { 
174                 push @cmd, "-d 'NeVAiRbE'";
175         }
176         elsif (! $s->{mv_record_delim} or $s->{mv_record_delim} eq "\n") { 
177                  #intentionally empty 
178         }
179         elsif ($s->{mv_record_delim} =~ /^\n+(.*)/) {
180                 #This doesn't handle two newlines, unfortunately
181                 push @cmd, "-d '^$1'";
182         }
183         else {
184                 $s->{mv_record_delim} =~ s/'/\\'/g; 
185                 push @cmd, "-d '$s->{mv_record_delim}'";
186         }
187
188         if($s->{regex_specs}) {
189                 @pats = @{$s->{regex_specs}};
190         }
191
192         my $joiner = $s->{mv_orsearch}[0] ? ',' : ';';
193
194         if ($s->{mv_coordinate}) {
195                 undef $f;
196         }
197         elsif ($s->{mv_return_all}) {
198                 return $s->search_error("mv_return_all not valid for Glimpse.");
199         }
200         elsif ($s->{mv_orsearch}[0]) {
201                 # Put mv_min_string in instead of mv_substring_match to avoid
202                 # \b in search function
203                 eval {$f = $s->create_search_or(
204                                                                         $s->get_scalar(
205                                                                                         qw/mv_case mv_min_string mv_negate/
206                                                                                         ),
207                                                                                 @pats                                   )};
208         }
209         else  { 
210                 # Put mv_min_string in instead of mv_substring_match to avoid
211                 # \b in search function
212                 eval {$f = $s->create_search_and(
213                                                                         $s->get_scalar(
214                                                                                         qw/mv_case mv_min_string mv_negate/
215                                                                                         ),
216                                                                                 @pats                                   )};
217         }
218
219         $@  and  return $s->search_error("Function creation: $@");
220         local($/) = $s->{mv_record_delim} || "\n";
221
222         $s->save_specs();
223         
224         my $spec = join $joiner, @pats;
225         $spec =~ s/'/./g;
226
227         if(length($spec) < $s->{mv_min_string}) {
228                 my $msg = errmsg(
229                                         "Glimpse search string less than minimum %s characters: %s",
230                                         $s->{mv_min_string},
231                                         $spec,
232                                 );
233                 return $s->search_error($msg);
234         }
235
236         push @cmd, "'$spec'";
237
238         $joiner = $spec;
239         $joiner =~ s/['";,]//g;
240         if(length($joiner) < $s->{mv_min_string}) {
241                 my $msg = ::errmsg (<<EOF, $s->{mv_min_string}, $joiner);
242 Search strings must be at least %s characters.
243 You had '%s' as the operative characters  of your search strings.
244 EOF
245                 return $s->search_error($msg);
246         }
247
248         my $cmd = join ' ', @cmd;
249
250 #::logDebug("Glimpse command '$cmd'");
251
252         GLIMPSE: {
253
254                 open(Vend::Glimpse::SEARCH, "$cmd |")
255                         or ::logError( "Couldn't fork glimpse search '$cmd': $!"), next;
256                 #$s->adjust_delimiter(\*SEARCH) if $s->{mv_delimiter_auto};
257                 my $line;
258                 my $field_names;
259
260                 # Get field names only if no sort (will throw it off) or
261                 # not already defined
262                 if($s->{mv_field_file}) {
263                         allowed_file($s->{mv_field_file})
264                                 or return $s->search_error("can't open fields file");
265                         $s->{mv_field_file} =
266                                         ::catfile($Vend::Cfg->{ProductDir}, $s->{mv_field_file})
267                                 unless ::file_name_is_absolute($s->{mv_field_file});
268                         open(FF, "< $s->{mv_field_file}")
269                                 or return $s->search_error("can't open fields file");
270                         chomp($field_names = <FF>);
271                 }
272                 if($field_names) {
273                         $field_names =~ s/^\s+//;
274                         my @laundry = (qw/mv_search_field mv_range_look mv_return_fields/);
275                         $s->hash_fields(
276                                                 [ split /\Q$s->{mv_index_delim}/, $field_names ],
277                                                 @laundry,
278                         );
279                         undef $field_names;
280                 }
281
282 #::logDebug("search after getting fields: self=" . ::uneval({%$s}));
283                 my $prospect;
284
285                 eval {
286                         ($limit_sub, $prospect) = $s->get_limit($f, 1);
287                 };
288
289                 $@  and  return $s->search_error("Limit subroutine creation: $@");
290
291                 $f = $prospect if $prospect;
292
293                 eval {($return_sub, $delayed_return) = $s->get_return(undef, 1)};
294
295                 $@  and  return $s->search_error("Return subroutine creation: $@");
296
297                 if(! defined $f and defined $limit_sub) {
298 #::logDebug("no f, limit");
299                         while(<Vend::Glimpse::SEARCH>) {
300                                 next unless &$limit_sub($_);
301                                 (push @out, $_ and last)
302                                         if $s->{mv_return_file_name};
303                                 push @out, &$return_sub($_);
304                         }
305                 }
306                 elsif(defined $limit_sub) {
307 #::logDebug("f, limit");
308 #::logDebug("record_delim: |$s->{mv_record_delim}|, delim=|$/|");
309                         while(<Vend::Glimpse::SEARCH>) {
310 #::logDebug("in line: $_");
311                                 next unless &$f();
312 #::logDebug("match line f: $_");
313                                 next unless &$limit_sub($_);
314 #::logDebug("match line limit: $_");
315                                 (push @out, $_ and last)
316                                         if $s->{mv_return_file_name};
317                                 push @out, &$return_sub($_);
318                         }
319                 }
320                 elsif (!defined $f) {
321 #::logDebug("no f, no limit");
322                         return $s->search_error('No search definition');
323                 }
324                 else {
325 #::logDebug("f, no limit");
326                         while(<Vend::Glimpse::SEARCH>) {
327                                 next unless &$f();
328                                 (push @out, $_ and last)
329                                         if $s->{mv_return_file_name};
330                                 push @out, &$return_sub($_);
331                         }
332                 }
333 #::logDebug("gsearch before closing search: self=" . ::Vend::Util::uneval_it({%$s}));
334                 close Vend::Glimpse::SEARCH;
335                 $s->restore_specs();
336         }
337
338         $s->{matches} = scalar(@out);
339
340 #::logDebug("gsearch before delayed return: self=" . ::Vend::Util::uneval_it({%$s}));
341         if($delayed_return and $s->{matches} > 0) {
342                 $s->hash_fields($s->{mv_field_names}, qw/mv_sort_field/);
343 #::logDebug("gsearch after hash fields: self=" . ::Vend::Util::uneval_it({%$s}));
344                 $s->sort_search_return(\@out);
345                 $delayed_return = $s->get_return(1);
346                 @out = map { $delayed_return->($_) } @out;
347         }
348 #::logDebug("after delayed return: self=" . ::Vend::Util::uneval_it({%$s}));
349
350         if($s->{mv_unique}) {
351                 my %seen;
352                 @out = grep ! $seen{$_->[0]}++, @out;
353                 $s->{matches} = scalar(@out);
354         }
355
356         if ($s->{matches} > $s->{mv_matchlimit} and $s->{mv_matchlimit} > 0) {
357                 $s->save_more(\@out)
358                         or ::logError("Error saving matches: $!");
359                 if ($s->{mv_first_match}) {
360                         splice(@out,0,$s->{mv_first_match});
361                         $s->{mv_next_pointer} = $s->{mv_first_match} + $s->{mv_matchlimit};
362                         $s->{mv_next_pointer} = 0
363                                 if $s->{mv_next_pointer} > $s->{matches};
364                 }
365                 $#out = $s->{mv_matchlimit} - 1;
366         }
367
368         if(! $s->{mv_return_reference}) {
369                 $s->{mv_results} = \@out;
370                 return $s;
371         }
372         elsif($s->{mv_return_reference} eq 'LIST') {
373                 my $col = scalar @{$s->{mv_return_fields}};
374                 @out = map { join $s->{mv_return_delim}, @$_ } @out;
375                 $s->{mv_results} = join $s->{mv_record_delim}, @out;
376         }
377         elsif($s->{mv_return_reference} eq 'HASH') {
378                 my $col = scalar @{$s->{mv_return_fields}};
379                 my @col;
380                 my @names;
381                 @names = @{$s->{mv_field_names}};
382                 $names[0] eq '0' and $names[0] = 'code';
383                 my %hash;
384                 my $key;
385                 for (@out) {
386                         @col = split /$s->{mv_return_delim}/, $_, $col;
387                         $hash{$col[0]} = {};
388                         @{ $hash{$col[0]} } {@names} = @col;
389                 }
390                 $s->{mv_results} = \%hash;
391         }
392         return $s;
393 }
394
395 # Unfortunate hack need for Safe searches
396 *create_search_and  = \&Vend::Search::create_search_and;
397 *create_search_or   = \&Vend::Search::create_search_or;
398 *dump_options       = \&Vend::Search::dump_options;
399 *escape             = \&Vend::Search::escape;
400 *get_limit          = \&Vend::Search::get_limit;
401 *get_return         = \&Vend::Search::get_return;
402 *get_scalar         = \&Vend::Search::get_scalar;
403 *hash_fields        = \&Vend::Search::hash_fields;
404 *map_ops            = \&Vend::Search::map_ops;
405 *more_matches       = \&Vend::Search::more_matches;
406 *range_check        = \&Vend::Search::range_check;
407 *restore_specs      = \&Vend::Search::restore_specs;
408 *save_context       = \&Vend::Search::save_context;
409 *save_more          = \&Vend::Search::save_more;
410 *save_specs         = \&Vend::Search::save_specs;
411 *saved_params       = \&Vend::Search::saved_params;
412 *search_error       = \&Vend::Search::search_error;
413 *sort_search_return = \&Vend::Search::sort_search_return;
414 *spec_check         = \&Vend::Search::spec_check;
415 *splice_specs       = \&Vend::Search::splice_specs;
416
417 1;
418 __END__