1
0
Fork 0
mirror of git://git.code.sf.net/p/cdesktopenv/code synced 2025-03-09 15:50:02 +00:00
cde/cde/lib/DtSearch/boolsrch.c
Pascal Stumpf a1cbcd24db Low-hanging fruit: Fix most warnings in lib/DtSearch.
Most of these are related to missing includes and prototypes as well as
parens/braces.  A few are also potential 64bit issues.
2012-08-12 14:20:58 -06:00

1596 lines
47 KiB
C

/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these librararies and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/* $XConsortium: boolsrch.c /main/4 1996/09/23 21:00:18 cde-ibm $
*
* (c) Copyright 1996 Digital Equipment Corporation.
* (c) Copyright 1996 Hewlett-Packard Company.
* (c) Copyright 1996 International Business Machines Corp.
* (c) Copyright 1996 Sun Microsystems, Inc.
* (c) Copyright 1996 Novell, Inc.
* (c) Copyright 1996 FUJITSU LIMITED.
* (c) Copyright 1996 Hitachi.
*/
/*
* COMPONENT_NAME: austext
*
* FUNCTIONS: boolean_search
* calc_result_bitvec_WK
* calculate_idfs
* dbread_filter_WK
* get_proximity
* got_USR_STOPSRCH
* load_DtSrResults_WK
* load_or_wordrecs
* read_d99
* read_recno
* read_stem_bitvec_WK
* stuff_DtSrResult
* weights_filter_WK
*
* ORIGINS: 27
*
*
* (C) COPYRIGHT International Business Machines Corp. 1996
* All Rights Reserved
* Licensed Materials - Property of IBM
* US Government Users Restricted Rights - Use, duplication or
* disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
*/
/********************* BOOLSRCH.C **********************
* $Id: boolsrch.c /main/4 1996/09/23 21:00:18 cde-ibm $
* February 1996.
* The vista code from the original vewords.c.
* Given a final truth table and stems array from the user's boolean
* query (output of boolean_search()), find all database records
* containing the truth table's set operations and return
* their database addresses in a resultlist.
* See boolpars.h for format and limitations of TRUTHTAB.
*
*-------------- D99DBA TO DBA CONVERSION ----------------
* 'd99dbas' are not real vista dbas! They were modified
* as follows to permit shorter bit vectors,
* and to minimize bit shifts at search time.
* vista_dba <- (OR_D00 << 24) | vista_slot
* vista_slot <- ((d99recno - 1) * or_recslots) + 2
* d99dba <- (d99recno << 8) | weight_byte
* d99recno <- ((vista_slot - 2) / or_recslots) + 1
* The d99 and bitvec recno of the first rec is 1.
* The slotno (vista dba) of the first rec is 2
* (dbrec occupies first slot and vista slots begin at 1).
*
* $Log$
* Revision 1.5 1996/03/20 19:21:49 miker
* Completed collocations code. Restored get_colloc_bitvec() from colloc.c.
*
* Revision 1.4 1996/03/18 22:06:24 miker
* Bug fix. Zero permute NOT queries always returned no hits.
*
* Revision 1.3 1996/03/13 23:05:24 miker
* Change long double constant to regular float for better portability.
*
* Revision 1.2 1996/03/13 22:36:37 miker
* Changed char to UCHAR several places; similar typecasts.
* Moved collocations processing to colloc.c.
*
* Revision 1.1 1996/03/05 15:52:06 miker
* Initial revision
*/
/***#define _ALL_SOURCE****/ /* to pickup typedefs for shm vnodes */
#include "SearchE.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "vista.h"
#include "boolpars.h"
#define PROGNAME "BOOLSRCH"
#define INIT_ITERATIONS 50
#define MS_boolsrch 16
/*
* DBAS_PER_BLOCK is the max number of dbas to be read
* from d99 file. Note DBAS_PER_BLOCK * sizeof(DB_ADDR) = 512 bytes,
* the standard blksize of one hard disk block.
*/
#define DBAS_PER_BLOCK 128
#define RESET_BIT(bv, by, bm) bv[by] &= (UCHAR) ~bm
#if (DtSrMAX_STEMCOUNT != 8)
#error DtSrMAX_STEMCOUNT does not equal 8.
#endif
/****************************************/
/* */
/* PROXWT */
/* */
/****************************************/
typedef struct {
float wt;
long byteno;
int bitmask;
int proximity;
} PROXWT;
/****************************************/
/* */
/* GLOBALS */
/* */
/****************************************/
int debugging_boolsrch = FALSE;
static int all_key_types = TRUE;
static UCHAR *bitvec_allocp = NULL;
static size_t bitvec_allocsz = 0;
static long bitveclen; /* 1/8 of tot_addr_count */
static UCHAR *bitvecs [DtSrMAX_STEMCOUNT];
static int check_dates = FALSE;
static int do_stat_sort = FALSE;
static double idf [DtSrMAX_STEMCOUNT];
static char *msgbuf = NULL;
static int need_zero_permute = FALSE;
static struct or_objrec objrec;
static DB_ADDR objrecdba;
static int or_abstrsz = 0;
static int or_fzkeysz = 0;
static short or_language = DtSrLaENG;
static long or_maxdba; /* largest dba in database */
static long or_reccount; /* tot num db obj (real_num_rec) */
static long or_recslots; /* D00 slots per obj (slot_d00) */
static struct or_hwordrec
*or_wordrecs = NULL;
static PROXWT *proxwts = NULL;
static int proxwtct;
static UCHAR *result_bitvec;
static long result_count = 0;
static DtSrResult *resultlist = NULL;
static int save_stemno = 0;
static long tot_addr_count; /* may be > reccount bcs deletes */
static int vistano;
static float *wtvec = NULL;
extern void find_keyword (char *cur_word, int vista_num);
extern void read_wordstr (struct or_hwordrec * glob_word, int vista_num);
/************************************************/
/* */
/* got_USR_STOPSRCH */
/* */
/************************************************/
/* Called at beginning of every workproc.
* Returns TRUE if user pushed STOP SEARCH button,
* else FALSE.
*/
static int got_USR_STOPSRCH (void)
{
if ((usrblk.flags & USR_STOPSRCH) == 0)
return FALSE;
if (OE_flags & OE_AUDIT)
oe_write_audit_rec (-1L);
usrblk.retncode = OE_USER_STOP;
return TRUE;
}
/****************************************/
/* */
/* read_recno */
/* */
/****************************************/
/* Utility function.
* Reads a database record given a d99 record number.
* Returns TRUE and loads globals objrec and objrecdba
* on success, else returns FALSE.
*/
static int read_recno (long recno)
{
/* Convert recno to a real dba */
objrecdba = (recno - 1) * or_recslots + 2;
if (objrecdba >= or_maxdba)
return FALSE;
objrecdba |= (OR_D00 << 24);
/* Read the object record.
* Skip records with database read errors.
* Use d_crset instead of CRSET and d_recread
* instead of RECREAD to trap vista errors
* without aborting.
*/
d_crset (&objrecdba, vistano);
if (db_status != S_OKAY) {
BAD_DBA:
if (debugging_boolsrch) {
fprintf (aa_stderr,
PROGNAME"434 Invalid dba %ld. "
"recno=%ld bitvec[%d]=%02x db_status=%d.\n",
objrecdba, recno, recno>>3, 1<<(recno%8), db_status);
fflush (aa_stderr);
}
return FALSE;
}
d_recread (&objrec, vistano);
if (db_status != S_OKAY)
goto BAD_DBA;
swab_objrec (&objrec, NTOH);
return TRUE;
} /* read_recno() */
/************************************************/
/* */
/* calculate_idfs */
/* */
/************************************************/
/* Subroutine of boolean_search() initialization.
* Loads idf[] (inverse doc frequency) for each stem.
* IDF = 1.0 for a word that occurs in every record.
* For a word that occurs only once in entire database:
* NUM OF DB RECS IDF OF SINGULAR WORD
* 10 4.32
* 100 7.64
* 1,000 10.97
* 10,000 14.29
* 100,000 17.61
* 1,000,000 20.93
* 10,000,000 24.25
*/
static void calculate_idfs (void)
{
int i;
char *cptr;
double dbl;
for (i = 0; i < saveusr.stemcount; i++) {
if ( or_wordrecs[i].or_hwaddrs == 0 ||
or_wordrecs[i].or_hwordkey[0] == '@')
idf[i] = 0.0;
else {
/* ln(2) = 0.693147181 */
dbl = (double) or_reccount / (double) or_wordrecs[i].or_hwaddrs;
idf[i] = log(dbl) / 0.693147181 + 1.0;
if (debugging_boolsrch)
fprintf (aa_stderr,
PROGNAME"733 IDF[%d] numdocs=%5ld idf=%lf\n",
i, or_wordrecs[i].or_hwaddrs, idf[i]);
}
}
return;
} /* calculate_idfs() */
/************************************************/
/* */
/* load_or_wordrecs */
/* */
/************************************************/
/* Subroutine of boolean_search() initialization.
* Loads or_wordrecs[] array with vista key file
* records for each term in saveusr.stems.
* Returns TRUE on success. Else returns FALSE with
* appropriate usrblk.retncode and user msgs on msglist.
*/
static int load_or_wordrecs (void)
{
int i, j, k;
int stemno;
struct or_hwordrec
*wordrec;
int colloc_count = 0;
int not_found_count = 0;
if (or_wordrecs)
free (or_wordrecs);
or_wordrecs = austext_malloc (
saveusr.stemcount * sizeof (struct or_hwordrec) + 16,
PROGNAME "782", NULL);
for (stemno = 0; stemno < saveusr.stemcount; stemno++) {
wordrec = &or_wordrecs [stemno];
/* If this is a collocation term,
* save the two indexes and the collocation
* value in the wordrec buffer instead of usual
* offsets and dba counts.
*/
if (saveusr.stems[stemno][0] == '@') {
strcpy (wordrec->or_hwordkey, saveusr.stems[stemno]);
sscanf (saveusr.stems[stemno], COLLOC_STEM_FORMAT, &i, &j, &k);
wordrec->or_hwoffset = i;
wordrec->or_hwfree = j;
wordrec->or_hwaddrs = k;
colloc_count++;
continue;
}
if (debugging_boolsrch)
fprintf (aa_stderr, PROGNAME"823 KEYFIND[%d] ", stemno);
find_keyword (saveusr.stems[stemno], vistano);
/*
* If term is found, add it to the or_wordrecs[] array.
* But it is an error to include a word in more records
* than the max specified in site config file. This is
* meaningful for databases where certain common high
* frequency words slip by which should be on the stoplist.
* It's possible in huge databases to run out of memory
* assembling very long resultlists.
*/
if (db_status == S_OKAY) {
strncpy (wordrec->or_hwordkey, saveusr.stems[stemno],
DtSrMAXWIDTH_HWORD);
wordrec->or_hwordkey [DtSrMAXWIDTH_HWORD - 1] = 0;
read_wordstr (wordrec, vistano);
if (db_status != S_OKAY) {
/* Probable corrupted database. The btree
* read succeeded but the record read failed.
*/
sprintf (msgbuf, catgets(dtsearch_catd, MS_boolsrch, 6,
"%s Database Error. Word '%s' is\n"
"listed in database '%s' but has no index record.") ,
PROGNAME"295", usrblk.stems[stemno], usrblk.dblk->label);
DtSearchAddMessage (msgbuf);
usrblk.retncode = OE_SYSTEM_STOP;
if (debugging_boolsrch)
fprintf (aa_stderr,
"db error, db_status = %d.\n", db_status);
return FALSE;
}
if (debugging_boolsrch)
fprintf (aa_stderr, "ofs=%ld addrs=%ld free=%ld\n",
wordrec->or_hwoffset,
wordrec->or_hwaddrs,
wordrec->or_hwfree);
if (wordrec->or_hwaddrs > OE_words_hitlimit) {
sprintf (msgbuf, catgets (dtsearch_catd, MS_boolsrch, 14,
"%s '%s' has more than %ld hits.\n"
"Please remove it from the query or raise the WHITLIM\n"
"value in the search engine configuration file."),
PROGNAME"1444", wordrec->or_hwordkey, OE_words_hitlimit);
DtSearchAddMessage (msgbuf);
/* Also log WHITLIM msg for administrator... */
fprintf (aa_stderr, "%s\n", msgbuf);
usrblk.retncode = OE_BAD_QUERY;
return FALSE;
}
}
/* Only other possible nonfatal vista return is S_NOTFOUND.
* If qry_is_all_ANDs we can quit right now.
* Otherwise switch off all bits in the word's bit vector.
*/
else if (qry_is_all_ANDs) {
if (debugging_boolsrch)
fputs ("not found, qry_all_ANDs, quit.\n", aa_stderr);
usrblk.retncode = OE_NOTAVAIL;
return FALSE;
}
else {
memset (wordrec, 0, sizeof(struct or_hwordrec));
if (debugging_boolsrch)
fputs ("not found, addrs-->0.\n", aa_stderr);
not_found_count++;
}
} /* end loop for each term in saveusr.stems[] */
/* It's a failure if all the user's words
* don't exist in database.
*/
if (not_found_count + colloc_count >= saveusr.stemcount) {
usrblk.retncode = OE_NOTAVAIL;
return FALSE;
}
return TRUE;
} /* load_or_wordrecs() */
/****************************************/
/* */
/* get_proximity */
/* */
/****************************************/
/* Subroutine of stuff_DtSrResult().
* Given d99recno, finds proxwt[] for record,
* calculates and returns integer proximity.
*/
static int get_proximity (long recno)
{
long byteno = recno >> 3;
int bitmask = 1 << (recno % 8);
int i;
for (i = 0; i < proxwtct; i++)
if (proxwts[i].byteno == byteno && proxwts[i].bitmask == bitmask)
break;
if (i >= proxwtct)
return -1;
return proxwts[i].proximity;
} /* get_proximity() */
/****************************************/
/* */
/* stuff_DtSrResult */
/* */
/****************************************/
/* Subroutine of load_DtSrResults_WK().
* Loads passed DtSrResult structure with data from global objrec.
* Performs additional vista reads as necessary to get misc recs.
*/
static void stuff_DtSrResult (
DtSrResult *new,
long recno)
{
int m;
int fzkey_remaining;
char *src, *targ, *targend;
static struct or_miscrec
miscrecbuf;
new->objflags = objrec.or_objflags;
new->objuflags = objrec.or_objuflags;
new->objsize = objrec.or_objsize;
new->objdate = objrec.or_objdate;
new->objtype = objrec.or_objtype;
new->objcost = objrec.or_objcost;
new->dbn = OE_dbn;
new->dba = objrecdba;
new->language = or_language;
strncpy (new->reckey, objrec.or_objkey, DtSrMAX_DB_KEYSIZE);
if (do_stat_sort)
new->proximity = get_proximity (recno);
/* The abstract immediately follows the fuzzy key
* in the FZKABS misc recs. It may span several recs.
*/
new->abstractp = (char *) (new + 1);
if (or_abstrsz > 0) {
targ = new->abstractp;
targend = targ + or_abstrsz - 1;
fzkey_remaining = or_fzkeysz;
CRSET (PROGNAME"226", &objrecdba, vistano);
SETOR (PROGNAME"227", OR_OBJ_MISCS, saveusr.vistano);
FINDFM (PROGNAME"228", OR_OBJ_MISCS, saveusr.vistano);
while (db_status == S_OKAY) {
RECREAD (PROGNAME"2209", &miscrecbuf, saveusr.vistano);
NTOHS (miscrecbuf.or_misctype);
if (miscrecbuf.or_misctype == ORM_FZKABS) {
src = (char *) miscrecbuf.or_misc;
for (m = 0; m < sizeof(miscrecbuf.or_misc); m++) {
/* skip over the fzkey */
if (fzkey_remaining > 0) {
src++;
fzkey_remaining--;
continue;
}
/* copy the abstract */
*targ = *src;
if (*src++ == 0 || targ++ >= targend) {
*targ = 0;
targ = targend; /* force outer loop end */
break;
}
} /* end for-loop m */
} /* end (misctype == FZKABS) */
if (targ >= targend)
break;
FINDNM (PROGNAME"545", OR_OBJ_MISCS, saveusr.vistano);
} /* end while-loop */
} /* endif: (or_abstrsz > 0) */
return;
} /* stuff_DtSrResult() */
/****************************************/
/* */
/* load_DtSrResults_WK */
/* */
/****************************************/
/* Builds DtSrResult list for every record
* in result_bitvec, but not more than aa_maxhits.
*/
static void load_DtSrResults_WK (void)
{
long recno;
int bitno;
long byteno;
int i;
long dittocount;
DtSrResult *resultp;
size_t resultsz = sizeof(DtSrResult) + or_abstrsz + 4;
if (got_USR_STOPSRCH())
return;
if (resultlist) {
DtSearchFreeResults (&resultlist);
resultlist = NULL;
}
/* Make a single pass through the final result_bitvec.
* For each nonzero bit, ie each database record
* that satisfies the query requirements,
* retrieve the record and push it onto the
* DtSrResult list. If not sorting records,
* stop when we reach the user's specified aa_maxhits count.
*/
dittocount = 0;
for (recno = 1; recno < tot_addr_count; recno++) {
byteno = recno >> 3; /* divide by 8 */
bitno = recno % 8;
/* Skip zero bits */
if ((result_bitvec[byteno] & (1 << bitno)) == 0)
continue;
if (!read_recno (recno))
continue;
/* Create new DtSrResult node, push it onto resultlist. */
resultp = austext_malloc (resultsz + 4, PROGNAME"466", NULL);
memset (resultp, 0, resultsz);
resultp->link = resultlist;
resultlist = resultp;
/* Load the new DtSrResult node from the object record */
stuff_DtSrResult (resultp, recno);
/* Check if any more reads are necessary.
* If not sorting, stop after aa_maxhits.
* If sorting, there won't be more than
* aa_maxhits recs in the bitvec anyway.
*/
dittocount++;
if (dittocount >= aa_maxhits)
break;
} /* end bitvec loop */
/*--------- All Done. Clean up and return to caller. ---------*/
/*@@@@@@ make separate workproc call if aa_maxhits > 100.
@@@@@ sort may take a long time */
if (wtvec) {
free (wtvec);
wtvec = NULL;
}
if (proxwts) {
free (proxwts);
proxwts = NULL;
}
if (dittocount <= 0) {
usrblk.workproc = dummy_workproc;
usrblk.retncode = OE_NOTAVAIL;
return;
}
usrblk.retncode = OE_OK;
usrblk.workproc = dummy_workproc;
usrblk.stemcount = saveusr.stemcount;
if (usrblk.search_type == 'W')
memcpy (usrblk.stems, saveusr.stems,
saveusr.stemcount * DtSrMAXWIDTH_HWORD);
else
/* Don't copy first char (ctrl-o) stem */
for (i = 0; i < saveusr.stemcount; i++)
strcpy (usrblk.stems[i], &saveusr.stems[i][1]);
if (do_stat_sort)
DtSearchSortResults (&resultlist, DtSrSORT_PROX);
usrblk.dittocount = dittocount;
if (usrblk.dittolist)
DtSearchFreeResults (&usrblk.dittolist);
usrblk.dittolist = resultlist;
resultlist = NULL;
return;
} /* load_DtSrResults_WK() */
/****************************************/
/* */
/* weights_filter_WK */
/* */
/****************************************/
/* This workproc is called only if we're doing statistical sorting.
* (1) It reduces the result_bitvec to it's final size,
* containing only the highest aa_maxhits statistical weights
* in wtvec.
* (2) It replaces (possibly large) wtvec with (probably much smaller)
* array of PROXWT structures containing the selected records'
* weights and calculated proximities, for final ranking sort.
*
*/
static void weights_filter_WK (void)
{
int i;
double scalefac;
long recno;
int smallest, biggest;
float biggestwt;
long byteno, smallest_byteno;
int bitmask, smallest_bitmask;
if (got_USR_STOPSRCH())
return;
/* Init weight filtering */
if (proxwts)
free (proxwts);
proxwtct = (result_count < aa_maxhits)? result_count : aa_maxhits;
proxwts = austext_malloc (proxwtct * sizeof(PROXWT) + 4,
PROGNAME"429", NULL);
memset (proxwts, 0, proxwtct * sizeof(PROXWT));
smallest = 0;
scalefac = 0.0;
biggestwt = 0.0; /* biggest single wt of all docs */
/* One pass thru entire result_bitvec */
for (recno = 1; recno < tot_addr_count; recno++) {
byteno = recno >> 3;
bitmask = 1 << (recno % 8);
/* Skip zero bits */
if ((result_bitvec[byteno] & bitmask) == 0)
continue;
/* Make scalefac = sum of squares of all wts in bitvec.
* It's possible that all or some of the weights are
* zero (eg queries like "~aaa" or "~aaa | bbb").
* In this case give them a very small positive number
* so we don't divide by zero later on.
*/
if (wtvec[recno] == 0.0)
wtvec[recno] = 0.1;
scalefac += (double) wtvec[recno] * (double) wtvec[recno];
/*
* The following logic first fills up the proxwts table.
* After that if a bitvec's weight is larger than the smallest
* proxwt, replace the smallest proxwt with the new weight
* and switch off the previous smallest in the original bitvec.
*/
/*
* Just discard rec on bitvec if it's weight
* is smaller than the current smallest.
*/
if (wtvec [recno] <= proxwts[smallest].wt) {
RESET_BIT (result_bitvec, byteno, bitmask);
result_count--;
continue;
}
/*
* Else discard current smallest if
* table full, ie it really points to something.
*/
if (proxwts[smallest].wt > 0.0) {
smallest_byteno = proxwts[smallest].byteno;
smallest_bitmask = proxwts[smallest].bitmask;
RESET_BIT (result_bitvec, smallest_byteno, smallest_bitmask);
result_count--;
}
/* Add this weight to the proxwts table. */
proxwts [smallest] .wt = wtvec [recno];
proxwts [smallest] .byteno = byteno;
proxwts [smallest] .bitmask = bitmask;
/* Keep track of the table entry that has
* the highest weight. This will eventually
* be the first sorted hit on the hitlist.
* It's weight/proximity will be used
* to scale the proximities of the
* other hits.
*/
if (biggestwt < wtvec[recno]) {
biggestwt = wtvec[recno];
biggest = smallest;
}
/* Find the next smallest */
smallest = 0;
for (i = 1; i < proxwtct; i++) {
if (proxwts[i].wt < proxwts[smallest].wt)
smallest = i;
}
} /* end loop on every recno */
free (wtvec);
wtvec = NULL;
/* PROXIMITY CALCULATIONS.
* In order to translate statistical weight into an AusText
* proximity, basically you have to invert it, then scale it.
* The statistical weight is a similarity measure: the
* larger it is the more similar the document to the query.
* But AusText 'proximity' is like a 'distance' measure,
* the smaller the number the closer the document is to the query.
*
* First 'normalize' each document's statistical
* weight to be a fraction between 0 and 1. Done
* by calculating a normalization factor,
* the sqrt of the sum of squares of weights of all
* docs that would have qualified for the hitlist
* if we weren't truncating. Note cosine-based normalization
* factor (Pythagorean) always >= largest wt so we can
* guarantee all normalized weights are > 0.0 and <= 1.0.
*
* The proximity itself is calculated as the 'percent value'
* that the doc is 'distant' from perfection (1.0 or 100%).
* For example, if the normalized weight of the first record
* is .931 then it's proximity will be 7 (100% - 93% = 7).
*
* The proximity of every other hit is scaled away
* from the first because the normalization algorithm
* tends to clump proximities when there are a lot of hits.
* Specifically the proximity of every hit is a constant
* scale factor (derived from the first proximity),
* divided by it's weight.
*
* A "bulls eye" (normalized weight = 1.0, proximity == 0)
* for the first hit is not allowed so scale factor will
* not also be zero. Otherwise *all* hits in that particular
* results list would be bulls eyes.
*/
scalefac = (double) biggestwt / sqrt (scalefac);
/* normalized weight of first hit */
scalefac = (1.0 - scalefac) * 100.0;
/* proximity of first hit */
if (scalefac < 1.0)
scalefac = 1.0;
/* No bulls eyes */
scalefac *= (double) biggestwt * 1.2;
/* scale factor for other hits */
for (i = 0; i < proxwtct; i++) {
proxwts[i].proximity = (int) (scalefac / (double) proxwts[i].wt);
if (proxwts[i].proximity > 9999)
proxwts[i].proximity = 9999;
}
if (debugging_boolsrch) {
fprintf (aa_stderr,
PROGNAME"489 FINAL PROXWTS proxwtct=%d bigwt=%.2f scalefac=%.2lf\n",
proxwtct, biggestwt, scalefac);
for (i=0; i<10; i++) {
if (i >= proxwtct)
break;
fprintf (aa_stderr,
" byteno=%3ld bitmask=%02x wt=%.2f prox=%d\n",
proxwts[i].byteno, proxwts[i].bitmask,
proxwts[i].wt, proxwts[i].proximity);
}
fprintf (aa_stderr, PROGNAME"499 WEIGHT RESULTS resultct=%ld bv=\n",
result_count);
for (i=0; i<22; i++) {
if (i >= bitveclen)
break;
fprintf (aa_stderr, " %02x", (int) result_bitvec[i]);
}
fputc ('\n', aa_stderr);
fflush (aa_stderr);
}
usrblk.retncode = OE_SEARCHING;
usrblk.workproc = load_DtSrResults_WK;
return;
} /* weights_filter_WK() */
/****************************************/
/* */
/* dbread_filter_WK */
/* */
/****************************************/
/* Called if we must remove documents from result_bitvec
* because of keytype or date,
*/
static void dbread_filter_WK (void)
{
long recno;
long byteno;
int bitmask;
long discards;
if (got_USR_STOPSRCH())
return;
if (debugging_boolsrch) {
discards = 0;
fputs (PROGNAME"865 DBREAD discards (k=keytype d=date):\n", aa_stderr);
fflush (aa_stderr);
}
/* One pass thru entire result_bitvec */
for (recno = 1; recno < tot_addr_count; recno++) {
byteno = recno >> 3;
bitmask = 1 << (recno % 8);
if ((result_bitvec[byteno] & bitmask) == 0)
continue;
if (!read_recno (recno))
continue;
/* Skip undesired record types */
if (!all_key_types) {
if (strchr (saveusr.ktchars, objrec.or_objkey[0]) == NULL) {
RESET_BIT (result_bitvec, byteno, bitmask);
result_count--;
if (debugging_boolsrch) {
discards++;
fputc ('k', aa_stderr);
fflush (aa_stderr);
}
continue;
}
}
/* Skip record if out of date range */
if (check_dates) {
if (!objdate_in_range (objrec.or_objdate,
usrblk.objdate1, usrblk.objdate2)) {
RESET_BIT (result_bitvec, byteno, bitmask);
result_count--;
if (debugging_boolsrch) {
discards++;
fputc ('d', aa_stderr);
fflush (aa_stderr);
}
continue;
}
}
} /* end loop on every recno */
if (debugging_boolsrch) {
int i;
if (discards)
fputc ('\n', aa_stderr);
fprintf (aa_stderr,
PROGNAME"857 DBREAD RESULTS discards=%ld resultct=%ld bv=\n",
discards, result_count);
for (i=0; i<22; i++) {
if (i >= bitveclen)
break;
fprintf (aa_stderr, " %02x", (int) result_bitvec[i]);
}
fputc ('\n', aa_stderr);
fflush (aa_stderr);
}
/* Determine next workproc.
* (1) If no records survived the read db filter,
* we're done, return 'no hits'.
* (2) If we're sorting, the next workproc reduces the
* bitvec to the aa_maxhits recs with the highest
* statistical weights.
* (3) Otherwise the next workproc just loads the hitlist.
*/
if (result_count <= 0) {
usrblk.retncode = OE_NOTAVAIL;
usrblk.workproc = dummy_workproc;
}
else if (do_stat_sort) {
usrblk.retncode = OE_SEARCHING;
usrblk.workproc = weights_filter_WK;
}
else {
if (debugging_boolsrch)
fprintf (aa_stderr,
PROGNAME"931 No sorting by statistical weights.\n");
usrblk.retncode = OE_SEARCHING;
usrblk.workproc = load_DtSrResults_WK;
}
return;
} /* dbread_filter_WK() */
/****************************************/
/* */
/* calc_result_bitvec_WK */
/* */
/****************************************/
/* Second workproc after read_stem_bitvec_WK().
* If possible, minimizes size of truth table permutes,
* then applies them to stem bitvecs to create result_bitvec.
*/
static void calc_result_bitvec_WK (void)
{
int mask;
int cpm;
long byteno;
int bitno, stemno;
UCHAR permute;
UCHAR my_permutes [256];
int my_pmsz;
int i;
if (got_USR_STOPSRCH())
return;
/* If there are fewer than a full complement of stems,
* minimize size of truth table by discarding
* permutes that refer to unused stems.
*/
if (saveusr.stemcount < DtSrMAX_STEMCOUNT) {
/* Set high order bits of mask to mark unused stem positions */
mask = 0;
for (i = 0; i < saveusr.stemcount; i++)
mask |= 1 << i;
mask = ~mask;
/* 'cpm' is a candidate permute */
my_pmsz = 0;
for (cpm = 0; cpm < 256; cpm++) {
/*
* Discard candidate if it refers to an unused stem.
*/
if (cpm & mask)
continue;
/*
* Otherwise if candidate is in final_truthtab, keep it.
*/
for (i = 0; i < final_truthtab.pmsz; i++) {
if (final_truthtab.permutes[i] == (UCHAR) cpm) {
my_permutes [my_pmsz] = (UCHAR) cpm;
my_pmsz++;
}
}
}
if (debugging_boolsrch) {
fprintf (aa_stderr,
PROGNAME"565 Minimize truth table, pmsz=%d-->%d\n permutes=",
final_truthtab.pmsz, my_pmsz);
for (i=0; i<16; i++) {
if (i >= my_pmsz)
break;
fprintf (aa_stderr, " %02x", (int) my_permutes [i]);
}
fputc ('\n', aa_stderr);
fflush (aa_stderr);
}
final_truthtab.permutes = my_permutes;
final_truthtab.pmsz = my_pmsz;
} /* end minimize of permutes */
/* Calculate result bit vector.
* Loop 1 is a single pass through the bit vectors
* (a bit loop inside a byte loop).
* For each nonzero bit, ie each database record
* that has at least one of the query terms in it,
* build a 'permute' equivalent to the boolean
* representation of the terms in that record (Loop 2).
* Then search the truth table permutes for a match (Loop 3).
* If found, set the record's bit in the result_bitvec.
*/
/* LOOP 1. For each database addr... */
result_count = 0;
for (byteno = 0; byteno < bitveclen; byteno++) {
for (bitno = 0; bitno < 8; bitno++) {
mask = 1 << bitno;
/* LOOP 2. Build permute for each query term. */
permute = 0;
for (stemno = 0; stemno < saveusr.stemcount; stemno++) {
if (bitvecs [stemno] [byteno] & (UCHAR) mask)
permute |= 1 << stemno;
}
/* LOOP 3. Search truth table for matching permute. */
for (i = 0; i < final_truthtab.pmsz; i++) {
if (final_truthtab.permutes[i] == permute) {
result_bitvec [byteno] |= (UCHAR) mask;
result_count++;
}
}
}
}
if (debugging_boolsrch) {
fprintf (aa_stderr, PROGNAME"621 PRELIM RESULTS resultct=%ld bv=\n",
result_count);
for (i=0; i<22; i++) {
if (i >= bitveclen)
break;
fprintf (aa_stderr, " %02x", (int) result_bitvec[i]);
}
fputc ('\n', aa_stderr);
fflush (aa_stderr);
}
/* The next workprocs are 'filters', reducing the size
* of result_bitvec by removing various unwanted records.
* They're done in the following order:
* (1) If no records survived the truth table manipulations,
* we're done, return 'no hits'.
* (2) If we must remove documents because of keytype or date,
* the next workproc is the filter that reads the database.
* (3) If we're sorting, the next workproc reduces the
* bitvec to the aa_maxhits recs with the highest
* statistical weights.
* (4) Otherwise the next workproc just loads the hitlist.
*/
if (result_count <= 0) {
usrblk.retncode = OE_NOTAVAIL;
usrblk.workproc = dummy_workproc;
}
else if (!all_key_types || check_dates) {
usrblk.retncode = OE_SEARCHING;
usrblk.workproc = dbread_filter_WK;
}
else if (do_stat_sort) {
if (debugging_boolsrch)
fprintf (aa_stderr,
PROGNAME"948 No db reads necessary for date or keytype.\n");
usrblk.retncode = OE_SEARCHING;
usrblk.workproc = weights_filter_WK;
}
else {
if (debugging_boolsrch)
fprintf (aa_stderr,
PROGNAME"625 No filtering: no sort and no db reads.\n");
usrblk.retncode = OE_SEARCHING;
usrblk.workproc = load_DtSrResults_WK;
}
return;
} /* calc_result_bitvec_WK() */
/****************************************/
/* */
/* read_d99 */
/* */
/****************************************/
/* Subroutine of read_stem_bitvec_WK().
* Repeatedly called to get each d99dba in the inverted index
* file (d99) for a specific index term. The first call passes
* the term's wordrec with d99 offset and size information.
* Subsequent calls pass NULL.
* Returns valid d99dba, or 0 at end of term's index, or -1 on error.
* Actual reads are performed a disk block at a time,
* with dbas stored in a static buffer for the next call.
*/
static DB_ADDR read_d99 (struct or_hwordrec *wordrec)
{
static DB_ADDR readbuf [DBAS_PER_BLOCK];
static DB_ADDR *bufptr, *endbuf;
static FILE *fptr;
static long bal_read, request_read, actual_read;
/* First call for new term */
if (wordrec) {
fptr = usrblk.dblk->iifile;
fseek (fptr, wordrec->or_hwoffset, SEEK_SET);
bal_read = wordrec->or_hwaddrs;
bufptr = endbuf = 0; /* triggers block read */
}
/* Time to read another block */
if (bufptr >= endbuf) {
if (bal_read <= 0)
return 0;
if (bal_read > DBAS_PER_BLOCK) {
request_read = DBAS_PER_BLOCK;
bal_read -= DBAS_PER_BLOCK;
endbuf = readbuf + DBAS_PER_BLOCK;
}
else {
/* last block is usually short */
request_read = bal_read;
bal_read = 0;
endbuf = readbuf + request_read;
}
if (fread (readbuf, sizeof(DB_ADDR), request_read, fptr)
!= request_read) {
sprintf (msgbuf, catgets(dtsearch_catd, MS_boolsrch, 28,
"%s Database Read Error in %s.d99.") ,
PROGNAME"428", usrblk.dblk->name);
DtSearchAddMessage (msgbuf);
return -1;
}
bufptr = readbuf;
}
/******return *bufptr++;*******/
return ntohl (*bufptr++);
} /* read_d99() */
/****************************************/
/* */
/* get_colloc_bitvec */
/* */
/****************************************/
/* Subroutine of read_stem_bitvec_WK().
* Constructs a 'collocation bitvector' for current save_stemno.
* A collocation expression requests the return of all records
* containing both of two terms (a kind of boolean AND) such that
* the occurrences are within n characters of each other.
* For example "ICE @5 CREAM" requests the return of all records
* containing both "ICE" and "CREAM" but only if they are separated
* by no more than 5 characters.
*
* Since offset information is not stored in the inverted index
* this module initially returns the intersection of the two words'
* bit vectors (boolean AND). Then it retrieves each record,
* builds an offset (hilites) table for each of the two words,
* then compares the offset differences in the tables.
* If no occurrence pairs are within the specified separation
* range, the record is deleted from the bitvector.
* Returns 0 if successful, otherwise returns -1 and msgs.
@@@@ rewrite as its own workproc--reading/hiliting can take a long time...
*/
static int get_colloc_bitvec (void)
{
int stemno_A = or_wordrecs[save_stemno].or_hwoffset;
int stemno_B = or_wordrecs[save_stemno].or_hwfree;
long range = or_wordrecs[save_stemno].or_hwaddrs;
UCHAR *bitvec_A = bitvecs [stemno_A];
UCHAR *bitvec_B = bitvecs [stemno_B];
UCHAR *bitvec_C = bitvecs [save_stemno];
long byteno, recno;
UCHAR bitmask;
int parse_type;
int got_a_colloc;
char *stemp;
DtSrHitword *hitwords_A, *hitwords_B;
long hitwcount_A, hitwcount_B;
long threshold_range;
DB_ADDR dba;
LLIST *bloblist;
long a, b, offset_A, offset_B;
/* First construct the set intersection (AND) of
* each of the collocated terms in the colloc bitvec.
*/
for (byteno = 0; byteno < bitveclen; byteno++)
bitvec_C [byteno] = bitvec_A [byteno] & bitvec_B [byteno];
if (debugging_boolsrch) {
int i;
fprintf (aa_stderr,
PROGNAME"312 INTERSECT[%d] (colloc %d & %d):\n",
save_stemno, stemno_A, stemno_B);
for (i=0; i<bitveclen; i++) {
fprintf (aa_stderr, " %02x", bitvec_C[i]);
if (i > 22)
break;
}
fputc ('\n', aa_stderr);
fflush (aa_stderr);
}
/* Read cleartext for each rec in intersection/colloc bitvec.
* Get hitwords (hilite table) for each collocation term.
* Switch off recs in bitvec where no term pairs are in
* collocation range.
*/
for (recno = 1; recno < tot_addr_count; recno++) {
byteno = recno >> 3; /* divide by 8 */
bitmask = 1 << (recno % 8);
/* Skip zero bits */
if ((bitvec_C[byteno] & bitmask) == 0)
continue;
/* Convert recno to vista database address.
* Silently skip rec if dba doesn't exist.
*/
dba = (recno - 1) * or_recslots + 2;
if (dba >= or_maxdba) {
RESET_BIT (bitvec_C, byteno, bitmask);
continue;
}
dba |= (OR_D00 << 24);
/* Silently skip records that have no document text */
if ((bloblist = ve_getblobs (dba, vistano)) == NULL) {
if (debugging_boolsrch) {
fprintf (aa_stderr,
PROGNAME"126 No blobs for recno=%ld byteno=%ld mask%02x\n",
recno, byteno, bitmask);
fflush (aa_stderr);
}
RESET_BIT (bitvec_C, byteno, bitmask);
continue;
}
/* Uncompress record text into usrblk.cleartext */
if (oe_unblob (bloblist) != OE_OK)
return -1;
/* Build 'hilite' table for stem A. If stem
* can't be found in the record, silently skip it.
* Otherwise save the table.
*/
stemp = saveusr.stems [stemno_A];
if (stemp[0] == STEM_CH) {
parse_type = 'S';
stemp++;
}
else
parse_type = 'W';
if (!hilite_cleartext (parse_type, stemp, 1)) {
RESET_BIT (bitvec_C, byteno, bitmask);
continue;
}
hitwords_A = usrblk.hitwords;
hitwcount_A = usrblk.hitwcount;
usrblk.hitwords = NULL;
usrblk.hitwcount = 0;
/* In the same way build 'hilite' table for stem B */
stemp = saveusr.stems [stemno_B];
if (stemp[0] == STEM_CH) {
parse_type = 'S';
stemp++;
}
else
parse_type = 'W';
if (!hilite_cleartext (parse_type, stemp, 1)) {
RESET_BIT (bitvec_C, byteno, bitmask);
free (hitwords_A);
continue;
}
hitwords_B = usrblk.hitwords;
hitwcount_B = usrblk.hitwcount;
usrblk.hitwords = NULL;
usrblk.hitwcount = 0;
/* Compare the two hilite tables for range matches */
got_a_colloc = FALSE;
b = 0;
for (a = 0; a < hitwcount_A; a++) {
offset_A = hitwords_A[a].offset;
threshold_range = offset_A + hitwords_A[a].length + range;
for (; b < hitwcount_B; b++) {
offset_B = hitwords_B[b].offset;
/* Advance B to first entry past A's offset */
if (offset_B <= offset_A )
continue; /* ...the B loop */
if (offset_B <= threshold_range)
got_a_colloc = TRUE;
break; /* ...the B loop */
} /* end B loop */
if (got_a_colloc || b >= hitwcount_B)
break; /* ...the A loop */
} /* end A loop */
free (hitwords_A);
free (hitwords_B);
/* If no collocations found within range,
* switch off rec in colloc bitvec.
*/
if (!got_a_colloc)
RESET_BIT (bitvec_C, byteno, bitmask);
} /* end loop on each recno in intersection/colloc bitvec */
return 0;
} /* get_colloc_bitvec() */
/****************************************/
/* */
/* read_stem_bitvec_WK */
/* */
/****************************************/
/* First workproc after boolean_search().
* Each iterative call loads one (save_stemno) real stem's bitvec.
* After last stem bitvec loaded, sets up
* call to next workproc in sequence.
*/
static void read_stem_bitvec_WK (void)
{
long byteno;
DB_ADDR d99recno;
float weight;
if (got_USR_STOPSRCH())
return;
/* Process collocation 'stems' */
if (saveusr.stems [save_stemno] [0] == '@') {
d99recno = get_colloc_bitvec();
goto DONE_READING;
}
for ( d99recno = read_d99 (&or_wordrecs [save_stemno]);
d99recno;
d99recno = read_d99 (NULL)) {
if (d99recno == -1) /* read error */
break;
/* Save low byte 'statistical weight' value.
* It can only be 0 - 255.
*/
if (do_stat_sort)
weight = (float) (d99recno & 0x000000ff) + 1.0;
d99recno = (d99recno >> 8) & 0x00ffffff;
/* Set correct bit in bitvec.
* The byte number is the recno divided by 8.
* The bit number is the remainder after division by 8.
*/
if ((byteno = d99recno >> 3) >= bitveclen) {
sprintf (msgbuf, catgets(dtsearch_catd, MS_boolsrch, 32,
"%s Database Error: %s '%s'\n"
"in database '%s' has invalid d99 record number %ld.") ,
PROGNAME"394",
(usrblk.search_type == 'W') ?
catgets(dtsearch_catd, MS_boolsrch, 33, "Word") :
catgets(dtsearch_catd, MS_boolsrch, 34, "Stem of"),
usrblk.stems [save_stemno],
usrblk.dblk->label,
d99recno);
DtSearchAddMessage (msgbuf);
d99recno = -1; /* force error return */
goto DONE_READING;
}
bitvecs [save_stemno] [byteno] |= 1 << (d99recno % 8);
/* Add to correct weight in weight vector.
* IDF ranges between 1.0 and 20.0, and weight
* is 1 - 256, so we're adding 1 - ~5000 to wtvec.
*/
if (do_stat_sort)
wtvec [d99recno] += weight * (float) idf [save_stemno];
} /* end loop that retrieves every d99recno for curr stem */
DONE_READING:
if (debugging_boolsrch) {
int i;
if (debugging_boolsrch)
fprintf (aa_stderr, PROGNAME"313 BITVEC[%ld]:\n", save_stemno);
for (i=0; i<bitveclen; i++) {
fprintf (aa_stderr, " %02x", bitvecs[save_stemno][i]);
if (i > 22)
break;
}
fputc ('\n', aa_stderr);
fflush (aa_stderr);
}
if (d99recno == 0) {
/* Normal conclusion. Increment to next stem.
* If not all stems have been read,
* this is still the next workproc.
* Otherwise the next workproc is the one
* merging all bitvectors into the final
* result bitvec using the truth table.
*/
usrblk.retncode = OE_SEARCHING;
if (++save_stemno < saveusr.stemcount)
usrblk.workproc = read_stem_bitvec_WK;
else
usrblk.workproc = calc_result_bitvec_WK;
}
else
/* d99recno must be -1 */
usrblk.retncode = OE_SYSTEM_STOP;
return;
} /* read_stem_bitvec_WK() */
/****************************************/
/* */
/* boolean_search */
/* */
/****************************************/
/* Called from Opera_Engine after successful boolean_parse().
* Expects valid globals: saveusr.stems, saveusr.stemcount,
* usrblk.stems (contains original unstemmed query terms for msgs),
* usrblk.search_type, final_truthtab, qry_has_no_NOTs,
* and qry_is_all_ANDs.
* Based on parts of the function ve_word_search().
* Upon return, usrblk.retncode, msglist, etc is appropriately loaded.
* Upon successful return usrblk.stems, usrblk.stemcount,
* and dittolist are also loaded.
*/
void boolean_search (void)
{
int i, j;
size_t allocsz_needed;
/* Sanity checks */
if ( saveusr.stemcount <= 0 ||
final_truthtab.pmsz <= 0 ||
final_truthtab.pmsz >= 256 ) {
fprintf (aa_stderr, catgets(dtsearch_catd, MS_boolsrch, 35,
"%s Program Error: stemct=%d pmsz=%d\n") ,
PROGNAME"1404", saveusr.stemcount, final_truthtab.pmsz);
DtSearchExit (14);
}
/*---------- Init globals ----------*/
if (!msgbuf)
msgbuf = austext_malloc (500, PROGNAME"393", NULL);
debugging_boolsrch = (usrblk.debug & USRDBG_SRCHCMPL);
need_zero_permute = (final_truthtab.permutes[0] == 0);
do_stat_sort = ((usrblk.flags & USR_SORT_WHITL) != 0);
check_dates = (usrblk.objdate1 || usrblk.objdate2);
or_abstrsz = usrblk.dblk->dbrec.or_abstrsz;
or_fzkeysz = usrblk.dblk->dbrec.or_fzkeysz;
or_language = usrblk.dblk->dbrec.or_language;
or_maxdba = usrblk.dblk->dbrec.or_maxdba;
usrblk.flags &= ~USR_STOPSRCH; /* turn off stop button */
saveusr.vistano = vistano = usrblk.dblk->vistano;
saveusr.dittolist = NULL;
saveusr.dittocount = 0L;
saveusr.iterations = INIT_ITERATIONS;
/*
* saveusr.ktchars is a string holding
* first char of desired record ids.
*/
all_key_types = TRUE;
for (i = 0, j = 0; i < usrblk.dblk->ktcount; i++) {
if (usrblk.dblk->keytypes[i].is_selected)
saveusr.ktchars[j++] = usrblk.dblk->keytypes[i].ktchar;
else
all_key_types = FALSE;
}
saveusr.ktchars[j] = '\0';
or_recslots = (long) (usrblk.dblk->dbrec.or_recslots);
or_reccount = usrblk.dblk->dbrec.or_reccount;
/* RECFRST is just to get the slot# (dba) of the
* first real object record after the dbrec.
* Currently the dbrec occupies only one slot,
* the first (#1), so dba will usually be #2.
*/
/********
RECFRST(PROGNAME"2545", OR_OBJREC, saveusr.vistano);
CRGET(PROGNAME"2546", &dba, saveusr.vistano);
dba &= 0x00FFFFFF;
********/
tot_addr_count = ((usrblk.dblk->dbrec.or_maxdba + 1) / or_recslots) + 1;
bitveclen = (tot_addr_count >> 3) + 1;
if (debugging_boolsrch) {
fprintf (aa_stderr, PROGNAME"360 "
"boolean_search: typ='%c' needzpm?=%d sort?=%d maxhits=%d\n"
" maxdba=%ld recct=%ld recslts=%ld\n"
" totnmadr=%ld bvln=%ld allkts?=%d ktchars='%s'\n"
,usrblk.search_type
,need_zero_permute
,do_stat_sort
,aa_maxhits
,usrblk.dblk->dbrec.or_maxdba
,or_reccount
,or_recslots
,tot_addr_count
,bitveclen
,all_key_types
,saveusr.ktchars
);
fflush (aa_stderr);
}
/*---------- Read vista btree ----------
* Load or_wordrecs[] array for each term in saveusr.stems.
*/
if (!load_or_wordrecs())
return;
/* If statistically sorting final resultlist, calculate
* idf (inverse document frequency) for each term using
* the frequency data in or_wordrecs[].
*/
if (do_stat_sort)
calculate_idfs();
/* Bitvector allocation. Number needed is one for each stem,
* plus one extra to accumulate the result bitvector.
*/
allocsz_needed = bitveclen * (saveusr.stemcount + 1);
if (debugging_boolsrch)
fprintf (aa_stderr, PROGNAME"430 "
"bitvecs[] alloc needed=%ld (bvln=%ld stems=%d+1), have=%ld.\n",
allocsz_needed, bitveclen, saveusr.stemcount, bitvec_allocsz);
if (bitvec_allocsz < allocsz_needed) {
if (bitvec_allocp)
free (bitvec_allocp);
bitvec_allocp = austext_malloc (allocsz_needed + 16,
PROGNAME"508", NULL);
if (debugging_boolsrch)
fprintf (aa_stderr, PROGNAME"432 bitvecs[] realloc %ld-->%ld.\n",
bitvec_allocsz, allocsz_needed);
bitvec_allocsz = allocsz_needed;
}
/* Clear all bitvecs to zero and assign them */
memset (bitvec_allocp, 0, allocsz_needed);
for (i = 0; i < saveusr.stemcount; i++)
bitvecs[i] = bitvec_allocp + (i * bitveclen);
result_bitvec = bitvec_allocp + (i * bitveclen);
/* If sorting statistically, allocate weight vector.
* One float for each db record.
*/
if (wtvec) {
free (wtvec);
wtvec = NULL;
}
if (do_stat_sort) {
wtvec = austext_malloc ((tot_addr_count + 4) * sizeof(float) + 4,
PROGNAME"040", NULL);
memset (wtvec, 0, (tot_addr_count + 4) * sizeof(float));
}
/* The 'zero permute' is every record that has
* NONE of the query terms in it. It can only be
* generated if a NOT operator was included in the query.
*/
if (need_zero_permute) {
sprintf (msgbuf, catgets (dtsearch_catd, MS_boolsrch, 15,
"%s Your query requires retrieving every\n"
"document in the database that does not have any of\n"
"your query words. This type of search may take an\n"
"unusually long time."),
PROGNAME"1536");
DtSearchAddMessage (msgbuf);
}
if (debugging_boolsrch)
fflush (aa_stderr);
/* Searches may take a long time. To allow gui to put a
* a 'working' dialog box and a 'cancel' button,
* we pass execution to workprocs.
* If user cannot cancel search no matter how
* long it may take, we call each of the subsequent
* workproc functions directly from here.
* Otherwise they will themselves setup each
* subsequent call to usrblk.workproc(), as long as
* the previous call returns OE_SEARCHING and the user
* hasn't pushed USR_STOPSRCH.
*/
usrblk.workproc = read_stem_bitvec_WK;
save_stemno = 0; /* global arg for first workproc */
usrblk.workproc(); /* direct call to first workproc */
if ((usrblk.flags & USR_NO_ITERATE) != 0 &&
(usrblk.debug & USRDBG_ITERATE) == 0) {
while (usrblk.retncode == OE_SEARCHING)
usrblk.workproc();
}
return;
} /* boolean_search() */
/************************** BOOLSRCH.C **********************/