mirror of
https://gitee.com/clygintang/Dockfile-Coreseek.git
synced 2025-07-21 00:00:15 +08:00
1176 lines
32 KiB
C++
Executable File
1176 lines
32 KiB
C++
Executable File
//
|
|
// $Id$
|
|
//
|
|
|
|
//
|
|
// Copyright (c) 2001-2011, Andrew Aksyonoff
|
|
// Copyright (c) 2008-2011, Sphinx Technologies Inc
|
|
// All rights reserved
|
|
//
|
|
// This program is free software; you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License. You should have
|
|
// received a copy of the GPL license along with this program; if you
|
|
// did not, you can find it at http://www.gnu.org/
|
|
//
|
|
|
|
#ifndef _sphinxint_
|
|
#define _sphinxint_
|
|
|
|
#include "sphinx.h"
|
|
#include "sphinxfilter.h"
|
|
|
|
#include <sys/stat.h>
|
|
#include <fcntl.h>
|
|
#include <float.h>
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
const char MAGIC_SYNONYM_WHITESPACE = 1; // used internally in tokenizer only
|
|
const char MAGIC_CODE_SENTENCE = 2; // emitted from tokenizer on sentence boundary
|
|
const char MAGIC_CODE_PARAGRAPH = 3; // emitted from stripper (and passed via tokenizer) on paragraph boundary
|
|
const char MAGIC_CODE_ZONE = 4; // emitted from stripper (and passed via tokenizer) on zone boundary; followed by zero-terminated zone name
|
|
|
|
const char MAGIC_WORD_HEAD = 1; // prepended to keyword by source, stored in (crc) dictionary
|
|
const char MAGIC_WORD_TAIL = 1; // appended to keyword by source, stored in (crc) dictionary
|
|
const char MAGIC_WORD_HEAD_NONSTEMMED = 2; // prepended to keyword by source, stored in dictionary
|
|
|
|
extern const char * MAGIC_WORD_SENTENCE;
|
|
extern const char * MAGIC_WORD_PARAGRAPH;
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
#ifdef O_BINARY
|
|
#define SPH_O_BINARY O_BINARY
|
|
#else
|
|
#define SPH_O_BINARY 0
|
|
#endif
|
|
|
|
#define SPH_O_READ ( O_RDONLY | SPH_O_BINARY )
|
|
#define SPH_O_NEW ( O_CREAT | O_RDWR | O_TRUNC | SPH_O_BINARY )
|
|
|
|
#define MVA_DOWNSIZE DWORD // MVA32 offset type
|
|
#define MVA_OFFSET_MASK 0x7fffffffUL // MVA offset mask
|
|
#define MVA_ARENA_FLAG 0x80000000UL // MVA global-arena flag
|
|
inline uint64_t MVA_UPSIZE ( const DWORD * pMva )
|
|
{
|
|
uint64_t uMva = (uint64_t)pMva[0] | ( ( (uint64_t)pMva[1] )<<32 );
|
|
return uMva;
|
|
}
|
|
|
|
|
|
/// file writer with write buffering and int encoder
|
|
class CSphWriter : ISphNoncopyable
|
|
{
|
|
public:
|
|
CSphWriter ();
|
|
virtual ~CSphWriter ();
|
|
|
|
void SetBufferSize ( int iBufferSize ); ///< tune write cache size; must be called before OpenFile() or SetFile()
|
|
|
|
bool OpenFile ( const CSphString & sName, CSphString & sErrorBuffer );
|
|
void SetFile ( int iFD, SphOffset_t * pSharedOffset );
|
|
void CloseFile ( bool bTruncate = false ); ///< note: calls Flush(), ie. IsError() might get true after this call
|
|
|
|
void PutByte ( int uValue );
|
|
void PutBytes ( const void * pData, int iSize );
|
|
void PutDword ( DWORD uValue ) { PutBytes ( &uValue, sizeof(DWORD) ); }
|
|
void PutOffset ( SphOffset_t uValue ) { PutBytes ( &uValue, sizeof(SphOffset_t) ); }
|
|
void PutString ( const char * szString );
|
|
void PutString ( const CSphString & sString );
|
|
|
|
void SeekTo ( SphOffset_t pos ); ///< seeking inside the buffer will truncate it
|
|
|
|
#if USE_64BIT
|
|
void PutDocid ( SphDocID_t uValue ) { PutOffset ( uValue ); }
|
|
#else
|
|
void PutDocid ( SphDocID_t uValue ) { PutDword ( uValue ); }
|
|
#endif
|
|
|
|
void ZipInt ( DWORD uValue );
|
|
void ZipOffset ( SphOffset_t uValue );
|
|
void ZipOffsets ( CSphVector<SphOffset_t> * pData );
|
|
|
|
bool IsError () const { return m_bError; }
|
|
SphOffset_t GetPos () const { return m_iPos; }
|
|
|
|
protected:
|
|
CSphString m_sName;
|
|
SphOffset_t m_iPos;
|
|
SphOffset_t m_iWritten;
|
|
|
|
int m_iFD;
|
|
int m_iPoolUsed;
|
|
BYTE * m_pBuffer;
|
|
BYTE * m_pPool;
|
|
bool m_bOwnFile;
|
|
SphOffset_t * m_pSharedOffset;
|
|
int m_iBufferSize;
|
|
|
|
bool m_bError;
|
|
CSphString * m_pError;
|
|
|
|
virtual void Flush ();
|
|
};
|
|
|
|
|
|
|
|
/// file which closes automatically when going out of scope
|
|
class CSphAutofile : ISphNoncopyable
|
|
{
|
|
protected:
|
|
int m_iFD; ///< my file descriptior
|
|
CSphString m_sFilename; ///< my file name
|
|
bool m_bTemporary; ///< whether to unlink this file on Close()
|
|
|
|
CSphIndex::ProgressCallback_t * m_pProgress; ///< for displaying progress
|
|
CSphIndexProgress * m_pStat;
|
|
|
|
public:
|
|
CSphAutofile ();
|
|
CSphAutofile ( const CSphString & sName, int iMode, CSphString & sError, bool bTemp=false );
|
|
~CSphAutofile ();
|
|
|
|
int Open ( const CSphString & sName, int iMode, CSphString & sError, bool bTemp=false );
|
|
void Close ();
|
|
|
|
public:
|
|
int GetFD () const { return m_iFD; }
|
|
const char * GetFilename () const;
|
|
SphOffset_t GetSize ( SphOffset_t iMinSize, bool bCheckSizeT, CSphString & sError );
|
|
SphOffset_t GetSize ();
|
|
|
|
bool Read ( void * pBuf, size_t uCount, CSphString & sError );
|
|
void SetProgressCallback ( CSphIndex::ProgressCallback_t * pfnProgress, CSphIndexProgress * pStat );
|
|
};
|
|
|
|
|
|
/// file reader with read buffering and int decoder
|
|
class CSphReader
|
|
{
|
|
public:
|
|
CSphReader ( BYTE * pBuf=NULL, int iSize=0 );
|
|
virtual ~CSphReader ();
|
|
|
|
void SetBuffers ( int iReadBuffer, int iReadUnhinted );
|
|
void SetFile ( int iFD, const char * sFilename );
|
|
void SetFile ( const CSphAutofile & tFile );
|
|
void Reset ();
|
|
void SeekTo ( SphOffset_t iPos, int iSizeHint );
|
|
|
|
void SkipBytes ( int iCount );
|
|
SphOffset_t GetPos () const { return m_iPos+m_iBuffPos; }
|
|
|
|
void GetBytes ( void * pData, int iSize );
|
|
int GetBytesZerocopy ( const BYTE ** ppData, int iMax ); ///< zerocopy method; returns actual length present in buffer (upto iMax)
|
|
|
|
int GetByte ();
|
|
DWORD GetDword ();
|
|
SphOffset_t GetOffset ();
|
|
CSphString GetString ();
|
|
int GetLine ( char * sBuffer, int iMaxLen );
|
|
|
|
DWORD UnzipInt ();
|
|
SphOffset_t UnzipOffset ();
|
|
|
|
SphOffset_t Tell () const { return m_iPos + m_iBuffPos; }
|
|
bool GetErrorFlag () const { return m_bError; }
|
|
const CSphString & GetErrorMessage () const { return m_sError; }
|
|
const CSphString & GetFilename() const { return m_sFilename; }
|
|
|
|
#if USE_64BIT
|
|
SphDocID_t GetDocid () { return GetOffset(); }
|
|
SphDocID_t UnzipDocid () { return UnzipOffset(); }
|
|
SphWordID_t UnzipWordid () { return UnzipOffset(); }
|
|
#else
|
|
SphDocID_t GetDocid () { return GetDword(); }
|
|
SphDocID_t UnzipDocid () { return UnzipInt(); }
|
|
SphWordID_t UnzipWordid () { return UnzipInt(); }
|
|
#endif
|
|
|
|
const CSphReader & operator = ( const CSphReader & rhs );
|
|
|
|
protected:
|
|
|
|
int m_iFD;
|
|
SphOffset_t m_iPos;
|
|
|
|
int m_iBuffPos;
|
|
int m_iBuffUsed;
|
|
BYTE * m_pBuff;
|
|
int m_iSizeHint; ///< how much do we expect to read
|
|
|
|
int m_iBufSize;
|
|
bool m_bBufOwned;
|
|
int m_iReadUnhinted;
|
|
|
|
bool m_bError;
|
|
CSphString m_sError;
|
|
CSphString m_sFilename;
|
|
|
|
private:
|
|
void UpdateCache ();
|
|
};
|
|
|
|
/// scoped reader
|
|
class CSphAutoreader : public CSphReader
|
|
{
|
|
public:
|
|
CSphAutoreader ( BYTE * pBuf=NULL, int iSize=0 ) : CSphReader ( pBuf, iSize ) {}
|
|
~CSphAutoreader ();
|
|
|
|
bool Open ( const CSphString & sFilename, CSphString & sError );
|
|
void Close ();
|
|
SphOffset_t GetFilesize ();
|
|
|
|
public:
|
|
// added for DebugCheck()
|
|
int GetFD () { return m_iFD; }
|
|
};
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
/// per-query search context
|
|
/// everything that index needs to compute/create to process the query
|
|
class CSphQueryContext
|
|
{
|
|
public:
|
|
// searching-only, per-query
|
|
int m_iWeights; ///< search query field weights count
|
|
int m_dWeights [ SPH_MAX_FIELDS ]; ///< search query field weights
|
|
|
|
bool m_bLookupFilter; ///< row data lookup required at filtering stage
|
|
bool m_bLookupSort; ///< row data lookup required at sorting stage
|
|
|
|
ISphFilter * m_pFilter;
|
|
ISphFilter * m_pWeightFilter;
|
|
|
|
struct CalcItem_t
|
|
{
|
|
CSphAttrLocator m_tLoc; ///< result locator
|
|
ESphAttr m_eType; ///< result type
|
|
ISphExpr * m_pExpr; ///< evaluator (non-owned)
|
|
};
|
|
CSphVector<CalcItem_t> m_dCalcFilter; ///< items to compute for filtering
|
|
CSphVector<CalcItem_t> m_dCalcSort; ///< items to compute for sorting/grouping
|
|
CSphVector<CalcItem_t> m_dCalcFinal; ///< items to compute when finalizing result set
|
|
|
|
const CSphVector<CSphAttrOverride> * m_pOverrides; ///< overridden attribute values
|
|
CSphVector<CSphAttrLocator> m_dOverrideIn;
|
|
CSphVector<CSphAttrLocator> m_dOverrideOut;
|
|
|
|
void * m_pIndexData; ///< backend specific data
|
|
|
|
public:
|
|
CSphQueryContext ();
|
|
~CSphQueryContext ();
|
|
|
|
void BindWeights ( const CSphQuery * pQuery, const CSphSchema & tSchema, int iIndexWeight );
|
|
bool SetupCalc ( CSphQueryResult * pResult, const CSphSchema & tInSchema, const CSphSchema & tSchema, const DWORD * pMvaPool );
|
|
bool CreateFilters ( bool bFullscan, const CSphVector<CSphFilterSettings> * pdFilters, const CSphSchema & tSchema, const DWORD * pMvaPool, CSphString & sError );
|
|
bool SetupOverrides ( const CSphQuery * pQuery, CSphQueryResult * pResult, const CSphSchema & tIndexSchema );
|
|
|
|
void CalcFilter ( CSphMatch & tMatch ) const;
|
|
void CalcSort ( CSphMatch & tMatch ) const;
|
|
void CalcFinal ( CSphMatch & tMatch ) const;
|
|
|
|
// rt index bind pools at segment searching, not at time it setups context
|
|
void SetStringPool ( const BYTE * pStrings );
|
|
void SetMVAPool ( const DWORD * pMva );
|
|
};
|
|
|
|
struct SphStringSorterRemap_t
|
|
{
|
|
CSphAttrLocator m_tSrc;
|
|
CSphAttrLocator m_tDst;
|
|
};
|
|
|
|
ISphExpr * sphSortSetupExpr ( const CSphString & sName, const CSphSchema & tIndexSchema );
|
|
bool sphSortGetStringRemap ( const CSphSchema & tSorterSchema, const CSphSchema & tIndexSchema, CSphVector<SphStringSorterRemap_t> & dAttrs );
|
|
void sphSortRemoveInternalAttrs ( CSphSchema & tSchema );
|
|
bool sphIsSortStringInternal ( const char * sColumnName );
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
bool sphWriteThrottled ( int iFD, const void * pBuf, int64_t iCount, const char * sName, CSphString & sError );
|
|
|
|
void SafeClose ( int & iFD );
|
|
|
|
void sphMergeStats ( CSphQueryResultMeta & tDstResult, const SmallStringHash_T<CSphQueryResultMeta::WordStat_t> & hSrc );
|
|
|
|
bool sphCheckQueryHeight ( const struct XQNode_t * pRoot, CSphString & sError );
|
|
|
|
void sphTransformExtendedQuery ( XQNode_t ** ppNode );
|
|
|
|
const BYTE * SkipQuoted ( const BYTE * p );
|
|
|
|
class ISphBinlog : ISphNoncopyable
|
|
{
|
|
public:
|
|
virtual ~ISphBinlog () {}
|
|
|
|
virtual void BinlogUpdateAttributes ( const char * sIndexName, int64_t iTID, const CSphAttrUpdate & tUpd ) = 0;
|
|
virtual void NotifyIndexFlush ( const char * sIndexName, int64_t iTID, bool bShutdown ) = 0;
|
|
};
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
/// memory tracker
|
|
namespace Memory
|
|
{
|
|
enum Category_e
|
|
{
|
|
SPH_MEM_CORE,
|
|
|
|
SPH_MEM_IDX_DISK,
|
|
SPH_MEM_IDX_RT,
|
|
SPH_MEM_IDX_RT_ACCUM,
|
|
|
|
SPH_MEM_MMAPED,
|
|
|
|
SPH_MEM_BINLOG,
|
|
|
|
SPH_MEM_HANDLE_NONSQL,
|
|
SPH_MEM_HANDLE_SQL,
|
|
|
|
SPH_MEM_SEARCH_NONSQL,
|
|
SPH_MEM_QUERY_NONSQL,
|
|
SPH_MEM_INSERT_SQL,
|
|
SPH_MEM_SELECT_SQL,
|
|
SPH_MEM_DELETE_SQL,
|
|
SPH_MEM_COMMIT_SET_SQL,
|
|
SPH_MEM_COMMIT_BEGIN_SQL,
|
|
SPH_MEM_COMMIT_SQL,
|
|
|
|
SPH_MEM_IDX_DISK_MULTY_QUERY,
|
|
SPH_MEM_IDX_DISK_MULTY_QUERY_EX,
|
|
SPH_MEM_IDX_RT_MULTY_QUERY,
|
|
|
|
SPH_MEM_IDX_RT_RES_MATCHES,
|
|
SPH_MEM_IDX_RT_RES_STRINGS,
|
|
|
|
SPH_MEM_TOTAL
|
|
};
|
|
}
|
|
|
|
#if SPH_ALLOCS_PROFILER
|
|
|
|
void sphMemStatPush ( Memory::Category_e eCategory );
|
|
void sphMemStatPop ( Memory::Category_e eCategory );
|
|
|
|
// memory tracker
|
|
struct MemTracker_c : ISphNoncopyable
|
|
{
|
|
const Memory::Category_e m_eCategory; ///< category
|
|
|
|
/// ctor
|
|
explicit MemTracker_c ( Memory::Category_e eCategory )
|
|
: m_eCategory ( eCategory )
|
|
{
|
|
sphMemStatPush ( m_eCategory );
|
|
}
|
|
|
|
/// dtor
|
|
~MemTracker_c ()
|
|
{
|
|
sphMemStatPop ( m_eCategory );
|
|
}
|
|
};
|
|
|
|
#define MEMORY(name) MemTracker_c tracker_##__LINE__##name(Memory::name);
|
|
|
|
#else // SPH_ALLOCS_PROFILER 0
|
|
|
|
#define MEMORY(name)
|
|
|
|
#endif // if SPH_ALLOCS_PROFILER
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
#define DOCINFO_INDEX_FREQ 128 // FIXME? make this configurable
|
|
|
|
struct CSphDocMVA
|
|
{
|
|
SphDocID_t m_iDocID;
|
|
CSphVector < CSphVector<DWORD> > m_dMVA;
|
|
CSphVector < DWORD > m_dOffsets;
|
|
|
|
explicit CSphDocMVA ( int iSize )
|
|
: m_iDocID ( 0 )
|
|
{
|
|
m_dMVA.Resize ( iSize );
|
|
m_dOffsets.Resize ( iSize );
|
|
}
|
|
|
|
void Read ( CSphReader & tReader );
|
|
void Write ( CSphWriter & tWriter );
|
|
};
|
|
|
|
/// attr min-max builder
|
|
template < typename DOCID = SphDocID_t >
|
|
class AttrIndexBuilder_t : ISphNoncopyable
|
|
{
|
|
private:
|
|
CSphVector<CSphAttrLocator> m_dIntAttrs;
|
|
CSphVector<CSphAttrLocator> m_dFloatAttrs;
|
|
CSphVector<CSphAttrLocator> m_dMvaAttrs;
|
|
CSphVector<SphAttr_t> m_dIntMin;
|
|
CSphVector<SphAttr_t> m_dIntMax;
|
|
CSphVector<SphAttr_t> m_dIntIndexMin;
|
|
CSphVector<SphAttr_t> m_dIntIndexMax;
|
|
CSphVector<float> m_dFloatMin;
|
|
CSphVector<float> m_dFloatMax;
|
|
CSphVector<float> m_dFloatIndexMin;
|
|
CSphVector<float> m_dFloatIndexMax;
|
|
CSphVector<uint64_t> m_dMvaMin;
|
|
CSphVector<uint64_t> m_dMvaMax;
|
|
CSphVector<uint64_t> m_dMvaIndexMin;
|
|
CSphVector<uint64_t> m_dMvaIndexMax;
|
|
DWORD m_uStride; // size of attribute's chunk (in DWORDs)
|
|
DWORD m_uElements; // counts total number of collected min/max pairs
|
|
int m_iLoop; // loop inside one set
|
|
DWORD * m_pOutBuffer; // storage for collected min/max
|
|
DWORD * m_pOutMax; // storage max for bound checking
|
|
DOCID m_uStart; // first and last docids of current chunk
|
|
DOCID m_uLast;
|
|
DOCID m_uIndexStart; // first and last docids of whole index
|
|
DOCID m_uIndexLast;
|
|
int m_iMva64;
|
|
|
|
private:
|
|
void ResetLocal();
|
|
void FlushComputed ( bool bUseAttrs, bool bUseMvas );
|
|
void UpdateMinMaxDocids ( DOCID uDocID );
|
|
void CollectRowMVA ( int iAttr, DWORD uCount, const DWORD * pMva );
|
|
|
|
public:
|
|
explicit AttrIndexBuilder_t ( const CSphSchema & tSchema );
|
|
|
|
void Prepare ( DWORD * pOutBuffer, DWORD * pOutMax );
|
|
|
|
void CollectWithoutMvas ( const DWORD * pCur, bool bUseMvas );
|
|
bool Collect ( const DWORD * pCur, const DWORD * pMvas, int64_t iMvasCount, CSphString & sError );
|
|
void Collect ( const DWORD * pCur, const struct CSphDocMVA & dMvas );
|
|
void CollectMVA ( DOCID uDocID, const CSphVector< CSphVector<DWORD> > & dCurInfo );
|
|
|
|
void FinishCollect ( bool bMvaOnly = false );
|
|
|
|
/// actually used part of output buffer, only used with index merge
|
|
/// (we reserve space for rows from both indexes, but might kill some rows)
|
|
inline DWORD GetActualSize() const
|
|
{
|
|
return 2 * m_uElements * m_uStride;
|
|
}
|
|
|
|
/// how many DWORDs will we need for block index
|
|
inline DWORD GetExpectedSize ( DWORD uMaxDocs ) const
|
|
{
|
|
DWORD uDocinfoIndex = ( uMaxDocs + DOCINFO_INDEX_FREQ - 1 ) / DOCINFO_INDEX_FREQ;
|
|
return 2 * ( 1 + uDocinfoIndex ) * m_uStride;
|
|
}
|
|
};
|
|
|
|
typedef AttrIndexBuilder_t<> AttrIndexBuilder_c;
|
|
|
|
// dirty hack for some build systems which not has LLONG_MAX
|
|
#ifndef LLONG_MAX
|
|
#define LLONG_MAX (((unsigned long long)(-1))>>1)
|
|
#endif
|
|
|
|
template < typename DOCID >
|
|
void AttrIndexBuilder_t<DOCID>::ResetLocal()
|
|
{
|
|
ARRAY_FOREACH ( i, m_dIntMin )
|
|
{
|
|
m_dIntMin[i] = LLONG_MAX;
|
|
m_dIntMax[i] = 0;
|
|
}
|
|
ARRAY_FOREACH ( i, m_dFloatMin )
|
|
{
|
|
m_dFloatMin[i] = FLT_MAX;
|
|
m_dFloatMax[i] = -FLT_MAX;
|
|
}
|
|
ARRAY_FOREACH ( i, m_dMvaMin )
|
|
{
|
|
m_dMvaMin[i] = LLONG_MAX;
|
|
m_dMvaMax[i] = 0;
|
|
}
|
|
m_uStart = m_uLast = 0;
|
|
m_iLoop = 0;
|
|
}
|
|
|
|
template < typename DOCID >
|
|
void AttrIndexBuilder_t<DOCID>::FlushComputed ( bool bUseAttrs, bool bUseMvas )
|
|
{
|
|
assert ( m_pOutBuffer );
|
|
DWORD * pMinEntry = m_pOutBuffer + 2 * m_uElements * m_uStride;
|
|
DWORD * pMinAttrs = DOCINFO2ATTRS ( pMinEntry );
|
|
DWORD * pMaxEntry = pMinEntry + m_uStride;
|
|
DWORD * pMaxAttrs = pMinAttrs + m_uStride;
|
|
|
|
assert ( pMaxEntry+m_uStride<=m_pOutMax );
|
|
assert ( pMaxAttrs+m_uStride-DOCINFO_IDSIZE<=m_pOutMax );
|
|
|
|
m_uIndexLast = m_uLast;
|
|
|
|
DOCINFOSETID ( pMinEntry, m_uStart );
|
|
DOCINFOSETID ( pMaxEntry, m_uLast );
|
|
|
|
if ( bUseAttrs )
|
|
{
|
|
ARRAY_FOREACH ( i, m_dIntAttrs )
|
|
{
|
|
m_dIntIndexMin[i] = Min ( m_dIntIndexMin[i], m_dIntMin[i] );
|
|
m_dIntIndexMax[i] = Max ( m_dIntIndexMax[i], m_dIntMax[i] );
|
|
sphSetRowAttr ( pMinAttrs, m_dIntAttrs[i], m_dIntMin[i] );
|
|
sphSetRowAttr ( pMaxAttrs, m_dIntAttrs[i], m_dIntMax[i] );
|
|
}
|
|
ARRAY_FOREACH ( i, m_dFloatAttrs )
|
|
{
|
|
m_dFloatIndexMin[i] = Min ( m_dFloatIndexMin[i], m_dFloatMin[i] );
|
|
m_dFloatIndexMax[i] = Max ( m_dFloatIndexMax[i], m_dFloatMax[i] );
|
|
sphSetRowAttr ( pMinAttrs, m_dFloatAttrs[i], sphF2DW ( m_dFloatMin[i] ) );
|
|
sphSetRowAttr ( pMaxAttrs, m_dFloatAttrs[i], sphF2DW ( m_dFloatMax[i] ) );
|
|
}
|
|
}
|
|
|
|
if ( bUseMvas )
|
|
ARRAY_FOREACH ( i, m_dMvaAttrs )
|
|
{
|
|
m_dMvaIndexMin[i] = Min ( m_dMvaIndexMin[i], m_dMvaMin[i] );
|
|
m_dMvaIndexMax[i] = Max ( m_dMvaIndexMax[i], m_dMvaMax[i] );
|
|
sphSetRowAttr ( pMinAttrs, m_dMvaAttrs[i], m_dMvaMin[i] );
|
|
sphSetRowAttr ( pMaxAttrs, m_dMvaAttrs[i], m_dMvaMax[i] );
|
|
}
|
|
|
|
m_uElements++;
|
|
ResetLocal();
|
|
}
|
|
|
|
template < typename DOCID >
|
|
void AttrIndexBuilder_t<DOCID>::UpdateMinMaxDocids ( DOCID uDocID )
|
|
{
|
|
if ( !m_uStart )
|
|
m_uStart = uDocID;
|
|
if ( !m_uIndexStart )
|
|
m_uIndexStart = uDocID;
|
|
m_uLast = uDocID;
|
|
}
|
|
|
|
template < typename DOCID >
|
|
AttrIndexBuilder_t<DOCID>::AttrIndexBuilder_t ( const CSphSchema & tSchema )
|
|
: m_uStride ( DWSIZEOF(DOCID) + tSchema.GetRowSize() )
|
|
, m_uElements ( 0 )
|
|
, m_iLoop ( 0 )
|
|
, m_pOutBuffer ( NULL )
|
|
, m_pOutMax ( NULL )
|
|
, m_uStart ( 0 )
|
|
, m_uLast ( 0 )
|
|
, m_uIndexStart ( 0 )
|
|
, m_uIndexLast ( 0 )
|
|
{
|
|
for ( int i=0; i<tSchema.GetAttrsCount(); i++ )
|
|
{
|
|
const CSphColumnInfo & tCol = tSchema.GetAttr(i);
|
|
switch ( tCol.m_eAttrType )
|
|
{
|
|
case SPH_ATTR_INTEGER:
|
|
case SPH_ATTR_TIMESTAMP:
|
|
case SPH_ATTR_BOOL:
|
|
case SPH_ATTR_BIGINT:
|
|
m_dIntAttrs.Add ( tCol.m_tLocator );
|
|
break;
|
|
|
|
case SPH_ATTR_FLOAT:
|
|
m_dFloatAttrs.Add ( tCol.m_tLocator );
|
|
break;
|
|
|
|
case SPH_ATTR_UINT32SET:
|
|
m_dMvaAttrs.Add ( tCol.m_tLocator );
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
m_iMva64 = m_dMvaAttrs.GetLength();
|
|
for ( int i=0; i<tSchema.GetAttrsCount(); i++ )
|
|
{
|
|
const CSphColumnInfo & tCol = tSchema.GetAttr(i);
|
|
if ( tCol.m_eAttrType==SPH_ATTR_UINT64SET )
|
|
m_dMvaAttrs.Add ( tCol.m_tLocator );
|
|
}
|
|
|
|
|
|
m_dIntMin.Resize ( m_dIntAttrs.GetLength() );
|
|
m_dIntMax.Resize ( m_dIntAttrs.GetLength() );
|
|
m_dIntIndexMin.Resize ( m_dIntAttrs.GetLength() );
|
|
m_dIntIndexMax.Resize ( m_dIntAttrs.GetLength() );
|
|
m_dFloatMin.Resize ( m_dFloatAttrs.GetLength() );
|
|
m_dFloatMax.Resize ( m_dFloatAttrs.GetLength() );
|
|
m_dFloatIndexMin.Resize ( m_dFloatAttrs.GetLength() );
|
|
m_dFloatIndexMax.Resize ( m_dFloatAttrs.GetLength() );
|
|
m_dMvaMin.Resize ( m_dMvaAttrs.GetLength() );
|
|
m_dMvaMax.Resize ( m_dMvaAttrs.GetLength() );
|
|
m_dMvaIndexMin.Resize ( m_dMvaAttrs.GetLength() );
|
|
m_dMvaIndexMax.Resize ( m_dMvaAttrs.GetLength() );
|
|
}
|
|
|
|
template < typename DOCID >
|
|
void AttrIndexBuilder_t<DOCID>::Prepare ( DWORD * pOutBuffer, DWORD * pOutMax )
|
|
{
|
|
m_pOutBuffer = pOutBuffer;
|
|
m_pOutMax = pOutMax;
|
|
m_uElements = 0;
|
|
m_uIndexStart = m_uIndexLast = 0;
|
|
ARRAY_FOREACH ( i, m_dIntIndexMin )
|
|
{
|
|
m_dIntIndexMin[i] = LLONG_MAX;
|
|
m_dIntIndexMax[i] = 0;
|
|
}
|
|
ARRAY_FOREACH ( i, m_dFloatIndexMin )
|
|
{
|
|
m_dFloatIndexMin[i] = FLT_MAX;
|
|
m_dFloatIndexMax[i] = -FLT_MAX;
|
|
}
|
|
ARRAY_FOREACH ( i, m_dMvaIndexMin )
|
|
{
|
|
m_dMvaIndexMin[i] = LLONG_MAX;
|
|
m_dMvaIndexMax[i] = 0;
|
|
}
|
|
ResetLocal();
|
|
}
|
|
|
|
template < typename DOCID >
|
|
void AttrIndexBuilder_t<DOCID>::CollectWithoutMvas ( const DWORD * pCur, bool bUseMvas )
|
|
{
|
|
// check if it is time to flush already collected values
|
|
if ( m_iLoop>=DOCINFO_INDEX_FREQ )
|
|
FlushComputed ( true, bUseMvas );
|
|
|
|
const DWORD * pRow = DOCINFO2ATTRS_T<DOCID>(pCur);
|
|
UpdateMinMaxDocids ( DOCINFO2ID_T<DOCID>(pCur) );
|
|
m_iLoop++;
|
|
|
|
// ints
|
|
ARRAY_FOREACH ( i, m_dIntAttrs )
|
|
{
|
|
SphAttr_t uVal = sphGetRowAttr ( pRow, m_dIntAttrs[i] );
|
|
m_dIntMin[i] = Min ( m_dIntMin[i], uVal );
|
|
m_dIntMax[i] = Max ( m_dIntMax[i], uVal );
|
|
}
|
|
|
|
// floats
|
|
ARRAY_FOREACH ( i, m_dFloatAttrs )
|
|
{
|
|
float fVal = sphDW2F ( (DWORD)sphGetRowAttr ( pRow, m_dFloatAttrs[i] ) );
|
|
m_dFloatMin[i] = Min ( m_dFloatMin[i], fVal );
|
|
m_dFloatMax[i] = Max ( m_dFloatMax[i], fVal );
|
|
}
|
|
}
|
|
|
|
template < typename DOCID >
|
|
void AttrIndexBuilder_t<DOCID>::CollectRowMVA ( int iAttr, DWORD uCount, const DWORD * pMva )
|
|
{
|
|
if ( iAttr>=m_iMva64 )
|
|
{
|
|
assert ( ( uCount%2 )==0 );
|
|
for ( ; uCount>0; uCount-=2, pMva+=2 )
|
|
{
|
|
uint64_t uVal = MVA_UPSIZE ( pMva );
|
|
m_dMvaMin[iAttr] = Min ( m_dMvaMin[iAttr], uVal );
|
|
m_dMvaMax[iAttr] = Max ( m_dMvaMax[iAttr], uVal );
|
|
}
|
|
} else
|
|
{
|
|
for ( ; uCount>0; uCount--, pMva++ )
|
|
{
|
|
DWORD uVal = *pMva;
|
|
m_dMvaMin[iAttr] = Min ( m_dMvaMin[iAttr], uVal );
|
|
m_dMvaMax[iAttr] = Max ( m_dMvaMax[iAttr], uVal );
|
|
}
|
|
}
|
|
}
|
|
|
|
template < typename DOCID >
|
|
bool AttrIndexBuilder_t<DOCID>::Collect ( const DWORD * pCur, const DWORD * pMvas, int64_t iMvasCount, CSphString & sError )
|
|
{
|
|
CollectWithoutMvas ( pCur, true );
|
|
|
|
const DWORD * pRow = DOCINFO2ATTRS_T<DOCID>(pCur);
|
|
SphDocID_t uDocID = DOCINFO2ID_T<DOCID>(pCur);
|
|
|
|
// MVAs
|
|
ARRAY_FOREACH ( i, m_dMvaAttrs )
|
|
{
|
|
SphAttr_t uOff = sphGetRowAttr ( pRow, m_dMvaAttrs[i] );
|
|
if ( !uOff )
|
|
continue;
|
|
|
|
// sanity checks
|
|
if ( uOff>=iMvasCount )
|
|
{
|
|
sError.SetSprintf ( "broken index: mva offset out of bounds, id=" DOCID_FMT, (SphDocID_t)uDocID );
|
|
return false;
|
|
}
|
|
|
|
const DWORD * pMva = pMvas + uOff; // don't care about updates at this point
|
|
|
|
if ( i==0 && DOCINFO2ID_T<DOCID> ( pMva-DWSIZEOF(DOCID) )!=uDocID )
|
|
{
|
|
sError.SetSprintf ( "broken index: mva docid verification failed, id=" DOCID_FMT, (SphDocID_t)uDocID );
|
|
return false;
|
|
}
|
|
|
|
DWORD uCount = *pMva;
|
|
if ( ( uOff+uCount>=iMvasCount ) || ( i>=m_iMva64 && ( uCount%2 )!=0 ) )
|
|
{
|
|
sError.SetSprintf ( "broken index: mva list out of bounds, id=" DOCID_FMT, (SphDocID_t)uDocID );
|
|
return false;
|
|
}
|
|
|
|
// walk and calc
|
|
CollectRowMVA ( i, uCount, pMva );
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template < typename DOCID >
|
|
void AttrIndexBuilder_t<DOCID>::Collect ( const DWORD * pCur, const CSphDocMVA & dMvas )
|
|
{
|
|
CollectWithoutMvas ( pCur, true );
|
|
ARRAY_FOREACH ( i, m_dMvaAttrs )
|
|
{
|
|
CollectRowMVA ( i, dMvas.m_dMVA[i].GetLength(), dMvas.m_dMVA[i].Begin() );
|
|
}
|
|
}
|
|
|
|
template < typename DOCID >
|
|
void AttrIndexBuilder_t<DOCID>::CollectMVA ( DOCID uDocID, const CSphVector< CSphVector<DWORD> > & dCurInfo )
|
|
{
|
|
// check if it is time to flush already collected values
|
|
if ( m_iLoop>=DOCINFO_INDEX_FREQ )
|
|
FlushComputed ( false, true );
|
|
|
|
UpdateMinMaxDocids ( uDocID );
|
|
m_iLoop++;
|
|
|
|
ARRAY_FOREACH ( i, dCurInfo )
|
|
{
|
|
CollectRowMVA ( i, dCurInfo[i].GetLength(), dCurInfo[i].Begin() );
|
|
}
|
|
}
|
|
|
|
template < typename DOCID >
|
|
void AttrIndexBuilder_t<DOCID>::FinishCollect ( bool bMvaOnly )
|
|
{
|
|
assert ( m_pOutBuffer );
|
|
if ( m_iLoop )
|
|
FlushComputed ( !bMvaOnly, true );
|
|
|
|
DWORD * pMinEntry = m_pOutBuffer + 2 * m_uElements * m_uStride;
|
|
DWORD * pMaxEntry = pMinEntry + m_uStride;
|
|
CSphRowitem * pMinAttrs = DOCINFO2ATTRS_T<DOCID> ( pMinEntry );
|
|
CSphRowitem * pMaxAttrs = DOCINFO2ATTRS_T<DOCID> ( pMaxEntry );
|
|
|
|
assert ( pMaxEntry+m_uStride<=m_pOutMax );
|
|
assert ( pMaxAttrs+m_uStride-DWSIZEOF(DOCID)<=m_pOutMax );
|
|
|
|
DOCINFOSETID ( pMinEntry, m_uIndexStart );
|
|
DOCINFOSETID ( pMaxEntry, m_uIndexLast );
|
|
|
|
ARRAY_FOREACH ( i, m_dMvaAttrs )
|
|
{
|
|
sphSetRowAttr ( pMinAttrs, m_dMvaAttrs[i], m_dMvaIndexMin[i] );
|
|
sphSetRowAttr ( pMaxAttrs, m_dMvaAttrs[i], m_dMvaIndexMax[i] );
|
|
}
|
|
|
|
if ( !bMvaOnly )
|
|
{
|
|
ARRAY_FOREACH ( i, m_dIntAttrs )
|
|
{
|
|
sphSetRowAttr ( pMinAttrs, m_dIntAttrs[i], m_dIntIndexMin[i] );
|
|
sphSetRowAttr ( pMaxAttrs, m_dIntAttrs[i], m_dIntIndexMax[i] );
|
|
}
|
|
ARRAY_FOREACH ( i, m_dFloatAttrs )
|
|
{
|
|
sphSetRowAttr ( pMinAttrs, m_dFloatAttrs[i], sphF2DW ( m_dFloatIndexMin[i] ) );
|
|
sphSetRowAttr ( pMaxAttrs, m_dFloatAttrs[i], sphF2DW ( m_dFloatIndexMax[i] ) );
|
|
}
|
|
m_uElements++;
|
|
|
|
} else
|
|
{
|
|
m_uElements = 0; // rewind back for collecting the rest of attributes.
|
|
}
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
/// find a value-enclosing span in a sorted vector (aka an index at which vec[i] <= val < vec[i+1])
|
|
template < typename T >
|
|
static int FindSpan ( const CSphVector<T> & dVec, T tRef, int iSmallTreshold=8 )
|
|
{
|
|
// empty vector
|
|
if ( !dVec.GetLength() )
|
|
return -1;
|
|
|
|
// check last semi-span
|
|
if ( dVec.Last()<tRef || dVec.Last()==tRef )
|
|
return dVec.GetLength()-1;
|
|
|
|
// linear search for small vectors
|
|
if ( dVec.GetLength()<=iSmallTreshold )
|
|
{
|
|
for ( int i=0; i<dVec.GetLength()-1; i++ )
|
|
if ( ( dVec[i]<tRef || dVec[i]==tRef ) && tRef<dVec[i+1] )
|
|
return i;
|
|
return -1;
|
|
}
|
|
|
|
// binary search for longer vectors
|
|
const T * pStart = dVec.Begin();
|
|
const T * pEnd = &dVec.Last();
|
|
|
|
if ( ( pStart[0]<tRef || pStart[0]==tRef ) && tRef<pStart[1] )
|
|
return 0;
|
|
|
|
if ( ( pEnd[-1]<tRef || pEnd[-1]==tRef ) && tRef<pEnd[0] )
|
|
return pEnd-dVec.Begin()-1;
|
|
|
|
while ( pEnd-pStart>1 )
|
|
{
|
|
if ( tRef<*pStart || *pEnd<tRef )
|
|
break;
|
|
assert ( *pStart<tRef );
|
|
assert ( tRef<*pEnd );
|
|
|
|
const T * pMid = pStart + (pEnd-pStart)/2;
|
|
assert ( pMid+1 < &dVec.Last() );
|
|
|
|
if ( ( pMid[0]<tRef || pMid[0]==tRef ) && tRef<pMid[1] )
|
|
return pMid - dVec.Begin();
|
|
|
|
if ( tRef<pMid[0] )
|
|
pEnd = pMid;
|
|
else
|
|
pStart = pMid;
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
|
|
inline int FindBit ( DWORD uValue )
|
|
{
|
|
DWORD uMask = 0xffff;
|
|
int iIdx = 0;
|
|
int iBits = 16;
|
|
|
|
// we negate bits to compare with 0
|
|
// this makes MSVC emit 'test' instead of 'cmp'
|
|
uValue ^= 0xffffffff;
|
|
for ( int t=0; t<5; t++ )
|
|
{
|
|
if ( ( uValue & uMask )==0 )
|
|
{
|
|
iIdx += iBits;
|
|
uValue >>= iBits;
|
|
}
|
|
iBits >>= 1;
|
|
uMask >>= iBits;
|
|
}
|
|
return iIdx;
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
/// decode UTF-8 codepoint
|
|
/// advances buffer ptr in all cases but end of buffer
|
|
///
|
|
/// returns -1 on failure
|
|
/// returns 0 on end of buffer
|
|
/// returns codepoint on success
|
|
inline int sphUTF8Decode ( BYTE * & pBuf )
|
|
{
|
|
BYTE v = *pBuf;
|
|
if ( !v )
|
|
return 0;
|
|
pBuf++;
|
|
|
|
// check for 7-bit case
|
|
if ( v<128 )
|
|
return v;
|
|
|
|
// get number of bytes
|
|
int iBytes = 0;
|
|
while ( v & 0x80 )
|
|
{
|
|
iBytes++;
|
|
v <<= 1;
|
|
}
|
|
|
|
// check for valid number of bytes
|
|
if ( iBytes<2 || iBytes>4 )
|
|
return -1;
|
|
|
|
int iCode = ( v >> iBytes );
|
|
iBytes--;
|
|
do
|
|
{
|
|
if ( !(*pBuf) )
|
|
return 0; // unexpected eof
|
|
|
|
if ( ((*pBuf) & 0xC0)!=0x80 )
|
|
return -1; // invalid code
|
|
|
|
iCode = ( iCode<<6 ) + ( (*pBuf) & 0x3F );
|
|
iBytes--;
|
|
pBuf++;
|
|
} while ( iBytes );
|
|
|
|
// all good
|
|
return iCode;
|
|
}
|
|
|
|
|
|
/// encode UTF-8 codepoint to buffer
|
|
/// returns number of bytes used
|
|
inline int sphUTF8Encode ( BYTE * pBuf, int iCode )
|
|
{
|
|
if ( iCode<0x80 )
|
|
{
|
|
pBuf[0] = (BYTE)( iCode & 0x7F );
|
|
return 1;
|
|
|
|
} else if ( iCode<0x800 )
|
|
{
|
|
pBuf[0] = (BYTE)( ( (iCode>>6) & 0x1F ) | 0xC0 );
|
|
pBuf[1] = (BYTE)( ( iCode & 0x3F ) | 0x80 );
|
|
return 2;
|
|
|
|
} else
|
|
{
|
|
pBuf[0] = (BYTE)( ( (iCode>>12) & 0x0F ) | 0xE0 );
|
|
pBuf[1] = (BYTE)( ( (iCode>>6) & 0x3F ) | 0x80 );
|
|
pBuf[2] = (BYTE)( ( iCode & 0x3F ) | 0x80 );
|
|
return 3;
|
|
}
|
|
}
|
|
|
|
|
|
/// compute UTF-8 string length in codepoints
|
|
inline int sphUTF8Len ( const char * pStr )
|
|
{
|
|
BYTE * pBuf = (BYTE*) pStr;
|
|
int iRes = 0, iCode;
|
|
|
|
while ( ( iCode = sphUTF8Decode(pBuf) )!=0 )
|
|
if ( iCode>0 )
|
|
iRes++;
|
|
|
|
return iRes;
|
|
}
|
|
|
|
|
|
/// compute UTF-8 string length in codepoints
|
|
inline int sphUTF8Len ( const char * pStr, int iMax )
|
|
{
|
|
BYTE * pBuf = (BYTE*) pStr;
|
|
BYTE * pMax = pBuf + iMax;
|
|
int iRes = 0;
|
|
while ( pBuf<pMax )
|
|
{
|
|
sphUTF8Decode ( pBuf );
|
|
iRes++;
|
|
}
|
|
return iRes;
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
/// hit in the stream
|
|
struct ExtHit_t
|
|
{
|
|
SphDocID_t m_uDocid;
|
|
Hitpos_t m_uHitpos;
|
|
WORD m_uQuerypos;
|
|
WORD m_uNodepos;
|
|
WORD m_uSpanlen;
|
|
WORD m_uMatchlen;
|
|
DWORD m_uWeight;
|
|
};
|
|
|
|
enum SphZoneHit_e
|
|
{
|
|
SPH_ZONE_FOUND,
|
|
SPH_ZONE_NO_SPAN,
|
|
SPH_ZONE_NO_DOCUMENT
|
|
};
|
|
|
|
class ISphZoneCheck
|
|
{
|
|
public:
|
|
virtual ~ISphZoneCheck () {}
|
|
virtual SphZoneHit_e IsInZone ( int iZone, const ExtHit_t * pHit ) = 0;
|
|
};
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
inline const char * sphTypeName ( ESphAttr eType )
|
|
{
|
|
switch ( eType )
|
|
{
|
|
case SPH_ATTR_NONE: return "none";
|
|
case SPH_ATTR_INTEGER: return "uint";
|
|
case SPH_ATTR_TIMESTAMP: return "timestamp";
|
|
case SPH_ATTR_ORDINAL: return "ordinal";
|
|
case SPH_ATTR_BOOL: return "bool";
|
|
case SPH_ATTR_FLOAT: return "float";
|
|
case SPH_ATTR_BIGINT: return "bigint";
|
|
case SPH_ATTR_STRING: return "string";
|
|
case SPH_ATTR_WORDCOUNT: return "wordcount";
|
|
case SPH_ATTR_UINT32SET: return "mva";
|
|
case SPH_ATTR_UINT64SET: return "mva64";
|
|
default: return "unknown";
|
|
}
|
|
}
|
|
|
|
inline const char * sphTypeDirective ( ESphAttr eType )
|
|
{
|
|
switch ( eType )
|
|
{
|
|
case SPH_ATTR_NONE: return "???";
|
|
case SPH_ATTR_INTEGER: return "sql_attr_uint";
|
|
case SPH_ATTR_TIMESTAMP: return "sql_attr_timestamp";
|
|
case SPH_ATTR_ORDINAL: return "sql_attr_str2ordinal";
|
|
case SPH_ATTR_BOOL: return "sql_attr_bool";
|
|
case SPH_ATTR_FLOAT: return "sql_attr_float";
|
|
case SPH_ATTR_BIGINT: return "sql_attr_bigint";
|
|
case SPH_ATTR_STRING: return "sql_attr_string";
|
|
case SPH_ATTR_WORDCOUNT: return "sql_attr_wordcount";
|
|
case SPH_ATTR_UINT32SET: return "sql_attr_multi";
|
|
case SPH_ATTR_UINT64SET: return "sql_attr_multi bigint";
|
|
default: return "???";
|
|
}
|
|
}
|
|
|
|
inline void SqlUnescape ( CSphString & sRes, const char * sEscaped, int iLen )
|
|
{
|
|
assert ( iLen>=2 );
|
|
assert ( sEscaped[0]=='\'' );
|
|
assert ( sEscaped[iLen-1]=='\'' );
|
|
|
|
// skip heading and trailing quotes
|
|
const char * s = sEscaped+1;
|
|
const char * sMax = s+iLen-2;
|
|
|
|
sRes.Reserve ( iLen );
|
|
char * d = (char*) sRes.cstr();
|
|
|
|
while ( s<sMax )
|
|
{
|
|
if ( s[0]=='\\' )
|
|
{
|
|
switch ( s[1] )
|
|
{
|
|
case 'b': *d++ = '\b'; break;
|
|
case 'n': *d++ = '\n'; break;
|
|
case 'r': *d++ = '\r'; break;
|
|
case 't': *d++ = '\t'; break;
|
|
default:
|
|
*d++ = s[1];
|
|
}
|
|
s += 2;
|
|
} else
|
|
*d++ = *s++;
|
|
}
|
|
|
|
*d++ = '\0';
|
|
}
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
/// locator pair, for RT string dynamization
|
|
struct LocatorPair_t
|
|
{
|
|
CSphAttrLocator m_tFrom; ///< source (static) locator
|
|
CSphAttrLocator m_tTo; ///< destination (dynamized) locator
|
|
};
|
|
|
|
/// internal disk index interface (that exposes some guts)
|
|
struct ISphIndex_VLN : public CSphIndex
|
|
{
|
|
explicit ISphIndex_VLN ( const char * sIndexName, const char * sFilename )
|
|
: CSphIndex ( sIndexName, sFilename )
|
|
{}
|
|
|
|
virtual void SetDynamize ( const CSphVector<LocatorPair_t> & dDynamize ) = 0;
|
|
};
|
|
|
|
//////////////////////////////////////////////////////////////////////////
|
|
|
|
/// dict traits
|
|
class CSphDictTraits : public CSphDict
|
|
{
|
|
public:
|
|
explicit CSphDictTraits ( CSphDict * pDict ) : m_pDict ( pDict ) { assert ( m_pDict ); }
|
|
|
|
virtual void LoadStopwords ( const char * sFiles, ISphTokenizer * pTokenizer ) { m_pDict->LoadStopwords ( sFiles, pTokenizer ); }
|
|
virtual bool LoadWordforms ( const char * sFile, ISphTokenizer * pTokenizer, const char * sIndex ) { return m_pDict->LoadWordforms ( sFile, pTokenizer, sIndex ); }
|
|
virtual bool SetMorphology ( const char * szMorph, bool bUseUTF8, CSphString & sError ) { return m_pDict->SetMorphology ( szMorph, bUseUTF8, sError ); }
|
|
|
|
virtual SphWordID_t GetWordID ( const BYTE * pWord, int iLen, bool bFilterStops ) { return m_pDict->GetWordID ( pWord, iLen, bFilterStops ); }
|
|
|
|
virtual void Setup ( const CSphDictSettings & ) {}
|
|
virtual const CSphDictSettings & GetSettings () const { return m_pDict->GetSettings (); }
|
|
virtual const CSphVector <CSphSavedFile> & GetStopwordsFileInfos () { return m_pDict->GetStopwordsFileInfos (); }
|
|
virtual const CSphSavedFile & GetWordformsFileInfo () { return m_pDict->GetWordformsFileInfo (); }
|
|
virtual const CSphMultiformContainer * GetMultiWordforms () const { return m_pDict->GetMultiWordforms (); }
|
|
|
|
virtual bool IsStopWord ( const BYTE * pWord ) const { return m_pDict->IsStopWord ( pWord ); }
|
|
|
|
protected:
|
|
CSphDict * m_pDict;
|
|
};
|
|
|
|
|
|
/// dict wrapper for star-syntax support in prefix-indexes
|
|
class CSphDictStar : public CSphDictTraits
|
|
{
|
|
public:
|
|
explicit CSphDictStar ( CSphDict * pDict ) : CSphDictTraits ( pDict ) {}
|
|
|
|
virtual SphWordID_t GetWordID ( BYTE * pWord );
|
|
virtual SphWordID_t GetWordIDNonStemmed ( BYTE * pWord );
|
|
};
|
|
|
|
|
|
/// star dict for index v.8+
|
|
class CSphDictStarV8 : public CSphDictStar
|
|
{
|
|
public:
|
|
CSphDictStarV8 ( CSphDict * pDict, bool bPrefixes, bool bInfixes );
|
|
|
|
virtual SphWordID_t GetWordID ( BYTE * pWord );
|
|
|
|
private:
|
|
bool m_bPrefixes;
|
|
bool m_bInfixes;
|
|
};
|
|
|
|
|
|
/// dict wrapper for exact-word syntax
|
|
class CSphDictExact : public CSphDictTraits
|
|
{
|
|
public:
|
|
explicit CSphDictExact ( CSphDict * pDict ) : CSphDictTraits ( pDict ) {}
|
|
virtual SphWordID_t GetWordID ( BYTE * pWord );
|
|
};
|
|
|
|
#endif // _sphinxint_
|
|
|
|
//
|
|
// $Id$
|
|
//
|