2016-12-17 19:39:01 +08:00

5747 lines
158 KiB
C++
Executable File

//
// $Id$
//
//
// Copyright (c) 2001-2011, Andrew Aksyonoff
// Copyright (c) 2008-2011, Sphinx Technologies Inc
// All rights reserved
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License. You should have
// received a copy of the GPL license along with this program; if you
// did not, you can find it at http://www.gnu.org/
//
#include "sphinx.h"
#include "sphinxint.h"
#include "sphinxrt.h"
#include "sphinxsearch.h"
#include "sphinxutils.h"
#include <sys/stat.h>
#include <fcntl.h>
#if USE_WINDOWS
#include <io.h> // for open(), close()
#include <errno.h>
#else
#include <unistd.h>
#include <sys/time.h>
#endif
//////////////////////////////////////////////////////////////////////////
#define BINLOG_WRITE_BUFFER 256*1024
#define BINLOG_AUTO_FLUSH 1000000
#define BINLOG_RESTART_SIZE 128*1024*1024
#if USE_64BIT
#define WORDID_MAX U64C(0xffffffffffffffff)
#else
#define WORDID_MAX 0xffffffffUL
#endif
// RT hitman
typedef Hitman_c<8> HITMAN;
//////////////////////////////////////////////////////////////////////////
#ifndef NDEBUG
#define Verify(_expr) assert(_expr)
#else
#define Verify(_expr) _expr
#endif
//////////////////////////////////////////////////////////////////////////
// !COMMIT cleanup extern ref to sphinx.cpp
extern void sphSortDocinfos ( DWORD * pBuf, int iCount, int iStride );
// !COMMIT yes i am when debugging
#ifndef NDEBUG
#define PARANOID 1
#endif
//////////////////////////////////////////////////////////////////////////
template < typename T, typename P >
static inline void ZipT ( CSphVector < BYTE, P > & dOut, T uValue )
{
do
{
BYTE bOut = (BYTE)( uValue & 0x7f );
uValue >>= 7;
if ( uValue )
bOut |= 0x80;
dOut.Add ( bOut );
} while ( uValue );
}
template < typename T >
static inline const BYTE * UnzipT ( T * pValue, const BYTE * pIn )
{
T uValue = 0;
BYTE bIn;
int iOff = 0;
do
{
bIn = *pIn++;
uValue += ( T ( bIn & 0x7f ) ) << iOff;
iOff += 7;
} while ( bIn & 0x80 );
*pValue = uValue;
return pIn;
}
#define ZipDword ZipT<DWORD>
#define ZipQword ZipT<uint64_t>
#define UnzipDword UnzipT<DWORD>
#define UnzipQword UnzipT<uint64_t>
#if USE_64BIT
#define ZipDocid ZipQword
#define ZipWordid ZipQword
#define UnzipDocid UnzipQword
#define UnzipWordid UnzipQword
#else
#define ZipDocid ZipDword
#define ZipWordid ZipDword
#define UnzipDocid UnzipDword
#define UnzipWordid UnzipDword
#endif
//////////////////////////////////////////////////////////////////////////
struct CmpHit_fn
{
inline bool IsLess ( const CSphWordHit & a, const CSphWordHit & b )
{
return ( a.m_iWordID<b.m_iWordID ) ||
( a.m_iWordID==b.m_iWordID && a.m_iDocID<b.m_iDocID ) ||
( a.m_iWordID==b.m_iWordID && a.m_iDocID==b.m_iDocID && a.m_iWordPos<b.m_iWordPos );
}
};
template < typename DOCID = SphDocID_t >
struct RtDoc_tmpl
{
DOCID m_uDocID; ///< my document id
CSphSmallBitvec m_dFields; ///< fields mask
DWORD m_uHits; ///< hit count
DWORD m_uHit; ///< either index into segment hits, or the only hit itself (if hit count is 1)
};
template < typename WORDID = SphWordID_t >
struct RtWord_tmpl
{
WORDID m_uWordID; ///< my keyword id
DWORD m_uDocs; ///< document count (for stats and/or BM25)
DWORD m_uHits; ///< hit count (for stats and/or BM25)
DWORD m_uDoc; ///< index into segment docs
};
typedef RtDoc_tmpl<> RtDoc_t;
typedef RtWord_tmpl<> RtWord_t;
struct RtWordCheckpoint_t
{
SphWordID_t m_uWordID;
int m_iOffset;
};
class RtDiskKlist_t : public ISphNoncopyable
{
private:
static const int MAX_SMALL_SIZE = 512;
CSphVector < SphAttr_t > m_dLargeKlist;
CSphOrderedHash < bool, SphDocID_t, IdentityHash_fn, MAX_SMALL_SIZE, 11 > m_hSmallKlist;
mutable CSphRwlock m_tRwLargelock;
mutable CSphRwlock m_tRwSmalllock;
void NakedFlush(); // flush without lockers
public:
RtDiskKlist_t() { m_tRwLargelock.Init(); m_tRwSmalllock.Init(); }
virtual ~RtDiskKlist_t() { m_tRwLargelock.Done(); m_tRwSmalllock.Done(); }
void Reset ();
void Flush()
{
if ( m_hSmallKlist.GetLength()==0 )
return;
m_tRwSmalllock.WriteLock();
m_tRwLargelock.WriteLock();
NakedFlush();
m_tRwLargelock.Unlock();
m_tRwSmalllock.Unlock();
}
void LoadFromFile ( const char * sFilename );
void SaveToFile ( const char * sFilename );
inline void Delete ( SphDocID_t uDoc )
{
m_tRwSmalllock.WriteLock();
if ( !m_hSmallKlist.Exists ( uDoc ) )
m_hSmallKlist.Add ( true, uDoc );
if ( m_hSmallKlist.GetLength()>=MAX_SMALL_SIZE )
NakedFlush();
m_tRwSmalllock.Unlock();
}
inline const SphAttr_t * GetKillList () const { return m_dLargeKlist.Begin(); }
inline int GetKillListSize () const { return m_dLargeKlist.GetLength(); }
inline bool KillListLock() const { return m_tRwLargelock.ReadLock(); }
inline bool KillListUnlock() const { return m_tRwLargelock.Unlock(); }
// NOT THREAD SAFE
bool Exists ( SphDocID_t uDoc )
{
return ( m_hSmallKlist.Exists ( uDoc ) || m_dLargeKlist.BinarySearch ( SphAttr_t(uDoc))!=NULL );
}
};
void RtDiskKlist_t::Reset()
{
m_dLargeKlist.Reset();
m_hSmallKlist.Reset();
}
void RtDiskKlist_t::NakedFlush()
{
if ( m_hSmallKlist.GetLength()==0 )
return;
m_hSmallKlist.IterateStart();
while ( m_hSmallKlist.IterateNext() )
m_dLargeKlist.Add ( m_hSmallKlist.IterateGetKey() );
m_dLargeKlist.Uniq();
m_hSmallKlist.Reset();
}
// is already id32<>id64 safe
void RtDiskKlist_t::LoadFromFile ( const char * sFilename )
{
m_tRwLargelock.WriteLock();
m_tRwSmalllock.WriteLock();
m_hSmallKlist.Reset();
m_tRwSmalllock.Unlock();
m_dLargeKlist.Reset();
CSphString sName, sError;
sName.SetSprintf ( "%s.kill", sFilename );
if ( !sphIsReadable ( sName.cstr(), &sError ) )
{
m_tRwLargelock.Unlock();
return;
}
CSphAutoreader rdKlist;
if ( !rdKlist.Open ( sName, sError ) )
{
m_tRwLargelock.Unlock();
return;
}
m_dLargeKlist.Resize ( rdKlist.GetDword() );
SphDocID_t uLastDocID = 0;
ARRAY_FOREACH ( i, m_dLargeKlist )
{
uLastDocID += ( SphDocID_t ) rdKlist.UnzipOffset();
m_dLargeKlist[i] = uLastDocID;
};
m_tRwLargelock.Unlock();
}
void RtDiskKlist_t::SaveToFile ( const char * sFilename )
{
m_tRwLargelock.WriteLock();
m_tRwSmalllock.WriteLock();
NakedFlush();
m_tRwSmalllock.Unlock();
CSphWriter wrKlist;
CSphString sName, sError;
sName.SetSprintf ( "%s.kill", sFilename );
wrKlist.OpenFile ( sName.cstr(), sError );
wrKlist.PutDword ( m_dLargeKlist.GetLength() );
SphDocID_t uLastDocID = 0;
ARRAY_FOREACH ( i, m_dLargeKlist )
{
wrKlist.ZipOffset ( m_dLargeKlist[i] - uLastDocID );
uLastDocID = ( SphDocID_t ) m_dLargeKlist[i];
};
m_tRwLargelock.Unlock();
wrKlist.CloseFile ();
}
struct RtSegment_t
{
protected:
static const int KLIST_ACCUM_THRESH = 32;
public:
static CSphStaticMutex m_tSegmentSeq;
static int m_iSegments; ///< age tag sequence generator
int m_iTag; ///< segment age tag
CSphTightVector<BYTE> m_dWords;
CSphVector<RtWordCheckpoint_t> m_dWordCheckpoints;
CSphTightVector<BYTE> m_dDocs;
CSphTightVector<BYTE> m_dHits;
int m_iRows; ///< number of actually allocated rows
int m_iAliveRows; ///< number of alive (non-killed) rows
CSphVector<CSphRowitem> m_dRows; ///< row data storage
CSphVector<SphDocID_t> m_dKlist; ///< sorted K-list
bool m_bTlsKlist; ///< whether to apply TLS K-list during merge (must only be used by writer during Commit())
CSphTightVector<BYTE> m_dStrings; ///< strings storage
CSphTightVector<DWORD> m_dMvas; ///< MVAs storage
RtSegment_t ()
{
m_tSegmentSeq.Lock ();
m_iTag = m_iSegments++;
m_tSegmentSeq.Unlock ();
m_iRows = 0;
m_iAliveRows = 0;
m_bTlsKlist = false;
m_dStrings.Add ( 0 ); // dummy zero offset
m_dMvas.Add ( 0 ); // dummy zero offset
}
int64_t GetUsedRam () const
{
// FIXME! gonna break on vectors over 2GB
return
m_dWords.GetLimit()*sizeof(m_dWords[0]) +
m_dDocs.GetLimit()*sizeof(m_dDocs[0]) +
m_dHits.GetLimit()*sizeof(m_dHits[0]) +
m_dStrings.GetLimit()*sizeof(m_dStrings[0])+
m_dMvas.GetLimit()*sizeof(m_dMvas[0]);
}
int GetMergeFactor () const
{
return m_iRows;
}
const CSphRowitem * FindRow ( SphDocID_t uDocid ) const;
const CSphRowitem * FindAliveRow ( SphDocID_t uDocid ) const;
};
int RtSegment_t::m_iSegments = 0;
CSphStaticMutex RtSegment_t::m_tSegmentSeq;
const CSphRowitem * RtSegment_t::FindRow ( SphDocID_t uDocid ) const
{
// binary search through the rows
int iStride = m_dRows.GetLength() / m_iRows;
SphDocID_t uL = DOCINFO2ID ( m_dRows.Begin() );
SphDocID_t uR = DOCINFO2ID ( &m_dRows[m_dRows.GetLength()-iStride] );
if ( uDocid==uL )
return m_dRows.Begin();
if ( uDocid==uR )
return &m_dRows[m_dRows.GetLength()-iStride];
if ( uDocid<uL || uDocid>uR )
return NULL;
int iL = 0;
int iR = m_iRows-1;
while ( iR-iL>1 )
{
int iM = iL + (iR-iL)/2;
SphDocID_t uM = DOCINFO2ID ( &m_dRows[iM*iStride] );
if ( uDocid==uM )
return &m_dRows[iM*iStride];
else if ( uDocid>uM )
iL = iM;
else
iR = iM;
}
return NULL;
}
const CSphRowitem * RtSegment_t::FindAliveRow ( SphDocID_t uDocid ) const
{
if ( m_dKlist.BinarySearch ( uDocid ) )
return NULL;
else
return FindRow ( uDocid );
}
//////////////////////////////////////////////////////////////////////////
struct RtDocWriter_t
{
CSphTightVector<BYTE> * m_pDocs;
SphDocID_t m_uLastDocID;
explicit RtDocWriter_t ( RtSegment_t * pSeg )
: m_pDocs ( &pSeg->m_dDocs )
, m_uLastDocID ( 0 )
{}
void ZipDoc ( const RtDoc_t & tDoc )
{
CSphTightVector<BYTE> & dDocs = *m_pDocs;
ZipDocid ( dDocs, tDoc.m_uDocID - m_uLastDocID );
m_uLastDocID = tDoc.m_uDocID;
ZipDword ( dDocs, tDoc.m_dFields.GetMask32() );
ZipDword ( dDocs, tDoc.m_uHits );
if ( tDoc.m_uHits==1 )
{
ZipDword ( dDocs, tDoc.m_uHit & 0xffffffUL );
ZipDword ( dDocs, tDoc.m_uHit>>24 );
} else
ZipDword ( dDocs, tDoc.m_uHit );
}
DWORD ZipDocPtr () const
{
return m_pDocs->GetLength();
}
void ZipRestart ()
{
m_uLastDocID = 0;
}
};
template < typename DOCID = SphDocID_t >
struct RtDocReader_tmpl
{
typedef RtDoc_tmpl<DOCID> RTDOC;
const BYTE * m_pDocs;
int m_iLeft;
RTDOC m_tDoc;
template < typename RTWORD >
explicit RtDocReader_tmpl ( const RtSegment_t * pSeg, const RTWORD & tWord )
{
m_pDocs = ( pSeg->m_dDocs.Begin() ? pSeg->m_dDocs.Begin() + tWord.m_uDoc : NULL );
m_iLeft = tWord.m_uDocs;
m_tDoc.m_uDocID = 0;
}
const RTDOC * UnzipDoc ()
{
if ( !m_iLeft || !m_pDocs )
return NULL;
const BYTE * pIn = m_pDocs;
SphDocID_t uDeltaID;
pIn = UnzipDocid ( &uDeltaID, pIn );
RTDOC & mtDoc = *(RTDOC*)&m_tDoc;
mtDoc.m_uDocID += (DOCID) uDeltaID;
DWORD uField;
pIn = UnzipDword ( &uField, pIn );
m_tDoc.m_dFields.Assign32 ( uField );
pIn = UnzipDword ( &mtDoc.m_uHits, pIn );
if ( mtDoc.m_uHits==1 )
{
DWORD a, b;
pIn = UnzipDword ( &a, pIn );
pIn = UnzipDword ( &b, pIn );
mtDoc.m_uHit = a + ( b<<24 );
} else
pIn = UnzipDword ( &mtDoc.m_uHit, pIn );
m_pDocs = pIn;
m_iLeft--;
return &mtDoc;
}
};
typedef RtDocReader_tmpl<> RtDocReader_t;
static const int RAM_WORDLIST_CHECKPOINT = 1024;
struct RtWordWriter_t
{
CSphTightVector<BYTE> * m_pWords;
CSphVector<RtWordCheckpoint_t> * m_pCheckpoints;
SphWordID_t m_uLastWordID;
DWORD m_uLastDoc;
int m_iWords;
explicit RtWordWriter_t ( RtSegment_t * pSeg )
: m_pWords ( &pSeg->m_dWords )
, m_pCheckpoints ( &pSeg->m_dWordCheckpoints )
, m_uLastWordID ( 0 )
, m_uLastDoc ( 0 )
, m_iWords ( 0 )
{
assert ( !m_pWords->GetLength() );
assert ( !m_pCheckpoints->GetLength() );
}
void ZipWord ( const RtWord_t & tWord )
{
CSphTightVector<BYTE> & tWords = *m_pWords;
if ( ++m_iWords==RAM_WORDLIST_CHECKPOINT )
{
RtWordCheckpoint_t & tCheckpoint = m_pCheckpoints->Add();
tCheckpoint.m_uWordID = tWord.m_uWordID;
tCheckpoint.m_iOffset = tWords.GetLength();
m_uLastWordID = 0;
m_uLastDoc = 0;
m_iWords = 1;
}
ZipWordid ( tWords, tWord.m_uWordID - m_uLastWordID );
ZipDword ( tWords, tWord.m_uDocs );
ZipDword ( tWords, tWord.m_uHits );
ZipDword ( tWords, tWord.m_uDoc - m_uLastDoc );
m_uLastWordID = tWord.m_uWordID;
m_uLastDoc = tWord.m_uDoc;
}
};
template < typename WORDID = SphWordID_t >
struct RtWordReader_tmpl
{
typedef RtWord_tmpl<WORDID> RTWORD;
const BYTE * m_pCur;
const BYTE * m_pMax;
RTWORD m_tWord;
int m_iWords;
explicit RtWordReader_tmpl ( const RtSegment_t * pSeg )
: m_iWords ( 0 )
{
m_pCur = pSeg->m_dWords.Begin();
m_pMax = m_pCur + pSeg->m_dWords.GetLength();
m_tWord.m_uWordID = 0;
m_tWord.m_uDoc = 0;
}
const RTWORD * UnzipWord ()
{
RTWORD & mtWord = *(RTWORD*)&m_tWord;
if ( ++m_iWords==RAM_WORDLIST_CHECKPOINT )
{
mtWord.m_uWordID = 0;
mtWord.m_uDoc = 0;
m_iWords = 1;
}
if ( m_pCur>=m_pMax )
return NULL;
const BYTE * pIn = m_pCur;
SphWordID_t uDeltaID;
DWORD uDeltaDoc;
pIn = UnzipWordid ( &uDeltaID, pIn );
pIn = UnzipDword ( &mtWord.m_uDocs, pIn );
pIn = UnzipDword ( &mtWord.m_uHits, pIn );
pIn = UnzipDword ( &uDeltaDoc, pIn );
m_pCur = pIn;
mtWord.m_uWordID += (WORDID) uDeltaID;
mtWord.m_uDoc += uDeltaDoc;
return &mtWord;
}
};
typedef RtWordReader_tmpl<SphWordID_t> RtWordReader_t;
struct RtHitWriter_t
{
CSphTightVector<BYTE> * m_pHits;
DWORD m_uLastHit;
explicit RtHitWriter_t ( RtSegment_t * pSeg )
: m_pHits ( &pSeg->m_dHits )
, m_uLastHit ( 0 )
{}
void ZipHit ( DWORD uValue )
{
ZipDword ( *m_pHits, uValue - m_uLastHit );
m_uLastHit = uValue;
}
void ZipRestart ()
{
m_uLastHit = 0;
}
DWORD ZipHitPtr () const
{
return m_pHits->GetLength();
}
};
struct RtHitReader_t
{
const BYTE * m_pCur;
DWORD m_iLeft;
DWORD m_uLast;
RtHitReader_t ()
: m_pCur ( NULL )
, m_iLeft ( 0 )
, m_uLast ( 0 )
{}
template < typename RTDOC >
explicit RtHitReader_t ( const RtSegment_t * pSeg, const RTDOC * pDoc )
{
m_pCur = &pSeg->m_dHits [ pDoc->m_uHit ];
m_iLeft = pDoc->m_uHits;
m_uLast = 0;
}
DWORD UnzipHit ()
{
if ( !m_iLeft )
return 0;
DWORD uValue;
m_pCur = UnzipDword ( &uValue, m_pCur );
m_uLast += uValue;
m_iLeft--;
return m_uLast;
}
};
struct RtHitReader2_t : public RtHitReader_t
{
const BYTE * m_pBase;
RtHitReader2_t ()
: m_pBase ( NULL )
{}
void Seek ( SphOffset_t uOff, int iHits )
{
m_pCur = m_pBase + uOff;
m_iLeft = iHits;
m_uLast = 0;
}
};
//////////////////////////////////////////////////////////////////////////
/// forward ref
struct RtIndex_t;
struct AccDocDup_t
{
SphDocID_t m_uDocid;
int m_iDupCount;
};
/// indexing accumulator
struct RtAccum_t
{
RtIndex_t * m_pIndex; ///< my current owner in this thread
int m_iAccumDocs;
CSphVector<CSphWordHit> m_dAccum;
CSphVector<CSphRowitem> m_dAccumRows;
CSphVector<SphDocID_t> m_dAccumKlist;
CSphTightVector<BYTE> m_dStrings;
CSphTightVector<DWORD> m_dMvas;
CSphVector<DWORD> m_dPerDocHitsCount;
RtAccum_t();
void AddDocument ( ISphHits * pHits, const CSphMatch & tDoc, int iRowSize, const char ** ppStr, const CSphVector<DWORD> & dMvas );
RtSegment_t * CreateSegment ( int iRowSize );
void CleanupDuplacates ( int iRowSize );
};
/// TLS indexing accumulator (we disallow two uncommitted adds within one thread; and so need at most one)
SphThreadKey_t g_tTlsAccumKey;
/// binlog file view of the index
/// everything that a given log file needs to know about an index
struct BinlogIndexInfo_t
{
CSphString m_sName; ///< index name
int64_t m_iMinTID; ///< min TID logged by this file
int64_t m_iMaxTID; ///< max TID logged by this file
int64_t m_iFlushedTID; ///< last flushed TID
int64_t m_tmMin; ///< min TID timestamp
int64_t m_tmMax; ///< max TID timestamp
CSphIndex * m_pIndex; ///< replay only; associated index (might be NULL if we don't serve it anymore!)
RtIndex_t * m_pRT; ///< replay only; RT index handle (might be NULL if N/A or non-RT)
int64_t m_iPreReplayTID; ///< replay only; index TID at the beginning of this file replay
BinlogIndexInfo_t ()
: m_iMinTID ( INT64_MAX )
, m_iMaxTID ( 0 )
, m_iFlushedTID ( 0 )
, m_tmMin ( INT64_MAX )
, m_tmMax ( 0 )
, m_pIndex ( NULL )
, m_pRT ( NULL )
, m_iPreReplayTID ( 0 )
{}
};
/// binlog file descriptor
/// file id (aka extension), plus a list of associated index infos
struct BinlogFileDesc_t
{
int m_iExt;
CSphVector<BinlogIndexInfo_t> m_dIndexInfos;
BinlogFileDesc_t ()
: m_iExt ( 0 )
{}
};
/// Bin Log Operation
enum Blop_e
{
BLOP_COMMIT = 1,
BLOP_UPDATE_ATTRS = 2,
BLOP_ADD_INDEX = 3,
BLOP_ADD_CACHE = 4,
BLOP_TOTAL
};
// forward declaration
class BufferReader_t;
class RtBinlog_c;
struct RtIndex_t;
class BinlogWriter_c : protected CSphWriter
{
public:
BinlogWriter_c ();
virtual ~BinlogWriter_c () {}
virtual void Flush ();
void Write ();
void Fsync ();
bool HasUnwrittenData () const { return m_iPoolUsed>0; }
bool HasUnsyncedData () const { return m_iLastFsyncPos!=m_iLastWritePos; }
void ResetCrc (); ///< restart checksumming
void WriteCrc (); ///< finalize and write current checksum to output stream
void SetBufferSize ( int iBufferSize ) { CSphWriter::SetBufferSize ( iBufferSize ); }
bool OpenFile ( const CSphString & sName, CSphString & sErrorBuffer ) { return CSphWriter::OpenFile ( sName, sErrorBuffer ); }
void SetFile ( int iFD, SphOffset_t * pSharedOffset ) { CSphWriter::SetFile ( iFD, pSharedOffset ); }
void CloseFile ( bool bTruncate=false ) { CSphWriter::CloseFile ( bTruncate ); }
SphOffset_t GetPos () const { return m_iPos; }
void PutBytes ( const void * pData, int iSize );
void PutString ( const char * szString );
void PutDword ( DWORD uValue ) { PutBytes ( &uValue, sizeof(DWORD) ); }
void ZipValue ( uint64_t uValue );
private:
int64_t m_iLastWritePos;
int64_t m_iLastFsyncPos;
DWORD m_uCRC;
};
class BinlogReader_c : protected CSphAutoreader
{
public:
bool Open ( const CSphString & sFilename, CSphString & sError ) { return CSphAutoreader::Open ( sFilename, sError ); }
void Close () { CSphAutoreader::Close(); }
SphOffset_t GetFilesize () { return CSphAutoreader::GetFilesize(); }
void GetBytes ( void * pData, int iSize );
CSphString GetString ();
DWORD GetDword ();
uint64_t UnzipValue ();
bool GetErrorFlag () { return CSphAutoreader::GetErrorFlag(); }
SphOffset_t GetPos () { return CSphAutoreader::GetPos(); }
void ResetCrc ();
bool CheckCrc ( const char * sOp, const char * sIndexName, int64_t iTid, int64_t iTxnPos );
private:
DWORD m_uCRC;
};
class RtBinlog_c : public ISphBinlog
{
public:
RtBinlog_c ();
~RtBinlog_c ();
void BinlogCommit ( const char * sIndexName, int64_t iTID, const RtSegment_t * pSeg, const CSphVector<SphDocID_t> & dKlist );
void BinlogUpdateAttributes ( const char * sIndexName, int64_t iTID, const CSphAttrUpdate & tUpd );
void NotifyIndexFlush ( const char * sIndexName, int64_t iTID, bool bShutdown );
void Configure ( const CSphConfigSection & hSearchd, bool bTestMode );
void Replay ( const SmallStringHash_T<CSphIndex*> & hIndexes, ProgressCallbackSimple_t * pfnProgressCallback );
void CreateTimerThread ();
private:
static const DWORD BINLOG_VERSION = 3;
static const DWORD BINLOG_HEADER_MAGIC = 0x4c425053; /// magic 'SPBL' header that marks binlog file
static const DWORD BLOP_MAGIC = 0x214e5854; /// magic 'TXN!' header that marks binlog entry
static const DWORD BINLOG_META_MAGIC = 0x494c5053; /// magic 'SPLI' header that marks binlog meta
int64_t m_iFlushTimeLeft;
volatile int m_iFlushPeriod;
enum OnCommitAction_e
{
ACTION_NONE,
ACTION_FSYNC,
ACTION_WRITE
};
OnCommitAction_e m_eOnCommit;
CSphMutex m_tWriteLock; // lock on operation
int m_iLockFD;
CSphString m_sWriterError;
BinlogWriter_c m_tWriter;
mutable CSphVector<BinlogFileDesc_t> m_dLogFiles; // active log files
CSphString m_sLogPath;
SphThread_t m_tUpdateTread;
bool m_bReplayMode; // replay mode indicator
bool m_bDisabled;
int m_iRestartSize; // binlog size restart threshold
// replay stats
mutable int m_iReplayedRows;
private:
static void DoAutoFlush ( void * pBinlog );
int GetWriteIndexID ( const char * sName, int64_t iTID, int64_t tmNow );
void LoadMeta ();
void SaveMeta ();
void LockFile ( bool bLock );
void DoCacheWrite ();
void CheckDoRestart ();
void CheckDoFlush ();
void OpenNewLog ( int iLastState=0 );
int ReplayBinlog ( const SmallStringHash_T<CSphIndex*> & hIndexes, int iBinlog );
bool ReplayCommit ( int iBinlog, BinlogReader_c & tReader ) const;
bool ReplayUpdateAttributes ( int iBinlog, BinlogReader_c & tReader ) const;
bool ReplayIndexAdd ( int iBinlog, const SmallStringHash_T<CSphIndex*> & hIndexes, BinlogReader_c & tReader ) const;
bool ReplayCacheAdd ( int iBinlog, BinlogReader_c & tReader ) const;
};
static RtBinlog_c * g_pBinlog = NULL;
static bool g_bRTChangesAllowed = false;
/// RAM based index
struct RtQword_t;
struct RtIndex_t : public ISphRtIndex, public ISphNoncopyable
{
private:
static const DWORD META_HEADER_MAGIC = 0x54525053; ///< my magic 'SPRT' header
static const DWORD META_VERSION = 3; ///< current version
private:
const int m_iStride;
CSphVector<RtSegment_t*> m_pSegments;
CSphMutex m_tWriterMutex;
mutable CSphRwlock m_tRwlock;
int64_t m_iRamSize;
CSphString m_sPath;
bool m_bPathStripped;
CSphVector<CSphIndex*> m_pDiskChunks;
int m_iLockFD;
mutable RtDiskKlist_t m_tKlist;
CSphSchema m_tOutboundSchema;
CSphVector<LocatorPair_t> m_dDynamize;
int64_t m_iSavedTID;
int64_t m_iSavedRam;
int64_t m_tmSaved;
public:
explicit RtIndex_t ( const CSphSchema & tSchema, const char * sIndexName, int64_t iRamSize, const char * sPath );
virtual ~RtIndex_t ();
bool AddDocument ( int iFields, const char ** ppFields, const CSphMatch & tDoc, bool bReplace, const char ** ppStr, const CSphVector<DWORD> & dMvas, CSphString & sError );
bool AddDocument ( ISphHits * pHits, const CSphMatch & tDoc, const char ** ppStr, const CSphVector<DWORD> & dMvas, CSphString & sError );
bool DeleteDocument ( const SphDocID_t * pDocs, int iDocs, CSphString & sError );
void Commit ();
void RollBack ();
void CommitReplayable ( RtSegment_t * pNewSeg, CSphVector<SphDocID_t> & dAccKlist );
void DumpToDisk ( const char * sFilename );
virtual void CheckRamFlush ();
private:
/// acquire thread-local indexing accumulator
/// returns NULL if another index already uses it in an open txn
RtAccum_t * AcquireAccum ( CSphString * sError=NULL );
RtSegment_t * MergeSegments ( const RtSegment_t * pSeg1, const RtSegment_t * pSeg2, const CSphVector<SphDocID_t> * pAccKlist );
const RtWord_t * CopyWord ( RtSegment_t * pDst, RtWordWriter_t & tOutWord, const RtSegment_t * pSrc, const RtWord_t * pWord, RtWordReader_t & tInWord, const CSphVector<SphDocID_t> * pAccKlist );
void MergeWord ( RtSegment_t * pDst, const RtSegment_t * pSrc1, const RtWord_t * pWord1, const RtSegment_t * pSrc2, const RtWord_t * pWord2, RtWordWriter_t & tOut, const CSphVector<SphDocID_t> * pAccKlist );
void CopyDoc ( RtSegment_t * pSeg, RtDocWriter_t & tOutDoc, RtWord_t * pWord, const RtSegment_t * pSrc, const RtDoc_t * pDoc );
void SaveMeta ( int iDiskChunks );
void SaveDiskHeader ( const char * sFilename, int iCheckpoints, SphOffset_t iCheckpointsPosition, DWORD uKillListSize, DWORD uMinMaxSize, bool bForceID32=false ) const;
void SaveDiskData ( const char * sFilename ) const;
template < typename DOCID, typename WORDID >
void SaveDiskDataImpl ( const char * sFilename ) const;
void SaveDiskChunk ();
CSphIndex * LoadDiskChunk ( int iChunk );
bool LoadRamChunk ( DWORD uVersion );
bool SaveRamChunk ();
public:
#if USE_WINDOWS
#pragma warning(push,1)
#pragma warning(disable:4100)
#endif
virtual SphAttr_t * GetKillList () const { return NULL; }
virtual int GetKillListSize () const { return 0; }
virtual bool HasDocid ( SphDocID_t ) const { assert ( 0 ); return false; }
virtual int Build ( const CSphVector<CSphSource*> & dSources, int iMemoryLimit, int iWriteBuffer ) { return 0; }
virtual bool Merge ( CSphIndex * pSource, CSphVector<CSphFilterSettings> & dFilters, bool bMergeKillLists ) { return false; }
virtual bool Prealloc ( bool bMlock, bool bStripPath, CSphString & sWarning );
virtual void Dealloc () {}
virtual bool Preread ();
virtual void SetBase ( const char * sNewBase ) {}
virtual bool Rename ( const char * sNewBase ) { return true; }
virtual bool Lock () { return true; }
virtual void Unlock () {}
virtual bool Mlock () { return true; }
virtual void PostSetup();
virtual bool IsRT() const { return true; }
virtual int UpdateAttributes ( const CSphAttrUpdate & tUpd, int iIndex, CSphString & sError );
virtual bool SaveAttributes () { return true; }
virtual DWORD GetAttributeStatus () const { return 0; }
virtual void DebugDumpHeader ( FILE * fp, const char * sHeaderName, bool bConfig ) {}
virtual void DebugDumpDocids ( FILE * fp ) {}
virtual void DebugDumpHitlist ( FILE * fp, const char * sKeyword, bool bID ) {}
virtual int DebugCheck ( FILE * fp );
#if USE_WINDOWS
#pragma warning(pop)
#endif
public:
virtual bool EarlyReject ( CSphQueryContext * pCtx, CSphMatch & ) const;
virtual const CSphSourceStats & GetStats () const { return m_tStats; }
virtual bool MultiQuery ( const CSphQuery * pQuery, CSphQueryResult * pResult, int iSorters, ISphMatchSorter ** ppSorters, const CSphVector<CSphFilterSettings> * pExtraFilters, int iTag ) const;
virtual bool MultiQueryEx ( int iQueries, const CSphQuery * ppQueries, CSphQueryResult ** ppResults, ISphMatchSorter ** ppSorters, const CSphVector<CSphFilterSettings> * pExtraFilters, int iTag ) const;
virtual bool GetKeywords ( CSphVector <CSphKeywordInfo> & dKeywords, const char * szQuery, bool bGetStats, CSphString & sError ) const;
void CopyDocinfo ( CSphMatch & tMatch, const DWORD * pFound ) const;
const CSphRowitem * FindDocinfo ( const RtSegment_t * pSeg, SphDocID_t uDocID ) const;
bool RtQwordSetup ( RtQword_t * pQword, RtSegment_t * pSeg ) const;
static bool RtQwordSetupSegment ( RtQword_t * pQword, RtSegment_t * pSeg, bool bSetup );
CSphDict * SetupExactDict ( CSphScopedPtr<CSphDict> & tContainer, CSphDict * pPrevDict, ISphTokenizer * pTokenizer ) const;
virtual const CSphSchema & GetMatchSchema () const { return m_tOutboundSchema; }
virtual const CSphSchema & GetInternalSchema () const { return m_tSchema; }
int64_t GetUsedRam () const;
protected:
CSphSourceStats m_tStats;
};
RtIndex_t::RtIndex_t ( const CSphSchema & tSchema, const char * sIndexName, int64_t iRamSize, const char * sPath )
: ISphRtIndex ( sIndexName, "rtindex" )
, m_iStride ( DOCINFO_IDSIZE + tSchema.GetRowSize() )
, m_iRamSize ( iRamSize )
, m_sPath ( sPath )
, m_bPathStripped ( false )
, m_iLockFD ( -1 )
, m_iSavedTID ( m_iTID )
, m_iSavedRam ( 0 )
, m_tmSaved ( sphMicroTimer() )
{
MEMORY ( SPH_MEM_IDX_RT );
m_tSchema = tSchema;
// schemes strings attributes fix up
bool bReplaceSchema = false;
for ( int i=0; i<tSchema.GetAttrsCount() && !bReplaceSchema; i++ )
{
const CSphColumnInfo & tCol = tSchema.GetAttr(i);
bReplaceSchema = ( ( tCol.m_eAttrType==SPH_ATTR_STRING || tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_UINT64SET )
&& !tCol.m_tLocator.m_bDynamic );
}
m_tOutboundSchema = m_tSchema;
if ( bReplaceSchema )
{
m_tOutboundSchema.ResetAttrs();
for ( int i=0; i<m_tSchema.GetAttrsCount(); i++ )
{
CSphColumnInfo tCol = m_tSchema.GetAttr(i);
bool bDynamic = tCol.m_tLocator.m_bDynamic;
if ( ( tCol.m_eAttrType==SPH_ATTR_STRING || tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_UINT64SET )
&& !tCol.m_tLocator.m_bDynamic )
{
tCol.m_eStage = SPH_EVAL_OVERRIDE;
bDynamic = true;
}
m_tOutboundSchema.AddAttr ( tCol, bDynamic );
if ( bDynamic )
{
LocatorPair_t & tPair = m_dDynamize.Add();
tPair.m_tFrom = tCol.m_tLocator;
tPair.m_tTo = m_tOutboundSchema.GetAttr(i).m_tLocator;
}
}
}
#ifndef NDEBUG
// check that index cols are static
for ( int i=0; i<m_tSchema.GetAttrsCount(); i++ )
assert ( !m_tSchema.GetAttr(i).m_tLocator.m_bDynamic );
#endif
Verify ( m_tWriterMutex.Init() );
Verify ( m_tRwlock.Init() );
}
RtIndex_t::~RtIndex_t ()
{
int64_t tmSave = sphMicroTimer();
SaveRamChunk ();
SaveMeta ( m_pDiskChunks.GetLength() );
Verify ( m_tWriterMutex.Done() );
Verify ( m_tRwlock.Done() );
ARRAY_FOREACH ( i, m_pSegments )
SafeDelete ( m_pSegments[i] );
ARRAY_FOREACH ( i, m_pDiskChunks )
SafeDelete ( m_pDiskChunks[i] );
if ( m_iLockFD>=0 )
::close ( m_iLockFD );
g_pBinlog->NotifyIndexFlush ( m_sIndexName.cstr(), m_iTID, true );
tmSave = sphMicroTimer() - tmSave;
if ( tmSave>=1000 )
{
sphInfo ( "rt: index %s: ramchunk saved in %d.%03d sec",
m_sIndexName.cstr(), (int)(tmSave/1000000), (int)((tmSave/1000)%1000) );
}
}
#define SPH_THRESHOLD_SAVE_RAM ( 64*1024*1024 )
static int64_t g_iRtFlushPeriod = 10*60*60; // default period is 10 hours
void RtIndex_t::CheckRamFlush ()
{
int64_t tmSave = sphMicroTimer();
if ( m_iTID<=m_iSavedTID || ( tmSave-m_tmSaved )/1000000<g_iRtFlushPeriod )
return;
m_tRwlock.ReadLock();
int64_t iUsedRam = GetUsedRam();
int64_t iDeltaRam = iUsedRam-m_iSavedRam;
m_tRwlock.Unlock();
// save if delta-ram runs over maximum value of ram-threshold or 1/3 of index size
if ( iDeltaRam < Max ( SPH_THRESHOLD_SAVE_RAM, m_iRamSize/3 ) )
return;
m_tWriterMutex.Lock();
if ( !SaveRamChunk () )
{
sphWarning ( "rt auto-save FAILED!!! (index='%s', error '%s')", m_sIndexName.cstr(), m_sLastError.cstr() );
m_tWriterMutex.Unlock();
return;
}
SaveMeta ( m_pDiskChunks.GetLength() );
g_pBinlog->NotifyIndexFlush ( m_sIndexName.cstr(), m_iTID, false );
int64_t iWasTID = m_iSavedTID;
int64_t iWasRam = m_iSavedRam;
int64_t tmDelta = sphMicroTimer() - m_tmSaved;
m_iSavedTID = m_iTID;
m_iSavedRam = iUsedRam;
m_tmSaved = sphMicroTimer();
m_tWriterMutex.Unlock();
tmSave = sphMicroTimer() - tmSave;
sphInfo ( "rt auto-saved (index='%s', last TID="INT64_FMT", current TID="INT64_FMT", last ram=%d.%03d Mb, current ram=%d.%03d Mb, time delta=%d sec, took=%d.%03d sec)"
, m_sIndexName.cstr(), iWasTID, m_iTID, (int)(iWasRam/1024/1024), (int)((iWasRam/1024)%1000)
, (int)(m_iSavedRam/1024/1024), (int)((m_iSavedRam/1024)%1000)
, (int) (tmDelta/1000000), (int)(tmSave/1000000), (int)((tmSave/1000)%1000) );
}
int64_t RtIndex_t::GetUsedRam () const
{
int64_t iTotal = 0;
ARRAY_FOREACH ( i, m_pSegments )
iTotal += m_pSegments[i]->GetUsedRam();
return iTotal;
}
//////////////////////////////////////////////////////////////////////////
// INDEXING
//////////////////////////////////////////////////////////////////////////
class CSphSource_StringVector : public CSphSource_Document
{
public:
explicit CSphSource_StringVector ( int iFields, const char ** ppFields, const CSphSchema & tSchema );
virtual ~CSphSource_StringVector () {}
virtual bool Connect ( CSphString & );
virtual void Disconnect ();
virtual bool HasAttrsConfigured () { return false; }
virtual bool IterateStart ( CSphString & ) { return true; }
virtual bool IterateMultivaluedStart ( int, CSphString & ) { return false; }
virtual bool IterateMultivaluedNext () { return false; }
virtual bool IterateFieldMVAStart ( int, CSphString & ) { return false; }
virtual bool IterateFieldMVANext () { return false; }
virtual bool IterateKillListStart ( CSphString & ) { return false; }
virtual bool IterateKillListNext ( SphDocID_t & ) { return false; }
virtual BYTE ** NextDocument ( CSphString & ) { return m_dFields.Begin(); }
protected:
CSphVector<BYTE *> m_dFields;
CSphVector<CSphWordHit> m_dHits;
};
CSphSource_StringVector::CSphSource_StringVector ( int iFields, const char ** ppFields, const CSphSchema & tSchema )
: CSphSource_Document ( "$stringvector" )
{
m_tSchema = tSchema;
m_dFields.Resize ( 1+iFields );
for ( int i=0; i<iFields; i++ )
{
m_dFields[i] = (BYTE*) ppFields[i];
assert ( m_dFields[i] );
}
m_dFields [ iFields ] = NULL;
m_iMaxHits = 0; // force all hits build
}
bool CSphSource_StringVector::Connect ( CSphString & )
{
m_tHits.m_dData.Reserve ( 1024 );
return true;
}
void CSphSource_StringVector::Disconnect ()
{
m_tHits.m_dData.Reset();
}
bool RtIndex_t::AddDocument ( int iFields, const char ** ppFields, const CSphMatch & tDoc, bool bReplace, const char ** ppStr, const CSphVector<DWORD> & dMvas, CSphString & sError )
{
assert ( g_bRTChangesAllowed );
if ( !tDoc.m_iDocID )
return true;
MEMORY ( SPH_MEM_IDX_RT );
if ( !bReplace )
{
m_tRwlock.ReadLock ();
ARRAY_FOREACH ( i, m_pSegments )
if ( FindDocinfo ( m_pSegments[i], tDoc.m_iDocID )
&& !m_pSegments[i]->m_dKlist.BinarySearch ( tDoc.m_iDocID ) )
{
m_tRwlock.Unlock ();
sError.SetSprintf ( "duplicate id '"UINT64_FMT"'", (uint64_t)tDoc.m_iDocID );
return false; // already exists and not deleted; INSERT fails
}
m_tRwlock.Unlock ();
}
CSphScopedPtr<ISphTokenizer> pTokenizer ( m_pTokenizer->Clone ( false ) ); // avoid race
CSphScopedPtr<CSphDict> tDictCloned ( NULL );
CSphDict * pDictBase = m_pDict;
if ( pDictBase->HasState() )
{
tDictCloned = pDictBase = pDictBase->Clone();
}
CSphSource_StringVector tSrc ( iFields, ppFields, m_tOutboundSchema );
tSrc.Setup ( m_tSettings );
tSrc.SetTokenizer ( pTokenizer.Ptr() );
tSrc.SetDict ( pDictBase );
if ( !tSrc.Connect ( m_sLastError ) )
return false;
tSrc.m_tDocInfo.Clone ( tDoc, m_tOutboundSchema.GetRowSize() );
if ( !tSrc.IterateStart ( sError ) || !tSrc.IterateDocument ( sError ) )
return false;
ISphHits * pHits = tSrc.IterateHits ( sError );
return AddDocument ( pHits, tDoc, ppStr, dMvas, sError );
}
void AccumCleanup ( void * pArg )
{
RtAccum_t * pAcc = (RtAccum_t *) pArg;
SafeDelete ( pAcc );
}
RtAccum_t * RtIndex_t::AcquireAccum ( CSphString * sError )
{
RtAccum_t * pAcc = NULL;
// check that no other index is holding the acc
pAcc = (RtAccum_t*) sphThreadGet ( g_tTlsAccumKey );
if ( pAcc && pAcc->m_pIndex!=NULL && pAcc->m_pIndex!=this )
{
if ( sError )
sError->SetSprintf ( "current txn is working with another index ('%s')", pAcc->m_pIndex->m_tSchema.m_sName.cstr() );
return NULL;
}
if ( !pAcc )
{
pAcc = new RtAccum_t ();
sphThreadSet ( g_tTlsAccumKey, pAcc );
sphThreadOnExit ( AccumCleanup, pAcc );
}
assert ( pAcc->m_pIndex==NULL || pAcc->m_pIndex==this );
pAcc->m_pIndex = this;
return pAcc;
}
bool RtIndex_t::AddDocument ( ISphHits * pHits, const CSphMatch & tDoc, const char ** ppStr, const CSphVector<DWORD> & dMvas, CSphString & sError )
{
assert ( g_bRTChangesAllowed );
RtAccum_t * pAcc = AcquireAccum ( &sError );
if ( pAcc )
pAcc->AddDocument ( pHits, tDoc, m_tOutboundSchema.GetRowSize(), ppStr, dMvas );
return ( pAcc!=NULL );
}
RtAccum_t::RtAccum_t ()
: m_pIndex ( NULL )
, m_iAccumDocs ( 0 )
{
m_dStrings.Add ( 0 );
m_dMvas.Add ( 0 );
}
void RtAccum_t::AddDocument ( ISphHits * pHits, const CSphMatch & tDoc, int iRowSize, const char ** ppStr, const CSphVector<DWORD> & dMvas )
{
MEMORY ( SPH_MEM_IDX_RT_ACCUM );
// schedule existing copies for deletion
m_dAccumKlist.Add ( tDoc.m_iDocID );
// reserve some hit space on first use
if ( pHits && pHits->Length() && !m_dAccum.GetLength() )
m_dAccum.Reserve ( 128*1024 );
// accumulate row data; expect fully dynamic rows
assert ( !tDoc.m_pStatic );
assert (!( !tDoc.m_pDynamic && iRowSize!=0 ));
assert (!( tDoc.m_pDynamic && (int)tDoc.m_pDynamic[-1]!=iRowSize ));
m_dAccumRows.Resize ( m_dAccumRows.GetLength() + DOCINFO_IDSIZE + iRowSize );
CSphRowitem * pRow = &m_dAccumRows [ m_dAccumRows.GetLength() - DOCINFO_IDSIZE - iRowSize ];
DOCINFOSETID ( pRow, tDoc.m_iDocID );
CSphRowitem * pAttrs = DOCINFO2ATTRS(pRow);
for ( int i=0; i<iRowSize; i++ )
pAttrs[i] = tDoc.m_pDynamic[i];
int iMva = 0;
const CSphSchema & pSchema = m_pIndex->GetInternalSchema();
int iAttr = 0;
for ( int i=0; i<pSchema.GetAttrsCount(); i++ )
{
const CSphColumnInfo & tColumn = pSchema.GetAttr(i);
if ( tColumn.m_eAttrType==SPH_ATTR_STRING )
{
const char * pStr = ppStr ? ppStr[iAttr++] : NULL;
const int iLen = pStr ? strlen ( pStr ) : 0;
if ( iLen )
{
BYTE dLen[3];
const int iLenPacked = sphPackStrlen ( dLen, iLen );
const int iOff = m_dStrings.GetLength();
assert ( iOff>=1 );
m_dStrings.Resize ( iOff + iLenPacked + iLen );
memcpy ( &m_dStrings[iOff], dLen, iLenPacked );
memcpy ( &m_dStrings[iOff+iLenPacked], pStr, iLen );
sphSetRowAttr ( pAttrs, tColumn.m_tLocator, iOff );
} else
{
sphSetRowAttr ( pAttrs, tColumn.m_tLocator, 0 );
}
} else if ( tColumn.m_eAttrType==SPH_ATTR_UINT32SET || tColumn.m_eAttrType==SPH_ATTR_UINT64SET )
{
assert ( m_dMvas.GetLength() );
int iCount = dMvas[iMva];
if ( iCount )
{
int iDst = m_dMvas.GetLength();
m_dMvas.Resize ( iDst+iCount+1 );
memcpy ( m_dMvas.Begin()+iDst, dMvas.Begin()+iMva, (iCount+1)*sizeof(dMvas[0]) );
sphSetRowAttr ( pAttrs, tColumn.m_tLocator, iDst );
} else
{
sphSetRowAttr ( pAttrs, tColumn.m_tLocator, 0 );
}
iMva += iCount+1;
}
}
// accumulate hits
int iHits = 0;
if ( pHits && pHits->Length() )
{
iHits = pHits->Length();
for ( const CSphWordHit * pHit = pHits->First(); pHit<=pHits->Last(); pHit++ )
m_dAccum.Add ( *pHit );
}
m_dPerDocHitsCount.Add ( iHits );
m_iAccumDocs++;
}
RtSegment_t * RtAccum_t::CreateSegment ( int iRowSize )
{
if ( !m_iAccumDocs )
return NULL;
MEMORY ( SPH_MEM_IDX_RT_ACCUM );
RtSegment_t * pSeg = new RtSegment_t ();
CSphWordHit tClosingHit;
tClosingHit.m_iWordID = WORDID_MAX;
tClosingHit.m_iDocID = DOCID_MAX;
tClosingHit.m_iWordPos = EMPTY_HIT;
m_dAccum.Add ( tClosingHit );
RtDoc_t tDoc;
tDoc.m_uDocID = 0;
tDoc.m_dFields.Unset();
tDoc.m_uHits = 0;
tDoc.m_uHit = 0;
RtWord_t tWord;
tWord.m_uWordID = 0;
tWord.m_uDocs = 0;
tWord.m_uHits = 0;
tWord.m_uDoc = 0;
RtDocWriter_t tOutDoc ( pSeg );
RtWordWriter_t tOutWord ( pSeg );
RtHitWriter_t tOutHit ( pSeg );
Hitpos_t uEmbeddedHit = EMPTY_HIT;
ARRAY_FOREACH ( i, m_dAccum )
{
const CSphWordHit & tHit = m_dAccum[i];
// new keyword or doc; flush current doc
if ( tHit.m_iWordID!=tWord.m_uWordID || tHit.m_iDocID!=tDoc.m_uDocID )
{
if ( tDoc.m_uDocID )
{
tWord.m_uDocs++;
tWord.m_uHits += tDoc.m_uHits;
if ( uEmbeddedHit )
{
assert ( tDoc.m_uHits==1 );
tDoc.m_uHit = uEmbeddedHit;
}
tOutDoc.ZipDoc ( tDoc );
tDoc.m_dFields.Unset();
tDoc.m_uHits = 0;
tDoc.m_uHit = tOutHit.ZipHitPtr();
}
tDoc.m_uDocID = tHit.m_iDocID;
tOutHit.ZipRestart ();
uEmbeddedHit = 0;
}
// new keyword; flush current keyword
if ( tHit.m_iWordID!=tWord.m_uWordID )
{
tOutDoc.ZipRestart ();
if ( tWord.m_uWordID )
tOutWord.ZipWord ( tWord );
tWord.m_uWordID = tHit.m_iWordID;
tWord.m_uDocs = 0;
tWord.m_uHits = 0;
tWord.m_uDoc = tOutDoc.ZipDocPtr();
}
// just a new hit
if ( !tDoc.m_uHits )
{
uEmbeddedHit = tHit.m_iWordPos;
} else
{
if ( uEmbeddedHit )
{
tOutHit.ZipHit ( uEmbeddedHit );
uEmbeddedHit = 0;
}
tOutHit.ZipHit ( tHit.m_iWordPos );
}
tDoc.m_dFields.Set ( HITMAN::GetField ( tHit.m_iWordPos ) );
tDoc.m_uHits++;
}
pSeg->m_iRows = m_iAccumDocs;
pSeg->m_iAliveRows = m_iAccumDocs;
// copy and sort attributes
int iStride = DOCINFO_IDSIZE + iRowSize;
pSeg->m_dRows.SwapData ( m_dAccumRows );
pSeg->m_dStrings.SwapData ( m_dStrings );
pSeg->m_dMvas.SwapData ( m_dMvas );
sphSortDocinfos ( pSeg->m_dRows.Begin(), pSeg->m_dRows.GetLength()/iStride, iStride );
// done
return pSeg;
}
struct AccumDocHits_t
{
SphDocID_t m_uDocid;
int m_iDocIndex;
int m_iHitIndex;
int m_iHitCount;
};
struct CmpDocHitIndex_t
{
inline bool IsLess ( const AccumDocHits_t & a, const AccumDocHits_t & b ) const
{
return ( a.m_uDocid<b.m_uDocid || ( a.m_uDocid==b.m_uDocid && a.m_iDocIndex<b.m_iDocIndex ) );
}
};
void RtAccum_t::CleanupDuplacates ( int iRowSize )
{
if ( m_iAccumDocs<=1 )
return;
assert ( m_iAccumDocs==m_dPerDocHitsCount.GetLength() );
CSphVector<AccumDocHits_t> dDocHits ( m_dPerDocHitsCount.GetLength() );
int iStride = DOCINFO_IDSIZE + iRowSize;
int iHitIndex = 0;
CSphRowitem * pRow = m_dAccumRows.Begin();
for ( int i=0; i<m_iAccumDocs; i++, pRow+=iStride )
{
AccumDocHits_t & tElem = dDocHits[i];
tElem.m_uDocid = DOCINFO2ID ( pRow );
tElem.m_iDocIndex = i;
tElem.m_iHitIndex = iHitIndex;
tElem.m_iHitCount = m_dPerDocHitsCount[i];
iHitIndex += m_dPerDocHitsCount[i];
}
dDocHits.Sort ( CmpDocHitIndex_t() );
bool bHasDups = false;;
for ( int i=0; i<dDocHits.GetLength()-1 && !bHasDups; i++ )
bHasDups = ( dDocHits[i].m_uDocid==dDocHits[i+1].m_uDocid );
if ( !bHasDups )
return;
// filter out unique - keep duplicates, but not last one
int iDst = 0;
int iSrc = 1;
while ( iSrc<dDocHits.GetLength() )
{
bool bDup = ( dDocHits[iDst].m_uDocid==dDocHits[iSrc].m_uDocid );
iDst += bDup;
dDocHits[iDst] = dDocHits[iSrc++];
}
dDocHits.Resize ( iDst );
assert ( dDocHits.GetLength() );
// sort by hit index
dDocHits.Sort ( bind ( &AccumDocHits_t::m_iHitIndex ) );
// clean up hits of duplicates
for ( int iHit = dDocHits.GetLength()-1; iHit>=0; iHit-- )
{
if ( !dDocHits[iHit].m_iHitCount )
continue;
int iFrom = dDocHits[iHit].m_iHitIndex;
int iCount = dDocHits[iHit].m_iHitCount;
if ( iFrom+iCount<m_dAccum.GetLength() )
{
for ( int iDst=iFrom, iSrc=iFrom+iCount; iSrc<m_dAccum.GetLength(); iSrc++, iDst++ )
m_dAccum[iDst] = m_dAccum[iSrc];
}
m_dAccum.Resize ( m_dAccum.GetLength()-iCount );
}
// sort by docid index
dDocHits.Sort ( bind ( &AccumDocHits_t::m_iDocIndex ) );
// clean up docinfos of duplicates
for ( int iDoc = dDocHits.GetLength()-1; iDoc>=0; iDoc-- )
{
int iDst = dDocHits[iDoc].m_iDocIndex*iStride;
int iSrc = iDst+iStride;
while ( iSrc<m_dAccumRows.GetLength() )
{
m_dAccumRows[iDst++] = m_dAccumRows[iSrc++];
}
m_iAccumDocs--;
m_dAccumRows.Resize ( m_iAccumDocs*iStride );
}
}
const RtWord_t * RtIndex_t::CopyWord ( RtSegment_t * pDst, RtWordWriter_t & tOutWord, const RtSegment_t * pSrc, const RtWord_t * pWord, RtWordReader_t & tInWord, const CSphVector<SphDocID_t> * pAccKlist )
{
RtDocReader_t tInDoc ( pSrc, *pWord );
RtDocWriter_t tOutDoc ( pDst );
RtWord_t tNewWord = *pWord;
tNewWord.m_uDoc = tOutDoc.ZipDocPtr();
// if flag is there, acc must be there
// however, NOT vice versa (newly created segments are unaffected by TLS klist)
assert (!( pSrc->m_bTlsKlist && !pAccKlist ));
#if 0
// index *must* be holding acc during merge
assert ( !pAcc || pAcc->m_pIndex==this );
#endif
// copy docs
for ( ;; )
{
const RtDoc_t * pDoc = tInDoc.UnzipDoc();
if ( !pDoc )
break;
// apply klist
bool bKill = ( pSrc->m_dKlist.BinarySearch ( pDoc->m_uDocID )!=NULL );
if ( !bKill && pSrc->m_bTlsKlist )
bKill = ( pAccKlist->BinarySearch ( pDoc->m_uDocID )!=NULL );
if ( bKill )
{
tNewWord.m_uDocs--;
tNewWord.m_uHits -= pDoc->m_uHits;
continue;
}
// short route, single embedded hit
if ( pDoc->m_uHits==1 )
{
tOutDoc.ZipDoc ( *pDoc );
continue;
}
// long route, copy hits
RtHitWriter_t tOutHit ( pDst );
RtHitReader_t tInHit ( pSrc, pDoc );
RtDoc_t tDoc = *pDoc;
tDoc.m_uHit = tOutHit.ZipHitPtr();
// OPTIMIZE? decode+memcpy?
for ( DWORD uValue=tInHit.UnzipHit(); uValue; uValue=tInHit.UnzipHit() )
tOutHit.ZipHit ( uValue );
// copy doc
tOutDoc.ZipDoc ( tDoc );
}
// append word to the dictionary
if ( tNewWord.m_uDocs )
tOutWord.ZipWord ( tNewWord );
// move forward
return tInWord.UnzipWord ();
}
void RtIndex_t::CopyDoc ( RtSegment_t * pSeg, RtDocWriter_t & tOutDoc, RtWord_t * pWord, const RtSegment_t * pSrc, const RtDoc_t * pDoc )
{
pWord->m_uDocs++;
pWord->m_uHits += pDoc->m_uHits;
if ( pDoc->m_uHits==1 )
{
tOutDoc.ZipDoc ( *pDoc );
return;
}
RtHitWriter_t tOutHit ( pSeg );
RtHitReader_t tInHit ( pSrc, pDoc );
RtDoc_t tDoc = *pDoc;
tDoc.m_uHit = tOutHit.ZipHitPtr();
tOutDoc.ZipDoc ( tDoc );
// OPTIMIZE? decode+memcpy?
for ( DWORD uValue=tInHit.UnzipHit(); uValue; uValue=tInHit.UnzipHit() )
tOutHit.ZipHit ( uValue );
}
void RtIndex_t::MergeWord ( RtSegment_t * pSeg, const RtSegment_t * pSrc1, const RtWord_t * pWord1, const RtSegment_t * pSrc2, const RtWord_t * pWord2, RtWordWriter_t & tOut, const CSphVector<SphDocID_t> * pAccKlist )
{
assert ( pWord1->m_uWordID==pWord2->m_uWordID );
RtDocWriter_t tOutDoc ( pSeg );
RtWord_t tWord;
tWord.m_uWordID = pWord1->m_uWordID;
tWord.m_uDocs = 0;
tWord.m_uHits = 0;
tWord.m_uDoc = tOutDoc.ZipDocPtr();
RtDocReader_t tIn1 ( pSrc1, *pWord1 );
RtDocReader_t tIn2 ( pSrc2, *pWord2 );
const RtDoc_t * pDoc1 = tIn1.UnzipDoc();
const RtDoc_t * pDoc2 = tIn2.UnzipDoc();
while ( pDoc1 || pDoc2 )
{
if ( pDoc1 && pDoc2 && pDoc1->m_uDocID==pDoc2->m_uDocID )
{
// dupe, must (!) be killed in the first segment, might be in both
#if 0
assert ( pSrc1->m_dKlist.BinarySearch ( pDoc1->m_uDocID )
|| ( pSrc1->m_bTlsKlist && pAcc && pAcc->m_dAccumKlist.BinarySearch ( pDoc1->m_uDocID ) ) );
#endif
if ( !pSrc2->m_dKlist.BinarySearch ( pDoc2->m_uDocID )
&& ( !pSrc1->m_bTlsKlist || !pSrc2->m_bTlsKlist || !pAccKlist->BinarySearch ( pDoc2->m_uDocID ) ) )
CopyDoc ( pSeg, tOutDoc, &tWord, pSrc2, pDoc2 );
pDoc1 = tIn1.UnzipDoc();
pDoc2 = tIn2.UnzipDoc();
} else if ( pDoc1 && ( !pDoc2 || pDoc1->m_uDocID < pDoc2->m_uDocID ) )
{
// winner from the first segment
if ( !pSrc1->m_dKlist.BinarySearch ( pDoc1->m_uDocID )
&& ( !pSrc1->m_bTlsKlist || !pAccKlist->BinarySearch ( pDoc1->m_uDocID ) ) )
CopyDoc ( pSeg, tOutDoc, &tWord, pSrc1, pDoc1 );
pDoc1 = tIn1.UnzipDoc();
} else
{
// winner from the second segment
assert ( pDoc2 && ( !pDoc1 || pDoc2->m_uDocID < pDoc1->m_uDocID ) );
if ( !pSrc2->m_dKlist.BinarySearch ( pDoc2->m_uDocID )
&& ( !pSrc2->m_bTlsKlist || !pAccKlist->BinarySearch ( pDoc2->m_uDocID ) ) )
CopyDoc ( pSeg, tOutDoc, &tWord, pSrc2, pDoc2 );
pDoc2 = tIn2.UnzipDoc();
}
}
if ( tWord.m_uDocs )
tOut.ZipWord ( tWord );
}
#if PARANOID
static void CheckSegmentRows ( const RtSegment_t * pSeg, int iStride )
{
const CSphVector<CSphRowitem> & dRows = pSeg->m_dRows; // shortcut
for ( int i=iStride; i<dRows.GetLength(); i+=iStride )
assert ( DOCINFO2ID ( &dRows[i] ) > DOCINFO2ID ( &dRows[i-iStride] ) );
}
#endif
template < typename DOCID = SphDocID_t >
struct RtRowIterator_tmpl : public ISphNoncopyable
{
protected:
const CSphRowitem * m_pRow;
const CSphRowitem * m_pRowMax;
const DOCID * m_pKlist;
const DOCID * m_pKlistMax;
const DOCID * m_pTlsKlist;
const DOCID * m_pTlsKlistMax;
const int m_iStride;
public:
explicit RtRowIterator_tmpl ( const RtSegment_t * pSeg, int iStride, bool bWriter, const CSphVector<DOCID> * pAccKlist )
: m_pRow ( pSeg->m_dRows.Begin() )
, m_pRowMax ( pSeg->m_dRows.Begin() + pSeg->m_dRows.GetLength() )
, m_pKlist ( NULL )
, m_pKlistMax ( NULL )
, m_pTlsKlist ( NULL )
, m_pTlsKlistMax ( NULL )
, m_iStride ( iStride )
{
if ( pSeg->m_dKlist.GetLength() )
{
m_pKlist = ( const DOCID* ) pSeg->m_dKlist.Begin();
m_pKlistMax = m_pKlist + pSeg->m_dKlist.GetLength();
}
// FIXME? OPTIMIZE? must not scan tls (open txn) in readers; can implement lighter iterator
// FIXME? OPTIMIZE? maybe we should just rely on the segment order and don't scan tls klist here
if ( bWriter && pSeg->m_bTlsKlist && pAccKlist && pAccKlist->GetLength() )
{
m_pTlsKlist = pAccKlist->Begin();
m_pTlsKlistMax = m_pTlsKlist + pAccKlist->GetLength();
}
}
const CSphRowitem * GetNextAliveRow ()
{
// while there are rows and k-list entries
while ( m_pRow<m_pRowMax && ( m_pKlist<m_pKlistMax || m_pTlsKlist<m_pTlsKlistMax ) )
{
// get next candidate id
DOCID uID = DOCINFO2ID_T<DOCID>(m_pRow);
// check if segment k-list kills it
while ( m_pKlist<m_pKlistMax && *m_pKlist<uID )
m_pKlist++;
if ( m_pKlist<m_pKlistMax && *m_pKlist==uID )
{
m_pKlist++;
m_pRow += m_iStride;
continue;
}
// check if txn k-list kills it
while ( m_pTlsKlist<m_pTlsKlistMax && *m_pTlsKlist<uID )
m_pTlsKlist++;
if ( m_pTlsKlist<m_pTlsKlistMax && *m_pTlsKlist==uID )
{
m_pTlsKlist++;
m_pRow += m_iStride;
continue;
}
// oh, so nobody kills it
break;
}
// oops, out of rows
if ( m_pRow>=m_pRowMax )
return NULL;
// got it, and it's alive!
m_pRow += m_iStride;
return m_pRow-m_iStride;
}
};
typedef RtRowIterator_tmpl<> RtRowIterator_t;
#ifdef PARANOID // sanity check in PARANOID mode
template <typename DOCID>
void VerifyEmptyStrings ( const CSphTightVector<BYTE> & dStorage, const CSphSchema & tSchema, const CSphRowitem * pRow )
{
if ( dStorage.GetLength()>1 )
return;
const DWORD * pAttr = DOCINFO2ATTRS_T<DOCID>(pRow);
for ( int i=0; i<tSchema.GetAttrsCount(); i++ )
{
const CSphColumnInfo & tCol = tSchema.GetAttr(i);
assert ( tCol.m_eAttrType!=SPH_ATTR_STRING
|| ( tCol.m_eAttrType==SPH_ATTR_STRING && sphGetRowAttr ( pAttr, tCol.m_tLocator )==0 ) );
}
}
#endif
static DWORD CopyPackedString ( const BYTE * pSrc, CSphTightVector<BYTE> & dDst )
{
assert ( pSrc );
assert ( dDst.GetLength()>=1 );
const BYTE * pStr = NULL;
const int iLen = sphUnpackStr ( pSrc, &pStr );
assert ( iLen>0 );
assert ( pStr );
const DWORD uOff = dDst.GetLength();
const DWORD uWriteLen = iLen + ( pStr - pSrc ); // actual length = strings content length + packed length of string
dDst.Resize ( uOff + uWriteLen );
memcpy ( dDst.Begin() + uOff, pSrc, uWriteLen );
return uOff;
}
static DWORD CopyMva ( const DWORD * pSrc, CSphTightVector<DWORD> & dDst )
{
assert ( pSrc );
assert ( dDst.GetLength()>=1 );
DWORD uCount = *pSrc;
assert ( uCount );
DWORD iLen = dDst.GetLength();
dDst.Resize ( iLen+uCount+1 );
memcpy ( dDst.Begin()+iLen, pSrc, ( uCount+1 )*sizeof(DWORD) );
return iLen;
}
void CopyDocid ( SphDocID_t uDocid, CSphTightVector<DWORD> & dDst )
{
int iLen = dDst.GetLength();
dDst.Resize ( iLen + sizeof(uDocid) );
DOCINFOSETID ( dDst.Begin()+iLen, uDocid );
}
static void ExtractLocators ( const CSphSchema & tSchema, ESphAttr eAttrType, CSphVector<CSphAttrLocator> & dLocators )
{
for ( int i=0; i<tSchema.GetAttrsCount(); i++ )
{
const CSphColumnInfo & tColumn = tSchema.GetAttr(i);
if ( tColumn.m_eAttrType==eAttrType )
dLocators.Add ( tColumn.m_tLocator );
}
}
class StorageStringWriter_t : ISphNoncopyable
{
private:
CSphWriter & m_tDst;
CSphVector<CSphAttrLocator> m_dLocators;
public:
explicit StorageStringWriter_t ( const CSphSchema & tSchema, CSphWriter & tDst )
: m_tDst ( tDst )
{
ExtractLocators ( tSchema, SPH_ATTR_STRING, m_dLocators );
}
const CSphVector<CSphAttrLocator> & GetLocators () const { return m_dLocators; }
void SetDocid ( SphDocID_t ) {}
DWORD CopyAttr ( const BYTE * pSrc )
{
assert ( m_tDst.GetPos()>0 && m_tDst.GetPos()<( I64C(1)<<32 ) ); // should be 32 bit offset
const BYTE * pStr = NULL;
const int iLen = sphUnpackStr ( pSrc, &pStr );
assert ( iLen && pStr );
DWORD uAttr = (DWORD)m_tDst.GetPos();
const int iWriteLen = iLen + ( pStr - pSrc );
m_tDst.PutBytes ( pSrc, iWriteLen );
return uAttr;
}
};
class StorageStringVector_t : ISphNoncopyable
{
private:
CSphTightVector<BYTE> & m_dDst;
CSphVector<CSphAttrLocator> m_dLocators;
public:
explicit StorageStringVector_t ( const CSphSchema & tSchema, CSphTightVector<BYTE> & dDst )
: m_dDst ( dDst )
{
ExtractLocators ( tSchema, SPH_ATTR_STRING, m_dLocators );
}
const CSphVector<CSphAttrLocator> & GetLocators () const { return m_dLocators; }
void SetDocid ( SphDocID_t ) {}
DWORD CopyAttr ( const BYTE * pSrc )
{
assert ( m_dDst.GetLength()>0 && m_dDst.GetLength()<( I64C(1)<<32 ) ); // should be 32 bit offset
return CopyPackedString ( pSrc, m_dDst );
}
};
class StorageMvaWriter_t : ISphNoncopyable
{
private:
CSphWriter & m_tDst;
CSphVector<CSphAttrLocator> m_dLocators;
public:
explicit StorageMvaWriter_t ( const CSphSchema & tSchema, CSphWriter & tDst )
: m_tDst ( tDst )
{
ExtractLocators ( tSchema, SPH_ATTR_UINT32SET, m_dLocators );
ExtractLocators ( tSchema, SPH_ATTR_UINT64SET, m_dLocators );
}
const CSphVector<CSphAttrLocator> & GetLocators () const { return m_dLocators; }
void SetDocid ( SphDocID_t uDocid )
{
m_tDst.PutDocid ( uDocid );
}
DWORD CopyAttr ( const DWORD * pSrc )
{
assert ( m_tDst.GetPos()>0 && m_tDst.GetPos()<( I64C(1)<<32 ) ); // should be 32 bit offset
DWORD uCount = *pSrc;
assert ( uCount );
SphOffset_t uOff = m_tDst.GetPos();
assert ( ( uOff%sizeof(DWORD) )==0 );
m_tDst.PutBytes ( pSrc, ( uCount+1 )*sizeof(DWORD) );
return MVA_DOWNSIZE ( uOff/sizeof(DWORD) );
}
};
class StorageMvaVector_t : ISphNoncopyable
{
private:
CSphTightVector<DWORD> & m_dDst;
CSphVector<CSphAttrLocator> m_dLocators;
public:
explicit StorageMvaVector_t ( const CSphSchema & tSchema, CSphTightVector<DWORD> & dDst )
: m_dDst ( dDst )
{
ExtractLocators ( tSchema, SPH_ATTR_UINT32SET, m_dLocators );
ExtractLocators ( tSchema, SPH_ATTR_UINT64SET, m_dLocators );
}
const CSphVector<CSphAttrLocator> & GetLocators () const { return m_dLocators; }
void SetDocid ( SphDocID_t uDocid )
{
CopyDocid ( uDocid, m_dDst );
}
DWORD CopyAttr ( const DWORD * pSrc )
{
assert ( m_dDst.GetLength()>0 && m_dDst.GetLength()<( I64C(1)<<32 ) ); // should be 32 bit offset
return CopyMva ( pSrc, m_dDst );
}
};
template <typename DOCID, typename STORAGE, typename SRC>
void CopyFixupStorageAttrs ( const CSphTightVector<SRC> & dSrc, STORAGE & tStorage, CSphRowitem * pRow )
{
const CSphVector<CSphAttrLocator> & dLocators = tStorage.GetLocators();
if ( !dLocators.GetLength() )
return;
// store string\mva attr for this row
SphDocID_t uDocid = DOCINFO2ID ( pRow );
DWORD * pAttr = DOCINFO2ATTRS_T<DOCID>( pRow );
bool bIdSet = false;
ARRAY_FOREACH ( i, dLocators )
{
const SphAttr_t uOff = sphGetRowAttr ( pAttr, dLocators[i] );
if ( !uOff )
continue;
assert ( uOff && uOff<dSrc.GetLength() );
if ( !bIdSet )
{
tStorage.SetDocid ( uDocid );
bIdSet = true;
}
DWORD uAttr = tStorage.CopyAttr ( dSrc.Begin() + uOff );
sphSetRowAttr ( pAttr, dLocators[i], uAttr );
}
}
RtSegment_t * RtIndex_t::MergeSegments ( const RtSegment_t * pSeg1, const RtSegment_t * pSeg2, const CSphVector<SphDocID_t> * pAccKlist )
{
if ( pSeg1->m_iTag > pSeg2->m_iTag )
Swap ( pSeg1, pSeg2 );
RtSegment_t * pSeg = new RtSegment_t ();
////////////////////
// merge attributes
////////////////////
// check that all the IDs are in proper asc order
#if PARANOID
CheckSegmentRows ( pSeg1, m_iStride );
CheckSegmentRows ( pSeg2, m_iStride );
#endif
// just a shortcut
CSphVector<CSphRowitem> & dRows = pSeg->m_dRows;
CSphTightVector<BYTE> & dStrings = pSeg->m_dStrings;
CSphTightVector<DWORD> & dMvas = pSeg->m_dMvas;
// we might need less because of dupes, but we can not know yet
dRows.Reserve ( pSeg1->m_dRows.GetLength() + pSeg2->m_dRows.GetLength() );
// as each segment has dummy zero we reserve less
assert ( pSeg1->m_dStrings.GetLength() + pSeg2->m_dStrings.GetLength()>=2 );
dStrings.Reserve ( pSeg1->m_dStrings.GetLength() + pSeg2->m_dStrings.GetLength() - 2 );
assert ( pSeg1->m_dMvas.GetLength() + pSeg2->m_dMvas.GetLength()>=2 );
dMvas.Reserve ( pSeg1->m_dMvas.GetLength() + pSeg2->m_dMvas.GetLength() - 2 );
StorageStringVector_t tStorageString ( m_tSchema, dStrings );
StorageMvaVector_t tStorageMva ( m_tSchema, dMvas );
RtRowIterator_t tIt1 ( pSeg1, m_iStride, true, pAccKlist );
RtRowIterator_t tIt2 ( pSeg2, m_iStride, true, pAccKlist );
const CSphRowitem * pRow1 = tIt1.GetNextAliveRow();
const CSphRowitem * pRow2 = tIt2.GetNextAliveRow();
while ( pRow1 || pRow2 )
{
if ( !pRow2 || ( pRow1 && pRow2 && DOCINFO2ID(pRow1)<DOCINFO2ID(pRow2) ) )
{
assert ( pRow1 );
for ( int i=0; i<m_iStride; i++ )
dRows.Add ( *pRow1++ );
CSphRowitem * pDstRow = dRows.Begin() + dRows.GetLength() - m_iStride;
CopyFixupStorageAttrs<SphDocID_t> ( pSeg1->m_dStrings, tStorageString, pDstRow );
CopyFixupStorageAttrs<SphDocID_t> ( pSeg1->m_dMvas, tStorageMva, pDstRow );
pRow1 = tIt1.GetNextAliveRow();
} else
{
assert ( pRow2 );
assert ( !pRow1 || ( DOCINFO2ID(pRow1)!=DOCINFO2ID(pRow2) ) ); // all dupes must be killed and skipped by the iterator
for ( int i=0; i<m_iStride; i++ )
dRows.Add ( *pRow2++ );
CSphRowitem * pDstRow = dRows.Begin() + dRows.GetLength() - m_iStride;
CopyFixupStorageAttrs<SphDocID_t> ( pSeg2->m_dStrings, tStorageString, pDstRow );
CopyFixupStorageAttrs<SphDocID_t> ( pSeg2->m_dMvas, tStorageMva, pDstRow );
pRow2 = tIt2.GetNextAliveRow();
}
pSeg->m_iRows++;
pSeg->m_iAliveRows++;
}
assert ( pSeg->m_iRows*m_iStride==pSeg->m_dRows.GetLength() );
#if PARANOID
CheckSegmentRows ( pSeg, m_iStride );
#endif
//////////////////
// merge keywords
//////////////////
pSeg->m_dWords.Reserve ( pSeg1->m_dWords.GetLength() + pSeg2->m_dWords.GetLength() );
pSeg->m_dDocs.Reserve ( pSeg1->m_dDocs.GetLength() + pSeg2->m_dDocs.GetLength() );
pSeg->m_dHits.Reserve ( pSeg1->m_dHits.GetLength() + pSeg2->m_dHits.GetLength() );
RtWordWriter_t tOut ( pSeg );
RtWordReader_t tIn1 ( pSeg1 );
RtWordReader_t tIn2 ( pSeg2 );
const RtWord_t * pWords1 = tIn1.UnzipWord ();
const RtWord_t * pWords2 = tIn2.UnzipWord ();
// merge while there are common words
for ( ;; )
{
while ( pWords1 && pWords2 && pWords1->m_uWordID!=pWords2->m_uWordID )
if ( pWords1->m_uWordID < pWords2->m_uWordID )
pWords1 = CopyWord ( pSeg, tOut, pSeg1, pWords1, tIn1, pAccKlist );
else
pWords2 = CopyWord ( pSeg, tOut, pSeg2, pWords2, tIn2, pAccKlist );
if ( !pWords1 || !pWords2 )
break;
assert ( pWords1 && pWords2 && pWords1->m_uWordID==pWords2->m_uWordID );
MergeWord ( pSeg, pSeg1, pWords1, pSeg2, pWords2, tOut, pAccKlist );
pWords1 = tIn1.UnzipWord();
pWords2 = tIn2.UnzipWord();
}
// copy tails
while ( pWords1 ) pWords1 = CopyWord ( pSeg, tOut, pSeg1, pWords1, tIn1, pAccKlist );
while ( pWords2 ) pWords2 = CopyWord ( pSeg, tOut, pSeg2, pWords2, tIn2, pAccKlist );
assert ( pSeg->m_dRows.GetLength() );
assert ( pSeg->m_iRows );
assert ( pSeg->m_iAliveRows==pSeg->m_iRows );
return pSeg;
}
struct CmpSegments_fn
{
inline bool IsLess ( const RtSegment_t * a, const RtSegment_t * b )
{
return a->GetMergeFactor() > b->GetMergeFactor();
}
};
void RtIndex_t::Commit ()
{
assert ( g_bRTChangesAllowed );
MEMORY ( SPH_MEM_IDX_RT );
RtAccum_t * pAcc = AcquireAccum();
if ( !pAcc )
return;
// empty txn, just ignore
if ( !pAcc->m_iAccumDocs && !pAcc->m_dAccumKlist.GetLength() )
{
pAcc->m_pIndex = NULL;
pAcc->m_iAccumDocs = 0;
pAcc->m_dAccumRows.Resize ( 0 );
pAcc->m_dStrings.Resize ( 1 );
pAcc->m_dPerDocHitsCount.Resize ( 0 );
return;
}
// phase 0, build a new segment
// accum and segment are thread local; so no locking needed yet
// segment might be NULL if we're only killing rows this txn
pAcc->CleanupDuplacates ( m_tOutboundSchema.GetRowSize() );
pAcc->m_dAccum.Sort ( CmpHit_fn() );
RtSegment_t * pNewSeg = pAcc->CreateSegment ( m_tOutboundSchema.GetRowSize() );
assert ( !pNewSeg || pNewSeg->m_iRows>0 );
assert ( !pNewSeg || pNewSeg->m_iAliveRows>0 );
assert ( !pNewSeg || pNewSeg->m_bTlsKlist==false );
#if PARANOID
if ( pNewSeg )
CheckSegmentRows ( pNewSeg, m_iStride );
#endif
// clean up parts we no longer need
pAcc->m_dAccum.Resize ( 0 );
pAcc->m_dAccumRows.Resize ( 0 );
pAcc->m_dStrings.Resize ( 1 ); // handle dummy zero offset
pAcc->m_dPerDocHitsCount.Resize ( 0 );
// sort accum klist, too
pAcc->m_dAccumKlist.Uniq ();
// now on to the stuff that needs locking and recovery
CommitReplayable ( pNewSeg, pAcc->m_dAccumKlist );
// done; cleanup accum
pAcc->m_pIndex = NULL;
pAcc->m_iAccumDocs = 0;
pAcc->m_dAccumKlist.Reset();
}
void RtIndex_t::CommitReplayable ( RtSegment_t * pNewSeg, CSphVector<SphDocID_t> & dAccKlist )
{
int iNewDocs = pNewSeg ? pNewSeg->m_iRows : 0;
// phase 1, lock out other writers (but not readers yet)
// concurrent readers are ok during merges, as existing segments won't be modified yet
// however, concurrent writers are not
Verify ( m_tWriterMutex.Lock() );
// first of all, binlog txn data for recovery
g_pBinlog->BinlogCommit ( m_sIndexName.cstr(), ++m_iTID, pNewSeg, dAccKlist );
// let merger know that existing segments are subject to additional, TLS K-list filter
// safe despite the readers, flag must only be used by writer
if ( dAccKlist.GetLength() )
ARRAY_FOREACH ( i, m_pSegments )
{
// OPTIMIZE? only need to set the flag if TLS K-list *actually* affects segment
assert ( m_pSegments[i]->m_bTlsKlist==false );
m_pSegments[i]->m_bTlsKlist = true;
}
// prepare new segments vector
// create more new segments by merging as needed
// do not (!) kill processed old segments just yet, as readers might still need them
CSphVector<RtSegment_t*> dSegments;
CSphVector<RtSegment_t*> dToKill;
dSegments = m_pSegments;
if ( pNewSeg )
dSegments.Add ( pNewSeg );
int64_t iRamFreed = 0;
// enforce RAM usage limit
int64_t iRamLeft = m_iRamSize;
ARRAY_FOREACH ( i, dSegments )
iRamLeft = Max ( 0, iRamLeft - dSegments[i]->GetUsedRam() );
// skip merging if no rows were added or no memory left
bool bDump = ( iRamLeft==0 );
const int MAX_SEGMENTS = 32;
const int MAX_PROGRESSION_SEGMENT = 8;
while ( pNewSeg && iRamLeft>0 )
{
dSegments.Sort ( CmpSegments_fn() );
// unconditionally merge if there's too much segments now
// conditionally merge if smallest segment has grown too large
// otherwise, we're done
const int iLen = dSegments.GetLength();
if ( iLen < ( MAX_SEGMENTS - MAX_PROGRESSION_SEGMENT ) )
break;
assert ( iLen>=2 );
// exit if progression is kept AND lesser MAX_SEGMENTS limit
if ( dSegments[iLen-2]->GetMergeFactor() > dSegments[iLen-1]->GetMergeFactor()*2 && iLen < MAX_SEGMENTS )
break;
// check whether we have enough RAM
#define LOC_ESTIMATE1(_seg,_vec) \
(int)( ( (int64_t)_seg->_vec.GetLength() ) * _seg->m_iAliveRows / _seg->m_iRows )
#define LOC_ESTIMATE(_vec) \
( LOC_ESTIMATE1 ( dSegments[iLen-1], _vec ) + LOC_ESTIMATE1 ( dSegments[iLen-2], _vec ) )
int64_t iEstimate =
CSphTightVectorPolicy<BYTE>::Relimit ( 0, LOC_ESTIMATE ( m_dWords ) ) +
CSphTightVectorPolicy<BYTE>::Relimit ( 0, LOC_ESTIMATE ( m_dDocs ) ) +
CSphTightVectorPolicy<BYTE>::Relimit ( 0, LOC_ESTIMATE ( m_dHits ) ) +
CSphTightVectorPolicy<BYTE>::Relimit ( 0, LOC_ESTIMATE ( m_dStrings ) );
#undef LOC_ESTIMATE
#undef LOC_ESTIMATE1
if ( iEstimate>iRamLeft )
{
// dump case: can't merge any more AND segments count limit's reached
bDump = ( ( iRamLeft + iRamFreed )<=iEstimate ) && ( iLen>=MAX_SEGMENTS );
break;
}
// do it
RtSegment_t * pA = dSegments.Pop();
RtSegment_t * pB = dSegments.Pop();
dSegments.Add ( MergeSegments ( pA, pB, &dAccKlist ) );
dToKill.Add ( pA );
dToKill.Add ( pB );
iRamFreed += pA->GetUsedRam() + pB->GetUsedRam();
int64_t iMerged = dSegments.Last()->GetUsedRam();
iRamLeft -= Min ( iRamLeft, iMerged );
}
// phase 2, obtain exclusive writer lock
// we now have to update K-lists in (some of) the survived segments
// and also swap in new segment list
m_tRwlock.WriteLock ();
// adjust for an incoming accumulator K-list
int iTotalKilled = 0;
if ( dAccKlist.GetLength() )
{
#ifndef NDEBUG
#if PARANOID
// check that klist is sorted and unique
for ( int i=1; i<dAccKlist.GetLength(); i++ )
assert ( dAccKlist[i-1] < dAccKlist[i] );
#endif
#endif
// update totals
// work the original (!) segments, and before (!) updating their K-lists
int iDiskLiveKLen = dAccKlist.GetLength();
for ( int i=0; i<iDiskLiveKLen; i++ )
{
const SphDocID_t uDocid = dAccKlist[i];
// check RAM chunk
bool bRamKilled = false;
for ( int j=0; j<m_pSegments.GetLength() && !bRamKilled; j++ )
bRamKilled = ( m_pSegments[j]->FindAliveRow ( uDocid )!=NULL );
bool bDiskKilled = m_tKlist.Exists ( uDocid );
// check disk chunks
bool bKeep = false;
if ( !bRamKilled || !bDiskKilled )
{
for ( int j=m_pDiskChunks.GetLength()-1; j>=0 && !bKeep; j-- )
{
if ( m_pDiskChunks[j]->HasDocid ( uDocid ) )
{
// we just found the most recent chunk with our suspect docid
// let's check whether it's already killed by subsequent chunks, or gets killed now
SphAttr_t uRef = uDocid;
bKeep = true;
for ( int k=j+1; k<m_pDiskChunks.GetLength() && bKeep; k++ )
{
const CSphIndex * pIndex = m_pDiskChunks[k];
bKeep &= ( sphBinarySearch ( pIndex->GetKillList(), pIndex->GetKillList() + pIndex->GetKillListSize() - 1, uRef )==NULL );
}
}
}
}
if ( bRamKilled || bKeep )
iTotalKilled++;
if ( bDiskKilled || !bKeep )
{
Swap ( dAccKlist[i], dAccKlist[iDiskLiveKLen-1] );
iDiskLiveKLen--;
i--;
}
}
// update K-lists on survivors
ARRAY_FOREACH ( iSeg, dSegments )
{
RtSegment_t * pSeg = dSegments[iSeg];
if ( !pSeg->m_bTlsKlist )
continue; // should be fresh enough
bool bKlistChanged = false;
// this segment was not created by this txn
// so we need to merge additional K-list from current txn into it
ARRAY_FOREACH ( j, dAccKlist )
{
SphDocID_t uDocid = dAccKlist[j];
if ( pSeg->FindAliveRow ( uDocid ) )
{
pSeg->m_dKlist.Add ( uDocid );
pSeg->m_iAliveRows--;
assert ( pSeg->m_iAliveRows>=0 );
bKlistChanged = true;
}
}
// we did not check for existence in K-list, only in segment
// so need to use Uniq(), not just Sort()
if ( bKlistChanged )
pSeg->m_dKlist.Uniq ();
// mark as good
pSeg->m_bTlsKlist = false;
}
// update disk K-list
// after iDiskLiveKLen are ids already stored on disk - just skip them
for ( int i=0; i<iDiskLiveKLen; i++ )
m_tKlist.Delete ( dAccKlist[i] );
}
ARRAY_FOREACH ( i, dSegments )
{
RtSegment_t * pSeg = dSegments[i];
if ( pSeg->m_iAliveRows==0 )
{
dToKill.Add ( pSeg );
dSegments.RemoveFast ( i );
i--;
}
}
// go live!
Swap ( m_pSegments, dSegments );
// we can kill retired segments now
ARRAY_FOREACH ( i, dToKill )
SafeDelete ( dToKill[i] );
// update stats
m_tStats.m_iTotalDocuments += iNewDocs - iTotalKilled;
// phase 3, enable readers again
// we might need to dump data to disk now
// but during the dump, readers can still use RAM chunk data
Verify ( m_tRwlock.Unlock() );
if ( bDump )
{
SaveDiskChunk();
g_pBinlog->NotifyIndexFlush ( m_sIndexName.cstr(), m_iTID, false );
}
// all done, enable other writers
Verify ( m_tWriterMutex.Unlock() );
}
void RtIndex_t::RollBack ()
{
assert ( g_bRTChangesAllowed );
RtAccum_t * pAcc = AcquireAccum();
if ( !pAcc )
return;
// clean up parts we no longer need
pAcc->m_dAccum.Resize ( 0 );
pAcc->m_dAccumRows.Resize ( 0 );
// finish cleaning up and release accumulator
pAcc->m_pIndex = NULL;
pAcc->m_iAccumDocs = 0;
pAcc->m_dAccumKlist.Reset();
}
bool RtIndex_t::DeleteDocument ( const SphDocID_t * pDocs, int iDocs, CSphString & sError )
{
assert ( g_bRTChangesAllowed );
MEMORY ( SPH_MEM_IDX_RT_ACCUM );
RtAccum_t * pAcc = AcquireAccum ( &sError );
if ( !pAcc )
return false;
if ( !iDocs )
return true;
assert ( pDocs && iDocs );
// !COMMIT should handle case when uDoc what inserted in current txn here
while ( iDocs-- )
pAcc->m_dAccumKlist.Add ( *pDocs++ );
return true;
}
//////////////////////////////////////////////////////////////////////////
// LOAD/SAVE
//////////////////////////////////////////////////////////////////////////
struct Checkpoint_t
{
uint64_t m_uWord;
uint64_t m_uOffset;
};
void RtIndex_t::DumpToDisk ( const char * sFilename )
{
MEMORY ( SPH_MEM_IDX_RT );
Verify ( m_tWriterMutex.Lock() );
Verify ( m_tRwlock.WriteLock() );
SaveDiskData ( sFilename );
Verify ( m_tRwlock.Unlock() );
Verify ( m_tWriterMutex.Unlock() );
}
// Here is the devil of saving id32 chunk from id64 binary daemon
template < typename DOCID, typename WORDID >
void RtIndex_t::SaveDiskDataImpl ( const char * sFilename ) const
{
typedef RtDoc_tmpl<DOCID> RTDOC;
typedef RtWord_tmpl<WORDID> RTWORD;
CSphString sName, sError;
CSphWriter wrHits, wrDocs, wrDict, wrRows;
sName.SetSprintf ( "%s.spp", sFilename ); wrHits.OpenFile ( sName.cstr(), sError );
sName.SetSprintf ( "%s.spd", sFilename ); wrDocs.OpenFile ( sName.cstr(), sError );
sName.SetSprintf ( "%s.spi", sFilename ); wrDict.OpenFile ( sName.cstr(), sError );
sName.SetSprintf ( "%s.spa", sFilename ); wrRows.OpenFile ( sName.cstr(), sError );
BYTE bDummy = 1;
wrDict.PutBytes ( &bDummy, 1 );
wrDocs.PutBytes ( &bDummy, 1 );
wrHits.PutBytes ( &bDummy, 1 );
// we don't have enough RAM to create new merged segments
// and have to do N-way merge kinda in-place
CSphVector<RtWordReader_tmpl<WORDID>*> pWordReaders;
CSphVector<RtDocReader_tmpl<DOCID>*> pDocReaders;
CSphVector<RtSegment_t*> pSegments;
CSphVector<const RTWORD*> pWords;
CSphVector<const RTDOC*> pDocs;
pWordReaders.Reserve ( m_pSegments.GetLength() );
pDocReaders.Reserve ( m_pSegments.GetLength() );
pSegments.Reserve ( m_pSegments.GetLength() );
pWords.Reserve ( m_pSegments.GetLength() );
pDocs.Reserve ( m_pSegments.GetLength() );
// OPTIMIZE? somehow avoid new on iterators maybe?
ARRAY_FOREACH ( i, m_pSegments )
pWordReaders.Add ( new RtWordReader_tmpl<WORDID> ( m_pSegments[i] ) );
ARRAY_FOREACH ( i, pWordReaders )
pWords.Add ( pWordReaders[i]->UnzipWord() );
// loop keywords
static const int WORDLIST_CHECKPOINT = 64;
CSphVector<Checkpoint_t> dCheckpoints;
int iWords = 0;
WORDID uLastWord = 0;
SphOffset_t uLastDocpos = 0;
for ( ;; )
{
// find keyword with min id
const RTWORD * pWord = NULL;
ARRAY_FOREACH ( i, pWords ) // OPTIMIZE? PQ or at least nulls removal here?!
if ( pWords[i] )
if ( !pWord || pWords[i]->m_uWordID < pWord->m_uWordID )
pWord = pWords[i];
if ( !pWord )
break;
// loop all segments that have this keyword
assert ( pSegments.GetLength()==0 );
assert ( pDocReaders.GetLength()==0 );
assert ( pDocs.GetLength()==0 );
ARRAY_FOREACH ( i, pWords )
if ( pWords[i] && pWords[i]->m_uWordID==pWord->m_uWordID )
{
pSegments.Add ( m_pSegments[i] );
pDocReaders.Add ( new RtDocReader_tmpl<DOCID> ( m_pSegments[i], *pWords[i] ) );
const RTDOC * pDoc = pDocReaders.Last()->UnzipDoc();
while ( pDoc && m_pSegments[i]->m_dKlist.BinarySearch ( pDoc->m_uDocID ) )
pDoc = pDocReaders.Last()->UnzipDoc();
pDocs.Add ( pDoc );
}
// loop documents
SphOffset_t uDocpos = wrDocs.GetPos();
DOCID uLastDoc = 0;
SphOffset_t uLastHitpos = 0;
int iDocs = 0;
int iHits = 0;
for ( ;; )
{
// find alive doc with min id
int iMinReader = -1;
ARRAY_FOREACH ( i, pDocs ) // OPTIMIZE?
{
if ( !pDocs[i] )
continue;
assert ( !pSegments[i]->m_dKlist.BinarySearch ( pDocs[i]->m_uDocID ) );
if ( iMinReader<0 || pDocs[i]->m_uDocID < pDocs[iMinReader]->m_uDocID )
iMinReader = i;
}
if ( iMinReader<0 )
break;
// write doclist entry
const RTDOC * pDoc = pDocs[iMinReader]; // shortcut
iDocs++;
iHits += pDoc->m_uHits;
wrDocs.ZipOffset ( pDoc->m_uDocID - uLastDoc );
wrDocs.ZipOffset ( wrHits.GetPos() - uLastHitpos );
wrDocs.ZipInt ( pDoc->m_dFields.GetMask32() );
wrDocs.ZipInt ( pDoc->m_uHits );
uLastDoc = pDoc->m_uDocID;
uLastHitpos = wrHits.GetPos();
// loop hits from most current segment
if ( pDoc->m_uHits>1 )
{
DWORD uLastHit = 0;
RtHitReader_t tInHit ( pSegments[iMinReader], pDoc );
for ( DWORD uValue=tInHit.UnzipHit(); uValue; uValue=tInHit.UnzipHit() )
{
wrHits.ZipInt ( uValue - uLastHit );
uLastHit = uValue;
}
} else
{
wrHits.ZipInt ( pDoc->m_uHit );
}
wrHits.ZipInt ( 0 );
// fast forward readers
DOCID uMinID = pDocs[iMinReader]->m_uDocID;
ARRAY_FOREACH ( i, pDocs )
while ( pDocs[i] && ( pDocs[i]->m_uDocID<=uMinID || pSegments[i]->m_dKlist.BinarySearch ( pDocs[i]->m_uDocID ) ) )
pDocs[i] = pDocReaders[i]->UnzipDoc();
}
// write dict entry if necessary
if ( wrDocs.GetPos()!=uDocpos )
{
wrDocs.ZipInt ( 0 );
if ( !iWords )
{
Checkpoint_t & tChk = dCheckpoints.Add ();
tChk.m_uWord = pWord->m_uWordID;
tChk.m_uOffset = wrDict.GetPos();
}
wrDict.ZipOffset ( pWord->m_uWordID - uLastWord );
wrDict.ZipOffset ( uDocpos - uLastDocpos );
wrDict.ZipInt ( iDocs );
wrDict.ZipInt ( iHits );
uLastWord = pWord->m_uWordID;
uLastDocpos = uDocpos;
if ( ++iWords==WORDLIST_CHECKPOINT )
{
wrDict.ZipInt ( 0 );
wrDict.ZipOffset ( wrDocs.GetPos() - uLastDocpos ); // store last hitlist length
uLastDocpos = 0;
uLastWord = 0;
iWords = 0;
}
}
// move words forward
WORDID uMinID = pWord->m_uWordID; // because pWord contents will move forward too!
ARRAY_FOREACH ( i, pWords )
if ( pWords[i] && pWords[i]->m_uWordID==uMinID )
pWords[i] = pWordReaders[i]->UnzipWord();
// cleanup
ARRAY_FOREACH ( i, pDocReaders )
SafeDelete ( pDocReaders[i] );
pSegments.Resize ( 0 );
pDocReaders.Resize ( 0 );
pDocs.Resize ( 0 );
}
// write checkpoints
wrDict.ZipInt ( 0 ); // indicate checkpoint
wrDict.ZipOffset ( wrDocs.GetPos() - uLastDocpos ); // store last doclist length
SphOffset_t iCheckpointsPosition = wrDict.GetPos();
ARRAY_FOREACH ( i, dCheckpoints )
{
wrDict.PutOffset ( dCheckpoints[i].m_uWord );
wrDict.PutOffset ( dCheckpoints[i].m_uOffset );
}
// write attributes
// the new, template-param aligned iStride instead of index-wide.
int iStride = DWSIZEOF(DOCID) + m_tSchema.GetRowSize();
CSphVector<RtRowIterator_tmpl<DOCID>*> pRowIterators ( m_pSegments.GetLength() );
ARRAY_FOREACH ( i, m_pSegments )
pRowIterators[i] = new RtRowIterator_tmpl<DOCID> ( m_pSegments[i], iStride, false, NULL );
CSphVector<const CSphRowitem*> pRows ( m_pSegments.GetLength() );
ARRAY_FOREACH ( i, pRowIterators )
pRows[i] = pRowIterators[i]->GetNextAliveRow();
// prepare to build min-max index for attributes too
int iTotalDocs = 0;
ARRAY_FOREACH ( i, m_pSegments )
iTotalDocs += m_pSegments[i]->m_iAliveRows;
AttrIndexBuilder_t<DOCID> tMinMaxBuilder ( m_tSchema );
CSphVector<DWORD> dMinMaxBuffer ( tMinMaxBuilder.GetExpectedSize ( iTotalDocs ) );
tMinMaxBuilder.Prepare ( dMinMaxBuffer.Begin(), dMinMaxBuffer.Begin() + dMinMaxBuffer.GetLength() );
sName.SetSprintf ( "%s.sps", sFilename );
CSphWriter tStrWriter;
tStrWriter.OpenFile ( sName.cstr(), sError );
tStrWriter.PutByte ( 0 ); // dummy byte, to reserve magic zero offset
sName.SetSprintf ( "%s.spm", sFilename );
CSphWriter tMvaWriter;
tMvaWriter.OpenFile ( sName.cstr(), sError );
tMvaWriter.PutDword ( 0 ); // dummy dword, to reserve magic zero offset
CSphRowitem * pFixedRow = new CSphRowitem[iStride];
#ifndef NDEBUG
int iStoredDocs = 0;
#endif
StorageStringWriter_t tStorageString ( m_tSchema, tStrWriter );
StorageMvaWriter_t tStorageMva ( m_tSchema, tMvaWriter );
for ( ;; )
{
// find min row
int iMinRow = -1;
ARRAY_FOREACH ( i, pRows )
if ( pRows[i] )
if ( iMinRow<0 || DOCINFO2ID_T<DOCID> ( pRows[i] ) < DOCINFO2ID_T<DOCID> ( pRows[iMinRow] ) )
iMinRow = i;
if ( iMinRow<0 )
break;
#ifndef NDEBUG
// verify that it's unique
int iDupes = 0;
ARRAY_FOREACH ( i, pRows )
if ( pRows[i] )
if ( DOCINFO2ID_T<DOCID> ( pRows[i] )==DOCINFO2ID_T<DOCID> ( pRows[iMinRow] ) )
iDupes++;
assert ( iDupes==1 );
#endif
const CSphRowitem * pRow = pRows[iMinRow];
// strings storage for stored row
assert ( iMinRow<m_pSegments.GetLength() );
const RtSegment_t * pSegment = m_pSegments[iMinRow];
#ifdef PARANOID // sanity check in PARANOID mode
VerifyEmptyStrings<DOCID> ( pSegment->m_dStrings, m_tSchema, pRow );
#endif
// collect min-max data
tMinMaxBuilder.Collect ( pRow, pSegment->m_dMvas.Begin(), pSegment->m_dMvas.GetLength(), sError );
if ( pSegment->m_dStrings.GetLength()>1 || pSegment->m_dMvas.GetLength()>1 ) // should be more then dummy zero elements
{
// copy row content as we'll fix up its attrs ( string offset for now )
memcpy ( pFixedRow, pRow, iStride*sizeof(CSphRowitem) );
pRow = pFixedRow;
CopyFixupStorageAttrs<DOCID> ( pSegment->m_dStrings, tStorageString, pFixedRow );
CopyFixupStorageAttrs<DOCID> ( pSegment->m_dMvas, tStorageMva, pFixedRow );
}
// emit it
wrRows.PutBytes ( pRow, iStride*sizeof(CSphRowitem) );
// fast forward
pRows[iMinRow] = pRowIterators[iMinRow]->GetNextAliveRow();
#ifndef NDEBUG
iStoredDocs++;
#endif
}
SafeDeleteArray ( pFixedRow );
assert ( iStoredDocs==iTotalDocs );
tMinMaxBuilder.FinishCollect ( false );
if ( tMinMaxBuilder.GetActualSize() )
wrRows.PutBytes ( dMinMaxBuffer.Begin(), sizeof(DWORD) * tMinMaxBuilder.GetActualSize() );
tStrWriter.CloseFile ();
// write dummy string attributes, mva and kill-list files
CSphWriter wrDummy;
// dump killlist
sName.SetSprintf ( "%s.spk", sFilename );
wrDummy.OpenFile ( sName.cstr(), sError );
m_tKlist.Flush();
m_tKlist.KillListLock();
DWORD uKlistSize = m_tKlist.GetKillListSize();
if ( uKlistSize )
wrDummy.PutBytes ( m_tKlist.GetKillList(), uKlistSize*sizeof ( SphAttr_t ) );
m_tKlist.Reset();
m_tKlist.KillListUnlock();
wrDummy.CloseFile ();
sName.SetSprintf ( "%s.spm", sFilename ); wrDummy.OpenFile ( sName.cstr(), sError ); wrDummy.CloseFile ();
// header
SaveDiskHeader ( sFilename, dCheckpoints.GetLength(), iCheckpointsPosition, uKlistSize, iTotalDocs*iStride, m_bId32to64 );
// cleanup
ARRAY_FOREACH ( i, pWordReaders )
SafeDelete ( pWordReaders[i] );
ARRAY_FOREACH ( i, pDocReaders )
SafeDelete ( pDocReaders[i] );
ARRAY_FOREACH ( i, pRowIterators )
SafeDelete ( pRowIterators[i] );
// done
wrHits.CloseFile ();
wrDocs.CloseFile ();
wrDict.CloseFile ();
wrRows.CloseFile ();
}
void RtIndex_t::SaveDiskData ( const char * sFilename ) const
{
if ( m_bId32to64 )
return SaveDiskDataImpl<DWORD,DWORD> (sFilename);
else
return SaveDiskDataImpl<SphDocID_t,SphWordID_t> (sFilename);
}
static void WriteFileInfo ( CSphWriter & tWriter, const CSphSavedFile & tInfo )
{
tWriter.PutOffset ( tInfo.m_uSize );
tWriter.PutOffset ( tInfo.m_uCTime );
tWriter.PutOffset ( tInfo.m_uMTime );
tWriter.PutDword ( tInfo.m_uCRC32 );
}
static void WriteSchemaColumn ( CSphWriter & tWriter, const CSphColumnInfo & tColumn )
{
int iLen = strlen ( tColumn.m_sName.cstr() );
tWriter.PutDword ( iLen );
tWriter.PutBytes ( tColumn.m_sName.cstr(), iLen );
ESphAttr eAttrType = tColumn.m_eAttrType;
if ( eAttrType==SPH_ATTR_WORDCOUNT )
eAttrType = SPH_ATTR_INTEGER;
tWriter.PutDword ( eAttrType );
tWriter.PutDword ( tColumn.m_tLocator.CalcRowitem() ); // for backwards compatibility
tWriter.PutDword ( tColumn.m_tLocator.m_iBitOffset );
tWriter.PutDword ( tColumn.m_tLocator.m_iBitCount );
tWriter.PutByte ( tColumn.m_bPayload );
}
void RtIndex_t::SaveDiskHeader ( const char * sFilename, int iCheckpoints, SphOffset_t iCheckpointsPosition, DWORD uKillListSize, DWORD uMinMaxSize, bool bForceID32 ) const
{
static const DWORD INDEX_MAGIC_HEADER = 0x58485053; ///< my magic 'SPHX' header
static const DWORD INDEX_FORMAT_VERSION = 26; ///< my format version
CSphWriter tWriter;
CSphString sName, sError;
sName.SetSprintf ( "%s.sph", sFilename );
tWriter.OpenFile ( sName.cstr(), sError );
// format
tWriter.PutDword ( INDEX_MAGIC_HEADER );
tWriter.PutDword ( INDEX_FORMAT_VERSION );
if ( bForceID32 )
tWriter.PutDword ( 0 ); // use-32bit
else
tWriter.PutDword ( USE_64BIT ); // use-64bit
tWriter.PutDword ( SPH_DOCINFO_EXTERN );
// schema
tWriter.PutDword ( m_tSchema.m_dFields.GetLength() );
ARRAY_FOREACH ( i, m_tSchema.m_dFields )
WriteSchemaColumn ( tWriter, m_tSchema.m_dFields[i] );
tWriter.PutDword ( m_tSchema.GetAttrsCount() );
for ( int i=0; i<m_tSchema.GetAttrsCount(); i++ )
WriteSchemaColumn ( tWriter, m_tSchema.GetAttr(i) );
tWriter.PutOffset ( 0 ); // min docid
// wordlist checkpoints
tWriter.PutOffset ( iCheckpointsPosition );
tWriter.PutDword ( iCheckpoints );
// stats
tWriter.PutDword ( m_tStats.m_iTotalDocuments );
tWriter.PutOffset ( m_tStats.m_iTotalBytes );
// index settings
tWriter.PutDword ( m_tSettings.m_iMinPrefixLen );
tWriter.PutDword ( m_tSettings.m_iMinInfixLen );
tWriter.PutByte ( m_tSettings.m_bHtmlStrip ? 1 : 0 );
tWriter.PutString ( m_tSettings.m_sHtmlIndexAttrs.cstr () );
tWriter.PutString ( m_tSettings.m_sHtmlRemoveElements.cstr () );
tWriter.PutByte ( m_tSettings.m_bIndexExactWords ? 1 : 0 );
tWriter.PutDword ( m_tSettings.m_eHitless );
tWriter.PutDword ( SPH_HIT_FORMAT_PLAIN );
tWriter.PutByte ( 0 ); // m_bIndexSP, v.21+
tWriter.PutString ( CSphString() ); // m_sZonePrefix, v.22+
tWriter.PutDword ( 0 ); // m_iBoundaryStep, v.23+
tWriter.PutDword ( 1 ); // m_iStopwordStep, v.23+
// tokenizer
assert ( m_pTokenizer );
const CSphTokenizerSettings & tSettings = m_pTokenizer->GetSettings ();
tWriter.PutByte ( tSettings.m_iType );
tWriter.PutString ( tSettings.m_sCaseFolding.cstr () );
tWriter.PutDword ( tSettings.m_iMinWordLen );
tWriter.PutString ( tSettings.m_sSynonymsFile.cstr () );
WriteFileInfo ( tWriter, m_pTokenizer->GetSynFileInfo () );
tWriter.PutString ( tSettings.m_sBoundary.cstr () );
tWriter.PutString ( tSettings.m_sIgnoreChars.cstr () );
tWriter.PutDword ( tSettings.m_iNgramLen );
tWriter.PutString ( tSettings.m_sNgramChars.cstr () );
tWriter.PutString ( tSettings.m_sBlendChars.cstr () );
tWriter.PutString ( tSettings.m_sBlendMode.cstr () );
// dictionary
assert ( m_pDict );
const CSphDictSettings & tDict = m_pDict->GetSettings ();
tWriter.PutString ( tDict.m_sMorphology.cstr () );
tWriter.PutString ( tDict.m_sStopwords.cstr () );
const CSphVector <CSphSavedFile> & dSWFileInfos = m_pDict->GetStopwordsFileInfos ();
tWriter.PutDword ( dSWFileInfos.GetLength () );
ARRAY_FOREACH ( i, dSWFileInfos )
{
tWriter.PutString ( dSWFileInfos[i].m_sFilename.cstr () );
WriteFileInfo ( tWriter, dSWFileInfos[i] );
}
const CSphSavedFile & tWFFileInfo = m_pDict->GetWordformsFileInfo ();
tWriter.PutString ( tDict.m_sWordforms.cstr () );
WriteFileInfo ( tWriter, tWFFileInfo );
tWriter.PutDword ( tDict.m_iMinStemmingLen );
tWriter.PutByte ( 0 ); // m_bWordDict flag, v.21+
// kill-list size
tWriter.PutDword ( uKillListSize );
// min-max count
tWriter.PutDword ( uMinMaxSize );
// done
tWriter.CloseFile ();
}
#if USE_WINDOWS
#undef rename
int rename_rt ( const char * sOld, const char * sNew )
{
if ( MoveFileEx ( sOld, sNew, MOVEFILE_REPLACE_EXISTING ) )
return 0;
errno = GetLastError();
return -1;
}
#else
#define rename_rt rename
#endif
void RtIndex_t::SaveMeta ( int iDiskChunks )
{
// sanity check
if ( m_iLockFD<0 )
return;
// write new meta
CSphString sMeta, sMetaNew;
sMeta.SetSprintf ( "%s.meta", m_sPath.cstr() );
sMetaNew.SetSprintf ( "%s.meta.new", m_sPath.cstr() );
CSphString sError;
CSphWriter wrMeta;
if ( !wrMeta.OpenFile ( sMetaNew, sError ) )
sphDie ( "failed to serialize meta: %s", sError.cstr() ); // !COMMIT handle this gracefully
wrMeta.PutDword ( META_HEADER_MAGIC );
wrMeta.PutDword ( META_VERSION );
wrMeta.PutDword ( iDiskChunks );
wrMeta.PutDword ( m_tStats.m_iTotalDocuments );
wrMeta.PutOffset ( m_tStats.m_iTotalBytes ); // FIXME? need PutQword ideally
wrMeta.PutOffset ( m_iTID );
wrMeta.CloseFile();
// rename
if ( ::rename_rt ( sMetaNew.cstr(), sMeta.cstr() ) )
sphDie ( "failed to rename meta (src=%s, dst=%s, errno=%d, error=%s)",
sMetaNew.cstr(), sMeta.cstr(), errno, strerror(errno) ); // !COMMIT handle this gracefully
}
void RtIndex_t::SaveDiskChunk ()
{
if ( !m_pSegments.GetLength() )
return;
MEMORY ( SPH_MEM_IDX_RT );
// dump it
CSphString sNewChunk;
sNewChunk.SetSprintf ( "%s.%d", m_sPath.cstr(), m_pDiskChunks.GetLength() );
SaveDiskData ( sNewChunk.cstr() );
// bring new disk chunk online
CSphIndex * pDiskChunk = LoadDiskChunk ( m_pDiskChunks.GetLength() );
assert ( pDiskChunk );
// save updated meta
SaveMeta ( m_pDiskChunks.GetLength()+1 );
m_iSavedTID = m_iTID;
m_iSavedRam = 0;
m_tmSaved = sphMicroTimer();
// FIXME! add binlog cleanup here once we have binlogs
// get exclusive lock again, gotta reset RAM chunk now
Verify ( m_tRwlock.WriteLock() );
ARRAY_FOREACH ( i, m_pSegments )
SafeDelete ( m_pSegments[i] );
m_pSegments.Reset();
m_pDiskChunks.Add ( pDiskChunk );
Verify ( m_tRwlock.Unlock() );
}
CSphIndex * RtIndex_t::LoadDiskChunk ( int iChunk )
{
MEMORY ( SPH_MEM_IDX_DISK );
CSphString sChunk, sError, sWarning;
sChunk.SetSprintf ( "%s.%d", m_sPath.cstr(), iChunk );
// !COMMIT handle errors gracefully instead of dying
CSphIndex * pDiskChunk = sphCreateIndexPhrase ( m_sIndexName.cstr(), sChunk.cstr() );
if ( !pDiskChunk )
sphDie ( "disk chunk %s: alloc failed", sChunk.cstr() );
pDiskChunk->SetWordlistPreload ( m_bPreloadWordlist );
if ( !pDiskChunk->Prealloc ( false, m_bPathStripped, sWarning ) )
sphDie ( "disk chunk %s: prealloc failed: %s", sChunk.cstr(), pDiskChunk->GetLastError().cstr() );
if ( !pDiskChunk->Preread() )
sphDie ( "disk chunk %s: preread failed: %s", sChunk.cstr(), pDiskChunk->GetLastError().cstr() );
ISphIndex_VLN * pChunk = dynamic_cast<ISphIndex_VLN*> ( pDiskChunk );
if ( !pChunk )
sphDie ( "disk chunk %s: internal error on load, dynamic_cast failed", sChunk.cstr() );
pChunk->SetDynamize ( m_dDynamize );
return pDiskChunk;
}
bool RtIndex_t::Prealloc ( bool, bool bStripPath, CSphString & )
{
MEMORY ( SPH_MEM_IDX_RT );
// locking uber alles
// in RT backed case, we just must be multi-threaded
// so we simply lock here, and ignore Lock/Unlock hassle caused by forks
assert ( m_iLockFD<0 );
CSphString sLock;
sLock.SetSprintf ( "%s.lock", m_sPath.cstr() );
m_iLockFD = ::open ( sLock.cstr(), SPH_O_NEW, 0644 );
if ( m_iLockFD<0 )
{
m_sLastError.SetSprintf ( "failed to open %s: %s", sLock.cstr(), strerror(errno) );
return false;
}
if ( !sphLockEx ( m_iLockFD, false ) )
{
m_sLastError.SetSprintf ( "failed to lock %s: %s", sLock.cstr(), strerror(errno) );
::close ( m_iLockFD );
return false;
}
// check if we have a meta file (kinda-header)
CSphString sMeta;
sMeta.SetSprintf ( "%s.meta", m_sPath.cstr() );
// no readable meta? no disk part yet
if ( !sphIsReadable ( sMeta.cstr() ) )
return true;
// opened and locked, lets read
CSphAutoreader rdMeta;
if ( !rdMeta.Open ( sMeta, m_sLastError ) )
return false;
if ( rdMeta.GetDword()!=META_HEADER_MAGIC )
{
m_sLastError.SetSprintf ( "invalid meta file %s", sMeta.cstr() );
return false;
}
DWORD uVersion = rdMeta.GetDword();
if ( uVersion==0 || uVersion>META_VERSION )
{
m_sLastError.SetSprintf ( "%s is v.%d, binary is v.%d", sMeta.cstr(), uVersion, META_VERSION );
return false;
}
const int iDiskChunks = rdMeta.GetDword();
m_tStats.m_iTotalDocuments = rdMeta.GetDword();
m_tStats.m_iTotalBytes = rdMeta.GetOffset();
if ( uVersion>=2 )
m_iTID = rdMeta.GetOffset();
m_bPathStripped = bStripPath;
// load disk chunks, if any
for ( int iChunk=0; iChunk<iDiskChunks; iChunk++ )
{
m_pDiskChunks.Add ( LoadDiskChunk ( iChunk ) );
// tricky bit
// outgoing match schema on disk chunk should be identical to our internal (!) schema
if ( !m_tSchema.CompareTo ( m_pDiskChunks.Last()->GetMatchSchema(), m_sLastError ) )
return false;
}
// load ram chunk
bool bRamLoaded = LoadRamChunk ( uVersion );
// set up values for on timer save
m_iSavedTID = m_iTID;
m_iSavedRam = GetUsedRam();
m_tmSaved = sphMicroTimer();
return bRamLoaded;
}
bool RtIndex_t::Preread ()
{
// !COMMIT move disk chunks prereading here
return true;
}
template < typename T > struct IsPodType { enum { Value = false }; };
template<> struct IsPodType<char> { enum { Value = true }; };
template<> struct IsPodType<BYTE> { enum { Value = true }; };
template<> struct IsPodType<int> { enum { Value = true }; };
template<> struct IsPodType<DWORD> { enum { Value = true }; };
template<> struct IsPodType<uint64_t> { enum { Value = true }; };
template<> struct IsPodType<float> { enum { Value = true }; };
template < typename T, typename P >
static void SaveVector ( CSphWriter & tWriter, const CSphVector < T, P > & tVector )
{
STATIC_ASSERT ( IsPodType<T>::Value, NON_POD_VECTORS_ARE_UNSERIALIZABLE );
tWriter.PutDword ( tVector.GetLength() );
if ( tVector.GetLength() )
tWriter.PutBytes ( tVector.Begin(), tVector.GetLength()*sizeof(T) );
}
template < typename T, typename P >
static void LoadVector ( CSphReader & tReader, CSphVector < T, P > & tVector )
{
STATIC_ASSERT ( IsPodType<T>::Value, NON_POD_VECTORS_ARE_UNSERIALIZABLE );
tVector.Resize ( tReader.GetDword() ); // FIXME? sanitize?
if ( tVector.GetLength() )
tReader.GetBytes ( tVector.Begin(), tVector.GetLength()*sizeof(T) );
}
template < typename T, typename P >
static void SaveVector ( BinlogWriter_c & tWriter, const CSphVector < T, P > & tVector )
{
STATIC_ASSERT ( IsPodType<T>::Value, NON_POD_VECTORS_ARE_UNSERIALIZABLE );
tWriter.ZipValue ( tVector.GetLength() );
if ( tVector.GetLength() )
tWriter.PutBytes ( tVector.Begin(), tVector.GetLength()*sizeof(T) );
}
template < typename T, typename P >
static bool LoadVector ( BinlogReader_c & tReader, CSphVector < T, P > & tVector )
{
STATIC_ASSERT ( IsPodType<T>::Value, NON_POD_VECTORS_ARE_UNSERIALIZABLE );
tVector.Resize ( (int) tReader.UnzipValue() ); // FIXME? sanitize?
if ( tVector.GetLength() )
tReader.GetBytes ( tVector.Begin(), tVector.GetLength()*sizeof(T) );
return !tReader.GetErrorFlag();
}
bool RtIndex_t::SaveRamChunk ()
{
MEMORY ( SPH_MEM_IDX_RT );
CSphString sChunk, sNewChunk;
sChunk.SetSprintf ( "%s.ram", m_sPath.cstr() );
sNewChunk.SetSprintf ( "%s.ram.new", m_sPath.cstr() );
m_tKlist.SaveToFile ( m_sPath.cstr() );
CSphWriter wrChunk;
if ( !wrChunk.OpenFile ( sNewChunk, m_sLastError ) )
return false;
wrChunk.PutDword ( USE_64BIT );
wrChunk.PutDword ( RtSegment_t::m_iSegments );
wrChunk.PutDword ( m_pSegments.GetLength() );
// no locks here, because it's only intended to be called from dtor
ARRAY_FOREACH ( iSeg, m_pSegments )
{
const RtSegment_t * pSeg = m_pSegments[iSeg];
wrChunk.PutDword ( pSeg->m_iTag );
SaveVector ( wrChunk, pSeg->m_dWords );
wrChunk.PutDword ( pSeg->m_dWordCheckpoints.GetLength() );
ARRAY_FOREACH ( i, pSeg->m_dWordCheckpoints )
{
wrChunk.PutOffset ( pSeg->m_dWordCheckpoints[i].m_iOffset );
wrChunk.PutOffset ( pSeg->m_dWordCheckpoints[i].m_uWordID );
}
SaveVector ( wrChunk, pSeg->m_dDocs );
SaveVector ( wrChunk, pSeg->m_dHits );
wrChunk.PutDword ( pSeg->m_iRows );
wrChunk.PutDword ( pSeg->m_iAliveRows );
SaveVector ( wrChunk, pSeg->m_dRows );
SaveVector ( wrChunk, pSeg->m_dKlist );
SaveVector ( wrChunk, pSeg->m_dStrings );
SaveVector ( wrChunk, pSeg->m_dMvas );
}
wrChunk.CloseFile();
if ( wrChunk.IsError() )
return false;
// rename
if ( ::rename_rt ( sNewChunk.cstr(), sChunk.cstr() ) )
sphDie ( "failed to rename ram chunk (src=%s, dst=%s, errno=%d, error=%s)",
sNewChunk.cstr(), sChunk.cstr(), errno, strerror(errno) ); // !COMMIT handle this gracefully
return true;
}
bool RtIndex_t::LoadRamChunk ( DWORD uVersion )
{
MEMORY ( SPH_MEM_IDX_RT );
CSphString sChunk;
sChunk.SetSprintf ( "%s.ram", m_sPath.cstr() );
if ( !sphIsReadable ( sChunk.cstr(), &m_sLastError ) )
return true;
m_tKlist.LoadFromFile ( m_sPath.cstr() );
CSphAutoreader rdChunk;
if ( !rdChunk.Open ( sChunk, m_sLastError ) )
return false;
bool bId64 = ( rdChunk.GetDword()!=0 );
if ( bId64!=USE_64BIT )
{
#if USE_64BIT
// #if 0
// TODO: may be do this param conditional and push it into the config?
m_bId32to64 = true;
#else
m_sLastError.SetSprintf ( "ram chunk dumped by %s binary; this binary is %s",
bId64 ? "id64" : "id32",
USE_64BIT ? "id64" : "id32" );
return false;
#endif
}
int iSegmentSeq = rdChunk.GetDword();
m_pSegments.Resize ( rdChunk.GetDword() ); // FIXME? sanitize
ARRAY_FOREACH ( iSeg, m_pSegments )
{
RtSegment_t * pSeg = new RtSegment_t ();
m_pSegments[iSeg] = pSeg;
pSeg->m_iTag = rdChunk.GetDword ();
LoadVector ( rdChunk, pSeg->m_dWords );
pSeg->m_dWordCheckpoints.Resize ( rdChunk.GetDword() );
ARRAY_FOREACH ( i, pSeg->m_dWordCheckpoints )
{
pSeg->m_dWordCheckpoints[i].m_iOffset = (int)rdChunk.GetOffset();
pSeg->m_dWordCheckpoints[i].m_uWordID = (SphWordID_t)rdChunk.GetOffset();
}
LoadVector ( rdChunk, pSeg->m_dDocs );
LoadVector ( rdChunk, pSeg->m_dHits );
pSeg->m_iRows = rdChunk.GetDword();
pSeg->m_iAliveRows = rdChunk.GetDword();
LoadVector ( rdChunk, pSeg->m_dRows );
LoadVector ( rdChunk, pSeg->m_dKlist );
LoadVector ( rdChunk, pSeg->m_dStrings );
if ( uVersion>=3 )
LoadVector ( rdChunk, pSeg->m_dMvas );
}
RtSegment_t::m_iSegments = iSegmentSeq;
if ( rdChunk.GetErrorFlag() )
return false;
return true;
}
void RtIndex_t::PostSetup()
{
if ( m_bId32to64 )
{
SaveDiskChunk();
// since the RAM chunk is just stored as id32, we are no more in compat mode
m_bId32to64 = false;
}
}
int RtIndex_t::DebugCheck ( FILE * fp )
{
int iFails = 0;
ARRAY_FOREACH ( i, m_pDiskChunks )
{
fprintf ( fp, "checking disk chunk %d(%d)...\n", i, m_pDiskChunks.GetLength() );
iFails += m_pDiskChunks[i]->DebugCheck ( fp );
}
return iFails;
}
//////////////////////////////////////////////////////////////////////////
// SEARCHING
//////////////////////////////////////////////////////////////////////////
struct RtQword_t : public ISphQword
{
friend struct RtIndex_t;
friend struct RtQwordSetup_t;
protected:
RtDocReader_t * m_pDocReader;
CSphMatch m_tMatch;
DWORD m_uNextHit;
RtHitReader2_t m_tHitReader;
RtSegment_t * m_pSeg;
public:
RtQword_t ()
: m_pDocReader ( NULL )
, m_uNextHit ( 0 )
, m_pSeg ( NULL )
{
m_tMatch.Reset ( 0 );
}
virtual ~RtQword_t ()
{
SafeDelete ( m_pDocReader );
}
virtual const CSphMatch & GetNextDoc ( DWORD * )
{
for ( ;; )
{
const RtDoc_t * pDoc = m_pDocReader->UnzipDoc();
if ( !pDoc )
{
m_tMatch.m_iDocID = 0;
return m_tMatch;
}
if ( m_pSeg->m_dKlist.BinarySearch ( pDoc->m_uDocID ) )
continue;
m_tMatch.m_iDocID = pDoc->m_uDocID;
m_dFields = pDoc->m_dFields;
m_uMatchHits = pDoc->m_uHits;
m_iHitlistPos = (uint64_t(pDoc->m_uHits)<<32) + pDoc->m_uHit;
return m_tMatch;
}
}
virtual void SeekHitlist ( SphOffset_t uOff )
{
int iHits = (int)(uOff>>32);
if ( iHits==1 )
{
m_uNextHit = DWORD(uOff);
} else
{
m_uNextHit = 0;
m_tHitReader.Seek ( DWORD(uOff), iHits );
}
}
virtual Hitpos_t GetNextHit ()
{
if ( m_uNextHit==0 )
{
return Hitpos_t ( m_tHitReader.UnzipHit() );
} else if ( m_uNextHit==0xffffffffUL )
{
return EMPTY_HIT;
} else
{
DWORD uRes = m_uNextHit;
m_uNextHit = 0xffffffffUL;
return Hitpos_t ( uRes );
}
}
};
struct RtQwordSetup_t : ISphQwordSetup
{
RtSegment_t * m_pSeg;
virtual ISphQword * QwordSpawn ( const XQKeyword_t & ) const;
virtual bool QwordSetup ( ISphQword * pQword ) const;
};
ISphQword * RtQwordSetup_t::QwordSpawn ( const XQKeyword_t & ) const
{
return new RtQword_t ();
}
bool RtQwordSetup_t::QwordSetup ( ISphQword * pQword ) const
{
RtQword_t * pMyWord = dynamic_cast<RtQword_t*> ( pQword );
if ( !pMyWord )
return false;
const RtIndex_t * pIndex = dynamic_cast< const RtIndex_t * > ( m_pIndex );
if ( !pIndex )
return false;
return pIndex->RtQwordSetup ( pMyWord, m_pSeg );
}
bool RtIndex_t::EarlyReject ( CSphQueryContext * pCtx, CSphMatch & tMatch ) const
{
// might be needed even when we do not have a filter!
if ( pCtx->m_bLookupFilter || pCtx->m_bLookupSort )
CopyDocinfo ( tMatch, FindDocinfo ( (RtSegment_t*)pCtx->m_pIndexData, tMatch.m_iDocID ) );
ARRAY_FOREACH ( i, m_dDynamize )
tMatch.SetAttr ( m_dDynamize[i].m_tTo, tMatch.GetAttr ( m_dDynamize[i].m_tFrom ) );
pCtx->CalcFilter ( tMatch );
return pCtx->m_pFilter ? !pCtx->m_pFilter->Eval ( tMatch ) : false;
}
void RtIndex_t::CopyDocinfo ( CSphMatch & tMatch, const DWORD * pFound ) const
{
if ( !pFound )
return;
// setup static pointer
assert ( DOCINFO2ID(pFound)==tMatch.m_iDocID );
tMatch.m_pStatic = DOCINFO2ATTRS(pFound);
// FIXME? implement overrides
}
const CSphRowitem * RtIndex_t::FindDocinfo ( const RtSegment_t * pSeg, SphDocID_t uDocID ) const
{
// FIXME! move to CSphIndex, and implement hashing
if ( pSeg->m_dRows.GetLength()==0 )
return NULL;
int iStride = m_iStride;
int iStart = 0;
int iEnd = pSeg->m_iRows-1;
assert ( iStride==( DOCINFO_IDSIZE + m_tSchema.GetRowSize() ) );
const CSphRowitem * pStorage = pSeg->m_dRows.Begin();
const CSphRowitem * pFound = NULL;
if ( uDocID==DOCINFO2ID ( &pStorage [ iStart*iStride ] ) )
{
pFound = &pStorage [ iStart*iStride ];
} else if ( uDocID==DOCINFO2ID ( &pStorage [ iEnd*iStride ] ) )
{
pFound = &pStorage [ iEnd*iStride ];
} else
{
while ( iEnd-iStart>1 )
{
// check if nothing found
if (
uDocID < DOCINFO2ID ( &pStorage [ iStart*iStride ] ) ||
uDocID > DOCINFO2ID ( &pStorage [ iEnd*iStride ] ) )
break;
assert ( uDocID > DOCINFO2ID ( &pStorage [ iStart*iStride ] ) );
assert ( uDocID < DOCINFO2ID ( &pStorage [ iEnd*iStride ] ) );
int iMid = iStart + (iEnd-iStart)/2;
if ( uDocID==DOCINFO2ID ( &pStorage [ iMid*iStride ] ) )
{
pFound = &pStorage [ iMid*iStride ];
break;
}
if ( uDocID<DOCINFO2ID ( &pStorage [ iMid*iStride ] ) )
iEnd = iMid;
else
iStart = iMid;
}
}
return pFound;
}
// WARNING, setup is pretty tricky
// for RT queries, we setup qwords several times
// first pass (with NULL segment arg) should sum all stats over all segments
// others passes (with non-NULL segments) should setup specific segment (including local stats)
bool RtIndex_t::RtQwordSetupSegment ( RtQword_t * pQword, RtSegment_t * pCurSeg, bool bSetup )
{
if ( !pCurSeg )
return false;
SphWordID_t uWordID = pQword->m_iWordID;
RtWordReader_t tReader ( pCurSeg );
// position reader to the right checkpoint
const CSphVector<RtWordCheckpoint_t> & dCheckpoints = pCurSeg->m_dWordCheckpoints;
if ( dCheckpoints.GetLength() )
{
if ( dCheckpoints.Begin()->m_uWordID > uWordID )
{
tReader.m_pMax = tReader.m_pCur + dCheckpoints.Begin()->m_iOffset;
} else if ( dCheckpoints.Last().m_uWordID<=uWordID )
{
tReader.m_pCur += dCheckpoints.Last().m_iOffset;
} else
{
int L = 0;
int R = dCheckpoints.GetLength()-1;
while ( L+1<R )
{
int M = L + (R-L)/2;
if ( uWordID < dCheckpoints[M].m_uWordID )
R = M;
else if ( uWordID > dCheckpoints[M].m_uWordID )
L = M;
else
{
L = M;
break;
}
}
assert ( dCheckpoints[L].m_uWordID<=uWordID );
if ( L < dCheckpoints.GetLength()-1 )
{
assert ( dCheckpoints[L+1].m_uWordID > uWordID );
tReader.m_pMax = tReader.m_pCur + dCheckpoints[L+1].m_iOffset;
}
tReader.m_pCur += dCheckpoints[L].m_iOffset;
}
}
// find the word between checkpoints
const RtWord_t * pWord = NULL;
while ( ( pWord = tReader.UnzipWord() )!=NULL )
{
if ( pWord->m_uWordID==uWordID )
{
pQword->m_iDocs += pWord->m_uDocs;
pQword->m_iHits += pWord->m_uHits;
if ( bSetup )
{
SafeDelete ( pQword->m_pDocReader );
pQword->m_pDocReader = new RtDocReader_t ( pCurSeg, *pWord );
pQword->m_tHitReader.m_pBase = NULL;
if ( pCurSeg->m_dHits.GetLength() )
pQword->m_tHitReader.m_pBase = pCurSeg->m_dHits.Begin();
pQword->m_pSeg = pCurSeg;
}
return true;
} else if ( pWord->m_uWordID > uWordID )
return false;
}
return false;
}
bool RtIndex_t::RtQwordSetup ( RtQword_t * pQword, RtSegment_t * pSeg ) const
{
// segment-specific setup pass
if ( pSeg )
return RtQwordSetupSegment ( pQword, pSeg, true );
// stat-only pass
// loop all segments, gather stats, do not setup anything
assert ( !pSeg );
pQword->m_iDocs = 0;
pQword->m_iHits = 0;
// we care about the results anyway though
// because if all (!) segments miss this word, we must notify the caller, right?
bool bRes = true;
ARRAY_FOREACH ( i, m_pSegments )
bRes &= RtQwordSetupSegment ( pQword, m_pSegments[i], false );
// sanity check
assert ( !( m_pSegments.GetLength()!=0 && bRes==true && pQword->m_iDocs==0 ) );
return bRes;
}
static void AddKillListFilter ( CSphVector<CSphFilterSettings> * pExtra, const SphAttr_t * pKillList, int nEntries )
{
assert ( nEntries && pKillList && pExtra );
CSphFilterSettings & tFilter = pExtra->Add();
tFilter.m_bExclude = true;
tFilter.m_eType = SPH_FILTER_VALUES;
tFilter.m_uMinValue = pKillList[0];
tFilter.m_uMaxValue = pKillList[nEntries-1];
tFilter.m_sAttrName = "@id";
tFilter.SetExternalValues ( pKillList, nEntries );
}
CSphDict * RtIndex_t::SetupExactDict ( CSphScopedPtr<CSphDict> & tContainer, CSphDict * pPrevDict, ISphTokenizer * pTokenizer ) const
{
assert ( pTokenizer );
if ( !m_tSettings.m_bIndexExactWords )
return pPrevDict;
tContainer = new CSphDictExact ( pPrevDict );
CSphRemapRange tStar ( '=', '=', '=' ); // FIXME? check and warn if star was already there
pTokenizer->AddCaseFolding ( tStar );
return tContainer.Ptr();
}
// FIXME! missing MVA, index_exact_words support
// FIXME? missing enable_star, legacy match modes support
// FIXME? any chance to factor out common backend agnostic code?
// FIXME? do we need to support pExtraFilters?
#ifndef NDEBUG
bool RtIndex_t::MultiQuery ( const CSphQuery * pQuery, CSphQueryResult * pResult, int iSorters, ISphMatchSorter ** ppSorters, const CSphVector<CSphFilterSettings> *, int iTag ) const
#else
bool RtIndex_t::MultiQuery ( const CSphQuery * pQuery, CSphQueryResult * pResult, int iSorters, ISphMatchSorter ** ppSorters, const CSphVector<CSphFilterSettings> *, int ) const
#endif
{
assert ( ppSorters );
// to avoid the checking of a ppSorters's element for NULL on every next step, just filter out all nulls right here
CSphVector<ISphMatchSorter*> dSorters;
dSorters.Reserve ( iSorters );
for ( int i=0; i<iSorters; i++ )
if ( ppSorters[i] )
dSorters.Add ( ppSorters[i] );
// if we have anything to work with
if ( dSorters.GetLength()==0 )
{
pResult->m_iQueryTime = 0;
return false;
}
// FIXME! too early (how low can you go?)
m_tRwlock.ReadLock ();
assert ( pQuery );
assert ( pResult );
assert ( iTag==0 );
MEMORY ( SPH_MEM_IDX_RT_MULTY_QUERY );
// start counting
pResult->m_iQueryTime = 0;
int64_t tmQueryStart = sphMicroTimer();
// force ext2 mode for them
// FIXME! eliminate this const breakage
const_cast<CSphQuery*> ( pQuery )->m_eMode = SPH_MATCH_EXTENDED2;
// wrappers
CSphScopedPtr<ISphTokenizer> pTokenizer ( m_pTokenizer->Clone ( false ) );
CSphScopedPtr<CSphDict> tDict2 ( NULL );
CSphDict * pDict = SetupExactDict ( tDict2, m_pDict, pTokenizer.Ptr() );
// FIXME! slow disk searches could lock out concurrent writes for too long
// FIXME! each result will point to its own MVA and string pools
// !COMMIT need to setup disk K-list here
//////////////////////
// search disk chunks
//////////////////////
bool m_bKlistLocked = false;
CSphVector<CSphFilterSettings> dExtra;
// first, collect all the killlists into a vector
for ( int iChunk = m_pDiskChunks.GetLength()-1; iChunk>=0; iChunk-- )
{
const int iOldLength = dExtra.GetLength();
if ( iChunk==m_pDiskChunks.GetLength()-1 )
{
// For the topmost chunk we add the killlist from the ram-index
m_tKlist.Flush();
m_tKlist.KillListLock();
if ( m_tKlist.GetKillListSize() )
{
// we don't lock in vain...
m_bKlistLocked = true;
AddKillListFilter ( &dExtra, m_tKlist.GetKillList(), m_tKlist.GetKillListSize() );
} else
m_tKlist.KillListUnlock();
} else
{
const CSphIndex * pDiskChunk = m_pDiskChunks[iChunk+1];
if ( pDiskChunk->GetKillListSize () )
AddKillListFilter ( &dExtra, pDiskChunk->GetKillList(), pDiskChunk->GetKillListSize() );
}
if ( dExtra.GetLength()==iOldLength )
dExtra.Add();
}
CSphVector<CSphString> dWrongWords;
SmallStringHash_T<CSphQueryResultMeta::WordStat_t> hDiskStats;
assert ( dExtra.GetLength()==m_pDiskChunks.GetLength() );
CSphVector<const BYTE *> dDiskStrings ( m_pDiskChunks.GetLength() );
CSphVector<const DWORD *> dDiskMva ( m_pDiskChunks.GetLength() );
ARRAY_FOREACH ( iChunk, m_pDiskChunks )
{
CSphQueryResult tChunkResult;
// storing index in matches tag for finding strings attrs offset later, biased against default zero and segments
const int iTag = m_pSegments.GetLength()+iChunk+1;
if ( !m_pDiskChunks[iChunk]->MultiQuery ( pQuery, &tChunkResult, iSorters, ppSorters, &dExtra, iTag ) )
{
// FIXME? maybe handle this more gracefully (convert to a warning)?
pResult->m_sError = tChunkResult.m_sError;
m_tRwlock.Unlock ();
if ( m_bKlistLocked )
m_tKlist.KillListUnlock();
return false;
}
// check terms inconsistency amongs disk chunks
const SmallStringHash_T<CSphQueryResultMeta::WordStat_t> & hSrcStats = tChunkResult.m_hWordStats;
if ( pResult->m_hWordStats.GetLength() )
{
hSrcStats.IterateStart();
while ( hSrcStats.IterateNext() )
{
const CSphQueryResultMeta::WordStat_t * pDstStat = pResult->m_hWordStats ( hSrcStats.IterateGetKey() );
const CSphQueryResultMeta::WordStat_t & tSrcStat = hSrcStats.IterateGet();
// all indexes should produce same words from the query
if ( !pDstStat && !tSrcStat.m_bExpanded )
{
dWrongWords.Add ( hSrcStats.IterateGetKey() );
}
pResult->AddStat ( hSrcStats.IterateGetKey(), tSrcStat.m_iDocs, tSrcStat.m_iHits, tSrcStat.m_bExpanded );
}
} else
{
pResult->m_hWordStats = hSrcStats;
}
dDiskStrings[iChunk] = tChunkResult.m_pStrings;
dDiskMva[iChunk] = tChunkResult.m_pMva;
dExtra.Pop();
// keep last chunk statistics to check vs rt settings
if ( iChunk==m_pDiskChunks.GetLength()-1 )
hDiskStats = hSrcStats;
}
if ( m_bKlistLocked )
m_tKlist.KillListUnlock();
////////////////////
// search RAM chunk
////////////////////
// select the sorter with max schema
int iMaxSchemaSize = -1;
int iMaxSchemaIndex = -1;
ARRAY_FOREACH ( i, dSorters )
if ( dSorters[i]->GetSchema().GetRowSize() > iMaxSchemaSize )
{
iMaxSchemaSize = dSorters[i]->GetSchema().GetRowSize();
iMaxSchemaIndex = i;
}
// setup calculations and result schema
CSphQueryContext tCtx;
if ( !tCtx.SetupCalc ( pResult, dSorters[iMaxSchemaIndex]->GetSchema(), m_tSchema, NULL ) )
{
m_tRwlock.Unlock ();
return false;
}
CSphScopedPtr<CSphDict> tDictCloned ( NULL );
CSphDict * pDictBase = pDict;
if ( pDictBase->HasState() )
{
tDictCloned = pDictBase = pDictBase->Clone();
}
// setup search terms
RtQwordSetup_t tTermSetup;
tTermSetup.m_pDict = pDictBase;
tTermSetup.m_pIndex = this;
tTermSetup.m_eDocinfo = m_tSettings.m_eDocinfo;
tTermSetup.m_iDynamicRowitems = pResult->m_tSchema.GetDynamicSize();
if ( pQuery->m_uMaxQueryMsec>0 )
tTermSetup.m_iMaxTimer = sphMicroTimer() + pQuery->m_uMaxQueryMsec*1000; // max_query_time
tTermSetup.m_pWarning = &pResult->m_sWarning;
tTermSetup.m_pSeg = NULL;
tTermSetup.m_pCtx = &tCtx;
int iIndexWeight = pQuery->GetIndexWeight ( m_sIndexName.cstr() );
// bind weights
tCtx.BindWeights ( pQuery, m_tOutboundSchema, iIndexWeight );
// parse query
XQQuery_t tParsed;
if ( !sphParseExtendedQuery ( tParsed, pQuery->m_sQuery.cstr(), pTokenizer.Ptr(), &m_tOutboundSchema, pDictBase, m_tSettings.m_iStopwordStep ) )
{
pResult->m_sError = tParsed.m_sParseError;
m_tRwlock.Unlock ();
return false;
}
// transform query if needed (quorum transform, keyword expansion, etc.)
sphTransformExtendedQuery ( &tParsed.m_pRoot );
if ( !sphCheckQueryHeight ( tParsed.m_pRoot, pResult->m_sError ) )
{
m_tRwlock.Unlock ();
return false;
}
// setup query
// must happen before index-level reject, in order to build proper keyword stats
CSphScopedPtr<ISphRanker> pRanker ( sphCreateRanker ( tParsed, pQuery, pResult, tTermSetup, tCtx ) );
if ( !pRanker.Ptr() )
{
m_tRwlock.Unlock ();
return false;
}
// check terms inconsistency disk chunks vs rt
if ( pResult->m_hWordStats.GetLength() && hDiskStats.GetLength() )
{
const SmallStringHash_T<CSphQueryResultMeta::WordStat_t> & hSrcStats = pResult->m_hWordStats;
hSrcStats.IterateStart();
while ( hSrcStats.IterateNext() )
{
const CSphQueryResultMeta::WordStat_t * pDstStat = hDiskStats ( hSrcStats.IterateGetKey() );
const CSphQueryResultMeta::WordStat_t & tSrcStat = hSrcStats.IterateGet();
// all indexes should produce same words from the query
if ( !pDstStat && !tSrcStat.m_bExpanded )
{
dWrongWords.Add ( hSrcStats.IterateGetKey() );
}
}
}
// make warning on terms inconsistency
if ( dWrongWords.GetLength() )
{
dWrongWords.Uniq();
pResult->m_sWarning.SetSprintf ( "index '%s': query word(s) mismatch: %s", m_sIndexName.cstr(), dWrongWords.Begin()->cstr() );
for ( int i=1; i<dWrongWords.GetLength(); i++ )
pResult->m_sWarning.SetSprintf ( "%s, %s", pResult->m_sWarning.cstr(), dWrongWords[i].cstr() );
}
// empty index, empty result
if ( !m_pSegments.GetLength() && !m_pDiskChunks.GetLength() )
{
pResult->m_iQueryTime = 0;
m_tRwlock.Unlock ();
return true;
}
if ( m_pSegments.GetLength() )
{
// setup filters
// FIXME! setup filters MVA pool
bool bFullscan = ( pQuery->m_eMode==SPH_MATCH_FULLSCAN || pQuery->m_sQuery.IsEmpty() );
if ( !tCtx.CreateFilters ( bFullscan, &pQuery->m_dFilters, pResult->m_tSchema, NULL, pResult->m_sError ) )
{
m_tRwlock.Unlock ();
return false;
}
// FIXME! OPTIMIZE! check if we can early reject the whole index
// setup lookup
// do pre-filter lookup as needed
// do pre-sort lookup in all cases
// post-sort lookup is complicated (because of many segments)
// pre-sort lookup is cheap now anyway, and almost always anyway
// (except maybe by stupid relevance-sorting-only benchmarks!!)
tCtx.m_bLookupFilter = ( pQuery->m_dFilters.GetLength() || tCtx.m_dCalcFilter.GetLength() );
tCtx.m_bLookupSort = true;
// FIXME! setup overrides
// do searching
bool bRandomize = dSorters[0]->m_bRandomize;
int iCutoff = pQuery->m_iCutoff;
if ( iCutoff<=0 )
iCutoff = -1;
if ( bFullscan )
{
// full scan
// FIXME? OPTIMIZE? add shortcuts here too?
CSphMatch tMatch;
tMatch.Reset ( pResult->m_tSchema.GetDynamicSize() );
tMatch.m_iWeight = pQuery->GetIndexWeight ( m_sIndexName.cstr() );
int iCutoff = pQuery->m_iCutoff;
if ( iCutoff<=0 )
iCutoff = -1;
ARRAY_FOREACH ( iSeg, m_pSegments )
{
// set string pool for string on_sort expression fix up
tCtx.SetStringPool ( m_pSegments[iSeg]->m_dStrings.Begin() );
tCtx.SetMVAPool ( m_pSegments[iSeg]->m_dMvas.Begin() );
ARRAY_FOREACH ( i, dSorters )
{
dSorters[i]->SetStringPool ( m_pSegments[iSeg]->m_dStrings.Begin() );
dSorters[i]->SetMVAPool ( m_pSegments[iSeg]->m_dMvas.Begin() );
}
RtRowIterator_t tIt ( m_pSegments[iSeg], m_iStride, false, NULL );
for ( ;; )
{
const CSphRowitem * pRow = tIt.GetNextAliveRow();
if ( !pRow )
break;
tMatch.m_iDocID = DOCINFO2ID(pRow);
tMatch.m_pStatic = DOCINFO2ATTRS(pRow); // FIXME! overrides
ARRAY_FOREACH ( j, m_dDynamize )
tMatch.SetAttr ( m_dDynamize[j].m_tTo, tMatch.GetAttr ( m_dDynamize[j].m_tFrom ) );
tCtx.CalcFilter ( tMatch );
if ( tCtx.m_pFilter && !tCtx.m_pFilter->Eval ( tMatch ) )
continue;
tCtx.CalcSort ( tMatch );
tCtx.CalcFinal ( tMatch ); // OPTIMIZE? could be possibly done later
// storing segment in matches tag for finding strings attrs offset later, biased against default zero
tMatch.m_iTag = iSeg+1;
bool bNewMatch = false;
ARRAY_FOREACH ( iSorter, dSorters )
bNewMatch |= dSorters[iSorter]->Push ( tMatch );
// handle cutoff
if ( bNewMatch )
if ( --iCutoff==0 )
break;
}
if ( iCutoff==0 )
break;
}
} else
{
// query matching
ARRAY_FOREACH ( iSeg, m_pSegments )
{
tTermSetup.m_pSeg = m_pSegments[iSeg];
pRanker->Reset ( tTermSetup );
// for lookups to work
tCtx.m_pIndexData = m_pSegments[iSeg];
// set string pool for string on_sort expression fix up
tCtx.SetStringPool ( m_pSegments[iSeg]->m_dStrings.Begin() );
tCtx.SetMVAPool ( m_pSegments[iSeg]->m_dMvas.Begin() );
ARRAY_FOREACH ( i, dSorters )
{
dSorters[i]->SetStringPool ( m_pSegments[iSeg]->m_dStrings.Begin() );
dSorters[i]->SetMVAPool ( m_pSegments[iSeg]->m_dMvas.Begin() );
}
CSphMatch * pMatch = pRanker->GetMatchesBuffer();
for ( ;; )
{
int iMatches = pRanker->GetMatches();
if ( iMatches<=0 )
break;
for ( int i=0; i<iMatches; i++ )
{
assert ( !tCtx.m_bLookupSort || FindDocinfo ( m_pSegments[iSeg], pMatch[i].m_iDocID ) );
if ( tCtx.m_bLookupSort )
CopyDocinfo ( pMatch[i], FindDocinfo ( m_pSegments[iSeg], pMatch[i].m_iDocID ) );
ARRAY_FOREACH ( j, m_dDynamize )
pMatch[i].SetAttr ( m_dDynamize[j].m_tTo, pMatch[i].GetAttr ( m_dDynamize[j].m_tFrom ) );
tCtx.CalcSort ( pMatch[i] );
tCtx.CalcFinal ( pMatch[i] ); // OPTIMIZE? could be possibly done later
if ( bRandomize )
pMatch[i].m_iWeight = ( sphRand() & 0xffff );
if ( tCtx.m_pWeightFilter && !tCtx.m_pWeightFilter->Eval ( pMatch[i] ) )
continue;
// storing segment in matches tag for finding strings attrs offset later, biased against default zero
pMatch[i].m_iTag = iSeg+1;
bool bNewMatch = false;
ARRAY_FOREACH ( iSorter, dSorters )
bNewMatch |= dSorters[iSorter]->Push ( pMatch[i] );
if ( bNewMatch )
if ( --iCutoff==0 )
break;
}
if ( iCutoff==0 )
{
iSeg = m_pSegments.GetLength();
break;
}
}
}
}
}
//////////////////////
// coping match's attributes to external storage in result set
//////////////////////
const int iSegmentsTotal = m_pSegments.GetLength();
const int iStaticSize = m_tSchema.GetStaticSize();
if ( iStaticSize>0 && iSegmentsTotal>0 )
{
MEMORY ( SPH_MEM_IDX_RT_RES_MATCHES );
// we need to count matches for allocating arena
// as we are going to fix match's m_pStatic pointers later
// and copy real match's data to arena
int iFixupCount = 0;
ARRAY_FOREACH ( iSorter, dSorters )
{
ISphMatchSorter * pSorter = dSorters[iSorter];
const int iMatchesCount = pSorter->GetLength();
const CSphMatch * pMatches = pSorter->First();
for ( int i=0; i<iMatchesCount; i++ )
{
const int iMatchSegment = pMatches[i].m_iTag-1;
if ( iMatchSegment>=0 && iMatchSegment< iSegmentsTotal )
iFixupCount++;
}
}
if ( iFixupCount>0 )
{
CSphRowitem * pAttr = new CSphRowitem [ iFixupCount * iStaticSize ];
pResult->m_dStorage2Free.Add ( pAttr );
#ifndef NDEBUG
CSphRowitem * pEnd = pAttr + iFixupCount * iStaticSize;
#endif
ARRAY_FOREACH ( iSorter, dSorters )
{
ISphMatchSorter * pSorter = dSorters[iSorter];
const int iMatchesCount = pSorter->GetLength();
CSphMatch * pMatches = pSorter->First();
for ( int i=0; i<iMatchesCount; i++ )
{
const int iMatchSegment = pMatches[i].m_iTag-1;
if ( iMatchSegment>=0 && iMatchSegment< iSegmentsTotal )
{
assert ( pAttr+iStaticSize<=pEnd );
memcpy ( pAttr, pMatches[i].m_pStatic, sizeof(CSphRowitem)*iStaticSize );
pMatches[i].m_pStatic = pAttr;
pAttr += iStaticSize;
}
}
}
}
}
//////////////////////
// fixing string offset and data in resulting matches
//////////////////////
MEMORY ( SPH_MEM_IDX_RT_RES_STRINGS );
CSphVector<CSphAttrLocator> dStringGetLoc;
CSphVector<CSphAttrLocator> dStringSetLoc;
CSphVector<CSphAttrLocator> dMvaGetLoc;
CSphVector<CSphAttrLocator> dMvaSetLoc;
for ( int i=0; i<pResult->m_tSchema.GetAttrsCount(); i++ )
{
const CSphColumnInfo & tSetInfo = pResult->m_tSchema.GetAttr(i);
if ( tSetInfo.m_eAttrType==SPH_ATTR_STRING )
{
const int iInLocator = m_tSchema.GetAttrIndex ( tSetInfo.m_sName.cstr() );
assert ( iInLocator>=0 );
dStringGetLoc.Add ( m_tSchema.GetAttr ( iInLocator ).m_tLocator );
dStringSetLoc.Add ( tSetInfo.m_tLocator );
} else if ( tSetInfo.m_eAttrType==SPH_ATTR_UINT32SET || tSetInfo.m_eAttrType==SPH_ATTR_UINT64SET )
{
const int iInLocator = m_tSchema.GetAttrIndex ( tSetInfo.m_sName.cstr() );
assert ( iInLocator>=0 );
dMvaGetLoc.Add ( m_tSchema.GetAttr ( iInLocator ).m_tLocator );
dMvaSetLoc.Add ( tSetInfo.m_tLocator );
}
assert ( ( tSetInfo.m_eAttrType!=SPH_ATTR_STRING && tSetInfo.m_eAttrType!=SPH_ATTR_UINT32SET && tSetInfo.m_eAttrType!=SPH_ATTR_UINT64SET )
|| tSetInfo.m_tLocator.m_bDynamic );
}
if ( dStringGetLoc.GetLength() || dMvaGetLoc.GetLength() )
{
assert ( !pResult->m_pStrings && !pResult->m_pMva );
CSphTightVector<BYTE> dStorageString;
CSphTightVector<DWORD> dStorageMva;
dStorageString.Add ( 0 );
dStorageMva.Add ( 0 );
ARRAY_FOREACH ( iSorter, dSorters )
{
ISphMatchSorter * pSorter = dSorters[iSorter];
const int iMatchesCount = pSorter->GetLength();
CSphMatch * pMatches = pSorter->First();
for ( int i=0; i<iMatchesCount; i++ )
{
CSphMatch & tMatch = pMatches[i];
const int iSegCount = m_pSegments.GetLength();
assert ( tMatch.m_iTag>=1 && tMatch.m_iTag<iSegCount+dDiskStrings.GetLength()+1 );
assert ( tMatch.m_pDynamic );
const int iStorageSrc = tMatch.m_iTag-1;
bool bSegmentMatch = ( iStorageSrc < iSegCount );
const BYTE * pBaseString = bSegmentMatch ? m_pSegments[iStorageSrc]->m_dStrings.Begin() : dDiskStrings[ iStorageSrc-iSegCount ];
const DWORD * pBaseMva = bSegmentMatch ? m_pSegments[iStorageSrc]->m_dMvas.Begin() : dDiskMva[ iStorageSrc-iSegCount ];
ARRAY_FOREACH ( i, dStringGetLoc )
{
const SphAttr_t uOff = tMatch.GetAttr ( dStringGetLoc[i] );
if ( uOff>0 ) // have to fix up only existed attribute
{
assert ( uOff<( I64C(1)<<32 ) ); // should be 32 bit offset
assert ( !bSegmentMatch || (int)uOff<m_pSegments[iStorageSrc]->m_dStrings.GetLength() );
DWORD uAttr = CopyPackedString ( pBaseString + uOff, dStorageString );
tMatch.SetAttr ( dStringSetLoc[i], uAttr );
} else
{
tMatch.SetAttr ( dStringSetLoc[i], 0 );
}
}
bool bIdSet = false;
ARRAY_FOREACH ( i, dMvaGetLoc )
{
const SphAttr_t uOff = tMatch.GetAttr ( dMvaGetLoc[i] );
if ( uOff>0 ) // have to fix up only existed attribute
{
assert ( uOff<( I64C(1)<<32 ) ); // should be 32 bit offset
assert ( !bSegmentMatch || (int)uOff<m_pSegments[iStorageSrc]->m_dMvas.GetLength() );
if ( !bIdSet )
{
CopyDocid ( tMatch.m_iDocID, dStorageMva );
bIdSet = true;
}
DWORD uAttr = CopyMva ( pBaseMva + uOff, dStorageMva );
tMatch.SetAttr ( dMvaSetLoc[i], uAttr );
} else
{
tMatch.SetAttr ( dMvaSetLoc[i], 0 );
}
}
}
}
if ( dStorageString.GetLength()>1 )
{
BYTE * pStrings = dStorageString.LeakData ();
pResult->m_dStorage2Free.Add ( pStrings );
pResult->m_pStrings = pStrings;
}
if ( dStorageMva.GetLength()>1 )
{
DWORD * pMva = dStorageMva.LeakData();
pResult->m_dStorage2Free.Add ( pMva );
pResult->m_pMva = pMva;
}
}
// query timer
pResult->m_iQueryTime = int ( ( sphMicroTimer()-tmQueryStart )/1000 );
m_tRwlock.Unlock ();
return true;
}
bool RtIndex_t::MultiQueryEx ( int iQueries, const CSphQuery * ppQueries, CSphQueryResult ** ppResults, ISphMatchSorter ** ppSorters, const CSphVector<CSphFilterSettings> * pExtraFilters, int iTag ) const
{
// FIXME! OPTIMIZE! implement common subtree cache here
bool bResult = false;
for ( int i=0; i<iQueries; i++ )
if ( MultiQuery ( &ppQueries[i], ppResults[i], 1, &ppSorters[i], pExtraFilters, iTag ) )
bResult = true;
else
ppResults[i]->m_iMultiplier = -1;
return bResult;
}
bool RtIndex_t::GetKeywords ( CSphVector<CSphKeywordInfo> & dKeywords, const char * sQuery, bool bGetStats, CSphString & sError ) const
{
m_tRwlock.ReadLock(); // this is actually needed only if they want stats
RtQword_t tQword;
CSphString sBuffer ( sQuery );
CSphScopedPtr<ISphTokenizer> pTokenizer ( m_pTokenizer->Clone ( false ) ); // avoid race
pTokenizer->SetBuffer ( (BYTE *)sBuffer.cstr(), sBuffer.Length() );
CSphScopedPtr<CSphDict> tDictCloned ( NULL );
CSphDict * pDictBase = m_pDict;
if ( pDictBase->HasState() )
{
tDictCloned = pDictBase = pDictBase->Clone();
}
while ( BYTE * pToken = pTokenizer->GetToken() )
{
const char * sToken = (const char *)pToken;
CSphString sWord ( sToken );
SphWordID_t iWord = pDictBase->GetWordID ( pToken );
if ( iWord )
{
CSphKeywordInfo & tInfo = dKeywords.Add();
tInfo.m_sTokenized = sWord;
tInfo.m_sNormalized = sToken;
tInfo.m_iDocs = 0;
tInfo.m_iHits = 0;
if ( !bGetStats )
continue;
tQword.m_iWordID = iWord;
tQword.m_iDocs = 0;
tQword.m_iHits = 0;
ARRAY_FOREACH ( iSeg, m_pSegments )
RtQwordSetupSegment ( &tQword, m_pSegments[iSeg], false );
tInfo.m_iDocs = tQword.m_iDocs;
tInfo.m_iHits = tQword.m_iHits;
}
}
// get stats from disk chunks too
if ( bGetStats )
ARRAY_FOREACH ( iChunk, m_pDiskChunks )
{
CSphVector<CSphKeywordInfo> dKeywords2;
if ( !m_pDiskChunks[iChunk]->GetKeywords ( dKeywords2, sQuery, bGetStats, sError ) )
{
m_tRwlock.Unlock();
return false;
}
if ( dKeywords.GetLength()!=dKeywords2.GetLength() )
{
sError.SetSprintf ( "INTERNAL ERROR: keyword count mismatch (ram=%d, disk[%d]=%d)",
dKeywords.GetLength(), iChunk, dKeywords2.GetLength() );
m_tRwlock.Unlock ();
break;
}
ARRAY_FOREACH ( i, dKeywords )
{
if ( dKeywords[i].m_sTokenized!=dKeywords2[i].m_sTokenized )
{
sError.SetSprintf ( "INTERNAL ERROR: tokenized keyword mismatch (n=%d, ram=%s, disk[%d]=%s)",
i, dKeywords[i].m_sTokenized.cstr(), iChunk, dKeywords2[i].m_sTokenized.cstr() );
m_tRwlock.Unlock ();
break;
}
if ( dKeywords[i].m_sNormalized!=dKeywords2[i].m_sNormalized )
{
sError.SetSprintf ( "INTERNAL ERROR: normalized keyword mismatch (n=%d, ram=%s, disk[%d]=%s)",
i, dKeywords[i].m_sTokenized.cstr(), iChunk, dKeywords2[i].m_sTokenized.cstr() );
m_tRwlock.Unlock ();
break;
}
dKeywords[i].m_iDocs += dKeywords2[i].m_iDocs;
dKeywords[i].m_iHits += dKeywords2[i].m_iHits;
}
}
m_tRwlock.Unlock();
return true;
}
// FIXME! might be inconsistent in case disk chunk update fails
int RtIndex_t::UpdateAttributes ( const CSphAttrUpdate & tUpd, int iIndex, CSphString & sError )
{
// check if we have to
assert ( tUpd.m_dDocids.GetLength()==tUpd.m_dRowOffset.GetLength() );
if ( !tUpd.m_dDocids.GetLength() )
return 0;
// remap update schema to index schema
CSphVector<CSphAttrLocator> dLocators;
uint64_t uDst64 = 0;
ARRAY_FOREACH ( i, tUpd.m_dAttrs )
{
int iIndex = m_tSchema.GetAttrIndex ( tUpd.m_dAttrs[i].m_sName.cstr() );
if ( iIndex<0 )
{
sError.SetSprintf ( "attribute '%s' not found", tUpd.m_dAttrs[i].m_sName.cstr() );
return -1;
}
// forbid updates on non-int columns
const CSphColumnInfo & tCol = m_tSchema.GetAttr(iIndex);
if ( !( tCol.m_eAttrType==SPH_ATTR_BOOL || tCol.m_eAttrType==SPH_ATTR_INTEGER || tCol.m_eAttrType==SPH_ATTR_TIMESTAMP
|| tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_UINT64SET ) )
{
sError.SetSprintf ( "attribute '%s' can not be updated (must be boolean, integer, or timestamp or MVA)", tUpd.m_dAttrs[i].m_sName.cstr() );
return -1;
}
bool bSrcMva = ( tCol.m_eAttrType==SPH_ATTR_UINT32SET || tCol.m_eAttrType==SPH_ATTR_UINT64SET );
bool bDstMva = ( tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_UINT32SET || tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_UINT64SET );
if ( bSrcMva!=bDstMva )
{
sError.SetSprintf ( "attribute '%s' MVA flag mismatch", tUpd.m_dAttrs[i].m_sName.cstr() );
return -1;
}
if ( tCol.m_eAttrType==SPH_ATTR_UINT32SET && tUpd.m_dAttrs[i].m_eAttrType==SPH_ATTR_UINT64SET )
{
sError.SetSprintf ( "attribute '%s' MVA bits (dst=%d, src=%d) mismatch", tUpd.m_dAttrs[i].m_sName.cstr(),
tCol.m_eAttrType, tUpd.m_dAttrs[i].m_eAttrType );
return -1;
}
if ( tCol.m_eAttrType==SPH_ATTR_UINT64SET )
uDst64 |= ( U64C(1)<<i );
dLocators.Add ( tCol.m_tLocator );
}
assert ( dLocators.GetLength()==tUpd.m_dAttrs.GetLength() );
// get that lock
m_tRwlock.WriteLock();
// check if we are empty
if ( !m_pSegments.GetLength() && !m_pDiskChunks.GetLength() )
{
m_tRwlock.Unlock();
return true;
}
// do the update
int iUpdated = 0;
DWORD uUpdateMask = 0;
int iFirst = ( iIndex<0 ) ? 0 : iIndex;
int iLast = ( iIndex<0 ) ? tUpd.m_dDocids.GetLength() : iIndex+1;
for ( int iUpd=iFirst; iUpd<iLast; iUpd++ )
{
// search segments first
bool bUpdated = false;
ARRAY_FOREACH ( iSeg, m_pSegments )
{
CSphRowitem * pRow = const_cast<CSphRowitem*> ( m_pSegments[iSeg]->FindAliveRow ( tUpd.m_dDocids[iUpd] ) );
if ( !pRow )
continue;
assert ( DOCINFO2ID(pRow)==tUpd.m_dDocids[iUpd] );
pRow = DOCINFO2ATTRS(pRow);
int iPos = tUpd.m_dRowOffset[iUpd];
ARRAY_FOREACH ( iCol, tUpd.m_dAttrs )
{
if ( !( tUpd.m_dAttrs[iCol].m_eAttrType==SPH_ATTR_UINT32SET || tUpd.m_dAttrs[iCol].m_eAttrType==SPH_ATTR_UINT64SET ) )
{
// plain update
uUpdateMask |= ATTRS_UPDATED;
SphAttr_t uValue = tUpd.m_dPool[iPos];
sphSetRowAttr ( pRow, dLocators[iCol], uValue );
iPos++;
} else
{
// MVA update
uUpdateMask |= ATTRS_MVA_UPDATED;
const DWORD * pSrc = tUpd.m_dPool.Begin()+iPos;
DWORD iLen = *pSrc;
if ( !iLen )
{
iPos++;
sphSetRowAttr ( pRow, dLocators[iCol], 0 );
continue;
}
iPos += iLen+1;
bool bDst64 = ( ( uDst64 & ( U64C(1) << iCol ) )!=0 );
assert ( ( iLen%2 )==0 );
DWORD uCount = ( bDst64 ? iLen : iLen/2 );
CSphTightVector<DWORD> & dMvas = m_pSegments[iSeg]->m_dMvas;
DWORD uMvaOff = (DWORD)sphGetRowAttr ( pRow, dLocators[iCol] );
assert ( !dMvas.Begin() || uMvaOff );
DWORD * pDst = dMvas.Begin() + uMvaOff;
if ( uCount>(*pDst) )
{
uMvaOff = dMvas.GetLength();
dMvas.Resize ( uMvaOff+uCount+1 );
pDst = dMvas.Begin()+uMvaOff;
}
sphSetRowAttr ( pRow, dLocators[iCol], uMvaOff );
if ( bDst64 )
{
memcpy ( pDst, pSrc, sizeof(DWORD)*(uCount+1) );
} else
{
*pDst++ = uCount; // MVA values counter first
pSrc++;
while ( uCount-- )
{
*pDst = *pSrc;
pDst++;
pSrc+=2;
}
}
}
}
bUpdated = true;
iUpdated++;
}
if ( bUpdated )
continue;
// check disk K-list now
// FIXME! optimize away flush
m_tKlist.Flush();
m_tKlist.KillListLock();
const SphAttr_t uRef = tUpd.m_dDocids[iUpd];
bUpdated = ( sphBinarySearch ( m_tKlist.GetKillList(), m_tKlist.GetKillList() + m_tKlist.GetKillListSize() - 1, uRef )!=NULL );
m_tKlist.KillListUnlock();
if ( bUpdated )
continue;
// finally, try disk chunks
for ( int iChunk = m_pDiskChunks.GetLength()-1; iChunk>=0; iChunk-- )
{
// run just this update
// FIXME! might be inefficient in case of big batches (redundant allocs in disk update)
int iRes = m_pDiskChunks[iChunk]->UpdateAttributes ( tUpd, iUpd, sError );
// errors are highly unlikely at this point
// FIXME! maybe emit a warning to client as well?
if ( iRes<0 )
{
sphWarn ( "INTERNAL ERROR: index %s chunk %d update failure: %s", m_sIndexName.cstr(), iChunk, sError.cstr() );
continue;
}
// update stats
iUpdated += iRes;
// we only need to update the most fresh chunk
if ( iRes>0 )
break;
}
}
// bump the counter, binlog the update!
assert ( iIndex<0 );
g_pBinlog->BinlogUpdateAttributes ( m_sIndexName.cstr(), ++m_iTID, tUpd );
// all done
m_tRwlock.Unlock ();
return iUpdated;
}
//////////////////////////////////////////////////////////////////////////
// BINLOG
//////////////////////////////////////////////////////////////////////////
extern DWORD g_dSphinxCRC32 [ 256 ];
static CSphString MakeBinlogName ( const char * sPath, int iExt )
{
CSphString sName;
sName.SetSprintf ( "%s/binlog.%03d", sPath, iExt );
return sName;
}
BinlogWriter_c::BinlogWriter_c ()
{
m_iLastWritePos = 0;
m_iLastFsyncPos = 0;
ResetCrc();
}
void BinlogWriter_c::ResetCrc ()
{
m_uCRC = ~((DWORD)0);
}
void BinlogWriter_c::PutBytes ( const void * pData, int iSize )
{
BYTE * b = (BYTE*) pData;
for ( int i=0; i<iSize; i++ )
m_uCRC = (m_uCRC >> 8) ^ g_dSphinxCRC32 [ (m_uCRC ^ *b++) & 0xff ];
CSphWriter::PutBytes ( pData, iSize );
}
void BinlogWriter_c::PutString ( const char * szString )
{
int iLen = szString ? strlen ( szString ) : 0;
ZipValue ( iLen );
if ( iLen )
PutBytes ( szString, iLen );
}
void BinlogWriter_c::ZipValue ( uint64_t uValue )
{
BYTE uBuf[16];
int iLen = 0;
while ( uValue>=0x80 )
{
uBuf[iLen++] = (BYTE)( 0x80 | ( uValue & 0x7f ) );
uValue >>= 7;
}
uBuf[iLen++] = (BYTE)uValue;
PutBytes ( uBuf, iLen );
}
void BinlogWriter_c::WriteCrc ()
{
m_uCRC = ~m_uCRC;
CSphWriter::PutDword ( m_uCRC );
m_uCRC = ~((DWORD)0);
}
void BinlogWriter_c::Flush ()
{
Write();
Fsync();
}
void BinlogWriter_c::Write ()
{
if ( m_iPoolUsed<=0 )
return;
CSphWriter::Flush();
m_iLastWritePos = GetPos();
}
#if USE_WINDOWS
int fsync ( int iFD )
{
// map fd to handle
HANDLE h = (HANDLE) _get_osfhandle ( iFD );
if ( h==INVALID_HANDLE_VALUE )
{
errno = EBADF;
return -1;
}
// do flush
if ( FlushFileBuffers(h) )
return 0;
// error handling
errno = EIO;
if ( GetLastError()==ERROR_INVALID_HANDLE )
errno = EINVAL;
return -1;
}
#endif
void BinlogWriter_c::Fsync ()
{
if ( !HasUnsyncedData() )
return;
m_bError = ( fsync ( m_iFD )!=0 );
if ( m_bError && m_pError )
m_pError->SetSprintf ( "failed to sync %s: %s" , m_sName.cstr(), strerror(errno) );
m_iLastFsyncPos = GetPos();
}
//////////////////////////////////////////////////////////////////////////
void BinlogReader_c::ResetCrc ()
{
m_uCRC = ~(DWORD(0));
}
void BinlogReader_c::GetBytes ( void * pData, int iSize )
{
CSphReader::GetBytes ( pData, iSize );
BYTE * b = (BYTE*) pData;
for ( int i=0; i<iSize; i++ )
m_uCRC = (m_uCRC >> 8) ^ g_dSphinxCRC32 [ (m_uCRC ^ *b++) & 0xff ];
}
DWORD BinlogReader_c::GetDword ()
{
DWORD uRes;
GetBytes ( &uRes, sizeof(DWORD) );
return uRes;
}
CSphString BinlogReader_c::GetString ()
{
CSphString sRes;
int iLen = (int) UnzipValue();
if ( iLen )
{
sRes.Reserve ( iLen );
GetBytes ( (BYTE*)sRes.cstr(), iLen );
}
return sRes;
}
uint64_t BinlogReader_c::UnzipValue ()
{
uint64_t uRes = 0;
int iOff = 0, iByte;
do
{
iByte = CSphReader::GetByte();
uRes += ( (uint64_t)( iByte & 0x7f ) << iOff );
iOff += 7;
m_uCRC = (m_uCRC >> 8) ^ g_dSphinxCRC32 [ (m_uCRC ^ (BYTE)iByte) & 0xff ];
} while ( iByte>=128 );
return uRes;
}
bool BinlogReader_c::CheckCrc ( const char * sOp, const char * sIndexName, int64_t iTid, int64_t iTxnPos )
{
DWORD uRef = CSphAutoreader::GetDword();
m_uCRC = ~m_uCRC;
if ( uRef!=m_uCRC )
sphWarning ( "binlog: %s: CRC mismatch (index=%s, tid="INT64_FMT", pos="INT64_FMT")", sOp, sIndexName ? sIndexName : "", iTid, iTxnPos );
return uRef==m_uCRC;
}
//////////////////////////////////////////////////////////////////////////
RtBinlog_c::RtBinlog_c ()
: m_iFlushTimeLeft ( 0 )
, m_iFlushPeriod ( BINLOG_AUTO_FLUSH )
, m_eOnCommit ( ACTION_NONE )
, m_iLockFD ( -1 )
, m_bReplayMode ( false )
, m_bDisabled ( true )
, m_iRestartSize ( 0 )
{
MEMORY ( SPH_MEM_BINLOG );
Verify ( m_tWriteLock.Init() );
m_tWriter.SetBufferSize ( BINLOG_WRITE_BUFFER );
}
RtBinlog_c::~RtBinlog_c ()
{
if ( !m_bDisabled )
{
m_iFlushPeriod = 0;
if ( m_eOnCommit!=ACTION_FSYNC )
sphThreadJoin ( &m_tUpdateTread );
DoCacheWrite();
m_tWriter.CloseFile();
LockFile ( false );
}
Verify ( m_tWriteLock.Done() );
}
void RtBinlog_c::BinlogCommit ( const char * sIndexName, int64_t iTID, const RtSegment_t * pSeg, const CSphVector<SphDocID_t> & dKlist )
{
if ( m_bReplayMode || m_bDisabled )
return;
MEMORY ( SPH_MEM_BINLOG );
Verify ( m_tWriteLock.Lock() );
const int64_t tmNow = sphMicroTimer();
const int uIndex = GetWriteIndexID ( sIndexName, iTID, tmNow );
// header
m_tWriter.PutDword ( BLOP_MAGIC );
m_tWriter.ResetCrc ();
m_tWriter.ZipValue ( BLOP_COMMIT );
m_tWriter.ZipValue ( uIndex );
m_tWriter.ZipValue ( iTID );
m_tWriter.ZipValue ( tmNow );
// save txn data
if ( !pSeg || !pSeg->m_iRows )
{
m_tWriter.ZipValue ( 0 );
} else
{
m_tWriter.ZipValue ( pSeg->m_iRows );
SaveVector ( m_tWriter, pSeg->m_dWords );
m_tWriter.ZipValue ( pSeg->m_dWordCheckpoints.GetLength() );
ARRAY_FOREACH ( i, pSeg->m_dWordCheckpoints )
{
m_tWriter.ZipValue ( pSeg->m_dWordCheckpoints[i].m_iOffset );
m_tWriter.ZipValue ( pSeg->m_dWordCheckpoints[i].m_uWordID );
}
SaveVector ( m_tWriter, pSeg->m_dDocs );
SaveVector ( m_tWriter, pSeg->m_dHits );
SaveVector ( m_tWriter, pSeg->m_dRows );
SaveVector ( m_tWriter, pSeg->m_dStrings );
SaveVector ( m_tWriter, pSeg->m_dMvas );
}
SaveVector ( m_tWriter, dKlist );
// checksum
m_tWriter.WriteCrc ();
// finalize
CheckDoFlush();
CheckDoRestart();
Verify ( m_tWriteLock.Unlock() );
}
void RtBinlog_c::BinlogUpdateAttributes ( const char * sIndexName, int64_t iTID, const CSphAttrUpdate & tUpd )
{
if ( m_bReplayMode || m_bDisabled )
return;
MEMORY ( SPH_MEM_BINLOG );
Verify ( m_tWriteLock.Lock() );
const int64_t tmNow = sphMicroTimer();
const int uIndex = GetWriteIndexID ( sIndexName, iTID, tmNow );
// header
m_tWriter.PutDword ( BLOP_MAGIC );
m_tWriter.ResetCrc ();
m_tWriter.ZipValue ( BLOP_UPDATE_ATTRS );
m_tWriter.ZipValue ( uIndex );
m_tWriter.ZipValue ( iTID );
m_tWriter.ZipValue ( tmNow );
// update data
m_tWriter.ZipValue ( tUpd.m_dAttrs.GetLength() );
ARRAY_FOREACH ( i, tUpd.m_dAttrs )
{
m_tWriter.PutString ( tUpd.m_dAttrs[i].m_sName.cstr() );
m_tWriter.ZipValue ( tUpd.m_dAttrs[i].m_eAttrType );
}
// POD vectors
SaveVector ( m_tWriter, tUpd.m_dPool );
SaveVector ( m_tWriter, tUpd.m_dDocids );
SaveVector ( m_tWriter, tUpd.m_dRowOffset );
// checksum
m_tWriter.WriteCrc ();
// finalize
CheckDoFlush();
CheckDoRestart();
Verify ( m_tWriteLock.Unlock() );
}
// here's been going binlogs with ALL closed indices removing
void RtBinlog_c::NotifyIndexFlush ( const char * sIndexName, int64_t iTID, bool bShutdown )
{
if ( m_bReplayMode )
sphInfo ( "index '%s': ramchunk saved. TID="INT64_FMT"", sIndexName, iTID );
if ( m_bReplayMode || m_bDisabled )
return;
MEMORY ( SPH_MEM_BINLOG );
assert ( bShutdown || m_dLogFiles.GetLength() );
Verify ( m_tWriteLock.Lock() );
bool bCurrentLogShut = false;
const int iPreflushFiles = m_dLogFiles.GetLength();
// loop through all log files, and check if we can unlink any
ARRAY_FOREACH ( iLog, m_dLogFiles )
{
BinlogFileDesc_t & tLog = m_dLogFiles[iLog];
bool bUsed = false;
// update index info for this log file
ARRAY_FOREACH ( i, tLog.m_dIndexInfos )
{
BinlogIndexInfo_t & tIndex = tLog.m_dIndexInfos[i];
// this index was just flushed, update flushed TID
if ( tIndex.m_sName==sIndexName )
{
assert ( iTID>=tIndex.m_iFlushedTID );
tIndex.m_iFlushedTID = Max ( tIndex.m_iFlushedTID, iTID );
}
// if max logged TID is greater than last flushed TID, log file still has needed recovery data
if ( tIndex.m_iFlushedTID < tIndex.m_iMaxTID )
bUsed = true;
}
// it's needed, keep looking
if ( bUsed )
continue;
// hooray, we can remove this log!
// if this is our current log, we have to close it first
if ( iLog==m_dLogFiles.GetLength()-1 )
{
m_tWriter.CloseFile ();
bCurrentLogShut = true;
}
// do unlink
CSphString sLog = MakeBinlogName ( m_sLogPath.cstr(), tLog.m_iExt );
if ( ::unlink ( sLog.cstr() ) )
sphWarning ( "binlog: failed to unlink %s: %s (remove it manually)", sLog.cstr(), strerror(errno) );
// we need to reset it, otherwise there might be leftover data after last Remove()
m_dLogFiles[iLog] = BinlogFileDesc_t();
// quit tracking it
m_dLogFiles.Remove ( iLog-- );
}
if ( bCurrentLogShut && !bShutdown )
{
// if current log was closed, we need a new one (it will automatically save meta, too)
OpenNewLog ();
} else if ( iPreflushFiles!=m_dLogFiles.GetLength() )
{
// if we unlinked any logs, we need to save meta, too
SaveMeta ();
}
Verify ( m_tWriteLock.Unlock() );
}
void RtBinlog_c::Configure ( const CSphConfigSection & hSearchd, bool bTestMode )
{
MEMORY ( SPH_MEM_BINLOG );
const int iMode = hSearchd.GetInt ( "binlog_flush", 2 );
switch ( iMode )
{
case 0: m_eOnCommit = ACTION_NONE; break;
case 1: m_eOnCommit = ACTION_FSYNC; break;
case 2: m_eOnCommit = ACTION_WRITE; break;
default: sphDie ( "unknown binlog flush mode %d (must be 0, 1, or 2)\n", iMode );
}
#ifndef DATADIR
#define DATADIR "."
#endif
m_sLogPath = hSearchd.GetStr ( "binlog_path", bTestMode ? "" : DATADIR );
m_bDisabled = m_sLogPath.IsEmpty();
m_iRestartSize = hSearchd.GetSize ( "binlog_max_log_size", m_iRestartSize );
if ( !m_bDisabled )
{
LockFile ( true );
LoadMeta();
}
}
void RtBinlog_c::Replay ( const SmallStringHash_T<CSphIndex*> & hIndexes, ProgressCallbackSimple_t * pfnProgressCallback )
{
if ( m_bDisabled || !hIndexes.GetLength() )
return;
// on replay started
if ( pfnProgressCallback )
pfnProgressCallback();
int64_t tmReplay = sphMicroTimer();
// do replay
m_bReplayMode = true;
int iLastLogState = 0;
ARRAY_FOREACH ( i, m_dLogFiles )
{
iLastLogState = ReplayBinlog ( hIndexes, i );
if ( pfnProgressCallback ) // on each replayed binlog
pfnProgressCallback();
}
if ( m_dLogFiles.GetLength()>0 )
{
tmReplay = sphMicroTimer() - tmReplay;
sphInfo ( "binlog: finished replaying total %d in %d.%03d sec",
m_dLogFiles.GetLength(),
(int)(tmReplay/1000000), (int)((tmReplay/1000)%1000) );
}
// FIXME?
// in some cases, indexes might had been flushed during replay
// and we might therefore want to update m_iFlushedTID everywhere
// but for now, let's just wait until next flush for simplicity
// resume normal operation
m_bReplayMode = false;
OpenNewLog ( iLastLogState );
}
void RtBinlog_c::CreateTimerThread ()
{
if ( !m_bDisabled && m_eOnCommit!=ACTION_FSYNC )
{
m_iFlushTimeLeft = sphMicroTimer() + m_iFlushPeriod;
sphThreadCreate ( &m_tUpdateTread, RtBinlog_c::DoAutoFlush, this );
}
}
void RtBinlog_c::DoAutoFlush ( void * pBinlog )
{
assert ( pBinlog );
RtBinlog_c * pLog = (RtBinlog_c *)pBinlog;
assert ( !pLog->m_bDisabled );
while ( pLog->m_iFlushPeriod>0 )
{
if ( pLog->m_iFlushTimeLeft < sphMicroTimer() )
{
MEMORY ( SPH_MEM_BINLOG );
pLog->m_iFlushTimeLeft = sphMicroTimer() + pLog->m_iFlushPeriod;
if ( pLog->m_eOnCommit==ACTION_NONE || pLog->m_tWriter.HasUnwrittenData() )
{
Verify ( pLog->m_tWriteLock.Lock() );
pLog->m_tWriter.Flush();
Verify ( pLog->m_tWriteLock.Unlock() );
}
if ( pLog->m_tWriter.HasUnsyncedData() )
pLog->m_tWriter.Fsync();
}
// sleep N msec before next iter or terminate because of shutdown
sphSleepMsec ( 100 );
}
}
int RtBinlog_c::GetWriteIndexID ( const char * sName, int64_t iTID, int64_t tmNow )
{
MEMORY ( SPH_MEM_BINLOG );
assert ( m_dLogFiles.GetLength() );
// OPTIMIZE? maybe hash them?
BinlogFileDesc_t & tLog = m_dLogFiles.Last();
ARRAY_FOREACH ( i, tLog.m_dIndexInfos )
{
BinlogIndexInfo_t & tIndex = tLog.m_dIndexInfos[i];
if ( tIndex.m_sName==sName )
{
tIndex.m_iMaxTID = Max ( tIndex.m_iMaxTID, iTID );
tIndex.m_tmMax = Max ( tIndex.m_tmMax, tmNow );
return i;
}
}
// create a new entry
int iID = tLog.m_dIndexInfos.GetLength();
BinlogIndexInfo_t & tIndex = tLog.m_dIndexInfos.Add(); // caller must hold a wlock
tIndex.m_sName = sName;
tIndex.m_iMinTID = iTID;
tIndex.m_iMaxTID = iTID;
tIndex.m_iFlushedTID = 0;
tIndex.m_tmMin = tmNow;
tIndex.m_tmMax = tmNow;
// log this new entry
m_tWriter.PutDword ( BLOP_MAGIC );
m_tWriter.ResetCrc ();
m_tWriter.ZipValue ( BLOP_ADD_INDEX );
m_tWriter.ZipValue ( iID );
m_tWriter.PutString ( sName );
m_tWriter.ZipValue ( iTID );
m_tWriter.ZipValue ( tmNow );
m_tWriter.WriteCrc ();
// return the index
return iID;
}
void RtBinlog_c::LoadMeta ()
{
MEMORY ( SPH_MEM_BINLOG );
CSphString sMeta;
sMeta.SetSprintf ( "%s/binlog.meta", m_sLogPath.cstr() );
if ( !sphIsReadable ( sMeta.cstr() ) )
return;
CSphString sError;
// opened and locked, lets read
CSphAutoreader rdMeta;
if ( !rdMeta.Open ( sMeta, sError ) )
sphDie ( "%s error: %s", sMeta.cstr(), sError.cstr() );
if ( rdMeta.GetDword()!=BINLOG_META_MAGIC )
sphDie ( "invalid meta file %s", sMeta.cstr() );
DWORD uVersion = rdMeta.GetDword();
if ( uVersion!=BINLOG_VERSION )
sphDie ( "binlog meta file %s is v.%d, binary is v.%d; recovery requires previous binary version", sMeta.cstr(), uVersion, BINLOG_VERSION );
const bool bLoaded64bit = ( rdMeta.GetByte()==1 );
if ( bLoaded64bit!=USE_64BIT )
sphDie ( "USE_64BIT inconsistency (binary=%d, binlog=%d); recovery requires previous binary version", USE_64BIT, bLoaded64bit );
// load list of active log files
m_dLogFiles.Resize ( rdMeta.UnzipInt() ); // FIXME! sanity check
ARRAY_FOREACH ( i, m_dLogFiles )
m_dLogFiles[i].m_iExt = rdMeta.UnzipInt(); // everything else is saved in logs themselves
}
void RtBinlog_c::SaveMeta ()
{
MEMORY ( SPH_MEM_BINLOG );
CSphString sMeta, sMetaOld;
sMeta.SetSprintf ( "%s/binlog.meta.new", m_sLogPath.cstr() );
sMetaOld.SetSprintf ( "%s/binlog.meta", m_sLogPath.cstr() );
CSphString sError;
// opened and locked, lets write
CSphWriter wrMeta;
if ( !wrMeta.OpenFile ( sMeta, sError ) )
sphDie ( "failed to open '%s': '%s'", sMeta.cstr(), sError.cstr() );
wrMeta.PutDword ( BINLOG_META_MAGIC );
wrMeta.PutDword ( BINLOG_VERSION );
wrMeta.PutByte ( USE_64BIT );
// save list of active log files
wrMeta.ZipInt ( m_dLogFiles.GetLength() );
ARRAY_FOREACH ( i, m_dLogFiles )
wrMeta.ZipInt ( m_dLogFiles[i].m_iExt ); // everything else is saved in logs themselves
wrMeta.CloseFile();
if ( ::rename_rt ( sMeta.cstr(), sMetaOld.cstr() ) )
sphDie ( "failed to rename meta (src=%s, dst=%s, errno=%d, error=%s)",
sMeta.cstr(), sMetaOld.cstr(), errno, strerror(errno) ); // !COMMIT handle this gracefully
sphLogDebug ( "SaveMeta: Done." );
}
void RtBinlog_c::LockFile ( bool bLock )
{
CSphString sName;
sName.SetSprintf ( "%s/binlog.lock", m_sLogPath.cstr() );
if ( bLock )
{
assert ( m_iLockFD==-1 );
const int iLockFD = ::open ( sName.cstr(), SPH_O_NEW, 0644 );
if ( iLockFD<0 )
sphDie ( "failed to open '%s': %u '%s'", sName.cstr(), errno, strerror(errno) );
if ( !sphLockEx ( iLockFD, false ) )
sphDie ( "failed to lock '%s': %u '%s'", sName.cstr(), errno, strerror(errno) );
m_iLockFD = iLockFD;
} else
{
SafeClose ( m_iLockFD );
::unlink ( sName.cstr() );
}
}
void RtBinlog_c::OpenNewLog ( int iLastState )
{
MEMORY ( SPH_MEM_BINLOG );
// calc new ext
int iExt = 1;
if ( m_dLogFiles.GetLength() )
{
iExt = m_dLogFiles.Last().m_iExt;
if ( !iLastState )
iExt++;
}
// create entry
// we need to reset it, otherwise there might be leftover data after last Remove()
BinlogFileDesc_t tLog;
tLog.m_iExt = iExt;
m_dLogFiles.Add ( tLog );
// create file
CSphString sLog = MakeBinlogName ( m_sLogPath.cstr(), tLog.m_iExt );
if ( !iLastState ) // reuse the last binlog since it is empty or useless.
::unlink ( sLog.cstr() );
if ( !m_tWriter.OpenFile ( sLog.cstr(), m_sWriterError ) )
sphDie ( "failed to create %s: errno=%d, error=%s", sLog.cstr(), errno, strerror(errno) );
// emit header
m_tWriter.PutDword ( BINLOG_HEADER_MAGIC );
m_tWriter.PutDword ( BINLOG_VERSION );
// update meta
SaveMeta();
}
void RtBinlog_c::DoCacheWrite ()
{
if ( !m_dLogFiles.GetLength() )
return;
const CSphVector<BinlogIndexInfo_t> & dIndexes = m_dLogFiles.Last().m_dIndexInfos;
m_tWriter.PutDword ( BLOP_MAGIC );
m_tWriter.ResetCrc ();
m_tWriter.ZipValue ( BLOP_ADD_CACHE );
m_tWriter.ZipValue ( dIndexes.GetLength() );
ARRAY_FOREACH ( i, dIndexes )
{
m_tWriter.PutString ( dIndexes[i].m_sName.cstr() );
m_tWriter.ZipValue ( dIndexes[i].m_iMinTID );
m_tWriter.ZipValue ( dIndexes[i].m_iMaxTID );
m_tWriter.ZipValue ( dIndexes[i].m_iFlushedTID );
m_tWriter.ZipValue ( dIndexes[i].m_tmMin );
m_tWriter.ZipValue ( dIndexes[i].m_tmMax );
}
m_tWriter.WriteCrc ();
}
void RtBinlog_c::CheckDoRestart ()
{
// restart on exceed file size limit
if ( m_iRestartSize>0 && m_tWriter.GetPos()>m_iRestartSize )
{
MEMORY ( SPH_MEM_BINLOG );
assert ( m_dLogFiles.GetLength() );
DoCacheWrite();
m_tWriter.CloseFile();
OpenNewLog();
}
}
void RtBinlog_c::CheckDoFlush ()
{
if ( m_eOnCommit==ACTION_NONE )
return;
if ( m_eOnCommit==ACTION_WRITE && m_tWriter.HasUnwrittenData() )
m_tWriter.Write();
if ( m_eOnCommit==ACTION_FSYNC && m_tWriter.HasUnsyncedData() )
{
if ( m_tWriter.HasUnwrittenData() )
m_tWriter.Write();
m_tWriter.Fsync();
}
}
int RtBinlog_c::ReplayBinlog ( const SmallStringHash_T<CSphIndex*> & hIndexes, int iBinlog )
{
assert ( iBinlog>=0 && iBinlog<m_dLogFiles.GetLength() );
CSphString sError;
const CSphString sLog ( MakeBinlogName ( m_sLogPath.cstr(), m_dLogFiles[iBinlog].m_iExt ) );
BinlogFileDesc_t & tLog = m_dLogFiles[iBinlog];
// open, check, play
sphInfo ( "binlog: replaying log %s", sLog.cstr() );
BinlogReader_c tReader;
if ( !tReader.Open ( sLog, sError ) )
sphDie ( "binlog: log open error: %s", sError.cstr() );
const SphOffset_t iFileSize = tReader.GetFilesize();
if ( !iFileSize )
{
sphWarning ( "binlog: empty binlog %s detected, skipping", sLog.cstr() );
return -1;
}
if ( tReader.GetDword()!=BINLOG_HEADER_MAGIC )
sphDie ( "binlog: log %s missing magic header (corrupted?)", sLog.cstr() );
DWORD uVersion = tReader.GetDword();
if ( uVersion!=BINLOG_VERSION || tReader.GetErrorFlag() )
sphDie ( "binlog: log %s is v.%d, binary is v.%d; recovery requires previous binary version", sLog.cstr(), uVersion, BINLOG_VERSION );
/////////////
// do replay
/////////////
int dTotal [ BLOP_TOTAL+1 ];
memset ( dTotal, 0, sizeof(dTotal) );
// !COMMIT
// instead of simply replaying everything, we should check whether this binlog is clean
// by loading and checking the cache stored at its very end
tLog.m_dIndexInfos.Reset();
bool bReplayOK = true;
bool bHaveCacheOp = false;
int64_t iPos = -1;
m_iReplayedRows = 0;
int64_t tmReplay = sphMicroTimer();
while ( iFileSize!=tReader.GetPos() && !tReader.GetErrorFlag() && bReplayOK )
{
iPos = tReader.GetPos();
if ( tReader.GetDword()!=BLOP_MAGIC )
{
sphDie ( "binlog: log missing txn marker at pos="INT64_FMT" (corrupted?)", iPos );
break;
}
tReader.ResetCrc ();
const uint64_t uOp = tReader.UnzipValue ();
if ( uOp<=0 || uOp>=BLOP_TOTAL )
sphDie ( "binlog: unexpected entry (blop="UINT64_FMT", pos="INT64_FMT")", uOp, iPos );
// FIXME! blop might be OK but skipped (eg. index that is no longer)
switch ( uOp )
{
case BLOP_COMMIT:
bReplayOK = ReplayCommit ( iBinlog, tReader );
break;
case BLOP_UPDATE_ATTRS:
bReplayOK = ReplayUpdateAttributes ( iBinlog, tReader );
break;
case BLOP_ADD_INDEX:
bReplayOK = ReplayIndexAdd ( iBinlog, hIndexes, tReader );
break;
case BLOP_ADD_CACHE:
if ( bHaveCacheOp )
sphDie ( "binlog: internal error, second BLOP_ADD_CACHE detected (corruption?)" );
bHaveCacheOp = true;
bReplayOK = ReplayCacheAdd ( iBinlog, tReader );
break;
default:
sphDie ( "binlog: internal error, unhandled entry (blop=%d)", (int)uOp );
}
dTotal [ uOp ] += bReplayOK?1:0;
dTotal [ BLOP_TOTAL ]++;
}
tmReplay = sphMicroTimer() - tmReplay;
if ( tReader.GetErrorFlag() )
sphWarning ( "binlog: log io error at pos="INT64_FMT": %s", iPos, sError.cstr() );
if ( !bReplayOK )
sphWarning ( "binlog: replay error at pos="INT64_FMT")", iPos );
// show additional replay statistics
ARRAY_FOREACH ( i, tLog.m_dIndexInfos )
{
const BinlogIndexInfo_t & tIndex = tLog.m_dIndexInfos[i];
if ( !hIndexes ( tIndex.m_sName.cstr() ) )
{
sphWarning ( "binlog: index %s: missing; tids "INT64_FMT" to "INT64_FMT" skipped!",
tIndex.m_sName.cstr(), tIndex.m_iMinTID, tIndex.m_iMaxTID );
} else if ( tIndex.m_iPreReplayTID < tIndex.m_iMaxTID )
{
sphInfo ( "binlog: index %s: recovered from tid "INT64_FMT" to tid "INT64_FMT,
tIndex.m_sName.cstr(), tIndex.m_iPreReplayTID, tIndex.m_iMaxTID );
} else
{
sphInfo ( "binlog: index %s: skipped at tid "INT64_FMT" and max binlog tid "INT64_FMT,
tIndex.m_sName.cstr(), tIndex.m_iPreReplayTID, tIndex.m_iMaxTID );
}
}
sphInfo ( "binlog: replay stats: %d rows in %d commits; %d updates; %d indexes",
m_iReplayedRows, dTotal[BLOP_COMMIT], dTotal[BLOP_UPDATE_ATTRS], dTotal[BLOP_ADD_INDEX] );
sphInfo ( "binlog: finished replaying %s; %d.%d MB in %d.%03d sec",
sLog.cstr(),
(int)(iFileSize/1048576), (int)((iFileSize*10/1048576)%10),
(int)(tmReplay/1000000), (int)((tmReplay/1000)%1000) );
if ( bHaveCacheOp && dTotal[BLOP_TOTAL]==1 ) // only one operation, that is Add Cache - by the fact, empty binlog
return 1;
return 0;
}
static BinlogIndexInfo_t & ReplayIndexID ( BinlogReader_c & tReader, BinlogFileDesc_t & tLog, const char * sPlace )
{
const int64_t iTxnPos = tReader.GetPos();
const int iVal = (int)tReader.UnzipValue();
if ( iVal<0 || iVal>=tLog.m_dIndexInfos.GetLength() )
sphDie ( "binlog: %s: unexpected index id (id=%d, max=%d, pos="INT64_FMT")",
sPlace, iVal, tLog.m_dIndexInfos.GetLength(), iTxnPos );
return tLog.m_dIndexInfos[iVal];
}
bool RtBinlog_c::ReplayCommit ( int iBinlog, BinlogReader_c & tReader ) const
{
// load and lookup index
const int64_t iTxnPos = tReader.GetPos();
BinlogFileDesc_t & tLog = m_dLogFiles[iBinlog];
BinlogIndexInfo_t & tIndex = ReplayIndexID ( tReader, tLog, "commit" );
// load transaction data
const int64_t iTID = (int64_t) tReader.UnzipValue();
const int64_t tmStamp = (int64_t) tReader.UnzipValue();
RtSegment_t * pSeg = NULL;
CSphVector<SphDocID_t> dKlist;
int iRows = (int)tReader.UnzipValue();
if ( iRows )
{
pSeg = new RtSegment_t();
pSeg->m_iRows = pSeg->m_iAliveRows = iRows;
m_iReplayedRows += iRows;
LoadVector ( tReader, pSeg->m_dWords );
pSeg->m_dWordCheckpoints.Resize ( (int) tReader.UnzipValue() ); // FIXME! sanity check
ARRAY_FOREACH ( i, pSeg->m_dWordCheckpoints )
{
pSeg->m_dWordCheckpoints[i].m_iOffset = (int) tReader.UnzipValue();
pSeg->m_dWordCheckpoints[i].m_uWordID = (SphWordID_t )tReader.UnzipValue();
}
LoadVector ( tReader, pSeg->m_dDocs );
LoadVector ( tReader, pSeg->m_dHits );
LoadVector ( tReader, pSeg->m_dRows );
LoadVector ( tReader, pSeg->m_dStrings );
LoadVector ( tReader, pSeg->m_dMvas );
}
LoadVector ( tReader, dKlist );
// checksum
if ( tReader.GetErrorFlag() || !tReader.CheckCrc ( "commit", tIndex.m_sName.cstr(), iTID, iTxnPos ) )
return false;
// check TID, time order in log
if ( iTID<tIndex.m_iMaxTID )
sphDie ( "binlog: commit: descending tid (index=%s, lasttid="INT64_FMT", logtid="INT64_FMT", pos="INT64_FMT")",
tIndex.m_sName.cstr(), tIndex.m_iMaxTID, iTID, iTxnPos );
if ( tmStamp<tIndex.m_tmMax )
sphDie ( "binlog: commit: descending time (index=%s, lasttime="INT64_FMT", logtime="INT64_FMT", pos="INT64_FMT")",
tIndex.m_sName.cstr(), tIndex.m_tmMax, tmStamp, iTxnPos );
// only replay transaction when index exists and does not have it yet (based on TID)
if ( tIndex.m_pRT && iTID > tIndex.m_pRT->m_iTID )
{
// we normally expect per-index TIDs to be sequential
// but let's be graceful about that
if ( iTID!=tIndex.m_pRT->m_iTID+1 )
sphWarning ( "binlog: commit: unexpected tid (index=%s, indextid="INT64_FMT", logtid="INT64_FMT", pos="INT64_FMT")",
tIndex.m_sName.cstr(), tIndex.m_pRT->m_iTID, iTID, iTxnPos );
// actually replay
tIndex.m_pRT->CommitReplayable ( pSeg, dKlist );
// update committed tid on replay in case of unexpected / mismatched tid
tIndex.m_pRT->m_iTID = iTID;
}
// update info
tIndex.m_iMinTID = Min ( tIndex.m_iMinTID, iTID );
tIndex.m_iMaxTID = Max ( tIndex.m_iMaxTID, iTID );
tIndex.m_tmMin = Min ( tIndex.m_tmMin, tmStamp );
tIndex.m_tmMax = Max ( tIndex.m_tmMax, tmStamp );
return true;
}
bool RtBinlog_c::ReplayIndexAdd ( int iBinlog, const SmallStringHash_T<CSphIndex*> & hIndexes, BinlogReader_c & tReader ) const
{
// load and check index
const int64_t iTxnPos = tReader.GetPos();
BinlogFileDesc_t & tLog = m_dLogFiles[iBinlog];
uint64_t uVal = tReader.UnzipValue();
if ( (int)uVal!=tLog.m_dIndexInfos.GetLength() )
sphDie ( "binlog: indexadd: unexpected index id (id="UINT64_FMT", expected=%d, pos="INT64_FMT")",
uVal, tLog.m_dIndexInfos.GetLength(), iTxnPos );
// load data
CSphString sName = tReader.GetString();
// FIXME? use this for double checking?
tReader.UnzipValue (); // TID
tReader.UnzipValue (); // time
if ( !tReader.CheckCrc ( "indexadd", sName.cstr(), 0, iTxnPos ) )
return false;
// check for index name dupes
ARRAY_FOREACH ( i, tLog.m_dIndexInfos )
if ( tLog.m_dIndexInfos[i].m_sName==sName )
sphDie ( "binlog: duplicate index name (name=%s, dupeid=%d, pos="INT64_FMT")",
sName.cstr(), i, iTxnPos );
// not a dupe, lets add
BinlogIndexInfo_t & tIndex = tLog.m_dIndexInfos.Add();
tIndex.m_sName = sName;
// lookup index in the list of currently served ones
CSphIndex ** ppIndex = hIndexes ( sName.cstr() );
CSphIndex * pIndex = ppIndex ? (*ppIndex) : NULL;
if ( pIndex )
{
tIndex.m_pIndex = pIndex;
if ( pIndex->IsRT() )
tIndex.m_pRT = (RtIndex_t*)pIndex;
tIndex.m_iPreReplayTID = pIndex->m_iTID;
tIndex.m_iFlushedTID = pIndex->m_iTID;
}
// all ok
// TID ranges will be now recomputed as we replay
return true;
}
bool RtBinlog_c::ReplayUpdateAttributes ( int iBinlog, BinlogReader_c & tReader ) const
{
// load and lookup index
const int64_t iTxnPos = tReader.GetPos();
BinlogFileDesc_t & tLog = m_dLogFiles[iBinlog];
BinlogIndexInfo_t & tIndex = ReplayIndexID ( tReader, tLog, "update" );
// load transaction data
CSphAttrUpdate tUpd;
int64_t iTID = (int64_t) tReader.UnzipValue();
int64_t tmStamp = (int64_t) tReader.UnzipValue();
tUpd.m_dAttrs.Resize ( (DWORD) tReader.UnzipValue() ); // FIXME! sanity check
ARRAY_FOREACH ( i, tUpd.m_dAttrs )
{
tUpd.m_dAttrs[i].m_sName = tReader.GetString();
tUpd.m_dAttrs[i].m_eAttrType = (ESphAttr) tReader.UnzipValue(); // safe, we'll crc check later
}
if ( tReader.GetErrorFlag()
|| !LoadVector ( tReader, tUpd.m_dPool )
|| !LoadVector ( tReader, tUpd.m_dDocids )
|| !LoadVector ( tReader, tUpd.m_dRowOffset )
|| !tReader.CheckCrc ( "update", tIndex.m_sName.cstr(), iTID, iTxnPos ) )
{
return false;
}
// check TID, time order in log
if ( iTID<tIndex.m_iMaxTID )
sphDie ( "binlog: update: descending tid (index=%s, lasttid="INT64_FMT", logtid="INT64_FMT", pos="INT64_FMT")",
tIndex.m_sName.cstr(), tIndex.m_iMaxTID, iTID, iTxnPos );
if ( tmStamp<tIndex.m_tmMax )
sphDie ( "binlog: update: descending time (index=%s, lasttime="INT64_FMT", logtime="INT64_FMT", pos="INT64_FMT")",
tIndex.m_sName.cstr(), tIndex.m_tmMax, tmStamp, iTxnPos );
if ( tIndex.m_pIndex && iTID > tIndex.m_pIndex->m_iTID )
{
// we normally expect per-index TIDs to be sequential
// but let's be graceful about that
if ( iTID!=tIndex.m_pIndex->m_iTID+1 )
sphWarning ( "binlog: update: unexpected tid (index=%s, indextid="INT64_FMT", logtid="INT64_FMT", pos="INT64_FMT")",
tIndex.m_sName.cstr(), tIndex.m_pIndex->m_iTID, iTID, iTxnPos );
CSphString sError;
tIndex.m_pIndex->UpdateAttributes ( tUpd, -1, sError ); // FIXME! check for errors
// update committed tid on replay in case of unexpected / mismatched tid
tIndex.m_pIndex->m_iTID = iTID;
}
// update info
tIndex.m_iMinTID = Min ( tIndex.m_iMinTID, iTID );
tIndex.m_iMaxTID = Max ( tIndex.m_iMaxTID, iTID );
tIndex.m_tmMin = Min ( tIndex.m_tmMin, tmStamp );
tIndex.m_tmMax = Max ( tIndex.m_tmMax, tmStamp );
return true;
}
bool RtBinlog_c::ReplayCacheAdd ( int iBinlog, BinlogReader_c & tReader ) const
{
const int64_t iTxnPos = tReader.GetPos();
BinlogFileDesc_t & tLog = m_dLogFiles[iBinlog];
// load data
CSphVector<BinlogIndexInfo_t> dCache;
dCache.Resize ( (int) tReader.UnzipValue() ); // FIXME! sanity check
ARRAY_FOREACH ( i, dCache )
{
dCache[i].m_sName = tReader.GetString();
dCache[i].m_iMinTID = tReader.UnzipValue();
dCache[i].m_iMaxTID = tReader.UnzipValue();
dCache[i].m_iFlushedTID = tReader.UnzipValue();
dCache[i].m_tmMin = tReader.UnzipValue();
dCache[i].m_tmMax = tReader.UnzipValue();
}
if ( !tReader.CheckCrc ( "cache", "", 0, iTxnPos ) )
return false;
// if we arrived here by replay, let's verify everything
// note that cached infos just passed checksumming, so the file is supposed to be clean!
// in any case, broken log or not, we probably managed to replay something
// so let's just report differences as warnings
if ( dCache.GetLength()!=tLog.m_dIndexInfos.GetLength() )
{
sphWarning ( "binlog: cache mismatch: %d indexes cached, %d replayed",
dCache.GetLength(), tLog.m_dIndexInfos.GetLength() );
return true;
}
ARRAY_FOREACH ( i, dCache )
{
BinlogIndexInfo_t & tCache = dCache[i];
BinlogIndexInfo_t & tIndex = tLog.m_dIndexInfos[i];
if ( tCache.m_sName!=tIndex.m_sName )
{
sphWarning ( "binlog: cache mismatch: index %d name mismatch (%s cached, %s replayed)",
i, tCache.m_sName.cstr(), tIndex.m_sName.cstr() );
continue;
}
if ( tCache.m_iMinTID!=tIndex.m_iMinTID || tCache.m_iMaxTID!=tIndex.m_iMaxTID )
{
sphWarning ( "binlog: cache mismatch: index %s tid ranges mismatch (cached "INT64_FMT" to "INT64_FMT", replayed "INT64_FMT" to "INT64_FMT")",
tCache.m_sName.cstr(), tCache.m_iMinTID, tCache.m_iMaxTID, tIndex.m_iMinTID, tIndex.m_iMaxTID );
}
}
return true;
}
//////////////////////////////////////////////////////////////////////////
ISphRtIndex * sphGetCurrentIndexRT()
{
RtAccum_t * pAcc = (RtAccum_t*) sphThreadGet ( g_tTlsAccumKey );
if ( pAcc )
return pAcc->m_pIndex;
return NULL;
}
ISphRtIndex * sphCreateIndexRT ( const CSphSchema & tSchema, const char * sIndexName, DWORD uRamSize, const char * sPath )
{
MEMORY ( SPH_MEM_IDX_RT );
return new RtIndex_t ( tSchema, sIndexName, uRamSize, sPath );
}
void sphRTInit ()
{
MEMORY ( SPH_MEM_BINLOG );
g_bRTChangesAllowed = false;
Verify ( RtSegment_t::m_tSegmentSeq.Init() );
Verify ( sphThreadKeyCreate ( &g_tTlsAccumKey ) );
g_pBinlog = new RtBinlog_c();
if ( !g_pBinlog )
sphDie ( "binlog: failed to create binlog" );
}
void sphRTConfigure ( const CSphConfigSection & hSearchd, bool bTestMode )
{
assert ( g_pBinlog );
g_pBinlog->Configure ( hSearchd, bTestMode );
g_iRtFlushPeriod = hSearchd.GetInt ( "rt_flush_period", (int)g_iRtFlushPeriod );
// clip period to range ( 10 sec, million years )
g_iRtFlushPeriod = Max ( g_iRtFlushPeriod, 10 );
g_iRtFlushPeriod = Min ( g_iRtFlushPeriod, INT64_MAX );
}
void sphRTDone ()
{
sphThreadKeyDelete ( g_tTlsAccumKey );
Verify ( RtSegment_t::m_tSegmentSeq.Done() );
// its valid for "searchd --stop" case
SafeDelete ( g_pBinlog );
}
void sphReplayBinlog ( const SmallStringHash_T<CSphIndex*> & hIndexes, ProgressCallbackSimple_t * pfnProgressCallback )
{
MEMORY ( SPH_MEM_BINLOG );
g_pBinlog->Replay ( hIndexes, pfnProgressCallback );
g_pBinlog->CreateTimerThread();
g_bRTChangesAllowed = true;
}
bool sphRTSchemaConfigure ( const CSphConfigSection & hIndex, CSphSchema * pSchema, CSphString * pError )
{
assert ( pSchema && pError );
CSphColumnInfo tCol;
// fields
for ( CSphVariant * v=hIndex("rt_field"); v; v=v->m_pNext )
{
tCol.m_sName = v->cstr();
tCol.m_sName.ToLower();
pSchema->m_dFields.Add ( tCol );
}
if ( !pSchema->m_dFields.GetLength() )
{
pError->SetSprintf ( "no fields configured (use rt_field directive)" );
return false;
}
if ( pSchema->m_dFields.GetLength()>SPH_MAX_FIELDS )
{
pError->SetSprintf ( "too many fields (fields=%d, max=%d)", pSchema->m_dFields.GetLength(), SPH_MAX_FIELDS );
return false;
}
// attrs
const int iNumTypes = 7;
const char * sTypes[iNumTypes] = { "rt_attr_uint", "rt_attr_bigint", "rt_attr_float", "rt_attr_timestamp", "rt_attr_string", "rt_attr_multi", "rt_attr_multi_64" };
const ESphAttr iTypes[iNumTypes] = { SPH_ATTR_INTEGER, SPH_ATTR_BIGINT, SPH_ATTR_FLOAT, SPH_ATTR_TIMESTAMP, SPH_ATTR_STRING, SPH_ATTR_UINT32SET, SPH_ATTR_UINT64SET };
for ( int iType=0; iType<iNumTypes; iType++ )
{
for ( CSphVariant * v = hIndex ( sTypes[iType] ); v; v = v->m_pNext )
{
tCol.m_sName = v->cstr();
tCol.m_sName.ToLower();
tCol.m_eAttrType = iTypes[iType];
pSchema->AddAttr ( tCol, false );
}
}
return true;
}
//
// $Id$
//