CC-2166: Packaging Improvements. Moved the Zend app into airtime_mvc. It is now installed to /var/www/airtime. Storage is now set to /srv/airtime/stor. Utils are now installed to /usr/lib/airtime/utils/. Added install/airtime-dircheck.php as a simple test to see if everything is install/uninstalled correctly.

This commit is contained in:
Paul Baranowski 2011-04-14 18:55:04 -04:00
parent 514777e8d2
commit b11cbd8159
4546 changed files with 138 additions and 51 deletions

View file

@ -0,0 +1,268 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: DictionaryLoader.php 20096 2010-01-06 02:05:09Z bkarwin $
*/
/**
* Dictionary loader
*
* It's a dummy class which is created to encapsulate non-good structured code.
* Manual "method inlining" is performed to increase dictionary index loading operation
* which is major bottelneck for search performance.
*
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Index_DictionaryLoader
{
/**
* Dictionary index loader.
*
* It takes a string which is actually <segment_name>.tii index file data and
* returns two arrays - term and tremInfo lists.
*
* See Zend_Search_Lucene_Index_SegmintInfo class for details
*
* @param string $data
* @return array
* @throws Zend_Search_Lucene_Exception
*/
public static function load($data)
{
$termDictionary = array();
$termInfos = array();
$pos = 0;
// $tiVersion = $tiiFile->readInt();
$tiVersion = ord($data[0]) << 24 | ord($data[1]) << 16 | ord($data[2]) << 8 | ord($data[3]);
$pos += 4;
if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ &&
$tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format');
}
// $indexTermCount = $tiiFile->readLong();
if (PHP_INT_SIZE > 4) {
$indexTermCount = ord($data[$pos]) << 56 |
ord($data[$pos+1]) << 48 |
ord($data[$pos+2]) << 40 |
ord($data[$pos+3]) << 32 |
ord($data[$pos+4]) << 24 |
ord($data[$pos+5]) << 16 |
ord($data[$pos+6]) << 8 |
ord($data[$pos+7]);
} else {
if ((ord($data[$pos]) != 0) ||
(ord($data[$pos+1]) != 0) ||
(ord($data[$pos+2]) != 0) ||
(ord($data[$pos+3]) != 0) ||
((ord($data[$pos+4]) & 0x80) != 0)) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
}
$indexTermCount = ord($data[$pos+4]) << 24 |
ord($data[$pos+5]) << 16 |
ord($data[$pos+6]) << 8 |
ord($data[$pos+7]);
}
$pos += 8;
// $tiiFile->readInt(); // IndexInterval
$pos += 4;
// $skipInterval = $tiiFile->readInt();
$skipInterval = ord($data[$pos]) << 24 | ord($data[$pos+1]) << 16 | ord($data[$pos+2]) << 8 | ord($data[$pos+3]);
$pos += 4;
if ($indexTermCount < 1) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Wrong number of terms in a term dictionary index');
}
if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
/* Skip MaxSkipLevels value */
$pos += 4;
}
$prevTerm = '';
$freqPointer = 0;
$proxPointer = 0;
$indexPointer = 0;
for ($count = 0; $count < $indexTermCount; $count++) {
//$termPrefixLength = $tiiFile->readVInt();
$nbyte = ord($data[$pos++]);
$termPrefixLength = $nbyte & 0x7F;
for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
$nbyte = ord($data[$pos++]);
$termPrefixLength |= ($nbyte & 0x7F) << $shift;
}
// $termSuffix = $tiiFile->readString();
$nbyte = ord($data[$pos++]);
$len = $nbyte & 0x7F;
for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
$nbyte = ord($data[$pos++]);
$len |= ($nbyte & 0x7F) << $shift;
}
if ($len == 0) {
$termSuffix = '';
} else {
$termSuffix = substr($data, $pos, $len);
$pos += $len;
for ($count1 = 0; $count1 < $len; $count1++ ) {
if (( ord($termSuffix[$count1]) & 0xC0 ) == 0xC0) {
$addBytes = 1;
if (ord($termSuffix[$count1]) & 0x20 ) {
$addBytes++;
// Never used for Java Lucene created index.
// Java2 doesn't encode strings in four bytes
if (ord($termSuffix[$count1]) & 0x10 ) {
$addBytes++;
}
}
$termSuffix .= substr($data, $pos, $addBytes);
$pos += $addBytes;
$len += $addBytes;
// Check for null character. Java2 encodes null character
// in two bytes.
if (ord($termSuffix[$count1]) == 0xC0 &&
ord($termSuffix[$count1+1]) == 0x80 ) {
$termSuffix[$count1] = 0;
$termSuffix = substr($termSuffix,0,$count1+1)
. substr($termSuffix,$count1+2);
}
$count1 += $addBytes;
}
}
}
// $termValue = Zend_Search_Lucene_Index_Term::getPrefix($prevTerm, $termPrefixLength) . $termSuffix;
$pb = 0; $pc = 0;
while ($pb < strlen($prevTerm) && $pc < $termPrefixLength) {
$charBytes = 1;
if ((ord($prevTerm[$pb]) & 0xC0) == 0xC0) {
$charBytes++;
if (ord($prevTerm[$pb]) & 0x20 ) {
$charBytes++;
if (ord($prevTerm[$pb]) & 0x10 ) {
$charBytes++;
}
}
}
if ($pb + $charBytes > strlen($data)) {
// wrong character
break;
}
$pc++;
$pb += $charBytes;
}
$termValue = substr($prevTerm, 0, $pb) . $termSuffix;
// $termFieldNum = $tiiFile->readVInt();
$nbyte = ord($data[$pos++]);
$termFieldNum = $nbyte & 0x7F;
for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
$nbyte = ord($data[$pos++]);
$termFieldNum |= ($nbyte & 0x7F) << $shift;
}
// $docFreq = $tiiFile->readVInt();
$nbyte = ord($data[$pos++]);
$docFreq = $nbyte & 0x7F;
for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
$nbyte = ord($data[$pos++]);
$docFreq |= ($nbyte & 0x7F) << $shift;
}
// $freqPointer += $tiiFile->readVInt();
$nbyte = ord($data[$pos++]);
$vint = $nbyte & 0x7F;
for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
$nbyte = ord($data[$pos++]);
$vint |= ($nbyte & 0x7F) << $shift;
}
$freqPointer += $vint;
// $proxPointer += $tiiFile->readVInt();
$nbyte = ord($data[$pos++]);
$vint = $nbyte & 0x7F;
for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
$nbyte = ord($data[$pos++]);
$vint |= ($nbyte & 0x7F) << $shift;
}
$proxPointer += $vint;
if( $docFreq >= $skipInterval ) {
// $skipDelta = $tiiFile->readVInt();
$nbyte = ord($data[$pos++]);
$vint = $nbyte & 0x7F;
for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
$nbyte = ord($data[$pos++]);
$vint |= ($nbyte & 0x7F) << $shift;
}
$skipDelta = $vint;
} else {
$skipDelta = 0;
}
// $indexPointer += $tiiFile->readVInt();
$nbyte = ord($data[$pos++]);
$vint = $nbyte & 0x7F;
for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
$nbyte = ord($data[$pos++]);
$vint |= ($nbyte & 0x7F) << $shift;
}
$indexPointer += $vint;
// $this->_termDictionary[] = new Zend_Search_Lucene_Index_Term($termValue, $termFieldNum);
$termDictionary[] = array($termFieldNum, $termValue);
$termInfos[] =
// new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipDelta, $indexPointer);
array($docFreq, $freqPointer, $proxPointer, $skipDelta, $indexPointer);
$prevTerm = $termValue;
}
// Check special index entry mark
if ($termDictionary[0][0] != (int)0xFFFFFFFF) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format');
}
if (PHP_INT_SIZE > 4) {
// Treat 64-bit 0xFFFFFFFF as -1
$termDictionary[0][0] = -1;
}
return array($termDictionary, $termInfos);
}
}

View file

@ -0,0 +1,59 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: DocsFilter.php 20096 2010-01-06 02:05:09Z bkarwin $
*/
/**
* A Zend_Search_Lucene_Index_DocsFilter is used to filter documents while searching.
*
* It may or _may_not_ be used for actual filtering, so it's just a hint that upper query limits
* search result by specified list.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Index_DocsFilter
{
/**
* Set of segment filters:
* array( <segmentName> => array(<docId> => <undefined_value>,
* <docId> => <undefined_value>,
* <docId> => <undefined_value>,
* ... ),
* <segmentName> => array(<docId> => <undefined_value>,
* <docId> => <undefined_value>,
* <docId> => <undefined_value>,
* ... ),
* <segmentName> => array(<docId> => <undefined_value>,
* <docId> => <undefined_value>,
* <docId> => <undefined_value>,
* ... ),
* ...
* )
*
* @var array
*/
public $segmentFilters = array();
}

View file

@ -0,0 +1,50 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: FieldInfo.php 20096 2010-01-06 02:05:09Z bkarwin $
*/
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Index_FieldInfo
{
public $name;
public $isIndexed;
public $number;
public $storeTermVector;
public $normsOmitted;
public $payloadsStored;
public function __construct($name, $isIndexed, $number, $storeTermVector, $normsOmitted = false, $payloadsStored = false)
{
$this->name = $name;
$this->isIndexed = $isIndexed;
$this->number = $number;
$this->storeTermVector = $storeTermVector;
$this->normsOmitted = $normsOmitted;
$this->payloadsStored = $payloadsStored;
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,271 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: SegmentMerger.php 20096 2010-01-06 02:05:09Z bkarwin $
*/
/** Zend_Search_Lucene_Index_SegmentInfo */
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Index_SegmentMerger
{
/**
* Target segment writer
*
* @var Zend_Search_Lucene_Index_SegmentWriter_StreamWriter
*/
private $_writer;
/**
* Number of docs in a new segment
*
* @var integer
*/
private $_docCount;
/**
* A set of segments to be merged
*
* @var array Zend_Search_Lucene_Index_SegmentInfo
*/
private $_segmentInfos = array();
/**
* Flag to signal, that merge is already done
*
* @var boolean
*/
private $_mergeDone = false;
/**
* Field map
* [<segment_name>][<field_number>] => <target_field_number>
*
* @var array
*/
private $_fieldsMap = array();
/**
* Object constructor.
*
* Creates new segment merger with $directory as target to merge segments into
* and $name as a name of new segment
*
* @param Zend_Search_Lucene_Storage_Directory $directory
* @param string $name
*/
public function __construct($directory, $name)
{
/** Zend_Search_Lucene_Index_SegmentWriter_StreamWriter */
require_once 'Zend/Search/Lucene/Index/SegmentWriter/StreamWriter.php';
$this->_writer = new Zend_Search_Lucene_Index_SegmentWriter_StreamWriter($directory, $name);
}
/**
* Add segmnet to a collection of segments to be merged
*
* @param Zend_Search_Lucene_Index_SegmentInfo $segment
*/
public function addSource(Zend_Search_Lucene_Index_SegmentInfo $segmentInfo)
{
$this->_segmentInfos[$segmentInfo->getName()] = $segmentInfo;
}
/**
* Do merge.
*
* Returns number of documents in newly created segment
*
* @return Zend_Search_Lucene_Index_SegmentInfo
* @throws Zend_Search_Lucene_Exception
*/
public function merge()
{
if ($this->_mergeDone) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Merge is already done.');
}
if (count($this->_segmentInfos) < 1) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Wrong number of segments to be merged ('
. count($this->_segmentInfos)
. ').');
}
$this->_mergeFields();
$this->_mergeNorms();
$this->_mergeStoredFields();
$this->_mergeTerms();
$this->_mergeDone = true;
return $this->_writer->close();
}
/**
* Merge fields information
*/
private function _mergeFields()
{
foreach ($this->_segmentInfos as $segName => $segmentInfo) {
foreach ($segmentInfo->getFieldInfos() as $fieldInfo) {
$this->_fieldsMap[$segName][$fieldInfo->number] = $this->_writer->addFieldInfo($fieldInfo);
}
}
}
/**
* Merge field's normalization factors
*/
private function _mergeNorms()
{
foreach ($this->_writer->getFieldInfos() as $fieldInfo) {
if ($fieldInfo->isIndexed) {
foreach ($this->_segmentInfos as $segName => $segmentInfo) {
if ($segmentInfo->hasDeletions()) {
$srcNorm = $segmentInfo->normVector($fieldInfo->name);
$norm = '';
$docs = $segmentInfo->count();
for ($count = 0; $count < $docs; $count++) {
if (!$segmentInfo->isDeleted($count)) {
$norm .= $srcNorm[$count];
}
}
$this->_writer->addNorm($fieldInfo->name, $norm);
} else {
$this->_writer->addNorm($fieldInfo->name, $segmentInfo->normVector($fieldInfo->name));
}
}
}
}
}
/**
* Merge fields information
*/
private function _mergeStoredFields()
{
$this->_docCount = 0;
foreach ($this->_segmentInfos as $segName => $segmentInfo) {
$fdtFile = $segmentInfo->openCompoundFile('.fdt');
for ($count = 0; $count < $segmentInfo->count(); $count++) {
$fieldCount = $fdtFile->readVInt();
$storedFields = array();
for ($count2 = 0; $count2 < $fieldCount; $count2++) {
$fieldNum = $fdtFile->readVInt();
$bits = $fdtFile->readByte();
$fieldInfo = $segmentInfo->getField($fieldNum);
if (!($bits & 2)) { // Text data
$storedFields[] =
new Zend_Search_Lucene_Field($fieldInfo->name,
$fdtFile->readString(),
'UTF-8',
true,
$fieldInfo->isIndexed,
$bits & 1 );
} else { // Binary data
$storedFields[] =
new Zend_Search_Lucene_Field($fieldInfo->name,
$fdtFile->readBinary(),
'',
true,
$fieldInfo->isIndexed,
$bits & 1,
true);
}
}
if (!$segmentInfo->isDeleted($count)) {
$this->_docCount++;
$this->_writer->addStoredFields($storedFields);
}
}
}
}
/**
* Merge fields information
*/
private function _mergeTerms()
{
/** Zend_Search_Lucene_Index_TermsPriorityQueue */
require_once 'Zend/Search/Lucene/Index/TermsPriorityQueue.php';
$segmentInfoQueue = new Zend_Search_Lucene_Index_TermsPriorityQueue();
$segmentStartId = 0;
foreach ($this->_segmentInfos as $segName => $segmentInfo) {
$segmentStartId = $segmentInfo->resetTermsStream($segmentStartId, Zend_Search_Lucene_Index_SegmentInfo::SM_MERGE_INFO);
// Skip "empty" segments
if ($segmentInfo->currentTerm() !== null) {
$segmentInfoQueue->put($segmentInfo);
}
}
$this->_writer->initializeDictionaryFiles();
$termDocs = array();
while (($segmentInfo = $segmentInfoQueue->pop()) !== null) {
// Merge positions array
$termDocs += $segmentInfo->currentTermPositions();
if ($segmentInfoQueue->top() === null ||
$segmentInfoQueue->top()->currentTerm()->key() !=
$segmentInfo->currentTerm()->key()) {
// We got new term
ksort($termDocs, SORT_NUMERIC);
// Add term if it's contained in any document
if (count($termDocs) > 0) {
$this->_writer->addTerm($segmentInfo->currentTerm(), $termDocs);
}
$termDocs = array();
}
$segmentInfo->nextTerm();
// check, if segment dictionary is finished
if ($segmentInfo->currentTerm() !== null) {
// Put segment back into the priority queue
$segmentInfoQueue->put($segmentInfo);
}
}
$this->_writer->closeDictionaryFiles();
}
}

View file

@ -0,0 +1,634 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: SegmentWriter.php 20096 2010-01-06 02:05:09Z bkarwin $
*/
/** Zend_Search_Lucene_Index_FieldInfo */
require_once 'Zend/Search/Lucene/Index/FieldInfo.php';
/** Zend_Search_Lucene_Index_Term */
require_once 'Zend/Search/Lucene/Index/Term.php';
/** Zend_Search_Lucene_Index_TermInfo */
require_once 'Zend/Search/Lucene/Index/TermInfo.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
abstract class Zend_Search_Lucene_Index_SegmentWriter
{
/**
* Expert: The fraction of terms in the "dictionary" which should be stored
* in RAM. Smaller values use more memory, but make searching slightly
* faster, while larger values use less memory and make searching slightly
* slower. Searching is typically not dominated by dictionary lookup, so
* tweaking this is rarely useful.
*
* @var integer
*/
public static $indexInterval = 128;
/**
* Expert: The fraction of TermDocs entries stored in skip tables.
* Larger values result in smaller indexes, greater acceleration, but fewer
* accelerable cases, while smaller values result in bigger indexes,
* less acceleration and more
* accelerable cases. More detailed experiments would be useful here.
*
* 0x7FFFFFFF indicates that we don't use skip data
*
* Note: not used in current implementation
*
* @var integer
*/
public static $skipInterval = 0x7FFFFFFF;
/**
* Expert: The maximum number of skip levels. Smaller values result in
* slightly smaller indexes, but slower skipping in big posting lists.
*
* 0 indicates that we don't use skip data
*
* Note: not used in current implementation
*
* @var integer
*/
public static $maxSkipLevels = 0;
/**
* Number of docs in a segment
*
* @var integer
*/
protected $_docCount = 0;
/**
* Segment name
*
* @var string
*/
protected $_name;
/**
* File system adapter.
*
* @var Zend_Search_Lucene_Storage_Directory
*/
protected $_directory;
/**
* List of the index files.
* Used for automatic compound file generation
*
* @var unknown_type
*/
protected $_files = array();
/**
* Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
*
* @var array
*/
protected $_fields = array();
/**
* Normalization factors.
* An array fieldName => normVector
* normVector is a binary string.
* Each byte corresponds to an indexed document in a segment and
* encodes normalization factor (float value, encoded by
* Zend_Search_Lucene_Search_Similarity::encodeNorm())
*
* @var array
*/
protected $_norms = array();
/**
* '.fdx' file - Stored Fields, the field index.
*
* @var Zend_Search_Lucene_Storage_File
*/
protected $_fdxFile = null;
/**
* '.fdt' file - Stored Fields, the field data.
*
* @var Zend_Search_Lucene_Storage_File
*/
protected $_fdtFile = null;
/**
* Object constructor.
*
* @param Zend_Search_Lucene_Storage_Directory $directory
* @param string $name
*/
public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name)
{
$this->_directory = $directory;
$this->_name = $name;
}
/**
* Add field to the segment
*
* Returns actual field number
*
* @param Zend_Search_Lucene_Field $field
* @return integer
*/
public function addField(Zend_Search_Lucene_Field $field)
{
if (!isset($this->_fields[$field->name])) {
$fieldNumber = count($this->_fields);
$this->_fields[$field->name] =
new Zend_Search_Lucene_Index_FieldInfo($field->name,
$field->isIndexed,
$fieldNumber,
$field->storeTermVector);
return $fieldNumber;
} else {
$this->_fields[$field->name]->isIndexed |= $field->isIndexed;
$this->_fields[$field->name]->storeTermVector |= $field->storeTermVector;
return $this->_fields[$field->name]->number;
}
}
/**
* Add fieldInfo to the segment
*
* Returns actual field number
*
* @param Zend_Search_Lucene_Index_FieldInfo $fieldInfo
* @return integer
*/
public function addFieldInfo(Zend_Search_Lucene_Index_FieldInfo $fieldInfo)
{
if (!isset($this->_fields[$fieldInfo->name])) {
$fieldNumber = count($this->_fields);
$this->_fields[$fieldInfo->name] =
new Zend_Search_Lucene_Index_FieldInfo($fieldInfo->name,
$fieldInfo->isIndexed,
$fieldNumber,
$fieldInfo->storeTermVector);
return $fieldNumber;
} else {
$this->_fields[$fieldInfo->name]->isIndexed |= $fieldInfo->isIndexed;
$this->_fields[$fieldInfo->name]->storeTermVector |= $fieldInfo->storeTermVector;
return $this->_fields[$fieldInfo->name]->number;
}
}
/**
* Returns array of FieldInfo objects.
*
* @return array
*/
public function getFieldInfos()
{
return $this->_fields;
}
/**
* Add stored fields information
*
* @param array $storedFields array of Zend_Search_Lucene_Field objects
*/
public function addStoredFields($storedFields)
{
if (!isset($this->_fdxFile)) {
$this->_fdxFile = $this->_directory->createFile($this->_name . '.fdx');
$this->_fdtFile = $this->_directory->createFile($this->_name . '.fdt');
$this->_files[] = $this->_name . '.fdx';
$this->_files[] = $this->_name . '.fdt';
}
$this->_fdxFile->writeLong($this->_fdtFile->tell());
$this->_fdtFile->writeVInt(count($storedFields));
foreach ($storedFields as $field) {
$this->_fdtFile->writeVInt($this->_fields[$field->name]->number);
$fieldBits = ($field->isTokenized ? 0x01 : 0x00) |
($field->isBinary ? 0x02 : 0x00) |
0x00; /* 0x04 - third bit, compressed (ZLIB) */
$this->_fdtFile->writeByte($fieldBits);
if ($field->isBinary) {
$this->_fdtFile->writeVInt(strlen($field->value));
$this->_fdtFile->writeBytes($field->value);
} else {
$this->_fdtFile->writeString($field->getUtf8Value());
}
}
$this->_docCount++;
}
/**
* Returns the total number of documents in this segment.
*
* @return integer
*/
public function count()
{
return $this->_docCount;
}
/**
* Return segment name
*
* @return string
*/
public function getName()
{
return $this->_name;
}
/**
* Dump Field Info (.fnm) segment file
*/
protected function _dumpFNM()
{
$fnmFile = $this->_directory->createFile($this->_name . '.fnm');
$fnmFile->writeVInt(count($this->_fields));
$nrmFile = $this->_directory->createFile($this->_name . '.nrm');
// Write header
$nrmFile->writeBytes('NRM');
// Write format specifier
$nrmFile->writeByte((int)0xFF);
foreach ($this->_fields as $field) {
$fnmFile->writeString($field->name);
$fnmFile->writeByte(($field->isIndexed ? 0x01 : 0x00) |
($field->storeTermVector ? 0x02 : 0x00)
// not supported yet 0x04 /* term positions are stored with the term vectors */ |
// not supported yet 0x08 /* term offsets are stored with the term vectors */ |
);
if ($field->isIndexed) {
// pre-2.1 index mode (not used now)
// $normFileName = $this->_name . '.f' . $field->number;
// $fFile = $this->_directory->createFile($normFileName);
// $fFile->writeBytes($this->_norms[$field->name]);
// $this->_files[] = $normFileName;
$nrmFile->writeBytes($this->_norms[$field->name]);
}
}
$this->_files[] = $this->_name . '.fnm';
$this->_files[] = $this->_name . '.nrm';
}
/**
* Term Dictionary file
*
* @var Zend_Search_Lucene_Storage_File
*/
private $_tisFile = null;
/**
* Term Dictionary index file
*
* @var Zend_Search_Lucene_Storage_File
*/
private $_tiiFile = null;
/**
* Frequencies file
*
* @var Zend_Search_Lucene_Storage_File
*/
private $_frqFile = null;
/**
* Positions file
*
* @var Zend_Search_Lucene_Storage_File
*/
private $_prxFile = null;
/**
* Number of written terms
*
* @var integer
*/
private $_termCount;
/**
* Last saved term
*
* @var Zend_Search_Lucene_Index_Term
*/
private $_prevTerm;
/**
* Last saved term info
*
* @var Zend_Search_Lucene_Index_TermInfo
*/
private $_prevTermInfo;
/**
* Last saved index term
*
* @var Zend_Search_Lucene_Index_Term
*/
private $_prevIndexTerm;
/**
* Last saved index term info
*
* @var Zend_Search_Lucene_Index_TermInfo
*/
private $_prevIndexTermInfo;
/**
* Last term dictionary file position
*
* @var integer
*/
private $_lastIndexPosition;
/**
* Create dicrionary, frequency and positions files and write necessary headers
*/
public function initializeDictionaryFiles()
{
$this->_tisFile = $this->_directory->createFile($this->_name . '.tis');
$this->_tisFile->writeInt((int)0xFFFFFFFD);
$this->_tisFile->writeLong(0 /* dummy data for terms count */);
$this->_tisFile->writeInt(self::$indexInterval);
$this->_tisFile->writeInt(self::$skipInterval);
$this->_tisFile->writeInt(self::$maxSkipLevels);
$this->_tiiFile = $this->_directory->createFile($this->_name . '.tii');
$this->_tiiFile->writeInt((int)0xFFFFFFFD);
$this->_tiiFile->writeLong(0 /* dummy data for terms count */);
$this->_tiiFile->writeInt(self::$indexInterval);
$this->_tiiFile->writeInt(self::$skipInterval);
$this->_tiiFile->writeInt(self::$maxSkipLevels);
/** Dump dictionary header */
$this->_tiiFile->writeVInt(0); // preffix length
$this->_tiiFile->writeString(''); // suffix
$this->_tiiFile->writeInt((int)0xFFFFFFFF); // field number
$this->_tiiFile->writeByte((int)0x0F);
$this->_tiiFile->writeVInt(0); // DocFreq
$this->_tiiFile->writeVInt(0); // FreqDelta
$this->_tiiFile->writeVInt(0); // ProxDelta
$this->_tiiFile->writeVInt(24); // IndexDelta
$this->_frqFile = $this->_directory->createFile($this->_name . '.frq');
$this->_prxFile = $this->_directory->createFile($this->_name . '.prx');
$this->_files[] = $this->_name . '.tis';
$this->_files[] = $this->_name . '.tii';
$this->_files[] = $this->_name . '.frq';
$this->_files[] = $this->_name . '.prx';
$this->_prevTerm = null;
$this->_prevTermInfo = null;
$this->_prevIndexTerm = null;
$this->_prevIndexTermInfo = null;
$this->_lastIndexPosition = 24;
$this->_termCount = 0;
}
/**
* Add term
*
* Term positions is an array( docId => array(pos1, pos2, pos3, ...), ... )
*
* @param Zend_Search_Lucene_Index_Term $termEntry
* @param array $termDocs
*/
public function addTerm($termEntry, $termDocs)
{
$freqPointer = $this->_frqFile->tell();
$proxPointer = $this->_prxFile->tell();
$prevDoc = 0;
foreach ($termDocs as $docId => $termPositions) {
$docDelta = ($docId - $prevDoc)*2;
$prevDoc = $docId;
if (count($termPositions) > 1) {
$this->_frqFile->writeVInt($docDelta);
$this->_frqFile->writeVInt(count($termPositions));
} else {
$this->_frqFile->writeVInt($docDelta + 1);
}
$prevPosition = 0;
foreach ($termPositions as $position) {
$this->_prxFile->writeVInt($position - $prevPosition);
$prevPosition = $position;
}
}
if (count($termDocs) >= self::$skipInterval) {
/**
* @todo Write Skip Data to a freq file.
* It's not used now, but make index more optimal
*/
$skipOffset = $this->_frqFile->tell() - $freqPointer;
} else {
$skipOffset = 0;
}
$term = new Zend_Search_Lucene_Index_Term($termEntry->text,
$this->_fields[$termEntry->field]->number);
$termInfo = new Zend_Search_Lucene_Index_TermInfo(count($termDocs),
$freqPointer, $proxPointer, $skipOffset);
$this->_dumpTermDictEntry($this->_tisFile, $this->_prevTerm, $term, $this->_prevTermInfo, $termInfo);
if (($this->_termCount + 1) % self::$indexInterval == 0) {
$this->_dumpTermDictEntry($this->_tiiFile, $this->_prevIndexTerm, $term, $this->_prevIndexTermInfo, $termInfo);
$indexPosition = $this->_tisFile->tell();
$this->_tiiFile->writeVInt($indexPosition - $this->_lastIndexPosition);
$this->_lastIndexPosition = $indexPosition;
}
$this->_termCount++;
}
/**
* Close dictionary
*/
public function closeDictionaryFiles()
{
$this->_tisFile->seek(4);
$this->_tisFile->writeLong($this->_termCount);
$this->_tiiFile->seek(4);
// + 1 is used to count an additional special index entry (empty term at the start of the list)
$this->_tiiFile->writeLong(($this->_termCount - $this->_termCount % self::$indexInterval)/self::$indexInterval + 1);
}
/**
* Dump Term Dictionary segment file entry.
* Used to write entry to .tis or .tii files
*
* @param Zend_Search_Lucene_Storage_File $dicFile
* @param Zend_Search_Lucene_Index_Term $prevTerm
* @param Zend_Search_Lucene_Index_Term $term
* @param Zend_Search_Lucene_Index_TermInfo $prevTermInfo
* @param Zend_Search_Lucene_Index_TermInfo $termInfo
*/
protected function _dumpTermDictEntry(Zend_Search_Lucene_Storage_File $dicFile,
&$prevTerm, Zend_Search_Lucene_Index_Term $term,
&$prevTermInfo, Zend_Search_Lucene_Index_TermInfo $termInfo)
{
if (isset($prevTerm) && $prevTerm->field == $term->field) {
$matchedBytes = 0;
$maxBytes = min(strlen($prevTerm->text), strlen($term->text));
while ($matchedBytes < $maxBytes &&
$prevTerm->text[$matchedBytes] == $term->text[$matchedBytes]) {
$matchedBytes++;
}
// Calculate actual matched UTF-8 pattern
$prefixBytes = 0;
$prefixChars = 0;
while ($prefixBytes < $matchedBytes) {
$charBytes = 1;
if ((ord($term->text[$prefixBytes]) & 0xC0) == 0xC0) {
$charBytes++;
if (ord($term->text[$prefixBytes]) & 0x20 ) {
$charBytes++;
if (ord($term->text[$prefixBytes]) & 0x10 ) {
$charBytes++;
}
}
}
if ($prefixBytes + $charBytes > $matchedBytes) {
// char crosses matched bytes boundary
// skip char
break;
}
$prefixChars++;
$prefixBytes += $charBytes;
}
// Write preffix length
$dicFile->writeVInt($prefixChars);
// Write suffix
$dicFile->writeString(substr($term->text, $prefixBytes));
} else {
// Write preffix length
$dicFile->writeVInt(0);
// Write suffix
$dicFile->writeString($term->text);
}
// Write field number
$dicFile->writeVInt($term->field);
// DocFreq (the count of documents which contain the term)
$dicFile->writeVInt($termInfo->docFreq);
$prevTerm = $term;
if (!isset($prevTermInfo)) {
// Write FreqDelta
$dicFile->writeVInt($termInfo->freqPointer);
// Write ProxDelta
$dicFile->writeVInt($termInfo->proxPointer);
} else {
// Write FreqDelta
$dicFile->writeVInt($termInfo->freqPointer - $prevTermInfo->freqPointer);
// Write ProxDelta
$dicFile->writeVInt($termInfo->proxPointer - $prevTermInfo->proxPointer);
}
// Write SkipOffset - it's not 0 when $termInfo->docFreq > self::$skipInterval
if ($termInfo->skipOffset != 0) {
$dicFile->writeVInt($termInfo->skipOffset);
}
$prevTermInfo = $termInfo;
}
/**
* Generate compound index file
*/
protected function _generateCFS()
{
$cfsFile = $this->_directory->createFile($this->_name . '.cfs');
$cfsFile->writeVInt(count($this->_files));
$dataOffsetPointers = array();
foreach ($this->_files as $fileName) {
$dataOffsetPointers[$fileName] = $cfsFile->tell();
$cfsFile->writeLong(0); // write dummy data
$cfsFile->writeString($fileName);
}
foreach ($this->_files as $fileName) {
// Get actual data offset
$dataOffset = $cfsFile->tell();
// Seek to the data offset pointer
$cfsFile->seek($dataOffsetPointers[$fileName]);
// Write actual data offset value
$cfsFile->writeLong($dataOffset);
// Seek back to the end of file
$cfsFile->seek($dataOffset);
$dataFile = $this->_directory->getFileObject($fileName);
$byteCount = $this->_directory->fileLength($fileName);
while ($byteCount > 0) {
$data = $dataFile->readBytes(min($byteCount, 131072 /*128Kb*/));
$byteCount -= strlen($data);
$cfsFile->writeBytes($data);
}
$this->_directory->deleteFile($fileName);
}
}
/**
* Close segment, write it to disk and return segment info
*
* @return Zend_Search_Lucene_Index_SegmentInfo
*/
abstract public function close();
}

View file

@ -0,0 +1,230 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: DocumentWriter.php 20096 2010-01-06 02:05:09Z bkarwin $
*/
/** Zend_Search_Lucene_Index_SegmentWriter */
require_once 'Zend/Search/Lucene/Index/SegmentWriter.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter extends Zend_Search_Lucene_Index_SegmentWriter
{
/**
* Term Dictionary
* Array of the Zend_Search_Lucene_Index_Term objects
* Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
*
* @var array
*/
protected $_termDictionary;
/**
* Documents, which contain the term
*
* @var array
*/
protected $_termDocs;
/**
* Object constructor.
*
* @param Zend_Search_Lucene_Storage_Directory $directory
* @param string $name
*/
public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name)
{
parent::__construct($directory, $name);
$this->_termDocs = array();
$this->_termDictionary = array();
}
/**
* Adds a document to this segment.
*
* @param Zend_Search_Lucene_Document $document
* @throws Zend_Search_Lucene_Exception
*/
public function addDocument(Zend_Search_Lucene_Document $document)
{
/** Zend_Search_Lucene_Search_Similarity */
require_once 'Zend/Search/Lucene/Search/Similarity.php';
$storedFields = array();
$docNorms = array();
$similarity = Zend_Search_Lucene_Search_Similarity::getDefault();
foreach ($document->getFieldNames() as $fieldName) {
$field = $document->getField($fieldName);
if ($field->storeTermVector) {
/**
* @todo term vector storing support
*/
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.');
}
if ($field->isIndexed) {
if ($field->isTokenized) {
/** Zend_Search_Lucene_Analysis_Analyzer */
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
$analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
$analyzer->setInput($field->value, $field->encoding);
$position = 0;
$tokenCounter = 0;
while (($token = $analyzer->nextToken()) !== null) {
$tokenCounter++;
$term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name);
$termKey = $term->key();
if (!isset($this->_termDictionary[$termKey])) {
// New term
$this->_termDictionary[$termKey] = $term;
$this->_termDocs[$termKey] = array();
$this->_termDocs[$termKey][$this->_docCount] = array();
} else if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
// Existing term, but new term entry
$this->_termDocs[$termKey][$this->_docCount] = array();
}
$position += $token->getPositionIncrement();
$this->_termDocs[$termKey][$this->_docCount][] = $position;
}
if ($tokenCounter == 0) {
// Field contains empty value. Treat it as non-indexed and non-tokenized
$field = clone($field);
$field->isIndexed = $field->isTokenized = false;
} else {
$docNorms[$field->name] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name,
$tokenCounter)*
$document->boost*
$field->boost ));
}
} else if (($fieldUtf8Value = $field->getUtf8Value()) == '') {
// Field contains empty value. Treat it as non-indexed and non-tokenized
$field = clone($field);
$field->isIndexed = $field->isTokenized = false;
} else {
$term = new Zend_Search_Lucene_Index_Term($fieldUtf8Value, $field->name);
$termKey = $term->key();
if (!isset($this->_termDictionary[$termKey])) {
// New term
$this->_termDictionary[$termKey] = $term;
$this->_termDocs[$termKey] = array();
$this->_termDocs[$termKey][$this->_docCount] = array();
} else if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
// Existing term, but new term entry
$this->_termDocs[$termKey][$this->_docCount] = array();
}
$this->_termDocs[$termKey][$this->_docCount][] = 0; // position
$docNorms[$field->name] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name, 1)*
$document->boost*
$field->boost ));
}
}
if ($field->isStored) {
$storedFields[] = $field;
}
$this->addField($field);
}
foreach ($this->_fields as $fieldName => $field) {
if (!$field->isIndexed) {
continue;
}
if (!isset($this->_norms[$fieldName])) {
$this->_norms[$fieldName] = str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )),
$this->_docCount);
}
if (isset($docNorms[$fieldName])){
$this->_norms[$fieldName] .= $docNorms[$fieldName];
} else {
$this->_norms[$fieldName] .= chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) ));
}
}
$this->addStoredFields($storedFields);
}
/**
* Dump Term Dictionary (.tis) and Term Dictionary Index (.tii) segment files
*/
protected function _dumpDictionary()
{
ksort($this->_termDictionary, SORT_STRING);
$this->initializeDictionaryFiles();
foreach ($this->_termDictionary as $termId => $term) {
$this->addTerm($term, $this->_termDocs[$termId]);
}
$this->closeDictionaryFiles();
}
/**
* Close segment, write it to disk and return segment info
*
* @return Zend_Search_Lucene_Index_SegmentInfo
*/
public function close()
{
if ($this->_docCount == 0) {
return null;
}
$this->_dumpFNM();
$this->_dumpDictionary();
$this->_generateCFS();
/** Zend_Search_Lucene_Index_SegmentInfo */
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
return new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
$this->_name,
$this->_docCount,
-1,
null,
true,
true);
}
}

View file

@ -0,0 +1,94 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: StreamWriter.php 20096 2010-01-06 02:05:09Z bkarwin $
*/
/** Zend_Search_Lucene_Index_SegmentWriter */
require_once 'Zend/Search/Lucene/Index/SegmentWriter.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Index_SegmentWriter_StreamWriter extends Zend_Search_Lucene_Index_SegmentWriter
{
/**
* Object constructor.
*
* @param Zend_Search_Lucene_Storage_Directory $directory
* @param string $name
*/
public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name)
{
parent::__construct($directory, $name);
}
/**
* Create stored fields files and open them for write
*/
public function createStoredFieldsFiles()
{
$this->_fdxFile = $this->_directory->createFile($this->_name . '.fdx');
$this->_fdtFile = $this->_directory->createFile($this->_name . '.fdt');
$this->_files[] = $this->_name . '.fdx';
$this->_files[] = $this->_name . '.fdt';
}
public function addNorm($fieldName, $normVector)
{
if (isset($this->_norms[$fieldName])) {
$this->_norms[$fieldName] .= $normVector;
} else {
$this->_norms[$fieldName] = $normVector;
}
}
/**
* Close segment, write it to disk and return segment info
*
* @return Zend_Search_Lucene_Index_SegmentInfo
*/
public function close()
{
if ($this->_docCount == 0) {
return null;
}
$this->_dumpFNM();
$this->_generateCFS();
/** Zend_Search_Lucene_Index_SegmentInfo */
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
return new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
$this->_name,
$this->_docCount,
-1,
null,
true,
true);
}
}

View file

@ -0,0 +1,144 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: Term.php 20096 2010-01-06 02:05:09Z bkarwin $
*/
/**
* A Term represents a word from text. This is the unit of search. It is
* composed of two elements, the text of the word, as a string, and the name of
* the field that the text occured in, an interned string.
*
* Note that terms may represent more than words from text fields, but also
* things like dates, email addresses, urls, etc.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Index_Term
{
/**
* Field name or field number (depending from context)
*
* @var mixed
*/
public $field;
/**
* Term value
*
* @var string
*/
public $text;
/**
* Object constructor
*/
public function __construct($text, $field = null)
{
$this->field = ($field === null)? Zend_Search_Lucene::getDefaultSearchField() : $field;
$this->text = $text;
}
/**
* Returns term key
*
* @return string
*/
public function key()
{
return $this->field . chr(0) . $this->text;
}
/**
* Get term prefix
*
* @param string $str
* @param integer $length
* @return string
*/
public static function getPrefix($str, $length)
{
$prefixBytes = 0;
$prefixChars = 0;
while ($prefixBytes < strlen($str) && $prefixChars < $length) {
$charBytes = 1;
if ((ord($str[$prefixBytes]) & 0xC0) == 0xC0) {
$charBytes++;
if (ord($str[$prefixBytes]) & 0x20 ) {
$charBytes++;
if (ord($str[$prefixBytes]) & 0x10 ) {
$charBytes++;
}
}
}
if ($prefixBytes + $charBytes > strlen($str)) {
// wrong character
break;
}
$prefixChars++;
$prefixBytes += $charBytes;
}
return substr($str, 0, $prefixBytes);
}
/**
* Get UTF-8 string length
*
* @param string $str
* @return string
*/
public static function getLength($str)
{
$bytes = 0;
$chars = 0;
while ($bytes < strlen($str)) {
$charBytes = 1;
if ((ord($str[$bytes]) & 0xC0) == 0xC0) {
$charBytes++;
if (ord($str[$bytes]) & 0x20 ) {
$charBytes++;
if (ord($str[$bytes]) & 0x10 ) {
$charBytes++;
}
}
}
if ($bytes + $charBytes > strlen($str)) {
// wrong character
break;
}
$chars++;
$bytes += $charBytes;
}
return $chars;
}
}

View file

@ -0,0 +1,80 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: TermInfo.php 20096 2010-01-06 02:05:09Z bkarwin $
*/
/**
* A Zend_Search_Lucene_Index_TermInfo represents a record of information stored for a term.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Index_TermInfo
{
/**
* The number of documents which contain the term.
*
* @var integer
*/
public $docFreq;
/**
* Data offset in a Frequencies file.
*
* @var integer
*/
public $freqPointer;
/**
* Data offset in a Positions file.
*
* @var integer
*/
public $proxPointer;
/**
* ScipData offset in a Frequencies file.
*
* @var integer
*/
public $skipOffset;
/**
* Term offset of the _next_ term in a TermDictionary file.
* Used only for Term Index
*
* @var integer
*/
public $indexPointer;
public function __construct($docFreq, $freqPointer, $proxPointer, $skipOffset, $indexPointer = null)
{
$this->docFreq = $docFreq;
$this->freqPointer = $freqPointer;
$this->proxPointer = $proxPointer;
$this->skipOffset = $skipOffset;
$this->indexPointer = $indexPointer;
}
}

View file

@ -0,0 +1,49 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: TermsPriorityQueue.php 20096 2010-01-06 02:05:09Z bkarwin $
*/
/** Zend_Search_Lucene_PriorityQueue */
require_once 'Zend/Search/Lucene/PriorityQueue.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Index_TermsPriorityQueue extends Zend_Search_Lucene_PriorityQueue
{
/**
* Compare elements
*
* Returns true, if $termsStream1 is "less" than $termsStream2; else otherwise
*
* @param mixed $termsStream1
* @param mixed $termsStream2
* @return boolean
*/
protected function _less($termsStream1, $termsStream2)
{
return strcmp($termsStream1->currentTerm()->key(), $termsStream2->currentTerm()->key()) < 0;
}
}

View file

@ -0,0 +1,66 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: Interface.php 20096 2010-01-06 02:05:09Z bkarwin $
*/
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
interface Zend_Search_Lucene_Index_TermsStream_Interface
{
/**
* Reset terms stream.
*/
public function resetTermsStream();
/**
* Skip terms stream up to specified term preffix.
*
* Prefix contains fully specified field info and portion of searched term
*
* @param Zend_Search_Lucene_Index_Term $prefix
*/
public function skipTo(Zend_Search_Lucene_Index_Term $prefix);
/**
* Scans terms dictionary and returns next term
*
* @return Zend_Search_Lucene_Index_Term|null
*/
public function nextTerm();
/**
* Returns term in current position
*
* @return Zend_Search_Lucene_Index_Term|null
*/
public function currentTerm();
/**
* Close terms stream
*
* Should be used for resources clean up if stream is not read up to the end
*/
public function closeTermsStream();
}

View file

@ -0,0 +1,841 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id: Writer.php 20096 2010-01-06 02:05:09Z bkarwin $
*/
/** Zend_Search_Lucene_LockManager */
require_once 'Zend/Search/Lucene/LockManager.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Index_Writer
{
/**
* @todo Implement Analyzer substitution
* @todo Implement Zend_Search_Lucene_Storage_DirectoryRAM and Zend_Search_Lucene_Storage_FileRAM to use it for
* temporary index files
* @todo Directory lock processing
*/
/**
* Number of documents required before the buffered in-memory
* documents are written into a new Segment
*
* Default value is 10
*
* @var integer
*/
public $maxBufferedDocs = 10;
/**
* Largest number of documents ever merged by addDocument().
* Small values (e.g., less than 10,000) are best for interactive indexing,
* as this limits the length of pauses while indexing to a few seconds.
* Larger values are best for batched indexing and speedier searches.
*
* Default value is PHP_INT_MAX
*
* @var integer
*/
public $maxMergeDocs = PHP_INT_MAX;
/**
* Determines how often segment indices are merged by addDocument().
*
* With smaller values, less RAM is used while indexing,
* and searches on unoptimized indices are faster,
* but indexing speed is slower.
*
* With larger values, more RAM is used during indexing,
* and while searches on unoptimized indices are slower,
* indexing is faster.
*
* Thus larger values (> 10) are best for batch index creation,
* and smaller values (< 10) for indices that are interactively maintained.
*
* Default value is 10
*
* @var integer
*/
public $mergeFactor = 10;
/**
* File system adapter.
*
* @var Zend_Search_Lucene_Storage_Directory
*/
private $_directory = null;
/**
* Changes counter.
*
* @var integer
*/
private $_versionUpdate = 0;
/**
* List of the segments, created by index writer
* Array of Zend_Search_Lucene_Index_SegmentInfo objects
*
* @var array
*/
private $_newSegments = array();
/**
* List of segments to be deleted on commit
*
* @var array
*/
private $_segmentsToDelete = array();
/**
* Current segment to add documents
*
* @var Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter
*/
private $_currentSegment = null;
/**
* Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index.
*
* It's a reference to the corresponding Zend_Search_Lucene::$_segmentInfos array
*
* @var array Zend_Search_Lucene_Index_SegmentInfo
*/
private $_segmentInfos;
/**
* Index target format version
*
* @var integer
*/
private $_targetFormatVersion;
/**
* List of indexfiles extensions
*
* @var array
*/
private static $_indexExtensions = array('.cfs' => '.cfs',
'.cfx' => '.cfx',
'.fnm' => '.fnm',
'.fdx' => '.fdx',
'.fdt' => '.fdt',
'.tis' => '.tis',
'.tii' => '.tii',
'.frq' => '.frq',
'.prx' => '.prx',
'.tvx' => '.tvx',
'.tvd' => '.tvd',
'.tvf' => '.tvf',
'.del' => '.del',
'.sti' => '.sti' );
/**
* Create empty index
*
* @param Zend_Search_Lucene_Storage_Directory $directory
* @param integer $generation
* @param integer $nameCount
*/
public static function createIndex(Zend_Search_Lucene_Storage_Directory $directory, $generation, $nameCount)
{
if ($generation == 0) {
// Create index in pre-2.1 mode
foreach ($directory->fileList() as $file) {
if ($file == 'deletable' ||
$file == 'segments' ||
isset(self::$_indexExtensions[ substr($file, strlen($file)-4)]) ||
preg_match('/\.f\d+$/i', $file) /* matches <segment_name>.f<decimal_nmber> file names */) {
$directory->deleteFile($file);
}
}
$segmentsFile = $directory->createFile('segments');
$segmentsFile->writeInt((int)0xFFFFFFFF);
// write version (initialized by current time)
$segmentsFile->writeLong(round(microtime(true)));
// write name counter
$segmentsFile->writeInt($nameCount);
// write segment counter
$segmentsFile->writeInt(0);
$deletableFile = $directory->createFile('deletable');
// write counter
$deletableFile->writeInt(0);
} else {
$genFile = $directory->createFile('segments.gen');
$genFile->writeInt((int)0xFFFFFFFE);
// Write generation two times
$genFile->writeLong($generation);
$genFile->writeLong($generation);
$segmentsFile = $directory->createFile(Zend_Search_Lucene::getSegmentFileName($generation));
$segmentsFile->writeInt((int)0xFFFFFFFD);
// write version (initialized by current time)
$segmentsFile->writeLong(round(microtime(true)));
// write name counter
$segmentsFile->writeInt($nameCount);
// write segment counter
$segmentsFile->writeInt(0);
}
}
/**
* Open the index for writing
*
* @param Zend_Search_Lucene_Storage_Directory $directory
* @param array $segmentInfos
* @param integer $targetFormatVersion
* @param Zend_Search_Lucene_Storage_File $cleanUpLock
*/
public function __construct(Zend_Search_Lucene_Storage_Directory $directory, &$segmentInfos, $targetFormatVersion)
{
$this->_directory = $directory;
$this->_segmentInfos = &$segmentInfos;
$this->_targetFormatVersion = $targetFormatVersion;
}
/**
* Adds a document to this index.
*
* @param Zend_Search_Lucene_Document $document
*/
public function addDocument(Zend_Search_Lucene_Document $document)
{
/** Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter */
require_once 'Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php';
if ($this->_currentSegment === null) {
$this->_currentSegment =
new Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter($this->_directory, $this->_newSegmentName());
}
$this->_currentSegment->addDocument($document);
if ($this->_currentSegment->count() >= $this->maxBufferedDocs) {
$this->commit();
}
$this->_maybeMergeSegments();
$this->_versionUpdate++;
}
/**
* Check if we have anything to merge
*
* @return boolean
*/
private function _hasAnythingToMerge()
{
$segmentSizes = array();
foreach ($this->_segmentInfos as $segName => $segmentInfo) {
$segmentSizes[$segName] = $segmentInfo->count();
}
$mergePool = array();
$poolSize = 0;
$sizeToMerge = $this->maxBufferedDocs;
asort($segmentSizes, SORT_NUMERIC);
foreach ($segmentSizes as $segName => $size) {
// Check, if segment comes into a new merging block
while ($size >= $sizeToMerge) {
// Merge previous block if it's large enough
if ($poolSize >= $sizeToMerge) {
return true;
}
$mergePool = array();
$poolSize = 0;
$sizeToMerge *= $this->mergeFactor;
if ($sizeToMerge > $this->maxMergeDocs) {
return false;
}
}
$mergePool[] = $this->_segmentInfos[$segName];
$poolSize += $size;
}
if ($poolSize >= $sizeToMerge) {
return true;
}
return false;
}
/**
* Merge segments if necessary
*/
private function _maybeMergeSegments()
{
if (Zend_Search_Lucene_LockManager::obtainOptimizationLock($this->_directory) === false) {
return;
}
if (!$this->_hasAnythingToMerge()) {
Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
return;
}
// Update segments list to be sure all segments are not merged yet by another process
//
// Segment merging functionality is concentrated in this class and surrounded
// by optimization lock obtaining/releasing.
// _updateSegments() refreshes segments list from the latest index generation.
// So only new segments can be added to the index while we are merging some already existing
// segments.
// Newly added segments will be also included into the index by the _updateSegments() call
// either by another process or by the current process with the commit() call at the end of _mergeSegments() method.
// That's guaranteed by the serialisation of _updateSegments() execution using exclusive locks.
$this->_updateSegments();
// Perform standard auto-optimization procedure
$segmentSizes = array();
foreach ($this->_segmentInfos as $segName => $segmentInfo) {
$segmentSizes[$segName] = $segmentInfo->count();
}
$mergePool = array();
$poolSize = 0;
$sizeToMerge = $this->maxBufferedDocs;
asort($segmentSizes, SORT_NUMERIC);
foreach ($segmentSizes as $segName => $size) {
// Check, if segment comes into a new merging block
while ($size >= $sizeToMerge) {
// Merge previous block if it's large enough
if ($poolSize >= $sizeToMerge) {
$this->_mergeSegments($mergePool);
}
$mergePool = array();
$poolSize = 0;
$sizeToMerge *= $this->mergeFactor;
if ($sizeToMerge > $this->maxMergeDocs) {
Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
return;
}
}
$mergePool[] = $this->_segmentInfos[$segName];
$poolSize += $size;
}
if ($poolSize >= $sizeToMerge) {
$this->_mergeSegments($mergePool);
}
Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
}
/**
* Merge specified segments
*
* $segments is an array of SegmentInfo objects
*
* @param array $segments
*/
private function _mergeSegments($segments)
{
$newName = $this->_newSegmentName();
/** Zend_Search_Lucene_Index_SegmentMerger */
require_once 'Zend/Search/Lucene/Index/SegmentMerger.php';
$merger = new Zend_Search_Lucene_Index_SegmentMerger($this->_directory,
$newName);
foreach ($segments as $segmentInfo) {
$merger->addSource($segmentInfo);
$this->_segmentsToDelete[$segmentInfo->getName()] = $segmentInfo->getName();
}
$newSegment = $merger->merge();
if ($newSegment !== null) {
$this->_newSegments[$newSegment->getName()] = $newSegment;
}
$this->commit();
}
/**
* Update segments file by adding current segment to a list
*
* @throws Zend_Search_Lucene_Exception
*/
private function _updateSegments()
{
// Get an exclusive index lock
Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
// Write down changes for the segments
foreach ($this->_segmentInfos as $segInfo) {
$segInfo->writeChanges();
}
$generation = Zend_Search_Lucene::getActualGeneration($this->_directory);
$segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false);
$newSegmentFile = $this->_directory->createFile(Zend_Search_Lucene::getSegmentFileName(++$generation), false);
try {
$genFile = $this->_directory->getFileObject('segments.gen', false);
} catch (Zend_Search_Lucene_Exception $e) {
if (strpos($e->getMessage(), 'is not readable') !== false) {
$genFile = $this->_directory->createFile('segments.gen');
} else {
throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
}
}
$genFile->writeInt((int)0xFFFFFFFE);
// Write generation (first copy)
$genFile->writeLong($generation);
try {
// Write format marker
if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_1) {
$newSegmentFile->writeInt((int)0xFFFFFFFD);
} else if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
$newSegmentFile->writeInt((int)0xFFFFFFFC);
}
// Read src file format identifier
$format = $segmentsFile->readInt();
if ($format == (int)0xFFFFFFFF) {
$srcFormat = Zend_Search_Lucene::FORMAT_PRE_2_1;
} else if ($format == (int)0xFFFFFFFD) {
$srcFormat = Zend_Search_Lucene::FORMAT_2_1;
} else if ($format == (int)0xFFFFFFFC) {
$srcFormat = Zend_Search_Lucene::FORMAT_2_3;
} else {
throw new Zend_Search_Lucene_Exception('Unsupported segments file format');
}
$version = $segmentsFile->readLong() + $this->_versionUpdate;
$this->_versionUpdate = 0;
$newSegmentFile->writeLong($version);
// Write segment name counter
$newSegmentFile->writeInt($segmentsFile->readInt());
// Get number of segments offset
$numOfSegmentsOffset = $newSegmentFile->tell();
// Write dummy data (segment counter)
$newSegmentFile->writeInt(0);
// Read number of segemnts
$segmentsCount = $segmentsFile->readInt();
$segments = array();
for ($count = 0; $count < $segmentsCount; $count++) {
$segName = $segmentsFile->readString();
$segSize = $segmentsFile->readInt();
if ($srcFormat == Zend_Search_Lucene::FORMAT_PRE_2_1) {
// pre-2.1 index format
$delGen = 0;
$hasSingleNormFile = false;
$numField = (int)0xFFFFFFFF;
$isCompoundByte = 0;
$docStoreOptions = null;
} else {
$delGen = $segmentsFile->readLong();
if ($srcFormat == Zend_Search_Lucene::FORMAT_2_3) {
$docStoreOffset = $segmentsFile->readInt();
if ($docStoreOffset != (int)0xFFFFFFFF) {
$docStoreSegment = $segmentsFile->readString();
$docStoreIsCompoundFile = $segmentsFile->readByte();
$docStoreOptions = array('offset' => $docStoreOffset,
'segment' => $docStoreSegment,
'isCompound' => ($docStoreIsCompoundFile == 1));
} else {
$docStoreOptions = null;
}
} else {
$docStoreOptions = null;
}
$hasSingleNormFile = $segmentsFile->readByte();
$numField = $segmentsFile->readInt();
$normGens = array();
if ($numField != (int)0xFFFFFFFF) {
for ($count1 = 0; $count1 < $numField; $count1++) {
$normGens[] = $segmentsFile->readLong();
}
}
$isCompoundByte = $segmentsFile->readByte();
}
if (!in_array($segName, $this->_segmentsToDelete)) {
// Load segment if necessary
if (!isset($this->_segmentInfos[$segName])) {
if ($isCompoundByte == 0xFF) {
// The segment is not a compound file
$isCompound = false;
} else if ($isCompoundByte == 0x00) {
// The status is unknown
$isCompound = null;
} else if ($isCompoundByte == 0x01) {
// The segment is a compound file
$isCompound = true;
}
/** Zend_Search_Lucene_Index_SegmentInfo */
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
$this->_segmentInfos[$segName] =
new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
$segName,
$segSize,
$delGen,
$docStoreOptions,
$hasSingleNormFile,
$isCompound);
} else {
// Retrieve actual deletions file generation number
$delGen = $this->_segmentInfos[$segName]->getDelGen();
}
$newSegmentFile->writeString($segName);
$newSegmentFile->writeInt($segSize);
$newSegmentFile->writeLong($delGen);
if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
if ($docStoreOptions !== null) {
$newSegmentFile->writeInt($docStoreOffset);
$newSegmentFile->writeString($docStoreSegment);
$newSegmentFile->writeByte($docStoreIsCompoundFile);
} else {
// Set DocStoreOffset to -1
$newSegmentFile->writeInt((int)0xFFFFFFFF);
}
} else if ($docStoreOptions !== null) {
// Release index write lock
Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
throw new Zend_Search_Lucene_Exception('Index conversion to lower format version is not supported.');
}
$newSegmentFile->writeByte($hasSingleNormFile);
$newSegmentFile->writeInt($numField);
if ($numField != (int)0xFFFFFFFF) {
foreach ($normGens as $normGen) {
$newSegmentFile->writeLong($normGen);
}
}
$newSegmentFile->writeByte($isCompoundByte);
$segments[$segName] = $segSize;
}
}
$segmentsFile->close();
$segmentsCount = count($segments) + count($this->_newSegments);
foreach ($this->_newSegments as $segName => $segmentInfo) {
$newSegmentFile->writeString($segName);
$newSegmentFile->writeInt($segmentInfo->count());
// delete file generation: -1 (there is no delete file yet)
$newSegmentFile->writeInt((int)0xFFFFFFFF);$newSegmentFile->writeInt((int)0xFFFFFFFF);
if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
// docStoreOffset: -1 (segment doesn't use shared doc store)
$newSegmentFile->writeInt((int)0xFFFFFFFF);
}
// HasSingleNormFile
$newSegmentFile->writeByte($segmentInfo->hasSingleNormFile());
// NumField
$newSegmentFile->writeInt((int)0xFFFFFFFF);
// IsCompoundFile
$newSegmentFile->writeByte($segmentInfo->isCompound() ? 1 : -1);
$segments[$segmentInfo->getName()] = $segmentInfo->count();
$this->_segmentInfos[$segName] = $segmentInfo;
}
$this->_newSegments = array();
$newSegmentFile->seek($numOfSegmentsOffset);
$newSegmentFile->writeInt($segmentsCount); // Update segments count
$newSegmentFile->close();
} catch (Exception $e) {
/** Restore previous index generation */
$generation--;
$genFile->seek(4, SEEK_SET);
// Write generation number twice
$genFile->writeLong($generation); $genFile->writeLong($generation);
// Release index write lock
Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
// Throw the exception
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
}
// Write generation (second copy)
$genFile->writeLong($generation);
// Check if another update or read process is not running now
// If yes, skip clean-up procedure
if (Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory)) {
/**
* Clean-up directory
*/
$filesToDelete = array();
$filesTypes = array();
$filesNumbers = array();
// list of .del files of currently used segments
// each segment can have several generations of .del files
// only last should not be deleted
$delFiles = array();
foreach ($this->_directory->fileList() as $file) {
if ($file == 'deletable') {
// 'deletable' file
$filesToDelete[] = $file;
$filesTypes[] = 0; // delete this file first, since it's not used starting from Lucene v2.1
$filesNumbers[] = 0;
} else if ($file == 'segments') {
// 'segments' file
$filesToDelete[] = $file;
$filesTypes[] = 1; // second file to be deleted "zero" version of segments file (Lucene pre-2.1)
$filesNumbers[] = 0;
} else if (preg_match('/^segments_[a-zA-Z0-9]+$/i', $file)) {
// 'segments_xxx' file
// Check if it's not a just created generation file
if ($file != Zend_Search_Lucene::getSegmentFileName($generation)) {
$filesToDelete[] = $file;
$filesTypes[] = 2; // first group of files for deletions
$filesNumbers[] = (int)base_convert(substr($file, 9), 36, 10); // ordered by segment generation numbers
}
} else if (preg_match('/(^_([a-zA-Z0-9]+))\.f\d+$/i', $file, $matches)) {
// one of per segment files ('<segment_name>.f<decimal_number>')
// Check if it's not one of the segments in the current segments set
if (!isset($segments[$matches[1]])) {
$filesToDelete[] = $file;
$filesTypes[] = 3; // second group of files for deletions
$filesNumbers[] = (int)base_convert($matches[2], 36, 10); // order by segment number
}
} else if (preg_match('/(^_([a-zA-Z0-9]+))(_([a-zA-Z0-9]+))\.del$/i', $file, $matches)) {
// one of per segment files ('<segment_name>_<del_generation>.del' where <segment_name> is '_<segment_number>')
// Check if it's not one of the segments in the current segments set
if (!isset($segments[$matches[1]])) {
$filesToDelete[] = $file;
$filesTypes[] = 3; // second group of files for deletions
$filesNumbers[] = (int)base_convert($matches[2], 36, 10); // order by segment number
} else {
$segmentNumber = (int)base_convert($matches[2], 36, 10);
$delGeneration = (int)base_convert($matches[4], 36, 10);
if (!isset($delFiles[$segmentNumber])) {
$delFiles[$segmentNumber] = array();
}
$delFiles[$segmentNumber][$delGeneration] = $file;
}
} else if (isset(self::$_indexExtensions[substr($file, strlen($file)-4)])) {
// one of per segment files ('<segment_name>.<ext>')
$segmentName = substr($file, 0, strlen($file) - 4);
// Check if it's not one of the segments in the current segments set
if (!isset($segments[$segmentName]) &&
($this->_currentSegment === null || $this->_currentSegment->getName() != $segmentName)) {
$filesToDelete[] = $file;
$filesTypes[] = 3; // second group of files for deletions
$filesNumbers[] = (int)base_convert(substr($file, 1 /* skip '_' */, strlen($file)-5), 36, 10); // order by segment number
}
}
}
$maxGenNumber = 0;
// process .del files of currently used segments
foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
ksort($delFiles[$segmentNumber], SORT_NUMERIC);
array_pop($delFiles[$segmentNumber]); // remove last delete file generation from candidates for deleting
end($delFiles[$segmentNumber]);
$lastGenNumber = key($delFiles[$segmentNumber]);
if ($lastGenNumber > $maxGenNumber) {
$maxGenNumber = $lastGenNumber;
}
}
foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
foreach ($segmentDelFiles as $delGeneration => $file) {
$filesToDelete[] = $file;
$filesTypes[] = 4; // third group of files for deletions
$filesNumbers[] = $segmentNumber*$maxGenNumber + $delGeneration; // order by <segment_number>,<del_generation> pair
}
}
// Reorder files for deleting
array_multisort($filesTypes, SORT_ASC, SORT_NUMERIC,
$filesNumbers, SORT_ASC, SORT_NUMERIC,
$filesToDelete, SORT_ASC, SORT_STRING);
foreach ($filesToDelete as $file) {
try {
/** Skip shared docstore segments deleting */
/** @todo Process '.cfx' files to check if them are already unused */
if (substr($file, strlen($file)-4) != '.cfx') {
$this->_directory->deleteFile($file);
}
} catch (Zend_Search_Lucene_Exception $e) {
if (strpos($e->getMessage(), 'Can\'t delete file') === false) {
// That's not "file is under processing or already deleted" exception
// Pass it through
throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
}
}
}
// Return read lock into the previous state
Zend_Search_Lucene_LockManager::deEscalateReadLock($this->_directory);
} else {
// Only release resources if another index reader is running now
foreach ($this->_segmentsToDelete as $segName) {
foreach (self::$_indexExtensions as $ext) {
$this->_directory->purgeFile($segName . $ext);
}
}
}
// Clean-up _segmentsToDelete container
$this->_segmentsToDelete = array();
// Release index write lock
Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
// Remove unused segments from segments list
foreach ($this->_segmentInfos as $segName => $segmentInfo) {
if (!isset($segments[$segName])) {
unset($this->_segmentInfos[$segName]);
}
}
}
/**
* Commit current changes
*/
public function commit()
{
if ($this->_currentSegment !== null) {
$newSegment = $this->_currentSegment->close();
if ($newSegment !== null) {
$this->_newSegments[$newSegment->getName()] = $newSegment;
}
$this->_currentSegment = null;
}
$this->_updateSegments();
}
/**
* Merges the provided indexes into this index.
*
* @param array $readers
* @return void
*/
public function addIndexes($readers)
{
/**
* @todo implementation
*/
}
/**
* Merges all segments together into new one
*
* Returns true on success and false if another optimization or auto-optimization process
* is running now
*
* @return boolean
*/
public function optimize()
{
if (Zend_Search_Lucene_LockManager::obtainOptimizationLock($this->_directory) === false) {
return false;
}
// Update segments list to be sure all segments are not merged yet by another process
//
// Segment merging functionality is concentrated in this class and surrounded
// by optimization lock obtaining/releasing.
// _updateSegments() refreshes segments list from the latest index generation.
// So only new segments can be added to the index while we are merging some already existing
// segments.
// Newly added segments will be also included into the index by the _updateSegments() call
// either by another process or by the current process with the commit() call at the end of _mergeSegments() method.
// That's guaranteed by the serialisation of _updateSegments() execution using exclusive locks.
$this->_updateSegments();
$this->_mergeSegments($this->_segmentInfos);
Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
return true;
}
/**
* Get name for new segment
*
* @return string
*/
private function _newSegmentName()
{
Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
$generation = Zend_Search_Lucene::getActualGeneration($this->_directory);
$segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false);
$segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
$segmentNameCounter = $segmentsFile->readInt();
$segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
$segmentsFile->writeInt($segmentNameCounter + 1);
// Flash output to guarantee that wrong value will not be loaded between unlock and
// return (which calls $segmentsFile destructor)
$segmentsFile->flush();
Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
return '_' . base_convert($segmentNameCounter, 10, 36);
}
}