You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

538 lines
16 KiB

<?php
/**
* li₃: the most RAD framework for PHP (http://li3.me)
*
* Copyright 2009, Union of RAD. All rights reserved. This source
* code is distributed under the terms of the BSD 3-Clause License.
* The full license text can be found in the LICENSE.txt file.
*/
namespace lithium\g11n\catalog\adapter;
use RangeException;
use lithium\core\ConfigException;
use lithium\core\Libraries;
/**
* The `Gettext` class is an adapter for reading and writing PO and MO files without the
* requirement of having the gettext extension enabled or installed. Moreover it doesn't
* require the usage of the non thread safe `setlocale()`.
*
* The adapter works with the directory structure below. The example shows the structure
* for the directory as given by the `'path'` configuration setting. It closely ressembles
* the standard gettext directory structure with a few slight adjustments to the way
* templates are being named.
*
* ```asciiart
* resources/g11n/po
* ├── <locale>
* | ├── LC_MESSAGES
* | | ├── default.po
* | | ├── default.mo
* | | ├── <scope>.po
* | | └── <scope>.mo
* | ├── LC_VALIDATION
* | | └── ...
* | └── ...
* ├── <locale>
* | └── ...
* ├── message_default.pot
* ├── message_<scope>.pot
* ├── validation_default.pot
* ├── validation_<scope>.pot
* └── ...
* ```
*
* @see lithium\g11n\Locale
* @link http://php.net/setlocale PHP Manual: setlocale()
* @link http://www.gnu.org/software/gettext/manual/gettext.html GNU Gettext Utilities
*/
class Gettext extends \lithium\g11n\catalog\Adapter {
/**
* Magic used for validating the format of a MO file as well as
* detecting if the machine used to create that file was little endian.
*
* @see lithium\g11n\catalog\adapter\Gettext::_parseMo()
* @var float
*/
const MO_LITTLE_ENDIAN_MAGIC = 0x950412de;
/**
* Magic used for validating the format of a MO file as well as
* detecting if the machine used to create that file was big endian.
*
* @see lithium\g11n\catalog\adapter\Gettext::_parseMo()
* @var float
*/
const MO_BIG_ENDIAN_MAGIC = 0xde120495;
/**
* The size of the header of a MO file in bytes.
*
* @see lithium\g11n\catalog\adapter\Gettext::_parseMo()
* @var integer Number of bytes.
*/
const MO_HEADER_SIZE = 28;
/**
* Constructor.
*
* @param array $config Available configuration options are:
* - `'path'`: The path to the directory holding the data.
* @return void
*/
public function __construct(array $config = []) {
$defaults = ['path' => null];
parent::__construct($config + $defaults);
}
/**
* Initializer. Checks if the configured path exists.
*
* @return void
* @throws ConfigException
*/
protected function _init() {
parent::_init();
if (!is_dir($this->_config['path'])) {
$message = "Gettext directory does not exist at path `{$this->_config['path']}`.";
throw new ConfigException($message);
}
}
/**
* Reads data.
*
* MO files are preferred over PO files when existent.
*
* @see lithium\g11n\catalog\adapter\Gettext::_files()
* @param string $category A category.
* @param string $locale A locale identifier.
* @param string $scope The scope for the current operation.
* @return array
*/
public function read($category, $locale, $scope) {
$files = $this->_files($category, $locale, $scope);
foreach ($files as $file) {
$method = '_parse' . ucfirst(pathinfo($file, PATHINFO_EXTENSION));
if (!file_exists($file) || !is_readable($file)) {
continue;
}
$stream = fopen($file, 'rb');
$data = $this->{$method}($stream);
fclose($stream);
if ($data) {
$data['pluralRule'] = [
'id' => 'pluralRule',
'translated' => function($count) {
return $count !== 1;
}
];
return $data;
}
}
}
/**
* Writes data.
*
* @param string $category A category.
* @param string $locale A locale identifier.
* @param string $scope The scope for the current operation.
* @param array $data The data to write.
* @return boolean
*/
public function write($category, $locale, $scope, array $data) {
$files = $this->_files($category, $locale, $scope);
foreach ($files as $file) {
$method = '_compile' . ucfirst(pathinfo($file, PATHINFO_EXTENSION));
if (!$stream = fopen($file, 'wb')) {
return false;
}
$this->{$method}($stream, $data);
fclose($stream);
}
return true;
}
/**
* Returns absolute paths to files according to configuration.
*
* @param string $category
* @param string $locale
* @param string $scope
* @return array
*/
protected function _files($category, $locale, $scope) {
$path = $this->_config['path'];
$scope = $scope ?: 'default';
if (($pos = strpos($category, 'Template')) !== false) {
$category = substr($category, 0, $pos);
return ["{$path}/{$category}_{$scope}.pot"];
}
if ($category === 'message') {
$category = 'messages';
}
$category = strtoupper($category);
return [
"{$path}/{$locale}/LC_{$category}/{$scope}.mo",
"{$path}/{$locale}/LC_{$category}/{$scope}.po"
];
}
/**
* Parses portable object (PO) format.
*
* This parser sacrifices some features of the reference implementation the
* differences to that implementation are as follows.
* - No support for comments spanning multiple lines.
* - Translator and extracted comments are treated as being the same type.
* - Message IDs are allowed to have other encodings as just US-ASCII.
*
* Items with an empty id are ignored. For more information see `_merge()`.
*
* @param resource $stream
* @return array
*/
protected function _parsePo($stream) {
$defaults = [
'ids' => [],
'translated' => null,
'flags' => [],
'comments' => [],
'occurrences' => [],
'context' => null
];
$data = [];
$item = $defaults;
while ($line = fgets($stream)) {
$line = trim($line);
if ($line === '') {
$data = $this->_merge($data, $item);
$item = $defaults;
} elseif (substr($line, 0, 3) === '#~ ') {
$item['flags']['obsolete'] = true;
} elseif (substr($line, 0, 3) === '#, ') {
$item['flags'][substr($line, 3)] = true;
} elseif (substr($line, 0, 3) === '#: ') {
$item['occurrences'][] = [
'file' => strtok(substr($line, 3), ':'),
'line' => strtok(':')
];
} elseif (substr($line, 0, 3) === '#. ') {
$item['comments'][] = substr($line, 3);
} elseif ($line[0] === '#') {
$item['comments'][] = ltrim(substr($line, 1));
} elseif (substr($line, 0, 7) === 'msgid "') {
$item['ids']['singular'] = substr($line, 7, -1);
} elseif (substr($line, 0, 9) === 'msgctxt "') {
$item['context'] = substr($line, 9, -1);
} elseif (substr($line, 0, 8) === 'msgstr "') {
$item['translated'] = substr($line, 8, -1);
} elseif ($line[0] === '"') {
$continues = isset($item['translated']) ? 'translated' : 'ids';
if (is_array($item[$continues])) {
end($item[$continues]);
$item[$continues][key($item[$continues])] .= substr($line, 1, -1);
} else {
$item[$continues] .= substr($line, 1, -1);
}
} elseif (substr($line, 0, 14) === 'msgid_plural "') {
$item['ids']['plural'] = substr($line, 14, -1);
} elseif (substr($line, 0, 7) === 'msgstr[') {
$item['translated'][(integer) substr($line, 7, 1)] = substr($line, 11, -1);
}
}
return $this->_merge($data, $item);
}
/**
* Parses portable object template (POT) format.
*
* @param resource $stream
* @return array
*/
protected function _parsePot($stream) {
return $this->_parsePo($stream);
}
/**
* Parses machine object (MO) format, independent of the machine's endian it
* was created on. Both 32bit and 64bit systems are supported.
*
* @link https://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
* @param resource $stream
* @return array
* @throws RangeException If stream content has an invalid format.
*/
protected function _parseMo($stream) {
$stat = fstat($stream);
if ($stat['size'] < static::MO_HEADER_SIZE) {
throw new RangeException("MO stream content has an invalid format.");
}
$magic = unpack('V1', fread($stream, 4));
$magic = hexdec(substr(dechex(current($magic)), -8));
if ($magic == static::MO_LITTLE_ENDIAN_MAGIC) {
$isBigEndian = false;
} elseif ($magic == static::MO_BIG_ENDIAN_MAGIC) {
$isBigEndian = true;
} else {
throw new RangeException("MO stream content has an invalid format.");
}
$header = [
'formatRevision' => null,
'count' => null,
'offsetId' => null,
'offsetTranslated' => null,
'sizeHashes' => null,
'offsetHashes' => null
];
foreach ($header as &$value) {
$value = $this->_readLong($stream, $isBigEndian);
}
extract($header);
$data = [];
for ($i = 0; $i < $count; $i++) {
$singularId = $pluralId = null;
$translated = null;
$context = null;
fseek($stream, $offsetId + $i * 8);
$length = $this->_readLong($stream, $isBigEndian);
$offset = $this->_readLong($stream, $isBigEndian);
if ($length < 1) {
continue;
}
fseek($stream, $offset);
$singularId = fread($stream, $length);
if (strpos($singularId, "\000") !== false) {
list($singularId, $pluralId) = explode("\000", $singularId);
}
if (strpos($singularId, "\004") !== false) {
list($context, $singularId) = explode("\004", $singularId);
}
fseek($stream, $offsetTranslated + $i * 8);
$length = $this->_readLong($stream, $isBigEndian);
$offset = $this->_readLong($stream, $isBigEndian);
fseek($stream, $offset);
$translated = fread($stream, $length);
if (strpos($translated, "\000") !== false) {
$translated = explode("\000", $translated);
}
$ids = ['singular' => $singularId, 'plural' => $pluralId];
$data = $this->_merge($data, compact('ids', 'translated', 'context'));
}
return $data;
}
/**
* Reads an unsigned long from stream respecting endianess.
*
* @param resource $stream
* @param boolean $isBigEndian
* @return integer
*/
protected function _readLong($stream, $isBigEndian) {
$result = unpack($isBigEndian ? 'N1' : 'V1', fread($stream, 4));
$result = current($result);
return (integer) substr($result, -8);
}
/**
* Compiles data into portable object (PO) format.
*
* To improve portability accross libraries the header is generated according
* to the format of the output of `xgettext`. This means using the same names for
* placeholders as well as including an empty entry. The empty entry at the
* beginning aids in parsing the file as it _attracts_ the preceding comments and
* following metadata when parsed which could otherwise be mistaken as a continued
* translation. The only difference in the header format is the initial header which
* just features one line of text.
*
* @param resource $stream
* @param array $data
* @return boolean
*/
protected function _compilePo($stream, array $data) {
$output[] = '# This file is distributed under the same license as the PACKAGE package.';
$output[] = '#';
$output[] = 'msgctxt ""';
$output[] = 'msgid ""';
$output[] = 'msgstr ""';
$output[] = '"Project-Id-Version: PACKAGE VERSION\n"';
$output[] = '"POT-Creation-Date: YEAR-MO-DA HO:MI+ZONE\n"';
$output[] = '"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"';
$output[] = '"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"';
$output[] = '"Language-Team: LANGUAGE <EMAIL@ADDRESS>\n"';
$output[] = '"MIME-Version: 1.0\n"';
$output[] = '"Content-Type: text/plain; charset=UTF-8\n"';
$output[] = '"Content-Transfer-Encoding: 8bit\n"';
$output[] = '"Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n"';
$output[] = '';
$output = implode("\n", $output) . "\n";
fwrite($stream, $output);
foreach ($data as $key => $item) {
$output = [];
$item = $this->_prepareForWrite($item);
foreach ($item['occurrences'] as $occurrence) {
$output[] = "#: {$occurrence['file']}:{$occurrence['line']}";
}
foreach ($item['comments'] as $comment) {
$output[] = "#. {$comment}";
}
foreach ($item['flags'] as $flag => $value) {
$output[] = "#, {$flag}";
}
if (isset($item['context'])) {
$output[] = "msgctxt \"{$item['context']}\"";
}
$output[] = "msgid \"{$item['ids']['singular']}\"";
if (isset($item['ids']['plural'])) {
$output[] = "msgid_plural \"{$item['ids']['plural']}\"";
foreach ((array) $item['translated'] ?: [null, null] as $key => $value) {
$output[] = "msgstr[{$key}] \"{$value}\"";
}
} else {
if (is_array($item['translated'])) {
$item['translated'] = array_pop($item['translated']);
}
$output[] = "msgstr \"{$item['translated']}\"";
}
$output[] = '';
$output = implode("\n", $output) . "\n";
fwrite($stream, $output);
}
return true;
}
/**
* Compiles data into portable object template (POT) format.
*
* @param resource $stream
* @param array $data
* @return boolean Success.
*/
protected function _compilePot($stream, array $data) {
return $this->_compilePo($stream, $data);
}
/**
* Compiles data into machine object (MO) format.
*
* @param resource $stream
* @param array $data
* @return void
* @todo Determine if needed and implement compiler.
*/
protected function _compileMo($stream, array $data) {}
/**
* Prepares an item before it is being written and escapes fields.
*
* All characters from \000 to \037 (this includes new line and tab characters)
* as well as the backslash (`\`) and the double quote (`"`) are escaped.
*
* Literal Windows CRLFs (`\r\n`) are converted to LFs (`\n`) to improve cross platform
* compatibility. Escaped single quotes (`'`) are unescaped as they should not need to be.
* Double escaped characters are maintained and not escaped once again.
*
* @link http://www.asciitable.com
* @see lithium\g11n\catalog\Adapter::_prepareForWrite()
* @param array $item
* @return array
*/
protected function _prepareForWrite(array $item) {
$filter = function ($value) use (&$filter) {
if (is_array($value)) {
return array_map($filter, $value);
}
$value = strtr($value, ["\\'" => "'", "\\\\" => "\\", "\r\n" => "\n"]);
$value = addcslashes($value, "\0..\37\\\"");
return $value;
};
$fields = ['id', 'ids', 'translated', 'context'];
foreach ($fields as $field) {
if (isset($item[$field])) {
$item[$field] = $filter($item[$field]);
}
}
if (!isset($item['ids']['singular'])) {
$item['ids']['singular'] = $item['id'];
}
$path = Libraries::get(true, 'path');
if (isset($item['occurrences'])) {
foreach ($item['occurrences'] as &$occurrence) {
$occurrence['file'] = str_replace($path, '', $occurrence['file']);
}
}
return parent::_prepareForWrite($item);
}
/**
* Merges an item into given data and unescapes fields.
*
* Please note that items with an id containing exclusively whitespace characters
* or are empty are **not** being merged. Whitespace characters are space, tab, vertical
* tab, line feed, carriage return and form feed.
*
* @see lithium\g11n\catalog\Adapter::_merge()
* @param array $data Data to merge item into.
* @param array $item Item to merge into $data.
* @return array The merged data.
*/
protected function _merge(array $data, array $item) {
$filter = function ($value) use (&$filter) {
if (is_array($value)) {
return array_map($filter, $value);
}
return stripcslashes($value);
};
$fields = ['id', 'ids', 'translated', 'context'];
foreach ($fields as $field) {
if (isset($item[$field])) {
$item[$field] = $filter($item[$field]);
}
}
if (isset($item['ids']['singular'])) {
$item['id'] = $item['ids']['singular'];
}
if (empty($item['id']) || ctype_space($item['id'])) {
return $data;
}
return parent::_merge($data, $item);
}
}
?>