You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
538 lines
16 KiB
538 lines
16 KiB
<?php
|
|
/**
|
|
* li₃: the most RAD framework for PHP (http://li3.me)
|
|
*
|
|
* Copyright 2009, Union of RAD. All rights reserved. This source
|
|
* code is distributed under the terms of the BSD 3-Clause License.
|
|
* The full license text can be found in the LICENSE.txt file.
|
|
*/
|
|
|
|
namespace lithium\g11n\catalog\adapter;
|
|
|
|
use RangeException;
|
|
use lithium\core\ConfigException;
|
|
use lithium\core\Libraries;
|
|
|
|
/**
|
|
* The `Gettext` class is an adapter for reading and writing PO and MO files without the
|
|
* requirement of having the gettext extension enabled or installed. Moreover it doesn't
|
|
* require the usage of the non thread safe `setlocale()`.
|
|
*
|
|
* The adapter works with the directory structure below. The example shows the structure
|
|
* for the directory as given by the `'path'` configuration setting. It closely ressembles
|
|
* the standard gettext directory structure with a few slight adjustments to the way
|
|
* templates are being named.
|
|
*
|
|
* ```asciiart
|
|
* resources/g11n/po
|
|
* ├── <locale>
|
|
* | ├── LC_MESSAGES
|
|
* | | ├── default.po
|
|
* | | ├── default.mo
|
|
* | | ├── <scope>.po
|
|
* | | └── <scope>.mo
|
|
* | ├── LC_VALIDATION
|
|
* | | └── ...
|
|
* | └── ...
|
|
* ├── <locale>
|
|
* | └── ...
|
|
* ├── message_default.pot
|
|
* ├── message_<scope>.pot
|
|
* ├── validation_default.pot
|
|
* ├── validation_<scope>.pot
|
|
* └── ...
|
|
* ```
|
|
*
|
|
* @see lithium\g11n\Locale
|
|
* @link http://php.net/setlocale PHP Manual: setlocale()
|
|
* @link http://www.gnu.org/software/gettext/manual/gettext.html GNU Gettext Utilities
|
|
*/
|
|
class Gettext extends \lithium\g11n\catalog\Adapter {
|
|
|
|
/**
|
|
* Magic used for validating the format of a MO file as well as
|
|
* detecting if the machine used to create that file was little endian.
|
|
*
|
|
* @see lithium\g11n\catalog\adapter\Gettext::_parseMo()
|
|
* @var float
|
|
*/
|
|
const MO_LITTLE_ENDIAN_MAGIC = 0x950412de;
|
|
|
|
/**
|
|
* Magic used for validating the format of a MO file as well as
|
|
* detecting if the machine used to create that file was big endian.
|
|
*
|
|
* @see lithium\g11n\catalog\adapter\Gettext::_parseMo()
|
|
* @var float
|
|
*/
|
|
const MO_BIG_ENDIAN_MAGIC = 0xde120495;
|
|
|
|
/**
|
|
* The size of the header of a MO file in bytes.
|
|
*
|
|
* @see lithium\g11n\catalog\adapter\Gettext::_parseMo()
|
|
* @var integer Number of bytes.
|
|
*/
|
|
const MO_HEADER_SIZE = 28;
|
|
|
|
/**
|
|
* Constructor.
|
|
*
|
|
* @param array $config Available configuration options are:
|
|
* - `'path'`: The path to the directory holding the data.
|
|
* @return void
|
|
*/
|
|
public function __construct(array $config = []) {
|
|
$defaults = ['path' => null];
|
|
parent::__construct($config + $defaults);
|
|
}
|
|
|
|
/**
|
|
* Initializer. Checks if the configured path exists.
|
|
*
|
|
* @return void
|
|
* @throws ConfigException
|
|
*/
|
|
protected function _init() {
|
|
parent::_init();
|
|
if (!is_dir($this->_config['path'])) {
|
|
$message = "Gettext directory does not exist at path `{$this->_config['path']}`.";
|
|
throw new ConfigException($message);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Reads data.
|
|
*
|
|
* MO files are preferred over PO files when existent.
|
|
*
|
|
* @see lithium\g11n\catalog\adapter\Gettext::_files()
|
|
* @param string $category A category.
|
|
* @param string $locale A locale identifier.
|
|
* @param string $scope The scope for the current operation.
|
|
* @return array
|
|
*/
|
|
public function read($category, $locale, $scope) {
|
|
$files = $this->_files($category, $locale, $scope);
|
|
|
|
foreach ($files as $file) {
|
|
$method = '_parse' . ucfirst(pathinfo($file, PATHINFO_EXTENSION));
|
|
|
|
if (!file_exists($file) || !is_readable($file)) {
|
|
continue;
|
|
}
|
|
$stream = fopen($file, 'rb');
|
|
$data = $this->{$method}($stream);
|
|
fclose($stream);
|
|
|
|
if ($data) {
|
|
$data['pluralRule'] = [
|
|
'id' => 'pluralRule',
|
|
'translated' => function($count) {
|
|
return $count !== 1;
|
|
}
|
|
];
|
|
return $data;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Writes data.
|
|
*
|
|
* @param string $category A category.
|
|
* @param string $locale A locale identifier.
|
|
* @param string $scope The scope for the current operation.
|
|
* @param array $data The data to write.
|
|
* @return boolean
|
|
*/
|
|
public function write($category, $locale, $scope, array $data) {
|
|
$files = $this->_files($category, $locale, $scope);
|
|
|
|
foreach ($files as $file) {
|
|
$method = '_compile' . ucfirst(pathinfo($file, PATHINFO_EXTENSION));
|
|
|
|
if (!$stream = fopen($file, 'wb')) {
|
|
return false;
|
|
}
|
|
$this->{$method}($stream, $data);
|
|
fclose($stream);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Returns absolute paths to files according to configuration.
|
|
*
|
|
* @param string $category
|
|
* @param string $locale
|
|
* @param string $scope
|
|
* @return array
|
|
*/
|
|
protected function _files($category, $locale, $scope) {
|
|
$path = $this->_config['path'];
|
|
$scope = $scope ?: 'default';
|
|
|
|
if (($pos = strpos($category, 'Template')) !== false) {
|
|
$category = substr($category, 0, $pos);
|
|
return ["{$path}/{$category}_{$scope}.pot"];
|
|
}
|
|
|
|
if ($category === 'message') {
|
|
$category = 'messages';
|
|
}
|
|
$category = strtoupper($category);
|
|
|
|
return [
|
|
"{$path}/{$locale}/LC_{$category}/{$scope}.mo",
|
|
"{$path}/{$locale}/LC_{$category}/{$scope}.po"
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Parses portable object (PO) format.
|
|
*
|
|
* This parser sacrifices some features of the reference implementation the
|
|
* differences to that implementation are as follows.
|
|
* - No support for comments spanning multiple lines.
|
|
* - Translator and extracted comments are treated as being the same type.
|
|
* - Message IDs are allowed to have other encodings as just US-ASCII.
|
|
*
|
|
* Items with an empty id are ignored. For more information see `_merge()`.
|
|
*
|
|
* @param resource $stream
|
|
* @return array
|
|
*/
|
|
protected function _parsePo($stream) {
|
|
$defaults = [
|
|
'ids' => [],
|
|
'translated' => null,
|
|
'flags' => [],
|
|
'comments' => [],
|
|
'occurrences' => [],
|
|
'context' => null
|
|
];
|
|
$data = [];
|
|
$item = $defaults;
|
|
|
|
while ($line = fgets($stream)) {
|
|
$line = trim($line);
|
|
|
|
if ($line === '') {
|
|
$data = $this->_merge($data, $item);
|
|
$item = $defaults;
|
|
} elseif (substr($line, 0, 3) === '#~ ') {
|
|
$item['flags']['obsolete'] = true;
|
|
} elseif (substr($line, 0, 3) === '#, ') {
|
|
$item['flags'][substr($line, 3)] = true;
|
|
} elseif (substr($line, 0, 3) === '#: ') {
|
|
$item['occurrences'][] = [
|
|
'file' => strtok(substr($line, 3), ':'),
|
|
'line' => strtok(':')
|
|
];
|
|
} elseif (substr($line, 0, 3) === '#. ') {
|
|
$item['comments'][] = substr($line, 3);
|
|
} elseif ($line[0] === '#') {
|
|
$item['comments'][] = ltrim(substr($line, 1));
|
|
} elseif (substr($line, 0, 7) === 'msgid "') {
|
|
$item['ids']['singular'] = substr($line, 7, -1);
|
|
} elseif (substr($line, 0, 9) === 'msgctxt "') {
|
|
$item['context'] = substr($line, 9, -1);
|
|
} elseif (substr($line, 0, 8) === 'msgstr "') {
|
|
$item['translated'] = substr($line, 8, -1);
|
|
} elseif ($line[0] === '"') {
|
|
$continues = isset($item['translated']) ? 'translated' : 'ids';
|
|
|
|
if (is_array($item[$continues])) {
|
|
end($item[$continues]);
|
|
$item[$continues][key($item[$continues])] .= substr($line, 1, -1);
|
|
} else {
|
|
$item[$continues] .= substr($line, 1, -1);
|
|
}
|
|
} elseif (substr($line, 0, 14) === 'msgid_plural "') {
|
|
$item['ids']['plural'] = substr($line, 14, -1);
|
|
} elseif (substr($line, 0, 7) === 'msgstr[') {
|
|
$item['translated'][(integer) substr($line, 7, 1)] = substr($line, 11, -1);
|
|
}
|
|
}
|
|
return $this->_merge($data, $item);
|
|
}
|
|
|
|
/**
|
|
* Parses portable object template (POT) format.
|
|
*
|
|
* @param resource $stream
|
|
* @return array
|
|
*/
|
|
protected function _parsePot($stream) {
|
|
return $this->_parsePo($stream);
|
|
}
|
|
|
|
/**
|
|
* Parses machine object (MO) format, independent of the machine's endian it
|
|
* was created on. Both 32bit and 64bit systems are supported.
|
|
*
|
|
* @link https://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
|
|
* @param resource $stream
|
|
* @return array
|
|
* @throws RangeException If stream content has an invalid format.
|
|
*/
|
|
protected function _parseMo($stream) {
|
|
$stat = fstat($stream);
|
|
|
|
if ($stat['size'] < static::MO_HEADER_SIZE) {
|
|
throw new RangeException("MO stream content has an invalid format.");
|
|
}
|
|
$magic = unpack('V1', fread($stream, 4));
|
|
$magic = hexdec(substr(dechex(current($magic)), -8));
|
|
|
|
if ($magic == static::MO_LITTLE_ENDIAN_MAGIC) {
|
|
$isBigEndian = false;
|
|
} elseif ($magic == static::MO_BIG_ENDIAN_MAGIC) {
|
|
$isBigEndian = true;
|
|
} else {
|
|
throw new RangeException("MO stream content has an invalid format.");
|
|
}
|
|
|
|
$header = [
|
|
'formatRevision' => null,
|
|
'count' => null,
|
|
'offsetId' => null,
|
|
'offsetTranslated' => null,
|
|
'sizeHashes' => null,
|
|
'offsetHashes' => null
|
|
];
|
|
foreach ($header as &$value) {
|
|
$value = $this->_readLong($stream, $isBigEndian);
|
|
}
|
|
extract($header);
|
|
$data = [];
|
|
|
|
for ($i = 0; $i < $count; $i++) {
|
|
$singularId = $pluralId = null;
|
|
$translated = null;
|
|
$context = null;
|
|
|
|
fseek($stream, $offsetId + $i * 8);
|
|
|
|
$length = $this->_readLong($stream, $isBigEndian);
|
|
$offset = $this->_readLong($stream, $isBigEndian);
|
|
|
|
if ($length < 1) {
|
|
continue;
|
|
}
|
|
|
|
fseek($stream, $offset);
|
|
$singularId = fread($stream, $length);
|
|
|
|
if (strpos($singularId, "\000") !== false) {
|
|
list($singularId, $pluralId) = explode("\000", $singularId);
|
|
}
|
|
|
|
if (strpos($singularId, "\004") !== false) {
|
|
list($context, $singularId) = explode("\004", $singularId);
|
|
}
|
|
|
|
fseek($stream, $offsetTranslated + $i * 8);
|
|
$length = $this->_readLong($stream, $isBigEndian);
|
|
$offset = $this->_readLong($stream, $isBigEndian);
|
|
|
|
fseek($stream, $offset);
|
|
$translated = fread($stream, $length);
|
|
|
|
if (strpos($translated, "\000") !== false) {
|
|
$translated = explode("\000", $translated);
|
|
}
|
|
|
|
$ids = ['singular' => $singularId, 'plural' => $pluralId];
|
|
$data = $this->_merge($data, compact('ids', 'translated', 'context'));
|
|
}
|
|
return $data;
|
|
}
|
|
|
|
/**
|
|
* Reads an unsigned long from stream respecting endianess.
|
|
*
|
|
* @param resource $stream
|
|
* @param boolean $isBigEndian
|
|
* @return integer
|
|
*/
|
|
protected function _readLong($stream, $isBigEndian) {
|
|
$result = unpack($isBigEndian ? 'N1' : 'V1', fread($stream, 4));
|
|
$result = current($result);
|
|
return (integer) substr($result, -8);
|
|
}
|
|
|
|
/**
|
|
* Compiles data into portable object (PO) format.
|
|
*
|
|
* To improve portability accross libraries the header is generated according
|
|
* to the format of the output of `xgettext`. This means using the same names for
|
|
* placeholders as well as including an empty entry. The empty entry at the
|
|
* beginning aids in parsing the file as it _attracts_ the preceding comments and
|
|
* following metadata when parsed which could otherwise be mistaken as a continued
|
|
* translation. The only difference in the header format is the initial header which
|
|
* just features one line of text.
|
|
*
|
|
* @param resource $stream
|
|
* @param array $data
|
|
* @return boolean
|
|
*/
|
|
protected function _compilePo($stream, array $data) {
|
|
$output[] = '# This file is distributed under the same license as the PACKAGE package.';
|
|
$output[] = '#';
|
|
$output[] = 'msgctxt ""';
|
|
$output[] = 'msgid ""';
|
|
$output[] = 'msgstr ""';
|
|
$output[] = '"Project-Id-Version: PACKAGE VERSION\n"';
|
|
$output[] = '"POT-Creation-Date: YEAR-MO-DA HO:MI+ZONE\n"';
|
|
$output[] = '"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"';
|
|
$output[] = '"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"';
|
|
$output[] = '"Language-Team: LANGUAGE <EMAIL@ADDRESS>\n"';
|
|
$output[] = '"MIME-Version: 1.0\n"';
|
|
$output[] = '"Content-Type: text/plain; charset=UTF-8\n"';
|
|
$output[] = '"Content-Transfer-Encoding: 8bit\n"';
|
|
$output[] = '"Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n"';
|
|
$output[] = '';
|
|
$output = implode("\n", $output) . "\n";
|
|
fwrite($stream, $output);
|
|
|
|
foreach ($data as $key => $item) {
|
|
$output = [];
|
|
$item = $this->_prepareForWrite($item);
|
|
|
|
foreach ($item['occurrences'] as $occurrence) {
|
|
$output[] = "#: {$occurrence['file']}:{$occurrence['line']}";
|
|
}
|
|
foreach ($item['comments'] as $comment) {
|
|
$output[] = "#. {$comment}";
|
|
}
|
|
foreach ($item['flags'] as $flag => $value) {
|
|
$output[] = "#, {$flag}";
|
|
}
|
|
|
|
if (isset($item['context'])) {
|
|
$output[] = "msgctxt \"{$item['context']}\"";
|
|
}
|
|
$output[] = "msgid \"{$item['ids']['singular']}\"";
|
|
|
|
if (isset($item['ids']['plural'])) {
|
|
$output[] = "msgid_plural \"{$item['ids']['plural']}\"";
|
|
|
|
foreach ((array) $item['translated'] ?: [null, null] as $key => $value) {
|
|
$output[] = "msgstr[{$key}] \"{$value}\"";
|
|
}
|
|
} else {
|
|
if (is_array($item['translated'])) {
|
|
$item['translated'] = array_pop($item['translated']);
|
|
}
|
|
$output[] = "msgstr \"{$item['translated']}\"";
|
|
}
|
|
$output[] = '';
|
|
$output = implode("\n", $output) . "\n";
|
|
fwrite($stream, $output);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Compiles data into portable object template (POT) format.
|
|
*
|
|
* @param resource $stream
|
|
* @param array $data
|
|
* @return boolean Success.
|
|
*/
|
|
protected function _compilePot($stream, array $data) {
|
|
return $this->_compilePo($stream, $data);
|
|
}
|
|
|
|
/**
|
|
* Compiles data into machine object (MO) format.
|
|
*
|
|
* @param resource $stream
|
|
* @param array $data
|
|
* @return void
|
|
* @todo Determine if needed and implement compiler.
|
|
*/
|
|
protected function _compileMo($stream, array $data) {}
|
|
|
|
/**
|
|
* Prepares an item before it is being written and escapes fields.
|
|
*
|
|
* All characters from \000 to \037 (this includes new line and tab characters)
|
|
* as well as the backslash (`\`) and the double quote (`"`) are escaped.
|
|
*
|
|
* Literal Windows CRLFs (`\r\n`) are converted to LFs (`\n`) to improve cross platform
|
|
* compatibility. Escaped single quotes (`'`) are unescaped as they should not need to be.
|
|
* Double escaped characters are maintained and not escaped once again.
|
|
*
|
|
* @link http://www.asciitable.com
|
|
* @see lithium\g11n\catalog\Adapter::_prepareForWrite()
|
|
* @param array $item
|
|
* @return array
|
|
*/
|
|
protected function _prepareForWrite(array $item) {
|
|
$filter = function ($value) use (&$filter) {
|
|
if (is_array($value)) {
|
|
return array_map($filter, $value);
|
|
}
|
|
$value = strtr($value, ["\\'" => "'", "\\\\" => "\\", "\r\n" => "\n"]);
|
|
$value = addcslashes($value, "\0..\37\\\"");
|
|
return $value;
|
|
};
|
|
$fields = ['id', 'ids', 'translated', 'context'];
|
|
|
|
foreach ($fields as $field) {
|
|
if (isset($item[$field])) {
|
|
$item[$field] = $filter($item[$field]);
|
|
}
|
|
}
|
|
if (!isset($item['ids']['singular'])) {
|
|
$item['ids']['singular'] = $item['id'];
|
|
}
|
|
$path = Libraries::get(true, 'path');
|
|
if (isset($item['occurrences'])) {
|
|
foreach ($item['occurrences'] as &$occurrence) {
|
|
$occurrence['file'] = str_replace($path, '', $occurrence['file']);
|
|
}
|
|
}
|
|
return parent::_prepareForWrite($item);
|
|
}
|
|
|
|
/**
|
|
* Merges an item into given data and unescapes fields.
|
|
*
|
|
* Please note that items with an id containing exclusively whitespace characters
|
|
* or are empty are **not** being merged. Whitespace characters are space, tab, vertical
|
|
* tab, line feed, carriage return and form feed.
|
|
*
|
|
* @see lithium\g11n\catalog\Adapter::_merge()
|
|
* @param array $data Data to merge item into.
|
|
* @param array $item Item to merge into $data.
|
|
* @return array The merged data.
|
|
*/
|
|
protected function _merge(array $data, array $item) {
|
|
$filter = function ($value) use (&$filter) {
|
|
if (is_array($value)) {
|
|
return array_map($filter, $value);
|
|
}
|
|
return stripcslashes($value);
|
|
};
|
|
$fields = ['id', 'ids', 'translated', 'context'];
|
|
|
|
foreach ($fields as $field) {
|
|
if (isset($item[$field])) {
|
|
$item[$field] = $filter($item[$field]);
|
|
}
|
|
}
|
|
if (isset($item['ids']['singular'])) {
|
|
$item['id'] = $item['ids']['singular'];
|
|
}
|
|
if (empty($item['id']) || ctype_space($item['id'])) {
|
|
return $data;
|
|
}
|
|
return parent::_merge($data, $item);
|
|
}
|
|
}
|
|
|
|
?>
|