* | ├── LC_MESSAGES * | | ├── default.po * | | ├── default.mo * | | ├── .po * | | └── .mo * | ├── LC_VALIDATION * | | └── ... * | └── ... * ├── * | └── ... * ├── message_default.pot * ├── message_.pot * ├── validation_default.pot * ├── validation_.pot * └── ... * ``` * * @see lithium\g11n\Locale * @link http://php.net/setlocale PHP Manual: setlocale() * @link http://www.gnu.org/software/gettext/manual/gettext.html GNU Gettext Utilities */ class Gettext extends \lithium\g11n\catalog\Adapter { /** * Magic used for validating the format of a MO file as well as * detecting if the machine used to create that file was little endian. * * @see lithium\g11n\catalog\adapter\Gettext::_parseMo() * @var float */ const MO_LITTLE_ENDIAN_MAGIC = 0x950412de; /** * Magic used for validating the format of a MO file as well as * detecting if the machine used to create that file was big endian. * * @see lithium\g11n\catalog\adapter\Gettext::_parseMo() * @var float */ const MO_BIG_ENDIAN_MAGIC = 0xde120495; /** * The size of the header of a MO file in bytes. * * @see lithium\g11n\catalog\adapter\Gettext::_parseMo() * @var integer Number of bytes. */ const MO_HEADER_SIZE = 28; /** * Constructor. * * @param array $config Available configuration options are: * - `'path'`: The path to the directory holding the data. * @return void */ public function __construct(array $config = []) { $defaults = ['path' => null]; parent::__construct($config + $defaults); } /** * Initializer. Checks if the configured path exists. * * @return void * @throws ConfigException */ protected function _init() { parent::_init(); if (!is_dir($this->_config['path'])) { $message = "Gettext directory does not exist at path `{$this->_config['path']}`."; throw new ConfigException($message); } } /** * Reads data. * * MO files are preferred over PO files when existent. * * @see lithium\g11n\catalog\adapter\Gettext::_files() * @param string $category A category. * @param string $locale A locale identifier. * @param string $scope The scope for the current operation. * @return array */ public function read($category, $locale, $scope) { $files = $this->_files($category, $locale, $scope); foreach ($files as $file) { $method = '_parse' . ucfirst(pathinfo($file, PATHINFO_EXTENSION)); if (!file_exists($file) || !is_readable($file)) { continue; } $stream = fopen($file, 'rb'); $data = $this->{$method}($stream); fclose($stream); if ($data) { $data['pluralRule'] = [ 'id' => 'pluralRule', 'translated' => function($count) { return $count !== 1; } ]; return $data; } } } /** * Writes data. * * @param string $category A category. * @param string $locale A locale identifier. * @param string $scope The scope for the current operation. * @param array $data The data to write. * @return boolean */ public function write($category, $locale, $scope, array $data) { $files = $this->_files($category, $locale, $scope); foreach ($files as $file) { $method = '_compile' . ucfirst(pathinfo($file, PATHINFO_EXTENSION)); if (!$stream = fopen($file, 'wb')) { return false; } $this->{$method}($stream, $data); fclose($stream); } return true; } /** * Returns absolute paths to files according to configuration. * * @param string $category * @param string $locale * @param string $scope * @return array */ protected function _files($category, $locale, $scope) { $path = $this->_config['path']; $scope = $scope ?: 'default'; if (($pos = strpos($category, 'Template')) !== false) { $category = substr($category, 0, $pos); return ["{$path}/{$category}_{$scope}.pot"]; } if ($category === 'message') { $category = 'messages'; } $category = strtoupper($category); return [ "{$path}/{$locale}/LC_{$category}/{$scope}.mo", "{$path}/{$locale}/LC_{$category}/{$scope}.po" ]; } /** * Parses portable object (PO) format. * * This parser sacrifices some features of the reference implementation the * differences to that implementation are as follows. * - No support for comments spanning multiple lines. * - Translator and extracted comments are treated as being the same type. * - Message IDs are allowed to have other encodings as just US-ASCII. * * Items with an empty id are ignored. For more information see `_merge()`. * * @param resource $stream * @return array */ protected function _parsePo($stream) { $defaults = [ 'ids' => [], 'translated' => null, 'flags' => [], 'comments' => [], 'occurrences' => [], 'context' => null ]; $data = []; $item = $defaults; while ($line = fgets($stream)) { $line = trim($line); if ($line === '') { $data = $this->_merge($data, $item); $item = $defaults; } elseif (substr($line, 0, 3) === '#~ ') { $item['flags']['obsolete'] = true; } elseif (substr($line, 0, 3) === '#, ') { $item['flags'][substr($line, 3)] = true; } elseif (substr($line, 0, 3) === '#: ') { $item['occurrences'][] = [ 'file' => strtok(substr($line, 3), ':'), 'line' => strtok(':') ]; } elseif (substr($line, 0, 3) === '#. ') { $item['comments'][] = substr($line, 3); } elseif ($line[0] === '#') { $item['comments'][] = ltrim(substr($line, 1)); } elseif (substr($line, 0, 7) === 'msgid "') { $item['ids']['singular'] = substr($line, 7, -1); } elseif (substr($line, 0, 9) === 'msgctxt "') { $item['context'] = substr($line, 9, -1); } elseif (substr($line, 0, 8) === 'msgstr "') { $item['translated'] = substr($line, 8, -1); } elseif ($line[0] === '"') { $continues = isset($item['translated']) ? 'translated' : 'ids'; if (is_array($item[$continues])) { end($item[$continues]); $item[$continues][key($item[$continues])] .= substr($line, 1, -1); } else { $item[$continues] .= substr($line, 1, -1); } } elseif (substr($line, 0, 14) === 'msgid_plural "') { $item['ids']['plural'] = substr($line, 14, -1); } elseif (substr($line, 0, 7) === 'msgstr[') { $item['translated'][(integer) substr($line, 7, 1)] = substr($line, 11, -1); } } return $this->_merge($data, $item); } /** * Parses portable object template (POT) format. * * @param resource $stream * @return array */ protected function _parsePot($stream) { return $this->_parsePo($stream); } /** * Parses machine object (MO) format, independent of the machine's endian it * was created on. Both 32bit and 64bit systems are supported. * * @link https://www.gnu.org/software/gettext/manual/html_node/MO-Files.html * @param resource $stream * @return array * @throws RangeException If stream content has an invalid format. */ protected function _parseMo($stream) { $stat = fstat($stream); if ($stat['size'] < static::MO_HEADER_SIZE) { throw new RangeException("MO stream content has an invalid format."); } $magic = unpack('V1', fread($stream, 4)); $magic = hexdec(substr(dechex(current($magic)), -8)); if ($magic == static::MO_LITTLE_ENDIAN_MAGIC) { $isBigEndian = false; } elseif ($magic == static::MO_BIG_ENDIAN_MAGIC) { $isBigEndian = true; } else { throw new RangeException("MO stream content has an invalid format."); } $header = [ 'formatRevision' => null, 'count' => null, 'offsetId' => null, 'offsetTranslated' => null, 'sizeHashes' => null, 'offsetHashes' => null ]; foreach ($header as &$value) { $value = $this->_readLong($stream, $isBigEndian); } extract($header); $data = []; for ($i = 0; $i < $count; $i++) { $singularId = $pluralId = null; $translated = null; $context = null; fseek($stream, $offsetId + $i * 8); $length = $this->_readLong($stream, $isBigEndian); $offset = $this->_readLong($stream, $isBigEndian); if ($length < 1) { continue; } fseek($stream, $offset); $singularId = fread($stream, $length); if (strpos($singularId, "\000") !== false) { list($singularId, $pluralId) = explode("\000", $singularId); } if (strpos($singularId, "\004") !== false) { list($context, $singularId) = explode("\004", $singularId); } fseek($stream, $offsetTranslated + $i * 8); $length = $this->_readLong($stream, $isBigEndian); $offset = $this->_readLong($stream, $isBigEndian); fseek($stream, $offset); $translated = fread($stream, $length); if (strpos($translated, "\000") !== false) { $translated = explode("\000", $translated); } $ids = ['singular' => $singularId, 'plural' => $pluralId]; $data = $this->_merge($data, compact('ids', 'translated', 'context')); } return $data; } /** * Reads an unsigned long from stream respecting endianess. * * @param resource $stream * @param boolean $isBigEndian * @return integer */ protected function _readLong($stream, $isBigEndian) { $result = unpack($isBigEndian ? 'N1' : 'V1', fread($stream, 4)); $result = current($result); return (integer) substr($result, -8); } /** * Compiles data into portable object (PO) format. * * To improve portability accross libraries the header is generated according * to the format of the output of `xgettext`. This means using the same names for * placeholders as well as including an empty entry. The empty entry at the * beginning aids in parsing the file as it _attracts_ the preceding comments and * following metadata when parsed which could otherwise be mistaken as a continued * translation. The only difference in the header format is the initial header which * just features one line of text. * * @param resource $stream * @param array $data * @return boolean */ protected function _compilePo($stream, array $data) { $output[] = '# This file is distributed under the same license as the PACKAGE package.'; $output[] = '#'; $output[] = 'msgctxt ""'; $output[] = 'msgid ""'; $output[] = 'msgstr ""'; $output[] = '"Project-Id-Version: PACKAGE VERSION\n"'; $output[] = '"POT-Creation-Date: YEAR-MO-DA HO:MI+ZONE\n"'; $output[] = '"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"'; $output[] = '"Last-Translator: FULL NAME \n"'; $output[] = '"Language-Team: LANGUAGE \n"'; $output[] = '"MIME-Version: 1.0\n"'; $output[] = '"Content-Type: text/plain; charset=UTF-8\n"'; $output[] = '"Content-Transfer-Encoding: 8bit\n"'; $output[] = '"Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n"'; $output[] = ''; $output = implode("\n", $output) . "\n"; fwrite($stream, $output); foreach ($data as $key => $item) { $output = []; $item = $this->_prepareForWrite($item); foreach ($item['occurrences'] as $occurrence) { $output[] = "#: {$occurrence['file']}:{$occurrence['line']}"; } foreach ($item['comments'] as $comment) { $output[] = "#. {$comment}"; } foreach ($item['flags'] as $flag => $value) { $output[] = "#, {$flag}"; } if (isset($item['context'])) { $output[] = "msgctxt \"{$item['context']}\""; } $output[] = "msgid \"{$item['ids']['singular']}\""; if (isset($item['ids']['plural'])) { $output[] = "msgid_plural \"{$item['ids']['plural']}\""; foreach ((array) $item['translated'] ?: [null, null] as $key => $value) { $output[] = "msgstr[{$key}] \"{$value}\""; } } else { if (is_array($item['translated'])) { $item['translated'] = array_pop($item['translated']); } $output[] = "msgstr \"{$item['translated']}\""; } $output[] = ''; $output = implode("\n", $output) . "\n"; fwrite($stream, $output); } return true; } /** * Compiles data into portable object template (POT) format. * * @param resource $stream * @param array $data * @return boolean Success. */ protected function _compilePot($stream, array $data) { return $this->_compilePo($stream, $data); } /** * Compiles data into machine object (MO) format. * * @param resource $stream * @param array $data * @return void * @todo Determine if needed and implement compiler. */ protected function _compileMo($stream, array $data) {} /** * Prepares an item before it is being written and escapes fields. * * All characters from \000 to \037 (this includes new line and tab characters) * as well as the backslash (`\`) and the double quote (`"`) are escaped. * * Literal Windows CRLFs (`\r\n`) are converted to LFs (`\n`) to improve cross platform * compatibility. Escaped single quotes (`'`) are unescaped as they should not need to be. * Double escaped characters are maintained and not escaped once again. * * @link http://www.asciitable.com * @see lithium\g11n\catalog\Adapter::_prepareForWrite() * @param array $item * @return array */ protected function _prepareForWrite(array $item) { $filter = function ($value) use (&$filter) { if (is_array($value)) { return array_map($filter, $value); } $value = strtr($value, ["\\'" => "'", "\\\\" => "\\", "\r\n" => "\n"]); $value = addcslashes($value, "\0..\37\\\""); return $value; }; $fields = ['id', 'ids', 'translated', 'context']; foreach ($fields as $field) { if (isset($item[$field])) { $item[$field] = $filter($item[$field]); } } if (!isset($item['ids']['singular'])) { $item['ids']['singular'] = $item['id']; } $path = Libraries::get(true, 'path'); if (isset($item['occurrences'])) { foreach ($item['occurrences'] as &$occurrence) { $occurrence['file'] = str_replace($path, '', $occurrence['file']); } } return parent::_prepareForWrite($item); } /** * Merges an item into given data and unescapes fields. * * Please note that items with an id containing exclusively whitespace characters * or are empty are **not** being merged. Whitespace characters are space, tab, vertical * tab, line feed, carriage return and form feed. * * @see lithium\g11n\catalog\Adapter::_merge() * @param array $data Data to merge item into. * @param array $item Item to merge into $data. * @return array The merged data. */ protected function _merge(array $data, array $item) { $filter = function ($value) use (&$filter) { if (is_array($value)) { return array_map($filter, $value); } return stripcslashes($value); }; $fields = ['id', 'ids', 'translated', 'context']; foreach ($fields as $field) { if (isset($item[$field])) { $item[$field] = $filter($item[$field]); } } if (isset($item['ids']['singular'])) { $item['id'] = $item['ids']['singular']; } if (empty($item['id']) || ctype_space($item['id'])) { return $data; } return parent::_merge($data, $item); } } ?>