You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
318 lines
9.6 KiB
318 lines
9.6 KiB
<?php
|
|
/**
|
|
* li₃: the most RAD framework for PHP (http://li3.me)
|
|
*
|
|
* Copyright 2009, Union of RAD. All rights reserved. This source
|
|
* code is distributed under the terms of the BSD 3-Clause License.
|
|
* The full license text can be found in the LICENSE.txt file.
|
|
*/
|
|
|
|
namespace lithium\util;
|
|
|
|
use lithium\security\Random;
|
|
|
|
/**
|
|
* Text manipulation utility class. Includes functionality for generating UUIDs,
|
|
* {:tag} and regex replacement, and tokenization.
|
|
*/
|
|
class Text {
|
|
|
|
/**
|
|
* UUID-related constant. Clears all bits of version byte (`00001111`).
|
|
*/
|
|
const UUID_CLEAR_VER = 15;
|
|
|
|
/**
|
|
* UUID constant that sets the version bit for generated UUIDs (`01000000`).
|
|
*/
|
|
const UUID_VERSION_4 = 64;
|
|
|
|
/**
|
|
* Clears relevant bits of variant byte (`00111111`).
|
|
*/
|
|
const UUID_CLEAR_VAR = 63;
|
|
|
|
/**
|
|
* The RFC 4122 variant (`10000000`).
|
|
*/
|
|
const UUID_VAR_RFC = 128;
|
|
|
|
/**
|
|
* Generates an RFC 4122-compliant version 4 UUID.
|
|
*
|
|
* @return string The string representation of an RFC 4122-compliant, version 4 UUID.
|
|
* @link http://www.ietf.org/rfc/rfc4122.txt RFC 4122: UUID URN Namespace
|
|
*/
|
|
public static function uuid() {
|
|
$uuid = Random::generate(16);
|
|
$uuid[6] = chr(ord($uuid[6]) & static::UUID_CLEAR_VER | static::UUID_VERSION_4);
|
|
$uuid[8] = chr(ord($uuid[8]) & static::UUID_CLEAR_VAR | static::UUID_VAR_RFC);
|
|
|
|
return join('-', [
|
|
bin2hex(substr($uuid, 0, 4)),
|
|
bin2hex(substr($uuid, 4, 2)),
|
|
bin2hex(substr($uuid, 6, 2)),
|
|
bin2hex(substr($uuid, 8, 2)),
|
|
bin2hex(substr($uuid, 10, 6))
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Replaces variable placeholders inside a string with any given data. Each key
|
|
* in the `$data` array corresponds to a variable placeholder name in `$str`.
|
|
*
|
|
* Usage:
|
|
* ```
|
|
* Text::insert(
|
|
* 'My name is {:name} and I am {:age} years old.',
|
|
* ['name' => 'Bob', 'age' => '65']
|
|
* ); // returns 'My name is Bob and I am 65 years old.'
|
|
* ```
|
|
*
|
|
* Please note that optimization have applied to this method and parts of the code
|
|
* may look like it can refactored or removed but in fact this is part of the applied
|
|
* optimization. Please check the history for this section of code before refactoring
|
|
*
|
|
* @param string $str A string containing variable place-holders.
|
|
* @param array $data A key, value array where each key stands for a place-holder variable
|
|
* name to be replaced with value.
|
|
* @param array $options Available options are:
|
|
* - `'after'`: The character or string after the name of the variable place-holder
|
|
* (defaults to `}`).
|
|
* - `'before'`: The character or string in front of the name of the variable
|
|
* place-holder (defaults to `'{:'`).
|
|
* - `'clean'`: A boolean or array with instructions for `Text::clean()`.
|
|
* - `'escape'`: The character or string used to escape the before character or string
|
|
* (defaults to `'\'`).
|
|
* - `'format'`: A regular expression to use for matching variable place-holders
|
|
* (defaults to `'/(?<!\\)\:%s/'`. Please note that this option takes precedence over
|
|
* all other options except `'clean'`.
|
|
* @return string
|
|
*/
|
|
public static function insert($str, array $data, array $options = []) {
|
|
$defaults = [
|
|
'before' => '{:',
|
|
'after' => '}',
|
|
'escape' => null,
|
|
'format' => null,
|
|
'clean' => false
|
|
];
|
|
$options += $defaults;
|
|
$format = $options['format'];
|
|
|
|
if ($format === 'regex' || (!$format && $options['escape'])) {
|
|
$format = sprintf(
|
|
'/(?<!%s)%s%%s%s/',
|
|
preg_quote($options['escape'], '/'),
|
|
str_replace('%', '%%', preg_quote($options['before'], '/')),
|
|
str_replace('%', '%%', preg_quote($options['after'], '/'))
|
|
);
|
|
}
|
|
|
|
if (!$format && key($data) !== 0) {
|
|
$replace = [];
|
|
|
|
foreach ($data as $key => $value) {
|
|
if (!is_scalar($value)) {
|
|
if (is_object($value) && method_exists($value, '__toString')) {
|
|
$value = (string) $value;
|
|
} else {
|
|
$value = '';
|
|
}
|
|
}
|
|
$replace["{$options['before']}{$key}{$options['after']}"] = $value;
|
|
}
|
|
$str = strtr($str, $replace);
|
|
return $options['clean'] ? static::clean($str, $options) : $str;
|
|
}
|
|
|
|
if (strpos($str, '?') !== false && isset($data[0])) {
|
|
$offset = 0;
|
|
|
|
while (($pos = strpos($str, '?', $offset)) !== false) {
|
|
$val = array_shift($data);
|
|
$offset = $pos + strlen($val);
|
|
$str = substr_replace($str, $val, $pos, 1);
|
|
}
|
|
return $options['clean'] ? static::clean($str, $options) : $str;
|
|
}
|
|
|
|
foreach ($data as $key => $value) {
|
|
if (!$key = sprintf($format, preg_quote($key, '/'))) {
|
|
continue;
|
|
}
|
|
$hash = crc32($key);
|
|
|
|
$str = preg_replace($key, $hash, $str);
|
|
$str = str_replace($hash, $value, $str);
|
|
}
|
|
|
|
if (!isset($options['format']) && isset($options['before'])) {
|
|
$str = str_replace($options['escape'] . $options['before'], $options['before'], $str);
|
|
}
|
|
return $options['clean'] ? static::clean($str, $options) : $str;
|
|
}
|
|
|
|
/**
|
|
* Cleans up a `Text::insert()` formatted string with given `$options` depending
|
|
* on the `'clean'` option. The goal of this function is to replace all whitespace
|
|
* and unneeded mark-up around place-holders that did not get replaced by `Text::insert()`.
|
|
*
|
|
* @param string $str The string to clean.
|
|
* @param array $options Available options are:
|
|
* - `'after'`: characters marking the end of targeted substring.
|
|
* - `'andText'`: (defaults to `true`).
|
|
* - `'before'`: characters marking the start of targeted substring.
|
|
* - `'clean'`: `true` or an array of clean options:
|
|
* - `'gap'`: Regular expression matching gaps.
|
|
* - `'method'`: Either `'text'` or `'html'` (defaults to `'text'`).
|
|
* - `'replacement'`: Text to use for cleaned substrings (defaults to `''`).
|
|
* - `'word'`: Regular expression matching words.
|
|
* @return string The cleaned string.
|
|
*/
|
|
public static function clean($str, array $options = []) {
|
|
if (is_array($options['clean'])) {
|
|
$clean = $options['clean'];
|
|
} else {
|
|
$clean = [
|
|
'method' => is_bool($options['clean']) ? 'text' : $options['clean']
|
|
];
|
|
}
|
|
|
|
switch ($clean['method']) {
|
|
case 'text':
|
|
$clean += [
|
|
'word' => '[\w,.]+',
|
|
'gap' => '[\s]*(?:(?:and|or|,)[\s]*)?',
|
|
'replacement' => ''
|
|
];
|
|
$before = preg_quote($options['before'], '/');
|
|
$after = preg_quote($options['after'], '/');
|
|
|
|
$kleenex = sprintf(
|
|
'/(%s%s%s%s|%s%s%s%s|%s%s%s%s%s)/',
|
|
$before, $clean['word'], $after, $clean['gap'],
|
|
$clean['gap'], $before, $clean['word'], $after,
|
|
$clean['gap'], $before, $clean['word'], $after, $clean['gap']
|
|
);
|
|
$str = preg_replace($kleenex, $clean['replacement'], $str);
|
|
break;
|
|
case 'html':
|
|
$clean += [
|
|
'word' => '[\w,.]+',
|
|
'andText' => true,
|
|
'replacement' => ''
|
|
];
|
|
$kleenex = sprintf(
|
|
'/[\s]*[a-z]+=(")(%s%s%s[\s]*)+\\1/i',
|
|
preg_quote($options['before'], '/'),
|
|
$clean['word'],
|
|
preg_quote($options['after'], '/')
|
|
);
|
|
$str = preg_replace($kleenex, $clean['replacement'], $str);
|
|
|
|
if ($clean['andText']) {
|
|
return static::clean($str, [
|
|
'clean' => ['method' => 'text']
|
|
] + $options);
|
|
}
|
|
break;
|
|
}
|
|
return $str;
|
|
}
|
|
|
|
/**
|
|
* Extract a part of a string based on a regular expression `$regex`.
|
|
*
|
|
* @param string $regex The regular expression to use.
|
|
* @param string $str The string to run the extraction on.
|
|
* @param integer $index The number of the part to return based on the regex.
|
|
* @return mixed
|
|
*/
|
|
public static function extract($regex, $str, $index = 0) {
|
|
if (!preg_match($regex, $str, $match)) {
|
|
return false;
|
|
}
|
|
return isset($match[$index]) ? $match[$index] : null;
|
|
}
|
|
|
|
/**
|
|
* Tokenizes a string using `$options['separator']`, ignoring any instances of
|
|
* `$options['separator']` that appear between `$options['leftBound']` and
|
|
* `$options['rightBound']`.
|
|
*
|
|
* @param string $data The data to tokenize.
|
|
* @param array $options Options to use when tokenizing:
|
|
* -`'separator'` _string_: The token to split the data on.
|
|
* -`'leftBound'` _string_: Left scope-enclosing boundary.
|
|
* -`'rightBound'` _string_: Right scope-enclosing boundary.
|
|
* @return array Returns an array of tokens.
|
|
*/
|
|
public static function tokenize($data, array $options = []) {
|
|
$options += ['separator' => ',', 'leftBound' => '(', 'rightBound' => ')'];
|
|
|
|
if (!$data || is_array($data)) {
|
|
return $data;
|
|
}
|
|
|
|
$depth = 0;
|
|
$offset = 0;
|
|
$buffer = '';
|
|
$results = [];
|
|
$length = strlen($data);
|
|
$open = false;
|
|
|
|
while ($offset <= $length) {
|
|
$tmpOffset = -1;
|
|
$offsets = [
|
|
strpos($data, $options['separator'], $offset),
|
|
strpos($data, $options['leftBound'], $offset),
|
|
strpos($data, $options['rightBound'], $offset)
|
|
];
|
|
|
|
for ($i = 0; $i < 3; $i++) {
|
|
if ($offsets[$i] !== false && ($offsets[$i] < $tmpOffset || $tmpOffset === -1)) {
|
|
$tmpOffset = $offsets[$i];
|
|
}
|
|
}
|
|
|
|
if ($tmpOffset === -1) {
|
|
$results[] = $buffer . substr($data, $offset);
|
|
$offset = $length + 1;
|
|
continue;
|
|
}
|
|
$buffer .= substr($data, $offset, ($tmpOffset - $offset));
|
|
|
|
if ($data[$tmpOffset] === $options['separator'] && $depth === 0) {
|
|
$results[] = $buffer;
|
|
$buffer = '';
|
|
} else {
|
|
$buffer .= $data{$tmpOffset};
|
|
}
|
|
|
|
if ($options['leftBound'] !== $options['rightBound']) {
|
|
if ($data[$tmpOffset] === $options['leftBound']) {
|
|
$depth++;
|
|
}
|
|
if ($data[$tmpOffset] === $options['rightBound']) {
|
|
$depth--;
|
|
}
|
|
$offset = ++$tmpOffset;
|
|
continue;
|
|
}
|
|
|
|
if ($data[$tmpOffset] === $options['leftBound']) {
|
|
($open) ? $depth-- : $depth++;
|
|
$open = !$open;
|
|
}
|
|
$offset = ++$tmpOffset;
|
|
}
|
|
|
|
if (!$results && $buffer) {
|
|
$results[] = $buffer;
|
|
}
|
|
return $results ? array_map('trim', $results) : [];
|
|
}
|
|
}
|
|
|
|
?>
|