Commit a8416dea authored by Kevin Ditscheid's avatar Kevin Ditscheid

Merge branch 'cdata' into 'master'

[BUGFIX] Fix wrong encoding of CDATA markers

Closes #18

See merge request !19
parents 1334b858 7335b6c1
......@@ -85,8 +85,6 @@ abstract class FileBaseService extends FileService {
$this->setWorkspace('base');
parent::init($file, $path, $metaFile);
$this->prepareLanguageFilesForFirstUsage();
}
/**
......@@ -142,62 +140,4 @@ abstract class FileBaseService extends FileService {
$this->localLang = $localLang;
$this->originLang = $originLang;
}
/**
* Prepares this language file and it's translations for the first usage.
*
* @throws LFException
* @return void
*/
protected function prepareLanguageFilesForFirstUsage() {
$adaptedContent = $this->checkFileForFirstImport($this->absFile);
if ($adaptedContent) {
$files = [];
$files[$this->absFile] = $adaptedContent;
$languages = SgLib::getSystemLanguages();
foreach ($languages as $lang) {
if ($lang === 'default') {
continue;
}
$file = \dirname($this->absFile) . '/' . $this->nameLocalizedFile($lang);
if (file_exists($file) && $this->checkLocalizedFile(basename($file), $lang)) {
$adaptedContent = $this->checkFileForFirstImport($file);
if ($adaptedContent) {
$files[$file] = $adaptedContent;
}
}
}
$this->writeFilesWithContent($files);
}
}
/**
* Checks if the given file is generated by this extension, if not the data will be prepared and returned.
*
* @param string $file
* @return bool|string
*/
protected function checkFileForFirstImport($file) {
if (!file_exists($file)) {
return FALSE;
}
// Don't do this on a XML file, because the CDATA will be ignored, if it's done...
$fileParts = pathinfo($file);
if ($fileParts['extension'] === 'xml') {
return FALSE;
}
$fileContent = file_get_contents($file);
if (\strpos($fileContent, '<generator>LFEditor</generator>') === FALSE) {
$fileContent = \str_replace(['<![CDATA[', ']]>'], ['&lt;![CDATA[', ']]&gt;'], $fileContent);
} else {
return FALSE;
}
return $fileContent;
}
}
......@@ -77,18 +77,14 @@ class FileBaseXLFService extends FileBaseService {
throw new LFException('failure.select.noLangfile');
}
// read xml into array
// $xmlContent = simplexml_load_file($file);
// $xmlContent = json_decode(json_encode($xmlContent), TRUE);
// LIBXML_NOCDATA treat CDATA node as text node.
// htmlentities in CDATA are not modified,
// and those - normaly encoded if XML is valid - outside CDATA will be decoded.
$contents = file_get_contents($file);
$xmlContent = simplexml_load_file($file,'SimpleXMLElement', LIBXML_NOCDATA);
// Remove CDATA tags and encode html entities
$contents = preg_replace_callback ("#&lt;\!\[CDATA\[(.*)\]\]&gt;#sU", function($matches) {
return htmlspecialchars($matches[1]);
}, $contents);
$xmlContent = simplexml_load_string($contents,'SimpleXMLElement', LIBXML_NOCDATA);
// Cast XML to associative array with json_decode/encode.
// This way, value is XML are casted to string.
$xmlContent = json_decode(json_encode($xmlContent), TRUE);
// check data
......@@ -268,29 +264,19 @@ class FileBaseXLFService extends FileBaseService {
$body = '<body/>';
if (is_array($phpArray['data']) && count($phpArray['data'])) {
$body = '<body>' . "\n";
$startCDATA = "<![CDATA[";
$endCDATA = "]]>";
$approved = ($targetLanguage !== 'default' ? ' approved="yes"' : '');
foreach ($phpArray['data'] as $constant => $value) {
$approved = ($targetLanguage !== 'default' ? ' approved="yes"' : '');
$enValue = $enLanguage[$constant];
$startCDATA = "<![CDATA[";
$endCDATA = "]]>";
//Detects if html is in the value and add CDATA tags
if(preg_match("#<(\w+).*>.*</(\w)>#isU", $value) ||
preg_match("#(\w+).*/>#isU", $value) ||
preg_match("#<!--.*-->#isU", $value)) {
$value = $startCDATA.$value.$endCDATA;
} else {
$value = htmlspecialchars($value);
}
//Detects if html is in the enValue and add CDATA tags
if(preg_match("#<(\w+).*>.*</(\w)>#isU", $enValue) ||
preg_match("#(\w+).*/>#isU", $value) ||
preg_match("#<!--.*-->#isU", $value)) {
$enValue = $startCDATA.$enValue.$endCDATA;
} else {
$enValue = htmlspecialchars($enLanguage[$constant]);
}
// CDATA markers are stripped when reading the XML file from the disk,
// and html entities are not decoded.
// So, for simplicity and safety, when writing back the data to the file,
// we will always enclose value in CDATA, so html entities are preserved
// and XML remains valid.
$value = $startCDATA.$value.$endCDATA;
$enValue = $startCDATA.$enValue.$endCDATA;
$body .= "\t\t\t" . '<trans-unit id="' . htmlspecialchars(
$constant
......
......@@ -181,23 +181,12 @@ class FileBaseXMLService extends FileBaseService {
$subNode->addAttribute('type', 'array');
$this->arrayToXml($value, $subNode, $parentTagMap, $tagName);
} else {
if (SgLib::checkForCdataInString($value)) {
$simpleSubNode = $xmlElement->addChildCData($tagName, $value);
} else {
$startCDATA = "<![CDATA[";
$endCDATA = "]]>";
//Detects if html is in the value and add CDATA tags
if(preg_match("#<(\w+).*>.*<\/(\w+)>#isU", $value) ||
preg_match("#(\w+).*/>#isU", $value) ||
preg_match("#<!--.*-->#isU", $value)) {
$value = $startCDATA.$value.$endCDATA;
} else {
$value = htmlspecialchars($value);
}
$simpleSubNode = $xmlElement->addChild($tagName, $value);
}
// CDATA markers are stripped when reading the XML file from the disk,
// and html entities are not decoded.
// So, for simplicity and safety, when writing back the data to the file,
// we will always enclose value in CDATA, so html entities are preserved
// and XML remains valid.
$simpleSubNode = $xmlElement->addChildCData($tagName, $value);
if ($indexAttributeValue !== NULL) {
$simpleSubNode->addAttribute('index', $indexAttributeValue);
}
......@@ -230,6 +219,7 @@ class FileBaseXMLService extends FileBaseService {
$this->arrayToXml($phpArray, $xmlElement, $parentTagMap);
$formattedXml = $this->formatXml($xmlElement);
return $formattedXml;
}
......@@ -350,8 +340,13 @@ class FileBaseXMLService extends FileBaseService {
//Search CDATA tags to decode the content inside
//Because $xmlElement->asXML() encode html entities
$xmlRaw = preg_replace_callback ("#(&lt;\!\[CDATA\[.*\]\]&gt;)#sU", function($matches) {
return htmlspecialchars_decode($matches[1]);}, $xmlRaw);
$xmlRaw = preg_replace_callback (
"#(&lt;\!\[CDATA\[.*\]\]&gt;)#sU",
function($matches) {
return htmlspecialchars_decode($matches[1]);
},
$xmlRaw
);
$dom->loadXML($xmlRaw);
$formattedXml = $dom->saveXML();
......
......@@ -52,7 +52,9 @@ class CdataSupportingSimpleXMLElement extends \SimpleXMLElement {
public function addChildCData($name, $cdataText) {
/** @var CdataSupportingSimpleXMLElement $child */
$child = $this->addChild($name);
$child->addCData($cdataText);
if ($cdataText !== '') {
$child->addCData($cdataText);
}
return $child;
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment