Commit 518ad5c9 authored by Rémy DANIEL's avatar Rémy DANIEL

[BUGFIX] Fix wrong encoding of CDATA markers when editing an XLF file

CDATA markers are properly and natively detected by SimpleXMLElement.
When writing back the file to the disk, all strings are enclosed in CDATA,
so the XML is valid and the html tags/entities are preserved and not altered.
parent 09482c8c
......@@ -77,18 +77,14 @@ class FileBaseXLFService extends FileBaseService {
throw new LFException('failure.select.noLangfile');
}
// read xml into array
// $xmlContent = simplexml_load_file($file);
// $xmlContent = json_decode(json_encode($xmlContent), TRUE);
// LIBXML_NOCDATA treat CDATA node as text node.
// htmlentities in CDATA are not modified,
// and those - normaly encoded if XML is valid - outside CDATA will be decoded.
$contents = file_get_contents($file);
$xmlContent = simplexml_load_file($file,'SimpleXMLElement', LIBXML_NOCDATA);
// Remove CDATA tags and encode html entities
$contents = preg_replace_callback ("#<\!\[CDATA\[(.*)\]\]>#sU", function($matches) {
return htmlspecialchars($matches[1]);
}, $contents);
$xmlContent = simplexml_load_string($contents,'SimpleXMLElement', LIBXML_NOCDATA);
// Cast XML to associative array with json_decode/encode.
// This way, value is XML are casted to string.
$xmlContent = json_decode(json_encode($xmlContent), TRUE);
// check data
......@@ -268,29 +264,19 @@ class FileBaseXLFService extends FileBaseService {
$body = '<body/>';
if (is_array($phpArray['data']) && count($phpArray['data'])) {
$body = '<body>' . "\n";
$startCDATA = "<![CDATA[";
$endCDATA = "]]>";
$approved = ($targetLanguage !== 'default' ? ' approved="yes"' : '');
foreach ($phpArray['data'] as $constant => $value) {
$approved = ($targetLanguage !== 'default' ? ' approved="yes"' : '');
$enValue = $enLanguage[$constant];
$startCDATA = "<![CDATA[";
$endCDATA = "]]>";
//Detects if html is in the value and add CDATA tags
if(preg_match("#<(\w+).*>.*</(\w)>#isU", $value) ||
preg_match("#(\w+).*/>#isU", $value) ||
preg_match("#<!--.*-->#isU", $value)) {
$value = $startCDATA.$value.$endCDATA;
} else {
$value = htmlspecialchars($value);
}
//Detects if html is in the enValue and add CDATA tags
if(preg_match("#<(\w+).*>.*</(\w)>#isU", $enValue) ||
preg_match("#(\w+).*/>#isU", $value) ||
preg_match("#<!--.*-->#isU", $value)) {
$enValue = $startCDATA.$enValue.$endCDATA;
} else {
$enValue = htmlspecialchars($enLanguage[$constant]);
}
// CDATA markers are stripped when reading the XML file from the disk,
// and html entities are not decoded.
// So, for simplicity and safety, when writing back the data to the file,
// we will always enclose value in CDATA, so html entities are preserved
// and XML remains valid.
$value = $startCDATA.$value.$endCDATA;
$enValue = $startCDATA.$enValue.$endCDATA;
$body .= "\t\t\t" . '<trans-unit id="' . htmlspecialchars(
$constant
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment