/*
* Copyright (C) 2001-2008 Jacek Sieka, arnetheduck on gmail point com
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include "stdinc.h"
#include "DCPlusPlus.h"
#include "SimpleXML.h"
namespace dcpp {
const string SimpleXML::utf8Header = "\r\n";
string& SimpleXML::escape(string& aString, bool aAttrib, bool aLoading /* = false */, const string &encoding /* = "UTF-8" */) {
string::size_type i = 0;
const char* chars = aAttrib ? "<&>'\"" : "<&>";
if(aLoading) {
while((i = aString.find('&', i)) != string::npos) {
if(aString.compare(i+1, 3, "lt;") == 0) {
aString.replace(i, 4, 1, '<');
} else if(aString.compare(i+1, 4, "amp;") == 0) {
aString.replace(i, 5, 1, '&');
} else if(aString.compare(i+1, 3, "gt;") == 0) {
aString.replace(i, 4, 1, '>');
} else if(aAttrib) {
if(aString.compare(i+1, 5, "apos;") == 0) {
aString.replace(i, 6, 1, '\'');
} else if(aString.compare(i+1, 5, "quot;") == 0) {
aString.replace(i, 6, 1, '"');
}
}
i++;
}
i = 0;
if( (i = aString.find('\n')) != string::npos) {
if(i > 0 && aString[i-1] != '\r') {
// This is a unix \n thing...convert it...
i = 0;
while( (i = aString.find('\n', i) ) != string::npos) {
if(aString[i-1] != '\r')
aString.insert(i, 1, '\r');
i+=2;
}
}
}
aString = Text::toUtf8(aString, encoding);
} else {
while( (i = aString.find_first_of(chars, i)) != string::npos) {
switch(aString[i]) {
case '<': aString.replace(i, 1, "<"); i+=4; break;
case '&': aString.replace(i, 1, "&"); i+=5; break;
case '>': aString.replace(i, 1, ">"); i+=4; break;
case '\'': aString.replace(i, 1, "'"); i+=6; break;
case '"': aString.replace(i, 1, """); i+=6; break;
default: dcasserta(0);
}
}
// No need to convert back to acp since our utf8Header denotes we
// should store it as utf8.
}
return aString;
}
void SimpleXML::Tag::appendAttribString(string& tmp) {
for(StringPairIter i = attribs.begin(); i!= attribs.end(); ++i) {
tmp.append(i->first);
tmp.append("=\"", 2);
if(needsEscape(i->second, true)) {
string tmp2(i->second);
escape(tmp2, true);
tmp.append(tmp2);
} else {
tmp.append(i->second);
}
tmp.append("\" ", 2);
}
tmp.erase(tmp.size()-1);
}
/**
* The same as the version above, but writes to a file instead...yes, this could be made
* with streams and only one code set but streams are slow...the file f should be a buffered
* file, otherwise things will be very slow (I assume write is not expensive and call it a lot
*/
void SimpleXML::Tag::toXML(int indent, OutputStream* f) {
if(children.empty() && data.empty()) {
string tmp;
tmp.reserve(indent + name.length() + 30);
tmp.append(indent, '\t');
tmp.append(1, '<');
tmp.append(name);
tmp.append(1, ' ');
appendAttribString(tmp);
tmp.append("/>\r\n", 4);
f->write(tmp);
} else {
string tmp;
tmp.append(indent, '\t');
tmp.append(1, '<');
tmp.append(name);
tmp.append(1, ' ');
appendAttribString(tmp);
if(children.empty()) {
tmp.append(1, '>');
if(needsEscape(data, false)) {
string tmp2(data);
escape(tmp2, false);
tmp.append(tmp2);
} else {
tmp.append(data);
}
} else {
tmp.append(">\r\n", 3);
f->write(tmp);
tmp.clear();
for(Iter i = children.begin(); i!=children.end(); ++i) {
(*i)->toXML(indent + 1, f);
}
tmp.append(indent, '\t');
}
tmp.append("", 2);
tmp.append(name);
tmp.append(">\r\n", 3);
f->write(tmp);
}
}
bool SimpleXML::findChild(const string& aName) throw() {
dcassert(current != NULL);
if(found && currentChild != current->children.end())
currentChild++;
while(currentChild!=current->children.end()) {
if((*currentChild)->name == aName) {
found = true;
return true;
} else
currentChild++;
}
return false;
}
string::size_type SimpleXMLReader::loadAttribs(const string& name, const string& tmp, string::size_type start) throw(SimpleXMLException) {
string::size_type i = start;
string::size_type j;
for(;;) {
if((j = tmp.find_first_of("= \"'/>", i)) == string::npos) {
throw SimpleXMLException("Missing '=' in " + name);
}
if(tmp[j] != '=') {
throw SimpleXMLException("Missing '=' in " + name);
}
if(tmp[j+1] != '"' && tmp[j+1] != '\'') {
throw SimpleXMLException("Invalid character after '=' in " + name);
}
string::size_type x = j + 2;
string::size_type y;
if((y = tmp.find(tmp[j+1], x)) == string::npos) {
throw SimpleXMLException("Missing '" + string(1, tmp[j+1]) + "' in " + name);
}
// Ok, we have an attribute...
attribs.push_back(make_pair(tmp.substr(i, j-i), tmp.substr(x, y-x)));
SimpleXML::escape(attribs.back().second, true, true, encoding);
i = tmp.find_first_not_of(' ', y + 1);
if(tmp[i] == '/' || tmp[i] == '>') {
return i;
}
}
}
string::size_type SimpleXMLReader::fromXML(const string& tmp, const string& n, string::size_type start, int depth) throw(SimpleXMLException) {
string::size_type i = start;
string::size_type j;
bool hasChildren = false;
for(;;) {
if((j = tmp.find('<', i)) == string::npos) {
if(depth > 0) {
throw SimpleXMLException("Missing end tag in " + n);
}
return tmp.size();
} else if(depth > maxNesting) {
throw SimpleXMLException("Too many nested tags (depth >" + Util::toString(maxNesting) + ")");
}
// Check that we have at least 3 more characters as the shortest valid xml tag is ...
if((j + 3) > tmp.size()) {
throw SimpleXMLException("Missing end tag in " + n);
}
i = j + 1;
if(tmp[i] == '?') {
// processing instruction ?>, check encoding...
if((j = tmp.find("?>", i)) == string::npos) {
throw SimpleXMLException("Missing '?>' in " + n);
}
string str = tmp.substr(i, j - i);
if((i = str.find("encoding=\"")) != string::npos) {
string::size_type k = str.find('\"', i + 10);
if(k != string::npos && k < j) {
encoding = str.substr(i + 10, k - i - 10);
}
}
i = j + 2;
continue;
}
if(tmp[i] == '!' && tmp[i+1] == '-' && tmp[i+2] == '-') {
// , ignore...
if((j = tmp.find("-->", i)) == string::npos) {
throw SimpleXMLException("Missing '-->' in " + n);
}
i = j + 3;
continue;
}
// Check if we reached the end tag
if(tmp[i] == '/') {
i++;
if( (tmp.compare(i, n.length(), n) == 0) &&
(tmp[i + n.length()] == '>') )
{
if(!hasChildren) {
data = tmp.substr(start, i - start - 2);
SimpleXML::escape(data, false, true, encoding);
} else {
data.clear();
}
return i + n.length() + 1;
} else {
throw SimpleXMLException("Missing end tag in " + n);
}
}
// Alright, we have a real tag for sure...now get the name of it.
if((j = tmp.find_first_of(" />", i)) == string::npos) {
throw SimpleXMLException("Missing '>' in " + n);
}
string name = tmp.substr(i, j-i);
hasChildren = true;
if(tmp[j] == ' ') {
if((j = tmp.find_first_not_of(' ', j+1)) == string::npos) {
throw SimpleXMLException("Missing '>' in " + name);
}
}
attribs.clear();
if(tmp[j] != '/' && tmp[j] != '>') {
// We have attribs...
j = loadAttribs(name, tmp, j);
}
if(tmp[j] == '>') {
// This is a real tag with data etc...
cb->startTag(name, attribs, false);
j = fromXML(tmp, name, j+1, depth+1);
cb->endTag(name, data);
} else {
// A simple tag (
cb->startTag(name, attribs, true);
j++;
}
i = j;
}
}
void SimpleXML::addTag(const string& aName, const string& aData /* = "" */) throw(SimpleXMLException) {
if(aName.empty()) {
throw SimpleXMLException("Empty tag names not allowed");
}
if(current == &root && !current->children.empty()) {
throw SimpleXMLException("Only one root tag allowed");
} else {
current->children.push_back(new Tag(aName, aData, current));
currentChild = current->children.end() - 1;
}
}
void SimpleXML::addAttrib(const string& aName, const string& aData) throw(SimpleXMLException) {
if(current == &root)
throw SimpleXMLException("No tag is currently selected");
current->attribs.push_back(make_pair(aName, aData));
}
void SimpleXML::addChildAttrib(const string& aName, const string& aData) throw(SimpleXMLException) {
checkChildSelected();
(*currentChild)->attribs.push_back(make_pair(aName, aData));
}
void SimpleXML::fromXML(const string& aXML) throw(SimpleXMLException) {
if(!root.children.empty()) {
delete root.children[0];
root.children.clear();
}
TagReader t(&root);
SimpleXMLReader(&t).fromXML(aXML);
if(root.children.size() != 1) {
throw SimpleXMLException("Invalid XML file, missing or multiple root tags");
}
current = &root;
resetCurrentChild();
}
} // namespace dcpp