xml_element_forest.hpp
Go to the documentation of this file.
00001 /* 00002 Copyright 2007 Adobe Systems Incorporated 00003 Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt 00004 or a copy at http://opensource.adobe.com/licenses.html) 00005 */ 00006 00007 /**************************************************************************************************/ 00008 00009 #ifndef ADOBE_XML_ELEMENT_FOREST_HPP 00010 #define ADOBE_XML_ELEMENT_FOREST_HPP 00011 00012 /**************************************************************************************************/ 00013 00014 #include <adobe/config.hpp> 00015 00016 #include <algorithm> 00017 #include <iostream> 00018 #include <string> 00019 00020 #include <adobe/algorithm/find.hpp> 00021 #include <adobe/dictionary.hpp> 00022 #include <adobe/forest.hpp> 00023 #include <adobe/iterator.hpp> 00024 #include <adobe/string.hpp> 00025 #include <adobe/xml_parser.hpp> 00026 00027 #include <boost/bind.hpp> 00028 00029 /**************************************************************************************************/ 00030 00031 namespace adobe { 00032 00033 /**************************************************************************************************/ 00052 /**************************************************************************************************/ 00056 typedef forest<dictionary_t> element_forest_t; 00057 00058 /**************************************************************************************************/ 00059 00060 #define ADOBE_XML_NODE_METADATA_NAME(x) \ 00061 inline name_t name_##x() { static const name_t name_s(static_name_t("> "#x)); return name_s; } 00062 00063 // These names are prefixed with a '> ' to prevent collisions with XML 00064 // attribute names (which are not allowed to have the '>' in them). 00065 00066 ADOBE_XML_NODE_METADATA_NAME(type) 00067 ADOBE_XML_NODE_METADATA_NAME(chardata) 00068 ADOBE_XML_NODE_METADATA_NAME(element_name) 00069 00070 /**************************************************************************************************/ 00071 #if !defined(ADOBE_NO_DOCUMENTATION) 00072 namespace implementation { 00073 00074 /**************************************************************************************************/ 00075 00076 inline std::string token_to_string(const token_range_t& token) 00077 { 00078 return std::string(reinterpret_cast<const char*>(token.first), 00079 std::distance(token.first, token.second)); 00080 } 00081 00082 /**************************************************************************************************/ 00083 00084 inline name_t token_to_name(const token_range_t& token) 00085 { 00086 return name_t(token_to_string(token).c_str()); 00087 } 00088 00089 /**************************************************************************************************/ 00090 00091 inline dictionary_t attribute_set_to_dictionary(const attribute_set_t& set) 00092 { 00093 dictionary_t result; 00094 00095 for (attribute_set_t::iterator iter(set.begin()), last(set.end()); iter != last; ++iter) 00096 result[implementation::token_to_name(iter->first)] = 00097 any_regular_t(implementation::token_to_string(iter->second)); 00098 00099 return result; 00100 } 00101 00102 /**************************************************************************************************/ 00103 00104 struct converter_t 00105 { 00106 element_forest_t parse(const char* xml) 00107 { 00108 element_forest_t root; 00109 00110 token_range_t xml_range(static_token_range(xml)); 00111 00112 make_xml_parser(xml_range.first, xml_range.second, 00113 line_position_t("top level xml"), 00114 always_true<token_range_t>(), 00115 boost::bind(&converter_t::xml_element_node, 00116 boost::ref(*this), 00117 _1, _2, _3, _4, 00118 boost::ref(root), 00119 root.begin()), 00120 std::back_inserter(chardata_m)) 00121 .parse_content(); 00122 00123 return root; 00124 } 00125 00126 token_range_t xml_element_node(const token_range_t& /*entire_element_range*/, 00127 const token_range_t& name, 00128 const attribute_set_t& attribute_set, 00129 const token_range_t& value, 00130 element_forest_t& forest, 00131 element_forest_t::iterator parent_node) 00132 { 00133 push_chardata(forest, parent_node); 00134 00135 element_forest_t::iterator new_parent = 00136 forest.insert(trailing_of(parent_node), dictionary_t()); 00137 00138 (*new_parent) = implementation::attribute_set_to_dictionary(attribute_set); 00139 00140 (*new_parent)[name_type()] = any_regular_t(std::string("element")); 00141 (*new_parent)[name_element_name()] = any_regular_t(implementation::token_to_string(name)); 00142 00143 make_xml_parser(value.first, value.second, 00144 line_position_t("xml_node"), 00145 always_true<token_range_t>(), 00146 boost::bind(&converter_t::xml_element_node, 00147 boost::ref(*this), 00148 _1, _2, _3, _4, 00149 boost::ref(forest), 00150 new_parent), 00151 std::back_inserter(chardata_m)) 00152 .parse_content(); 00153 00154 push_chardata(forest, new_parent); 00155 00156 return token_range_t(); 00157 } 00158 00159 private: 00160 std::vector<char> chardata_m; 00161 00162 void push_chardata(element_forest_t& forest, element_forest_t::iterator parent) 00163 { 00164 if (chardata_m.empty()) 00165 return; 00166 00167 std::string chardata_str(repair_whitespace(std::string(&chardata_m[0], chardata_m.size()))); 00168 00169 if (chardata_str.empty() == false) 00170 { 00171 dictionary_t chardata; 00172 00173 chardata[name_type()] = any_regular_t(std::string("chardata")); 00174 chardata[name_chardata()] = any_regular_t(chardata_str); 00175 00176 forest.insert(trailing_of(parent), chardata); 00177 } 00178 00179 chardata_m = std::vector<char>(); 00180 } 00181 00182 static std::string repair_whitespace(const std::string& src) 00183 { 00184 std::string result; 00185 std::string::const_iterator iter(src.begin()); 00186 std::string::const_iterator last(src.end()); 00187 static const boost::function<bool (char)> isspace = 00188 boost::bind(&std::isspace<char>, _1, std::locale()); 00189 00190 while (true) 00191 { 00192 std::string::const_iterator ws_begin(std::find_if(iter, last, isspace)); 00193 00194 if (iter != ws_begin) 00195 result << std::string(iter, ws_begin); 00196 00197 if (ws_begin == last) 00198 break; 00199 00200 std::string::const_iterator ws_end(find_if_not(ws_begin, last, isspace)); 00201 00202 result += ' '; 00203 00204 iter = ws_end; 00205 } 00206 00207 return result; 00208 } 00209 }; 00210 00211 /**************************************************************************************************/ 00212 00213 template <typename T> 00214 inline void indent_stream(std::ostream& stream, T count) 00215 { for (; count != 0; --count) stream << " "; } 00216 00217 /**************************************************************************************************/ 00218 00219 } // namespace implementation 00220 #endif 00221 /**************************************************************************************************/ 00235 template <typename R> // R is a depth adaptor range 00236 void element_forest_to_xml(const R& f, std::ostream& output, bool verbose = true) 00237 { 00238 typedef typename boost::range_iterator<R>::type iterator; 00239 00240 for (iterator first(boost::begin(f)), last(boost::end(f)); first != last; ++first) 00241 { 00242 const dictionary_t& node(*first); 00243 const std::string& type(get_value(node, name_type()).cast<std::string>()); 00244 00245 if (type == "element") 00246 { 00247 if (first.edge() == forest_leading_edge) 00248 { 00249 if (verbose) 00250 implementation::indent_stream(output, first.depth()); 00251 00252 output << "<" << get_value(node, name_element_name()); 00253 00254 for (dictionary_t::const_iterator iter(node.begin()), last(node.end()); 00255 iter != last; ++iter) 00256 if (iter->first.c_str()[0] != '>') 00257 output << ' ' << iter->first.c_str() << "='" << iter->second << "'"; 00258 00259 if (has_children(first)) 00260 { 00261 output << ">"; 00262 00263 if (verbose) 00264 output << " <!-- " 00265 << std::distance(child_begin(first), child_end(first)) 00266 << " children -->"; 00267 } 00268 else 00269 { 00270 output << "/>"; 00271 } 00272 00273 if (verbose) 00274 output << std::endl; 00275 } 00276 else if (has_children(first)) 00277 { 00278 if (verbose) 00279 implementation::indent_stream(output, first.depth()); 00280 00281 output << "</" << get_value(node, name_element_name()) << ">"; 00282 00283 if (verbose) 00284 output << std::endl; 00285 } 00286 } 00287 else if (type == "chardata" && first.edge() == forest_leading_edge) 00288 { 00289 const std::string& chardata(get_value(node, name_chardata()).cast<std::string>()); 00290 00291 if (!verbose) 00292 { 00293 output << chardata; 00294 } 00295 else 00296 { 00297 if (adobe::find_if_not(chardata, 00298 boost::bind(&std::isspace<char>, 00299 _1, 00300 std::locale())) == chardata.end()) 00301 continue; 00302 00303 implementation::indent_stream(output, first.depth()); 00304 00305 output << chardata << std::endl; 00306 } 00307 } 00308 } 00309 } 00310 00311 /**************************************************************************************************/ 00322 element_forest_t xml_parse_to_forest(const char* xml) 00323 { 00324 return implementation::converter_t().parse(xml); 00325 } 00326 00327 /**************************************************************************************************/ 00328 00329 } // namespace adobe 00330 00331 /**************************************************************************************************/ 00332 00333 #endif 00334 00335 /**************************************************************************************************/ |