xstring.hpp
Go to the documentation of this file.
00001 /* 00002 Copyright 2005-2007 Adobe Systems Incorporated 00003 Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt 00004 or a copy at http://stlab.adobe.com/licenses.html) 00005 */ 00006 00007 /*************************************************************************************************/ 00008 00009 #ifndef ADOBE_XSTRING_HPP 00010 #define ADOBE_XSTRING_HPP 00011 00012 /*************************************************************************************************/ 00013 00014 #include <adobe/config.hpp> 00015 00016 #include <adobe/functional.hpp> 00017 #include <adobe/implementation/string_pool.hpp> 00018 #include <adobe/istream.hpp> 00019 #include <adobe/name.hpp> 00020 #include <adobe/string.hpp> 00021 #include <adobe/unicode.hpp> 00022 #include <adobe/xml_parser.hpp> 00023 00024 #include <boost/function.hpp> 00025 #include <boost/noncopyable.hpp> 00026 00027 #include <sstream> 00028 #include <vector> 00029 #include <map> 00030 #include <cassert> 00031 #include <cctype> 00032 00033 /*************************************************************************************************/ 00034 00035 namespace adobe { 00036 00037 /*************************************************************************************************/ 00038 00039 namespace implementation { 00040 00041 /****************************************************************************************************/ 00042 00043 inline bool xstring_preorder_predicate(const token_range_t& range) 00044 { 00045 // we want to check for both xstr and marker tags because both are 00046 // handled by the xstring system 00047 00048 return token_range_equal(range, static_token_range("xstr")) || 00049 token_range_equal(range, static_token_range("marker")); 00050 } 00051 00052 /****************************************************************************************************/ 00053 00054 struct null_output_t 00055 { 00056 typedef std::output_iterator_tag iterator_category; 00057 typedef null_output_t value_type; 00058 typedef std::ptrdiff_t difference_type; 00059 typedef value_type* pointer; 00060 typedef value_type& reference; 00061 00062 null_output_t& operator ++ (int) { return *this; } 00063 null_output_t& operator ++ () { return *this; } 00064 reference operator * () { return *this; } 00065 00066 template <typename T> 00067 null_output_t& operator = (const T&) { return *this; } 00068 }; 00069 00070 /****************************************************************************************************/ 00071 00072 token_range_t xml_xstr_store(const token_range_t& entire_element_range, 00073 const token_range_t& name, 00074 const attribute_set_t& attribute_set, 00075 const token_range_t& value); 00076 00077 token_range_t xml_xstr_lookup( const token_range_t& entire_element_range, 00078 const token_range_t& name, 00079 const attribute_set_t& attribute_set, 00080 const token_range_t& value); 00081 00082 token_range_t xml_element_finalize( const token_range_t& entire_element_range, 00083 const token_range_t& name, 00084 const attribute_set_t& attribute_set, 00085 const token_range_t& value); 00086 00087 /*************************************************************************************************/ 00088 00089 struct context_frame_t 00090 { 00091 struct comp_t 00092 { 00093 bool operator () (const token_range_t& x, const token_range_t& y) const 00094 { 00095 return token_range_less(x, y); 00096 } 00097 }; 00098 00099 typedef std::pair<attribute_set_t, token_range_t> element_t; 00100 typedef std::multimap<token_range_t, element_t, comp_t> store_t; 00101 typedef store_t::iterator store_iterator; 00102 typedef store_t::value_type store_value_type; 00103 typedef std::pair<store_iterator, store_iterator> store_range_pair_t; 00104 00105 typedef xml_parser_t<char*>::callback_proc_t callback_proc_t; 00106 typedef xml_parser_t<char*>::preorder_predicate_t preorder_predicate_t; 00107 00108 context_frame_t() : 00109 parse_info_m("xstring context_frame_t"), 00110 parsed_m(false) 00111 { } 00112 00113 context_frame_t(const context_frame_t& rhs) : 00114 parse_info_m(rhs.parse_info_m), 00115 parsed_m(rhs.parsed_m), 00116 attribute_set_m(rhs.attribute_set_m), 00117 glossary_m(rhs.glossary_m), 00118 callback_m(rhs.callback_m), 00119 predicate_m(rhs.predicate_m) 00120 //slurp_m(rhs.slurp_m), // not to be transferred from context to context 00121 //pool_m(rhs.pool_m), // not to be transferred from context to context 00122 { } 00123 00124 context_frame_t& operator = (const context_frame_t& rhs) 00125 { 00126 parse_info_m = rhs.parse_info_m; 00127 parsed_m = rhs.parsed_m; 00128 attribute_set_m = rhs.attribute_set_m; 00129 glossary_m = rhs.glossary_m; 00130 callback_m = rhs.callback_m; 00131 predicate_m = rhs.predicate_m; 00132 //slurp_m = rhs.slurp_m; // not to be transferred from context to context 00133 //pool_m = rhs.pool_m; // not to be transferred from context to context 00134 00135 return *this; 00136 } 00137 00138 ~context_frame_t() 00139 { if (slurp_m.first) delete [] slurp_m.first; } 00140 00141 inline store_range_pair_t range_for_key(const store_t::key_type& key) 00142 { return glossary_m.equal_range(key); } 00143 00144 std::pair<bool, store_iterator> exact_match_exists( const attribute_set_t& attribute_set, 00145 const token_range_t& value); 00146 00147 store_t::mapped_type* store( const store_t::key_type& key, 00148 const attribute_set_t& attribute_set, 00149 const token_range_t& value, 00150 bool copy = false); 00151 00152 store_iterator closest_match( store_range_pair_t range, 00153 const attribute_set_t& searching); 00154 00155 token_range_t element_handler( const token_range_t& entire_element_range, 00156 const token_range_t& name, 00157 const attribute_set_t& attribute_set, 00158 const token_range_t& value) const 00159 { 00160 if (xstring_preorder_predicate(name)) 00161 // Note that this implicitly handles "marker" elements (by echoing them) 00162 return xml_xstr_lookup(entire_element_range, name, attribute_set, value); 00163 else if (predicate_m && predicate_m(name)) 00164 return callback_m(entire_element_range, name, attribute_set, value); 00165 else 00166 return xml_element_strip(entire_element_range, name, attribute_set, value); 00167 } 00168 00169 token_range_t clone(const token_range_t& token); 00170 00171 line_position_t parse_info_m; 00172 bool parsed_m; 00173 attribute_set_t attribute_set_m; 00174 store_t glossary_m; 00175 callback_proc_t callback_m; 00176 preorder_predicate_t predicate_m; 00177 token_range_t slurp_m; 00178 unique_string_pool_t pool_m; 00179 }; 00180 00181 /*************************************************************************************************/ 00182 00183 inline bool operator == (const context_frame_t::element_t& x, const context_frame_t::element_t& y) 00184 { return x.first == y.first && token_range_equal(x.second, y.second); } 00185 00186 /*************************************************************************************************/ 00187 00188 implementation::context_frame_t& top_frame(); 00189 00190 /*************************************************************************************************/ 00191 00192 } // namespace implementation 00193 00194 /*************************************************************************************************/ 00195 #ifndef NDEBUG 00196 00197 void xstring_clear_glossary(); 00198 00199 #endif 00200 /*************************************************************************************************/ 00201 00202 // XML fragment parsing 00203 00204 template <typename O> // O models OutputIterator 00205 inline void parse_xml_fragment(uchar_ptr_t fragment, std::size_t n, O output) 00206 { 00207 const implementation::context_frame_t& context(implementation::top_frame()); 00208 00209 make_xml_parser( fragment, 00210 fragment + n, 00211 line_position_t("parse_xml_fragment"), 00212 always_true<token_range_t>(), 00213 boost::bind(&implementation::context_frame_t::element_handler, boost::cref(context), _1, _2, _3, _4), 00214 output) 00215 00216 .parse_content(); // REVISIT (fbrereto) : More or less legible than having it after the above declaration? 00217 } 00218 00219 template <typename O> // O models OutputIterator 00220 inline void parse_xml_fragment(const std::string& fragment, O output) 00221 { return parse_xml_fragment(reinterpret_cast<uchar_ptr_t>(fragment.c_str()), fragment.size(), output); } 00222 00223 template <typename O> // O models OutputIterator 00224 inline void parse_xml_fragment(const char* fragment, O output) 00225 { return parse_xml_fragment(reinterpret_cast<uchar_ptr_t>(fragment), std::strlen(fragment), output); } 00226 00227 /*************************************************************************************************/ 00228 00229 // xstring lookup with OutputIterator; all of these functions return a valid XML fragment 00230 00231 template <typename O> // O models OutputIterator; required: sizeof(value_type(O)) >= 21 bits 00232 inline void xstring(const char* xstr, std::size_t n, O output) 00233 { parse_xml_fragment(reinterpret_cast<uchar_ptr_t>(xstr), n, output); } 00234 00235 template <typename O> // O models OutputIterator; required: sizeof(value_type(O)) >= 21 bits 00236 inline void xstring(const char* xstr, O output) 00237 { xstring(xstr, std::strlen(xstr), output); } 00238 00239 /*************************************************************************************************/ 00240 00241 // xstring lookup; all of these functions return a valid XML fragment 00242 00243 inline std::string xstring(const char* xstr, std::size_t n) 00244 { 00245 std::string result; 00246 00247 xstring(xstr, n, std::back_inserter(result)); 00248 00249 return result; 00250 } 00251 00252 inline std::string xstring(const std::string& xstr) 00253 { return xstring(xstr.c_str(), xstr.size()); } 00254 00255 /*************************************************************************************************/ 00256 00257 // Context-sensitive marker replacement 00258 00259 std::string xstring_replace( const std::string& xstr, 00260 const std::string& marker); 00261 00262 std::string xstring_replace( const std::string& xstr, 00263 const std::string* first, 00264 const std::string* last); 00265 00266 std::string xstring_replace( const name_t& xstr_id, 00267 const std::string& marker); 00268 00269 std::string xstring_replace( const name_t& xstr_id, 00270 const std::string* first, 00271 const std::string* last); 00272 00273 /*************************************************************************************************/ 00274 00275 struct xstring_context_t : boost::noncopyable 00276 { 00277 typedef implementation::context_frame_t::callback_proc_t callback_proc_t; 00278 typedef implementation::context_frame_t::preorder_predicate_t preorder_predicate_t; 00279 00280 xstring_context_t( const char* parse_first, 00281 const char* parse_last, 00282 const line_position_t& parse_info = 00283 line_position_t("xstring_context_t")) : 00284 back_frame_m(implementation::top_frame()) // save snapshot of stack 00285 { 00286 implementation::context_frame_t& context(implementation::top_frame()); 00287 00288 context.slurp_m.first = reinterpret_cast<uchar_ptr_t>(parse_first); 00289 context.slurp_m.second = reinterpret_cast<uchar_ptr_t>(parse_last); 00290 context.parse_info_m = parse_info; 00291 context.parsed_m = false; 00292 00293 glossary_parse(); 00294 } 00295 00296 template <typename I> // I models InputIterator 00297 xstring_context_t( I first_attribute, 00298 I last_attribute) : 00299 back_frame_m(implementation::top_frame()) // save snapshot of stack 00300 { 00301 implementation::top_frame().attribute_set_m.insert(first_attribute, last_attribute); 00302 } 00303 00304 template <typename I> // I models InputIterator 00305 xstring_context_t( I first_attribute, 00306 I last_attribute, 00307 const unsigned char* parse_first, 00308 const unsigned char* parse_last, 00309 const line_position_t& parse_info = 00310 line_position_t("xstring_context_t")) : 00311 back_frame_m(implementation::top_frame()) // save snapshot of stack 00312 { 00313 implementation::context_frame_t& context(implementation::top_frame()); 00314 00315 context.attribute_set_m.insert(first_attribute, last_attribute); 00316 context.slurp_m.first = parse_first; 00317 context.slurp_m.second = parse_last; 00318 context.parse_info_m = parse_info; 00319 context.parsed_m = false; 00320 00321 glossary_parse(); 00322 } 00323 00324 void set_preorder_predicate(preorder_predicate_t proc) 00325 { implementation::top_frame().predicate_m = proc; } 00326 00327 void set_element_handler(callback_proc_t proc) 00328 { implementation::top_frame().callback_m = proc; } 00329 00330 ~xstring_context_t() 00331 { implementation::top_frame() = back_frame_m; } // restore stack as it was 00332 00333 private: 00334 void glossary_parse() 00335 { 00336 implementation::context_frame_t& context(implementation::top_frame()); 00337 00338 if (context.parsed_m || !boost::size(context.slurp_m)) 00339 return; 00340 00341 make_xml_parser( 00342 context.slurp_m.first, 00343 context.slurp_m.second, 00344 context.parse_info_m, 00345 implementation::xstring_preorder_predicate, 00346 &implementation::xml_xstr_store, 00347 implementation::null_output_t()) 00348 00349 .parse_element_sequence(); // REVISIT (fbrereto) : More or less legible than having it after the above declaration? 00350 00351 context.parsed_m = true; 00352 } 00353 00354 implementation::context_frame_t back_frame_m; 00355 }; 00356 00357 /*************************************************************************************************/ 00358 00359 } // namespace adobe 00360 00361 /*************************************************************************************************/ 00362 #ifdef __ADOBE_COMPILER_CONCEPTS__ 00363 namespace std { 00364 // It would be nice to be able to instantiate this for all T. Not sure why it doesn't work. 00365 concept_map OutputIterator<adobe::implementation::null_output_t, char> {}; 00366 concept_map OutputIterator<adobe::implementation::null_output_t, unsigned char> {}; 00367 } 00368 #endif 00369 00370 /*************************************************************************************************/ 00371 00372 #endif 00373 00374 /*************************************************************************************************/ |