stlab.adobe.com Adobe Systems Incorporated

xstring.hpp

Go to the documentation of this file.
00001 /*
00002     Copyright 2005-2007 Adobe Systems Incorporated
00003     Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
00004     or a copy at http://stlab.adobe.com/licenses.html)
00005 */
00006 
00007 /*************************************************************************************************/
00008 
00009 #ifndef ADOBE_XSTRING_HPP
00010 #define ADOBE_XSTRING_HPP
00011 
00012 /*************************************************************************************************/
00013 
00014 #include <adobe/config.hpp>
00015 
00016 #include <adobe/functional.hpp>
00017 #include <adobe/implementation/string_pool.hpp>
00018 #include <adobe/istream.hpp>
00019 #include <adobe/name.hpp>
00020 #include <adobe/string.hpp>
00021 #include <adobe/unicode.hpp>
00022 #include <adobe/xml_parser.hpp>
00023 
00024 #include <boost/function.hpp>
00025 #include <boost/noncopyable.hpp>
00026 
00027 #include <sstream>
00028 #include <vector>
00029 #include <map>
00030 #include <cassert>
00031 #include <cctype>
00032 
00033 /*************************************************************************************************/
00034 
00035 namespace adobe {
00036 
00037 /*************************************************************************************************/
00038 
00039 namespace implementation {
00040 
00041 /****************************************************************************************************/
00042 
00043 inline bool xstring_preorder_predicate(const token_range_t& range)
00044 {
00045     // we want to check for both xstr and marker tags because both are
00046     // handled by the xstring system
00047 
00048     return  token_range_equal(range, static_token_range("xstr")) ||
00049             token_range_equal(range, static_token_range("marker"));
00050 }
00051 
00052 /****************************************************************************************************/
00053 
00054 struct null_output_t
00055 {
00056     typedef std::output_iterator_tag    iterator_category;
00057     typedef null_output_t               value_type;
00058     typedef std::ptrdiff_t              difference_type;
00059     typedef value_type*                 pointer;
00060     typedef value_type&                 reference;
00061 
00062     null_output_t&  operator ++ (int) { return *this; }
00063     null_output_t&  operator ++ () { return *this; }
00064     reference       operator * () { return *this; }
00065 
00066     template <typename T>
00067     null_output_t& operator = (const T&) { return *this; }
00068 };
00069 
00070 /****************************************************************************************************/
00071 
00072 token_range_t xml_xstr_store(const token_range_t&     entire_element_range,
00073                                     const token_range_t&     name,
00074                                     const attribute_set_t&   attribute_set,
00075                                     const token_range_t&     value);
00076 
00077 token_range_t xml_xstr_lookup(   const token_range_t&     entire_element_range,
00078                                         const token_range_t&     name,
00079                                         const attribute_set_t&   attribute_set,
00080                                         const token_range_t&     value);
00081 
00082 token_range_t xml_element_finalize(  const token_range_t&     entire_element_range,
00083                                             const token_range_t&     name,
00084                                             const attribute_set_t&   attribute_set,
00085                                             const token_range_t&     value);
00086 
00087 /*************************************************************************************************/
00088 
00089 struct context_frame_t
00090 {
00091     struct comp_t
00092     {
00093         bool operator () (const token_range_t& x, const token_range_t& y) const
00094         {
00095             return token_range_less(x, y);
00096         }
00097     };
00098 
00099     typedef std::pair<attribute_set_t, token_range_t> element_t;
00100     typedef std::multimap<token_range_t, element_t, comp_t>  store_t;
00101     typedef store_t::iterator                                       store_iterator;
00102     typedef store_t::value_type                                     store_value_type;
00103     typedef std::pair<store_iterator, store_iterator>               store_range_pair_t;
00104 
00105     typedef xml_parser_t<char*>::callback_proc_t                    callback_proc_t;
00106     typedef xml_parser_t<char*>::preorder_predicate_t               preorder_predicate_t;
00107 
00108     context_frame_t() :
00109         parse_info_m("xstring context_frame_t"),
00110         parsed_m(false)
00111     { }
00112 
00113     context_frame_t(const context_frame_t& rhs) :
00114         parse_info_m(rhs.parse_info_m),
00115         parsed_m(rhs.parsed_m),
00116         attribute_set_m(rhs.attribute_set_m),
00117         glossary_m(rhs.glossary_m),
00118         callback_m(rhs.callback_m),
00119         predicate_m(rhs.predicate_m)
00120         //slurp_m(rhs.slurp_m), // not to be transferred from context to context
00121         //pool_m(rhs.pool_m), // not to be transferred from context to context
00122     { }
00123 
00124     context_frame_t& operator = (const context_frame_t& rhs)
00125     {
00126         parse_info_m = rhs.parse_info_m;
00127         parsed_m = rhs.parsed_m;
00128         attribute_set_m = rhs.attribute_set_m;
00129         glossary_m = rhs.glossary_m;
00130         callback_m = rhs.callback_m;
00131         predicate_m = rhs.predicate_m;
00132         //slurp_m = rhs.slurp_m; // not to be transferred from context to context
00133         //pool_m = rhs.pool_m; // not to be transferred from context to context
00134 
00135         return *this;
00136     }
00137 
00138     ~context_frame_t()
00139         { if (slurp_m.first) delete [] slurp_m.first; }
00140 
00141     inline store_range_pair_t range_for_key(const store_t::key_type& key)
00142         { return glossary_m.equal_range(key); }
00143 
00144     std::pair<bool, store_iterator> exact_match_exists( const attribute_set_t&   attribute_set,
00145                                                         const token_range_t&     value);
00146 
00147     store_t::mapped_type*   store(  const store_t::key_type&        key,
00148                                     const attribute_set_t&   attribute_set,
00149                                     const token_range_t&     value,
00150                                     bool                            copy = false);
00151 
00152     store_iterator          closest_match(  store_range_pair_t              range,
00153                                             const attribute_set_t&   searching);
00154 
00155     token_range_t element_handler(   const token_range_t&     entire_element_range,
00156                                             const token_range_t&     name,
00157                                             const attribute_set_t&   attribute_set,
00158                                             const token_range_t&     value) const
00159     {
00160         if (xstring_preorder_predicate(name))
00161             // Note that this implicitly handles "marker" elements (by echoing them)
00162             return xml_xstr_lookup(entire_element_range, name, attribute_set, value);
00163         else if (predicate_m && predicate_m(name))
00164             return callback_m(entire_element_range, name, attribute_set, value);
00165         else
00166             return xml_element_strip(entire_element_range, name, attribute_set, value);
00167     }
00168 
00169     token_range_t   clone(const token_range_t& token);
00170 
00171     line_position_t      parse_info_m;
00172     bool                        parsed_m;
00173     attribute_set_t      attribute_set_m;
00174     store_t                     glossary_m;
00175     callback_proc_t             callback_m;
00176     preorder_predicate_t        predicate_m;
00177     token_range_t        slurp_m;
00178     unique_string_pool_t pool_m;
00179 };
00180 
00181 /*************************************************************************************************/
00182 
00183 inline bool operator == (const context_frame_t::element_t& x, const context_frame_t::element_t& y)
00184     { return x.first == y.first && token_range_equal(x.second, y.second); }
00185 
00186 /*************************************************************************************************/
00187 
00188 implementation::context_frame_t& top_frame();
00189 
00190 /*************************************************************************************************/
00191 
00192 } // namespace implementation
00193 
00194 /*************************************************************************************************/
00195 #ifndef NDEBUG
00196 
00197 void xstring_clear_glossary();
00198 
00199 #endif
00200 /*************************************************************************************************/
00201 
00202 // XML fragment parsing
00203 
00204 template <typename O> // O models OutputIterator
00205 inline void parse_xml_fragment(uchar_ptr_t fragment, std::size_t n, O output)
00206 {
00207     const implementation::context_frame_t& context(implementation::top_frame());
00208 
00209     make_xml_parser( fragment,
00210                             fragment + n,
00211                             line_position_t("parse_xml_fragment"),
00212                             always_true<token_range_t>(),
00213                             boost::bind(&implementation::context_frame_t::element_handler, boost::cref(context), _1, _2, _3, _4),
00214                             output)
00215 
00216     .parse_content(); // REVISIT (fbrereto) : More or less legible than having it after the above declaration?
00217 }
00218 
00219 template <typename O> // O models OutputIterator
00220 inline void parse_xml_fragment(const std::string& fragment, O output)
00221     { return parse_xml_fragment(reinterpret_cast<uchar_ptr_t>(fragment.c_str()), fragment.size(), output); }
00222 
00223 template <typename O> // O models OutputIterator
00224 inline void parse_xml_fragment(const char* fragment, O output)
00225     { return parse_xml_fragment(reinterpret_cast<uchar_ptr_t>(fragment), std::strlen(fragment), output); }
00226 
00227 /*************************************************************************************************/
00228 
00229 // xstring lookup with OutputIterator; all of these functions return a valid XML fragment
00230 
00231 template <typename O> // O models OutputIterator; required: sizeof(value_type(O)) >= 21 bits
00232 inline void xstring(const char* xstr, std::size_t n, O output)
00233     { parse_xml_fragment(reinterpret_cast<uchar_ptr_t>(xstr), n, output); }
00234 
00235 template <typename O> // O models OutputIterator; required: sizeof(value_type(O)) >= 21 bits
00236 inline void xstring(const char* xstr, O output)
00237     { xstring(xstr, std::strlen(xstr), output); }
00238 
00239 /*************************************************************************************************/
00240 
00241 // xstring lookup; all of these functions return a valid XML fragment
00242 
00243 inline std::string xstring(const char* xstr, std::size_t n)
00244 {
00245     std::string result;
00246 
00247     xstring(xstr, n, std::back_inserter(result));
00248 
00249     return result;
00250 }
00251 
00252 inline std::string xstring(const std::string& xstr)
00253     { return xstring(xstr.c_str(), xstr.size()); }
00254 
00255 /*************************************************************************************************/
00256 
00257 // Context-sensitive marker replacement
00258 
00259 std::string xstring_replace(    const std::string& xstr,
00260                                 const std::string& marker);
00261 
00262 std::string xstring_replace(    const std::string& xstr,
00263                                 const std::string* first,
00264                                 const std::string* last);
00265 
00266 std::string xstring_replace(    const name_t& xstr_id,
00267                                 const std::string& marker);
00268 
00269 std::string xstring_replace(    const name_t& xstr_id,
00270                                 const std::string* first,
00271                                 const std::string* last);
00272 
00273 /*************************************************************************************************/
00274 
00275 struct xstring_context_t : boost::noncopyable
00276 {
00277     typedef implementation::context_frame_t::callback_proc_t         callback_proc_t;
00278     typedef implementation::context_frame_t::preorder_predicate_t    preorder_predicate_t;
00279 
00280     xstring_context_t(  const char*                     parse_first,
00281                         const char*                     parse_last,
00282                         const line_position_t&   parse_info =
00283                             line_position_t("xstring_context_t")) :
00284         back_frame_m(implementation::top_frame()) // save snapshot of stack
00285     {
00286         implementation::context_frame_t& context(implementation::top_frame());
00287 
00288         context.slurp_m.first = reinterpret_cast<uchar_ptr_t>(parse_first);
00289         context.slurp_m.second = reinterpret_cast<uchar_ptr_t>(parse_last);
00290         context.parse_info_m = parse_info;
00291         context.parsed_m = false;
00292 
00293         glossary_parse();
00294     }
00295 
00296     template <typename I> // I models InputIterator
00297     xstring_context_t(  I   first_attribute,
00298                         I   last_attribute) :
00299         back_frame_m(implementation::top_frame()) // save snapshot of stack
00300     {
00301         implementation::top_frame().attribute_set_m.insert(first_attribute, last_attribute);
00302     }
00303 
00304     template <typename I> // I models InputIterator
00305     xstring_context_t(  I                               first_attribute,
00306                         I                               last_attribute,
00307                         const unsigned char*            parse_first,
00308                         const unsigned char*            parse_last,
00309                         const line_position_t&   parse_info =
00310                             line_position_t("xstring_context_t")) :
00311         back_frame_m(implementation::top_frame()) // save snapshot of stack
00312     {
00313         implementation::context_frame_t& context(implementation::top_frame());
00314 
00315         context.attribute_set_m.insert(first_attribute, last_attribute);
00316         context.slurp_m.first = parse_first;
00317         context.slurp_m.second = parse_last;
00318         context.parse_info_m = parse_info;
00319         context.parsed_m = false;
00320 
00321         glossary_parse();
00322     }
00323 
00324     void set_preorder_predicate(preorder_predicate_t proc)
00325     { implementation::top_frame().predicate_m = proc; }
00326 
00327     void set_element_handler(callback_proc_t proc)
00328     { implementation::top_frame().callback_m = proc; }
00329 
00330     ~xstring_context_t()
00331     { implementation::top_frame() = back_frame_m; } // restore stack as it was
00332 
00333 private:
00334     void glossary_parse()
00335     {
00336         implementation::context_frame_t& context(implementation::top_frame());
00337 
00338         if (context.parsed_m || !boost::size(context.slurp_m))
00339             return;
00340 
00341         make_xml_parser(
00342             context.slurp_m.first,
00343             context.slurp_m.second,
00344             context.parse_info_m,
00345             implementation::xstring_preorder_predicate,
00346             &implementation::xml_xstr_store,
00347             implementation::null_output_t())
00348 
00349         .parse_element_sequence(); // REVISIT (fbrereto) : More or less legible than having it after the above declaration?
00350 
00351         context.parsed_m = true;
00352     }
00353 
00354     implementation::context_frame_t back_frame_m;
00355 };
00356 
00357 /*************************************************************************************************/
00358 
00359 } // namespace adobe
00360 
00361 /*************************************************************************************************/
00362 #ifdef __ADOBE_COMPILER_CONCEPTS__
00363 namespace std {
00364     // It would be nice to be able to instantiate this for all T. Not sure why it doesn't work.
00365     concept_map OutputIterator<adobe::implementation::null_output_t, char> {};
00366     concept_map OutputIterator<adobe::implementation::null_output_t, unsigned char> {};
00367 }
00368 #endif
00369 
00370 /*************************************************************************************************/
00371 
00372 #endif
00373 
00374 /*************************************************************************************************/

Copyright © 2006-2007 Adobe Systems Incorporated.

Use of this website signifies your agreement to the Terms of Use and Online Privacy Policy.

Search powered by Google