1 module gumbo.capi;
2 
3 extern (C) {
4 
5 /**
6  * A struct representing a character position within the original text buffer.
7  * Line and column numbers are 1-based and offsets are 0-based, which matches
8  * how most editors and command-line tools work.  Also, columns measure
9  * positions in terms of characters while offsets measure by bytes; this is
10  * because the offset field is often used to pull out a particular region of
11  * text (which in most languages that bind to C implies pointer arithmetic on a
12  * buffer of bytes), while the column field is often used to reference a
13  * particular column on a printable display, which nowadays is usually UTF-8.
14  */
15 struct GumboSourcePosition {
16   uint line;
17   uint column;
18   uint offset;
19 };
20 
21 /**
22  * A SourcePosition used for elements that have no source position, i.e.
23  * parser-inserted elements.
24  */
25 extern const GumboSourcePosition kGumboEmptySourcePosition;
26 
27 
28 /**
29  * A struct representing a string or part of a string.  Strings within the
30  * parser are represented by a char* and a length; the char* points into
31  * an existing data buffer owned by some other code (often the original input).
32  * GumboStringPieces are assumed (by convention) to be immutable, because they
33  * may share data.  Use GumboStringBuffer if you need to construct a string.
34  * Clients should assume that it is not NUL-terminated, and should always use
35  * explicit lengths when manipulating them.
36  */
37 struct GumboStringPiece {
38   /** A pointer to the beginning of the string.  NULL iff length == 0. */
39   const char* data;
40 
41   /** The length of the string fragment, in bytes.  May be zero. */
42   size_t length;
43 };
44 
45 /** A constant to represent a 0-length null string. */
46 extern const GumboStringPiece kGumboEmptyString;
47 
48 /**
49  * Compares two GumboStringPieces, and returns true if they're equal or false
50  * otherwise.
51  */
52 bool gumbo_string_equals(
53     const GumboStringPiece* str1, const GumboStringPiece* str2);
54 
55 /**
56  * Compares two GumboStringPieces ignoring case, and returns true if they're
57  * equal or false otherwise.
58  */
59 bool gumbo_string_equals_ignore_case(
60     const GumboStringPiece* str1, const GumboStringPiece* str2);
61 
62 
63 /**
64  * A simple vector implementation.  This stores a pointer to a data array and a
65  * length.  All elements are stored as void*; client code must cast to the
66  * appropriate type.  Overflows upon addition result in reallocation of the data
67  * array, with the size doubling to maintain O(1) amortized cost.  There is no
68  * removal function, as this isn't needed for any of the operations within this
69  * library.  Iteration can be done through inspecting the structure directly in
70  * a for-loop.
71  */
72 struct GumboVector {
73   /** Data elements.  This points to a dynamically-allocated array of capacity
74    * elements, each a void* to the element itself.
75    */
76   void** data;
77 
78   /** Number of elements currently in the vector. */
79   uint length;
80 
81   /** Current array capacity. */
82   uint capacity;
83 };
84 
85 /** An empty (0-length, 0-capacity) GumboVector. */
86 extern const GumboVector kGumboEmptyVector;
87 
88 /**
89  * Returns the first index at which an element appears in this vector (testing
90  * by pointer equality), or -1 if it never does.
91  */
92 int gumbo_vector_index_of(GumboVector* vector, void* element);
93 
94 
95 /**
96  * An enum for all the tags defined in the HTML5 standard.  These correspond to
97  * the tag names themselves.  Enum constants exist only for tags which appear in
98  * the spec itself (or for tags with special handling in the SVG and MathML
99  * namespaces); any other tags appear as GUMBO_TAG_UNKNOWN and the actual tag
100  * name can be obtained through original_tag.
101  *
102  * This is mostly for API convenience, so that clients of this library don't
103  * need to perform a strcasecmp to find the normalized tag name.  It also has
104  * efficiency benefits, by letting the parser work with enums instead of
105  * strings.
106  */
107 enum GumboTag {
108   // http://www.whatwg.org/specs/web-apps/current-work/multipage/semantics.html#the-root-element
109   GUMBO_TAG_HTML,
110   // http://www.whatwg.org/specs/web-apps/current-work/multipage/semantics.html#document-metadata
111   GUMBO_TAG_HEAD,
112   GUMBO_TAG_TITLE,
113   GUMBO_TAG_BASE,
114   GUMBO_TAG_LINK,
115   GUMBO_TAG_META,
116   GUMBO_TAG_STYLE,
117   // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#scripting-1
118   GUMBO_TAG_SCRIPT,
119   GUMBO_TAG_NOSCRIPT,
120   // http://www.whatwg.org/specs/web-apps/current-work/multipage/sections.html#sections
121   GUMBO_TAG_BODY,
122   GUMBO_TAG_SECTION,
123   GUMBO_TAG_NAV,
124   GUMBO_TAG_ARTICLE,
125   GUMBO_TAG_ASIDE,
126   GUMBO_TAG_H1,
127   GUMBO_TAG_H2,
128   GUMBO_TAG_H3,
129   GUMBO_TAG_H4,
130   GUMBO_TAG_H5,
131   GUMBO_TAG_H6,
132   GUMBO_TAG_HGROUP,
133   GUMBO_TAG_HEADER,
134   GUMBO_TAG_FOOTER,
135   GUMBO_TAG_ADDRESS,
136   // http://www.whatwg.org/specs/web-apps/current-work/multipage/grouping-content.html#grouping-content
137   GUMBO_TAG_P,
138   GUMBO_TAG_HR,
139   GUMBO_TAG_PRE,
140   GUMBO_TAG_BLOCKQUOTE,
141   GUMBO_TAG_OL,
142   GUMBO_TAG_UL,
143   GUMBO_TAG_LI,
144   GUMBO_TAG_DL,
145   GUMBO_TAG_DT,
146   GUMBO_TAG_DD,
147   GUMBO_TAG_FIGURE,
148   GUMBO_TAG_FIGCAPTION,
149   GUMBO_TAG_DIV,
150   // http://www.whatwg.org/specs/web-apps/current-work/multipage/text-level-semantics.html#text-level-semantics
151   GUMBO_TAG_A,
152   GUMBO_TAG_EM,
153   GUMBO_TAG_STRONG,
154   GUMBO_TAG_SMALL,
155   GUMBO_TAG_S,
156   GUMBO_TAG_CITE,
157   GUMBO_TAG_Q,
158   GUMBO_TAG_DFN,
159   GUMBO_TAG_ABBR,
160   GUMBO_TAG_TIME,
161   GUMBO_TAG_CODE,
162   GUMBO_TAG_VAR,
163   GUMBO_TAG_SAMP,
164   GUMBO_TAG_KBD,
165   GUMBO_TAG_SUB,
166   GUMBO_TAG_SUP,
167   GUMBO_TAG_I,
168   GUMBO_TAG_B,
169   GUMBO_TAG_MARK,
170   GUMBO_TAG_RUBY,
171   GUMBO_TAG_RT,
172   GUMBO_TAG_RP,
173   GUMBO_TAG_BDI,
174   GUMBO_TAG_BDO,
175   GUMBO_TAG_SPAN,
176   GUMBO_TAG_BR,
177   GUMBO_TAG_WBR,
178   // http://www.whatwg.org/specs/web-apps/current-work/multipage/edits.html#edits
179   GUMBO_TAG_INS,
180   GUMBO_TAG_DEL,
181   // http://www.whatwg.org/specs/web-apps/current-work/multipage/embedded-content-1.html#embedded-content-1
182   GUMBO_TAG_IMAGE,
183   GUMBO_TAG_IMG,
184   GUMBO_TAG_IFRAME,
185   GUMBO_TAG_EMBED,
186   GUMBO_TAG_OBJECT,
187   GUMBO_TAG_PARAM,
188   GUMBO_TAG_VIDEO,
189   GUMBO_TAG_AUDIO,
190   GUMBO_TAG_SOURCE,
191   GUMBO_TAG_TRACK,
192   GUMBO_TAG_CANVAS,
193   GUMBO_TAG_MAP,
194   GUMBO_TAG_AREA,
195   // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#mathml
196   GUMBO_TAG_MATH,
197   GUMBO_TAG_MI,
198   GUMBO_TAG_MO,
199   GUMBO_TAG_MN,
200   GUMBO_TAG_MS,
201   GUMBO_TAG_MTEXT,
202   GUMBO_TAG_MGLYPH,
203   GUMBO_TAG_MALIGNMARK,
204   GUMBO_TAG_ANNOTATION_XML,
205   // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#svg-0
206   GUMBO_TAG_SVG,
207   GUMBO_TAG_FOREIGNOBJECT,
208   GUMBO_TAG_DESC,
209   // SVG title tags will have GUMBO_TAG_TITLE as with HTML.
210   // http://www.whatwg.org/specs/web-apps/current-work/multipage/tabular-data.html#tabular-data
211   GUMBO_TAG_TABLE,
212   GUMBO_TAG_CAPTION,
213   GUMBO_TAG_COLGROUP,
214   GUMBO_TAG_COL,
215   GUMBO_TAG_TBODY,
216   GUMBO_TAG_THEAD,
217   GUMBO_TAG_TFOOT,
218   GUMBO_TAG_TR,
219   GUMBO_TAG_TD,
220   GUMBO_TAG_TH,
221   // http://www.whatwg.org/specs/web-apps/current-work/multipage/forms.html#forms
222   GUMBO_TAG_FORM,
223   GUMBO_TAG_FIELDSET,
224   GUMBO_TAG_LEGEND,
225   GUMBO_TAG_LABEL,
226   GUMBO_TAG_INPUT,
227   GUMBO_TAG_BUTTON,
228   GUMBO_TAG_SELECT,
229   GUMBO_TAG_DATALIST,
230   GUMBO_TAG_OPTGROUP,
231   GUMBO_TAG_OPTION,
232   GUMBO_TAG_TEXTAREA,
233   GUMBO_TAG_KEYGEN,
234   GUMBO_TAG_OUTPUT,
235   GUMBO_TAG_PROGRESS,
236   GUMBO_TAG_METER,
237   // http://www.whatwg.org/specs/web-apps/current-work/multipage/interactive-elements.html#interactive-elements
238   GUMBO_TAG_DETAILS,
239   GUMBO_TAG_SUMMARY,
240   GUMBO_TAG_COMMAND,
241   GUMBO_TAG_MENU,
242   // Non-conforming elements that nonetheless appear in the HTML5 spec.
243   // http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#non-conforming-features
244   GUMBO_TAG_APPLET,
245   GUMBO_TAG_ACRONYM,
246   GUMBO_TAG_BGSOUND,
247   GUMBO_TAG_DIR,
248   GUMBO_TAG_FRAME,
249   GUMBO_TAG_FRAMESET,
250   GUMBO_TAG_NOFRAMES,
251   GUMBO_TAG_ISINDEX,
252   GUMBO_TAG_LISTING,
253   GUMBO_TAG_XMP,
254   GUMBO_TAG_NEXTID,
255   GUMBO_TAG_NOEMBED,
256   GUMBO_TAG_PLAINTEXT,
257   GUMBO_TAG_RB,
258   GUMBO_TAG_STRIKE,
259   GUMBO_TAG_BASEFONT,
260   GUMBO_TAG_BIG,
261   GUMBO_TAG_BLINK,
262   GUMBO_TAG_CENTER,
263   GUMBO_TAG_FONT,
264   GUMBO_TAG_MARQUEE,
265   GUMBO_TAG_MULTICOL,
266   GUMBO_TAG_NOBR,
267   GUMBO_TAG_SPACER,
268   GUMBO_TAG_TT,
269   GUMBO_TAG_U,
270   // Used for all tags that don't have special handling in HTML.
271   GUMBO_TAG_UNKNOWN,
272   // A marker value to indicate the end of the enum, for iterating over it.
273   // Also used as the terminator for varargs functions that take tags.
274   GUMBO_TAG_LAST,
275 };
276 
277 /**
278  * Returns the normalized (usually all-lowercased, except for foreign content)
279  * tag name for an GumboTag enum.  Return value is static data owned by the
280  * library.
281  */
282 char* gumbo_normalized_tagname(GumboTag tag);
283 
284 /**
285  * Extracts the tag name from the original_text field of an element or token by
286  * stripping off </> characters and attributes and adjusting the passed-in
287  * GumboStringPiece appropriately.  The tag name is in the original case and
288  * shares a buffer with the original text, to simplify memory management.
289  * Behavior is undefined if a string-piece that doesn't represent an HTML tag
290  * (<tagname> or </tagname>) is passed in.  If the string piece is completely
291  * empty (NULL data pointer), then this function will exit successfully as a
292  * no-op.
293  */
294 void gumbo_tag_from_original_text(GumboStringPiece* text);
295 
296 /**
297  * Fixes the case of SVG elements that are not all lowercase.
298  * http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-inforeign
299  * This is not done at parse time because there's no place to store a mutated
300  * tag name.  tag_name is an enum (which will be TAG_UNKNOWN for most SVG tags
301  * without special handling), while original_tag_name is a pointer into the
302  * original buffer.  Instead, we provide this helper function that clients can
303  * use to rename SVG tags as appropriate.
304  * Returns the case-normalized SVG tagname if a replacement is found, or NULL if
305  * no normalization is called for.  The return value is static data and owned by
306  * the library.
307  */
308 char* gumbo_normalize_svg_tagname(const GumboStringPiece* tagname);
309 
310 /**
311  * Converts a tag name string (which may be in upper or mixed case) to a tag
312  * enum.
313  */
314 GumboTag gumbo_tag_enum(const char* tagname);
315 
316 /**
317  * Attribute namespaces.
318  * HTML includes special handling for XLink, XML, and XMLNS namespaces on
319  * attributes.  Everything else goes in the generatic "NONE" namespace.
320  */
321 enum GumboAttributeNamespaceEnum {
322   GUMBO_ATTR_NAMESPACE_NONE,
323   GUMBO_ATTR_NAMESPACE_XLINK,
324   GUMBO_ATTR_NAMESPACE_XML,
325   GUMBO_ATTR_NAMESPACE_XMLNS,
326 };
327 
328 /**
329  * A struct representing a single attribute on an HTML tag.  This is a
330  * name-value pair, but also includes information about source locations and
331  * original source text.
332  */
333 struct GumboAttribute {
334   /**
335    * The namespace for the attribute.  This will usually be
336    * GUMBO_ATTR_NAMESPACE_NONE, but some XLink/XMLNS/XML attributes take special
337    * values, per:
338    * http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adjust-foreign-attributes
339    */
340   GumboAttributeNamespaceEnum attr_namespace;
341 
342   /**
343    * The name of the attribute.  This is in a freshly-allocated buffer to deal
344    * with case-normalization, and is null-terminated.
345    */
346   const char* name;
347 
348   /**
349    * The original text of the attribute name, as a pointer into the original
350    * source buffer.
351    */
352   GumboStringPiece original_name;
353 
354   /**
355    * The value of the attribute.  This is in a freshly-allocated buffer to deal
356    * with unescaping, and is null-terminated.  It does not include any quotes
357    * that surround the attribute.  If the attribute has no value (for example,
358    * 'selected' on a checkbox), this will be an empty string.
359    */
360   const char* value;
361 
362   /**
363    * The original text of the value of the attribute.  This points into the
364    * original source buffer.  It includes any quotes that surround the
365    * attribute, and you can look at original_value.data[0] and
366    * original_value.data[original_value.length - 1] to determine what the quote
367    * characters were.  If the attribute has no value, this will be a 0-length
368    * string.
369    */
370   GumboStringPiece original_value;
371 
372   /** The starting position of the attribute name. */
373   GumboSourcePosition name_start;
374 
375   /**
376    * The ending position of the attribute name.  This is not always derivable
377    * from the starting position of the value because of the possibility of
378    * whitespace around the = sign.
379    */
380   GumboSourcePosition name_end;
381 
382   /** The starting position of the attribute value. */
383   GumboSourcePosition value_start;
384 
385   /** The ending position of the attribute value. */
386   GumboSourcePosition value_end;
387 };
388 
389 /**
390  * Given a vector of GumboAttributes, look up the one with the specified name
391  * and return it, or NULL if no such attribute exists.  This uses a
392  * case-insensitive match, as HTML is case-insensitive.
393  */
394 GumboAttribute* gumbo_get_attribute(
395     const GumboVector* attrs, const char* name);
396 
397 /**
398  * Enum denoting the type of node.  This determines the type of the node.v
399  * union.
400  */
401 enum GumboNodeType {
402   /** Document node.  v will be a GumboDocument. */
403   GUMBO_NODE_DOCUMENT,
404   /** Element node.  v will be a GumboElement. */
405   GUMBO_NODE_ELEMENT,
406   /** Text node.  v will be a GumboText. */
407   GUMBO_NODE_TEXT,
408   /** CDATA node. v will be a GumboText. */
409   GUMBO_NODE_CDATA,
410   /** Comment node.  v. will be a GumboText, excluding comment delimiters. */
411   GUMBO_NODE_COMMENT,
412   /** Text node, where all contents is whitespace.  v will be a GumboText. */
413   GUMBO_NODE_WHITESPACE
414 };
415 
416 /**
417  * Forward declaration of GumboNode so it can be used recursively in
418  * GumboNode.parent.
419  */
420 //typedef struct _GumboNode GumboNode;
421 
422 /** http://www.whatwg.org/specs/web-apps/current-work/complete/dom.html#quirks-mode */
423 enum GumboQuirksModeEnum {
424   GUMBO_DOCTYPE_NO_QUIRKS,
425   GUMBO_DOCTYPE_QUIRKS,
426   GUMBO_DOCTYPE_LIMITED_QUIRKS
427 };
428 
429 /**
430  * Namespaces.
431  * Unlike in X(HT)ML, namespaces in HTML5 are not denoted by a prefix.  Rather,
432  * anything inside an <svg> tag is in the SVG namespace, anything inside the
433  * <math> tag is in the MathML namespace, and anything else is inside the HTML
434  * namespace.  No other namespaces are supported, so this can be an enum only.
435  */
436 enum GumboNamespaceEnum {
437   GUMBO_NAMESPACE_HTML,
438   GUMBO_NAMESPACE_SVG,
439   GUMBO_NAMESPACE_MATHML
440 };
441 
442 /**
443  * Parse flags.
444  * We track the reasons for parser insertion of nodes and store them in a
445  * bitvector in the node itself.  This lets client code optimize out nodes that
446  * are implied by the HTML structure of the document, or flag constructs that
447  * may not be allowed by a style guide, or track the prevalence of incorrect or
448  * tricky HTML code.
449  */
450 enum GumboParseFlags {
451   /**
452    * A normal node - both start and end tags appear in the source, nothing has
453    * been reparented.
454    */
455   GUMBO_INSERTION_NORMAL = 0,
456 
457   /**
458    * A node inserted by the parser to fulfill some implicit insertion rule.
459    * This is usually set in addition to some other flag giving a more specific
460    * insertion reason; it's a generic catch-all term meaning "The start tag for
461    * this node did not appear in the document source".
462    */
463   GUMBO_INSERTION_BY_PARSER = 1 << 0,
464 
465   /**
466    * A flag indicating that the end tag for this node did not appear in the
467    * document source.  Note that in some cases, you can still have
468    * parser-inserted nodes with an explicit end tag: for example, "Text</html>"
469    * has GUMBO_INSERTED_BY_PARSER set on the <html> node, but
470    * GUMBO_INSERTED_END_TAG_IMPLICITLY is unset, as the </html> tag actually
471    * exists.  This flag will be set only if the end tag is completely missing;
472    * in some cases, the end tag may be misplaced (eg. a </body> tag with text
473    * afterwards), which will leave this flag unset and require clients to
474    * inspect the parse errors for that case.
475    */
476   GUMBO_INSERTION_IMPLICIT_END_TAG = 1 << 1,
477 
478   // Value 1 << 2 was for a flag that has since been removed.
479 
480   /**
481    * A flag for nodes that are inserted because their presence is implied by
482    * other tags, eg. <html>, <head>, <body>, <tbody>, etc.
483    */
484   GUMBO_INSERTION_IMPLIED = 1 << 3,
485 
486   /**
487    * A flag for nodes that are converted from their end tag equivalents.  For
488    * example, </p> when no paragraph is open implies that the parser should
489    * create a <p> tag and immediately close it, while </br> means the same thing
490    * as <br>.
491    */
492   GUMBO_INSERTION_CONVERTED_FROM_END_TAG = 1 << 4,
493 
494   /** A flag for nodes that are converted from the parse of an <isindex> tag. */
495   GUMBO_INSERTION_FROM_ISINDEX = 1 << 5,
496 
497   /** A flag for <image> tags that are rewritten as <img>. */
498   GUMBO_INSERTION_FROM_IMAGE = 1 << 6,
499 
500   /**
501    * A flag for nodes that are cloned as a result of the reconstruction of
502    * active formatting elements.  This is set only on the clone; the initial
503    * portion of the formatting run is a NORMAL node with an IMPLICIT_END_TAG.
504    */
505   GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT = 1 << 7,
506 
507   /** A flag for nodes that are cloned by the adoption agency algorithm. */
508   GUMBO_INSERTION_ADOPTION_AGENCY_CLONED = 1 << 8,
509 
510   /** A flag for nodes that are moved by the adoption agency algorithm. */
511   GUMBO_INSERTION_ADOPTION_AGENCY_MOVED = 1 << 9,
512 
513   /**
514    * A flag for nodes that have been foster-parented out of a table (or
515    * should've been foster-parented, if verbatim mode is set).
516    */
517   GUMBO_INSERTION_FOSTER_PARENTED = 1 << 10,
518 };
519 
520 
521 /**
522  * Information specific to document nodes.
523  */
524 struct GumboDocument {
525   /**
526    * An array of GumboNodes, containing the children of this element.  This will
527    * normally consist of the <html> element and any comment nodes found.
528    * Pointers are owned.
529    */
530   GumboVector /* GumboNode* */ children;
531 
532   // True if there was an explicit doctype token as opposed to it being omitted.
533   bool has_doctype;
534 
535   // Fields from the doctype token, copied verbatim.
536   const char* name;
537   const char* public_identifier;
538   const char* system_identifier;
539 
540   /**
541    * Whether or not the document is in QuirksMode, as determined by the values
542    * in the GumboTokenDocType template.
543    */
544   GumboQuirksModeEnum doc_type_quirks_mode;
545 };
546 
547 /**
548  * The struct used to represent TEXT, CDATA, COMMENT, and WHITESPACE elements.
549  * This contains just a block of text and its position.
550  */
551 struct GumboText {
552   /**
553    * The text of this node, after entities have been parsed and decoded.  For
554    * comment/cdata nodes, this does not include the comment delimiters.
555    */
556   const char* text;
557 
558   /**
559    * The original text of this node, as a pointer into the original buffer.  For
560    * comment/cdata nodes, this includes the comment delimiters.
561    */
562   GumboStringPiece original_text;
563 
564   /**
565    * The starting position of this node.  This corresponds to the position of
566    * original_text, before entities are decoded.
567    * */
568   GumboSourcePosition start_pos;
569 };
570 
571 /**
572  * The struct used to represent all HTML elements.  This contains information
573  * about the tag, attributes, and child nodes.
574  */
575 struct GumboElement {
576   /**
577    * An array of GumboNodes, containing the children of this element.  Pointers
578    * are owned.
579    */
580   GumboVector /* GumboNode* */ children;
581 
582   /** The GumboTag enum for this element. */
583   GumboTag tag;
584 
585   /** The GumboNamespaceEnum for this element. */
586   GumboNamespaceEnum tag_namespace;
587 
588   /**
589    * A GumboStringPiece pointing to the original tag text for this element,
590    * pointing directly into the source buffer.  If the tag was inserted
591    * algorithmically (for example, <head> or <tbody> insertion), this will be a
592    * zero-length string.
593    */
594   GumboStringPiece original_tag;
595 
596   /**
597    * A GumboStringPiece pointing to the original end tag text for this element.
598    * If the end tag was inserted algorithmically, (for example, closing a
599    * self-closing tag), this will be a zero-length string.
600    */
601   GumboStringPiece original_end_tag;
602 
603   /** The source position for the start of the start tag. */
604   GumboSourcePosition start_pos;
605 
606   /** The source position for the start of the end tag. */
607   GumboSourcePosition end_pos;
608 
609   /**
610    * An array of GumboAttributes, containing the attributes for this tag in the
611    * order that they were parsed.  Pointers are owned.
612    */
613   GumboVector /* GumboAttribute* */ attributes;
614 };
615 
616 /**
617  * A supertype for GumboElement and GumboText, so that we can include one
618  * generic type in lists of children and cast as necessary to subtypes.
619  */
620 struct GumboNode {
621   /** The type of node that this is. */
622   GumboNodeType type;
623 
624   /** Pointer back to parent node.  Not owned. */
625   GumboNode* parent;
626 
627   /** The index within the parent's children vector of this node. */
628   size_t index_within_parent;
629 
630   /**
631    * A bitvector of flags containing information about why this element was
632    * inserted into the parse tree, including a variety of special parse
633    * situations.
634    */
635   GumboParseFlags parse_flags;
636 
637   /** The actual node data. */
638   private union NodeData {
639     GumboDocument document;      // For GUMBO_NODE_DOCUMENT.
640     GumboElement element;        // For GUMBO_NODE_ELEMENT.
641     GumboText text;              // For everything else.
642   };
643   NodeData v;
644 };
645 
646 /**
647  * The type for an allocator function.  Takes the 'userdata' member of the
648  * GumboParser struct as its first argument.  Semantics should be the same as
649  * malloc, i.e. return a block of size_t bytes on success or NULL on failure.
650  * Allocating a block of 0 bytes behaves as per malloc.
651  */
652 // TODO(jdtang): Add checks throughout the codebase for out-of-memory condition.
653 //typedef void* (*GumboAllocatorFunction)(void* userdata, size_t size);
654 alias void* function(void* userdata, size_t size) GumboAllocatorFunction;
655 
656 /**
657  * The type for a deallocator function.  Takes the 'userdata' member of the
658  * GumboParser struct as its first argument.
659  */
660 //typedef void (*GumboDeallocatorFunction)(void* userdata, void* ptr);
661 alias void function(void* userdata, void* ptr) GumboDeallocatorFunction;
662 
663 /**
664  * Input struct containing configuration options for the parser.
665  * These let you specify alternate memory managers, provide different error
666  * handling, etc.
667  * Use kGumboDefaultOptions for sensible defaults, and only set what you need.
668  */
669 struct GumboOptions {
670   /** A memory allocator function.  Default: malloc. */
671   GumboAllocatorFunction allocator;
672 
673   /** A memory deallocator function. Default: free. */
674   GumboDeallocatorFunction deallocator;
675 
676   /**
677    * An opaque object that's passed in as the first argument to all callbacks
678    * used by this library.  Default: NULL.
679    */
680   void* userdata;
681 
682   /**
683    * The tab-stop size, for computing positions in source code that uses tabs.
684    * Default: 8.
685    */
686   int tab_stop;
687 
688   /**
689    * Whether or not to stop parsing when the first error is encountered.
690    * Default: false.
691    */
692   bool stop_on_first_error;
693 
694   /**
695    * The maximum number of errors before the parser stops recording them.  This
696    * is provided so that if the page is totally borked, we don't completely fill
697    * up the errors vector and exhaust memory with useless redundant errors.  Set
698    * to -1 to disable the limit.
699    * Default: -1
700    */
701   int max_errors;
702 };
703 
704 /** Default options struct; use this with gumbo_parse_with_options. */
705 extern const GumboOptions kGumboDefaultOptions;
706 
707 /** The output struct containing the results of the parse. */
708 struct GumboOutput {
709   /**
710    * Pointer to the document node.  This is a GumboNode of type NODE_DOCUMENT
711    * that contains the entire document as its child.
712    */
713   GumboNode* document;
714 
715   /**
716    * Pointer to the root node.  This the <html> tag that forms the root of the
717    * document.
718    */
719   GumboNode* root;
720 
721   /**
722    * A list of errors that occurred during the parse.
723    * NOTE: In version 1.0 of this library, the API for errors hasn't been fully
724    * fleshed out and may change in the future.  For this reason, the GumboError
725    * header isn't part of the public API.  Contact us if you need errors
726    * reported so we can work out something appropriate for your use-case.
727    */
728   GumboVector /* GumboError */ errors;
729 };
730 
731 /**
732  * Parses a buffer of UTF8 text into an GumboNode parse tree.  The buffer must
733  * live at least as long as the parse tree, as some fields (eg. original_text)
734  * point directly into the original buffer.
735  *
736  * This doesn't support buffers longer than 4 gigabytes.
737  */
738 GumboOutput* gumbo_parse(const char* buffer);
739 
740 /**
741  * Extended version of gumbo_parse that takes an explicit options structure,
742  * buffer, and length.
743  */
744 GumboOutput* gumbo_parse_with_options(
745     const GumboOptions* options, const char* buffer, size_t buffer_length);
746 
747 /** Release the memory used for the parse tree & parse errors. */
748 void gumbo_destroy_output(
749     const GumboOptions* options, GumboOutput* output);
750 
751 }