-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | XML parser and renderer with HTML 5 quirks mode
--   
--   Contains renderers and parsers for both XML and HTML 5 document
--   fragments, which share data structures so that it's easy to work with
--   both. Document fragments are bits of documents, which are not
--   constrained by some of the high-level structure rules (in particular,
--   they may contain more than one root element).
--   
--   Note that this is not a compliant HTML 5 parser. Rather, it is a
--   parser for HTML 5 compliant documents. It does not implement the HTML
--   5 parsing algorithm, and should generally be expected to perform
--   correctly only on documents that you trust to conform to HTML 5. This
--   is not a suitable library for implementing web crawlers or other
--   software that will be exposed to documents from outside sources. The
--   result is also not the HTML 5 node structure, but rather something
--   closer to the physical structure. For example, omitted start tags are
--   not inserted (and so, their corresponding end tags must also be
--   omitted).
@package xmlhtml
@version 0.2.5.4

module Text.XmlHtml.HTML.Meta

-- | Void elements as defined by the HTML5 spec.
voidTags :: HashSet Text

-- | Elements that XmlHtml treats as raw text by default. Raw text elements
--   are not allowed to have any other tags in them. This is necessary to
--   support the Javascript less than operator inside a script tag, for
--   example.
--   
--   The library uses the <a>isRawText</a> function everywhere instead of
--   checking this set directly because that gives us an escape hatch to
--   avoid the default behavior if necessary.
rawTextTags :: HashSet Text

-- | Determine whether a tag should be treated as raw text. Raw text
--   elements are not allowed to have any other tags in them. This is
--   necessary to support the Javascript less than operator inside a script
--   tag, for example.
--   
--   If a tag is in the <a>rawTextTags</a> set, this function allows you to
--   override that behavior by adding the <tt>xmlhtmlNotRaw</tt> attribute.
--   Conversely, if a tag is not in the <a>rawTextTags</a> set, this
--   function allows you to override that by adding the <tt>xmlhtmlRaw</tt>
--   attribute to the tag.
--   
--   This is the function that is actually used in the parser and renderer.
--   <a>rawTextTags</a> is not used any more, but is still provided for
--   backwards compatibility and to let you see which tags are treated as
--   raw by default.
isRawText :: Text -> [(Text, Text)] -> Bool

-- | List of elements with omittable end tags.
endOmittableLast :: HashSet Text

-- | Tags which should be considered automatically ended in case one of a
--   certain set of tags pops up.
endOmittableNext :: HashMap Text (HashSet Text)

-- | Tags and attributes which should always be rendered with an explicit
--   value, even when the value is empty. This is required by some web
--   browsers for tags that are typically non-empty.
explicitAttributes :: HashMap Text (HashSet Text)

-- | Predefined character entity references as defined by the HTML5 spec.
predefinedRefs :: Map Text Text

-- | Reverse lookup of Html entities. The values in this map should be the
--   "canonical" entity names that are most widely support by browsers,
--   email clients, etc. If you encounter a situation where the value in
--   this map is not the most widely supported, please open a pull request
--   to change the order in the appropriate ent2names table below.
reversePredefinedRefs :: Map Text Text


-- | Parsers and renderers for XML and HTML 5. Although the formats are
--   treated differently, the data types used by each are the same, which
--   makes it easy to write code that works with the element structure of
--   either XML or HTML 5 documents.
--   
--   Limitations:
--   
--   <ul>
--   <li>The XML parser does not parse internal DOCTYPE subsets. They are
--   just stored as blocks of text, with minimal scanning done to match
--   quotes and brackets to determine the end.</li>
--   <li>Since DTDs are not parsed, the XML parser fails on entity
--   references, except for those defined internally. You cannot use this
--   library for parsing XML documents with entity references outside the
--   predefined set.</li>
--   <li>The HTML 5 parser is not a compliant HTML parser. Instead, it is a
--   parser for valid HTML 5 content. It should only be used on content
--   that you have reason to believe is probably correct, since the
--   compatibility features of HTML 5 are missing. This is the wrong
--   library on which to build a web spider.</li>
--   <li>Both parsers accept fragments of documents, by which is meant that
--   they do not enforce the top-level structure of the document. Files may
--   contain more than one root element, for example.</li>
--   </ul>
module Text.XmlHtml

-- | Represents a document fragment, including the format, encoding, and
--   document type declaration as well as its content.
data Document
XmlDocument :: !Encoding -> !Maybe DocType -> ![Node] -> Document
[docEncoding] :: Document -> !Encoding
[docType] :: Document -> !Maybe DocType
[docContent] :: Document -> ![Node]
HtmlDocument :: !Encoding -> !Maybe DocType -> ![Node] -> Document
[docEncoding] :: Document -> !Encoding
[docType] :: Document -> !Maybe DocType
[docContent] :: Document -> ![Node]

-- | A node of a document structure. A node can be text, a comment, or an
--   element. XML processing instructions are intentionally omitted as a
--   simplification, and CDATA and plain text are both text nodes, since
--   they ought to be semantically interchangeable.
data Node
TextNode :: !Text -> Node
Comment :: !Text -> Node
Element :: !Text -> ![(Text, Text)] -> ![Node] -> Node
[elementTag] :: Node -> !Text
[elementAttrs] :: Node -> ![(Text, Text)]
[elementChildren] :: Node -> ![Node]

-- | A document type declaration. Note that DTD internal subsets are
--   currently unimplemented.
data DocType
DocType :: !Text -> !ExternalID -> !InternalSubset -> DocType

-- | An external ID, as in a document type declaration. This can be a
--   SYSTEM identifier, or a PUBLIC identifier, or can be omitted.
data ExternalID
Public :: !Text -> !Text -> ExternalID
System :: !Text -> ExternalID
NoExternalID :: ExternalID

-- | The internal subset is unparsed, but preserved in case it's actually
--   wanted.
data InternalSubset
InternalText :: !Text -> InternalSubset
NoInternalSubset :: InternalSubset

-- | The character encoding of a document. Currently only the required
--   character encodings are implemented.
data Encoding
UTF8 :: Encoding
UTF16BE :: Encoding
UTF16LE :: Encoding
ISO_8859_1 :: Encoding

-- | Rendering options
data RenderOptions
RenderOptions :: AttrSurround -> AttrResolveInternalQuotes -> Maybe (HashMap Text (HashSet Text)) -> RenderOptions

-- | Single or double-quotes used around attribute values
[roAttributeSurround] :: RenderOptions -> AttrSurround

-- | Quotes inside attribute values that conflict with the surround are
--   escaped, or the outer quotes are changed to avoid conflicting with the
--   internal ones
[roAttributeResolveInternal] :: RenderOptions -> AttrResolveInternalQuotes

-- | Attributes in the whitelist with empty values are rendered as
--   <a>example=""</a> <a>Nothing</a> applies this rule to all attributes
--   with empty values
[roExplicitEmptyAttrs] :: RenderOptions -> Maybe (HashMap Text (HashSet Text))
data AttrSurround
SurroundDoubleQuote :: AttrSurround
SurroundSingleQuote :: AttrSurround
data AttrResolveInternalQuotes
AttrResolveByEscape :: AttrResolveInternalQuotes
AttrResolveAvoidEscape :: AttrResolveInternalQuotes

-- | Determines whether the node is text or not.
isTextNode :: Node -> Bool

-- | Determines whether the node is a comment or not.
isComment :: Node -> Bool

-- | Determines whether the node is an element or not.
isElement :: Node -> Bool

-- | Gives the tag name of an element, or <a>Nothing</a> if the node isn't
--   an element.
tagName :: Node -> Maybe Text

-- | Retrieves the attribute with the given name. If the <a>Node</a> is not
--   an element, the result is always <a>Nothing</a>
getAttribute :: Text -> Node -> Maybe Text

-- | Checks if a given attribute exists in a <a>Node</a>.
hasAttribute :: Text -> Node -> Bool

-- | Sets the attribute name to the given value. If the <a>Node</a> is not
--   an element, this is the identity.
setAttribute :: Text -> Text -> Node -> Node

-- | Gives the entire text content of a node, ignoring markup.
nodeText :: Node -> Text

-- | Gives the child nodes of the given node. Only elements have child
--   nodes.
childNodes :: Node -> [Node]

-- | Gives the child elements of the given node.
childElements :: Node -> [Node]

-- | Gives all of the child elements of the node with the given tag name.
childElementsTag :: Text -> Node -> [Node]

-- | Gives the first child element of the node with the given tag name, or
--   <a>Nothing</a> if there is no such child element.
childElementTag :: Text -> Node -> Maybe Node

-- | Gives the descendants of the given node in the order that they begin
--   in the document.
descendantNodes :: Node -> [Node]

-- | Gives the descendant elements of the given node, in the order that
--   their start tags appear in the document.
descendantElements :: Node -> [Node]

-- | Gives the descendant elements with a given tag name.
descendantElementsTag :: Text -> Node -> [Node]

-- | Gives the first descendant element of the node with the given tag
--   name, or <a>Nothing</a> if there is no such element.
descendantElementTag :: Text -> Node -> Maybe Node

-- | Parses the given XML fragment.
parseXML :: String -> ByteString -> Either String Document

-- | Parses the given HTML fragment. This enables HTML quirks mode, which
--   changes the parsing algorithm to parse valid HTML 5 documents
--   correctly.
parseHTML :: String -> ByteString -> Either String Document
render :: Document -> Builder

-- | Renders a <a>Document</a>.
renderWithOptions :: RenderOptions -> Document -> Builder
defaultRenderOptions :: RenderOptions
renderXmlFragment :: Encoding -> [Node] -> Builder

-- | Function for rendering XML nodes without the overhead of creating a
--   Document structure.
renderXmlFragmentWithOptions :: RenderOptions -> Encoding -> [Node] -> Builder

-- | Function for rendering HTML nodes without the overhead of creating a
--   Document structure, using default rendering options
renderHtmlFragment :: Encoding -> [Node] -> Builder

-- | Function for rendering HTML nodes without the overhead of creating a
--   Document structure.
renderHtmlFragmentWithOptions :: RenderOptions -> Encoding -> [Node] -> Builder
renderDocType :: Encoding -> Maybe DocType -> Builder


-- | A zipper for navigating and modifying XML trees. This is nearly the
--   same exposed interface as the <tt>xml</tt> package in
--   <tt>Text.XML.Light.Cursor</tt>, with modifications as needed to adapt
--   to different types.
module Text.XmlHtml.Cursor

-- | A zipper for XML document forests.
data Cursor

-- | Builds a <a>Cursor</a> for navigating a tree. That is, a forest with a
--   single root <a>Node</a>.
fromNode :: Node -> Cursor

-- | Builds a <a>Cursor</a> for navigating a forest with the given list of
--   roots. The cursor is initially positioned at the left-most node. Gives
--   <a>Nothing</a> if the list is empty.
fromNodes :: [Node] -> Maybe Cursor

-- | Retrieves the root node containing the current cursor position.
topNode :: Cursor -> Node

-- | Retrieves the entire forest of <a>Node</a>s corresponding to a
--   <a>Cursor</a>.
topNodes :: Cursor -> [Node]

-- | Retrieves the current node of a <a>Cursor</a>
current :: Cursor -> Node

-- | Retrieves a list of the <a>Node</a>s at the same level as the current
--   position of a cursor, including the current node.
siblings :: Cursor -> [Node]

-- | Navigates a <a>Cursor</a> to its parent in the document.
parent :: Cursor -> Maybe Cursor

-- | Navigates a <a>Cursor</a> up through parents to reach the root level.
root :: Cursor -> Cursor

-- | Navigates a <a>Cursor</a> down to the indicated child index.
getChild :: Int -> Cursor -> Maybe Cursor

-- | Navigates a <a>Cursor</a> down to its first child.
firstChild :: Cursor -> Maybe Cursor

-- | Navigates a <a>Cursor</a> down to its last child.
lastChild :: Cursor -> Maybe Cursor

-- | Moves a <a>Cursor</a> to its left sibling.
left :: Cursor -> Maybe Cursor

-- | Moves a <a>Cursor</a> to its right sibling.
right :: Cursor -> Maybe Cursor

-- | Moves a <a>Cursor</a> to the next node encountered in a depth-first
--   search. If it has children, this is equivalent to <a>firstChild</a>.
--   Otherwise, if it has a right sibling, then this is equivalent to
--   <a>right</a>. Otherwise, the cursor moves to the first right sibling
--   of one of its parents.
nextDF :: Cursor -> Maybe Cursor

-- | Navigates a <a>Cursor</a> to the first child that matches the
--   predicate.
findChild :: (Cursor -> Bool) -> Cursor -> Maybe Cursor

-- | Navigates a <a>Cursor</a> to the nearest left sibling that matches a
--   predicate.
findLeft :: (Cursor -> Bool) -> Cursor -> Maybe Cursor

-- | Navigates a <a>Cursor</a> to the nearest right sibling that matches a
--   predicate.
findRight :: (Cursor -> Bool) -> Cursor -> Maybe Cursor

-- | Does a depth-first search for a descendant matching the predicate.
--   This can match the current cursor position.
findRec :: (Cursor -> Bool) -> Cursor -> Maybe Cursor

-- | Determines if the <a>Cursor</a> is at a root node.
isRoot :: Cursor -> Bool

-- | Determines if the <a>Cursor</a> is at a first child.
isFirst :: Cursor -> Bool

-- | Determines if the <a>Cursor</a> is at a last child.
isLast :: Cursor -> Bool

-- | Determines if the <a>Cursor</a> is at a leaf node.
isLeaf :: Cursor -> Bool

-- | Determines if the <a>Cursor</a> is at a child node (i.e., if it has a
--   parent).
isChild :: Cursor -> Bool

-- | Determines if the <a>Cursor</a> is at a non-leaf node (i.e., if it has
--   children).
hasChildren :: Cursor -> Bool

-- | Gets the index of the <a>Cursor</a> among its siblings.
getNodeIndex :: Cursor -> Int

-- | Replaces the current node.
setNode :: Node -> Cursor -> Cursor

-- | Modifies the current node by applying a function.
modifyNode :: (Node -> Node) -> Cursor -> Cursor

-- | Modifies the current node by applying an action in some functor.
modifyNodeM :: Functor m => (Node -> m Node) -> Cursor -> m Cursor

-- | Inserts a new <a>Node</a> to the left of the current position.
insertLeft :: Node -> Cursor -> Cursor

-- | Inserts a new <a>Node</a> to the right of the current position.
insertRight :: Node -> Cursor -> Cursor

-- | Inserts a list of new <a>Node</a>s to the left of the current
--   position.
insertManyLeft :: [Node] -> Cursor -> Cursor

-- | Inserts a list of new <a>Node</a>s to the right of the current
--   position.
insertManyRight :: [Node] -> Cursor -> Cursor

-- | Inserts a <a>Node</a> as the first child of the current element.
insertFirstChild :: Node -> Cursor -> Maybe Cursor

-- | Inserts a <a>Node</a> as the last child of the current element.
insertLastChild :: Node -> Cursor -> Maybe Cursor

-- | Inserts a list of <a>Node</a>s as the first children of the current
--   element.
insertManyFirstChild :: [Node] -> Cursor -> Maybe Cursor

-- | Inserts a list of <a>Node</a>s as the last children of the current
--   element.
insertManyLastChild :: [Node] -> Cursor -> Maybe Cursor

-- | Inserts a new <a>Node</a> to the left of the current position, and
--   moves left to the new node.
insertGoLeft :: Node -> Cursor -> Cursor

-- | Inserts a new <a>Node</a> to the right of the current position, and
--   moves right to the new node.
insertGoRight :: Node -> Cursor -> Cursor

-- | Removes the <a>Node</a> to the left of the current position, if any.
removeLeft :: Cursor -> Maybe (Node, Cursor)

-- | Removes the <a>Node</a> to the right of the current position, if any.
removeRight :: Cursor -> Maybe (Node, Cursor)

-- | Removes the current <a>Node</a>, and moves the Cursor to its left
--   sibling, if any.
removeGoLeft :: Cursor -> Maybe Cursor

-- | Removes the current <a>Node</a>, and moves the Cursor to its right
--   sibling, if any.
removeGoRight :: Cursor -> Maybe Cursor

-- | Removes the current <a>Node</a>, and moves the Cursor to its parent,
--   if any.
removeGoUp :: Cursor -> Maybe Cursor
instance GHC.Classes.Eq Text.XmlHtml.Cursor.Cursor


-- | Renderer that supports rendering to xmlhtml forests. This is a port of
--   the Hexpat renderer.
--   
--   Warning: because this renderer doesn't directly create the output, but
--   rather an XML tree representation, it is impossible to render
--   pre-escaped text.
module Text.Blaze.Renderer.XmlHtml

-- | Render HTML to an xmlhtml <a>Document</a>
renderHtml :: Html -> Document

-- | Render HTML to a list of xmlhtml nodes
renderHtmlNodes :: Html -> [Node]
