-- ------------------------------------------------------------

{- |
   Module     : Text.XML.HXT.Arrow.TagSoupInterface
   Copyright  : Copyright (C) 2010 Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : stable
   Portability: portable

   Interface for TagSoup Parser

-}

-- ------------------------------------------------------------

module Text.XML.HXT.Arrow.TagSoupInterface
where

import Control.Arrow
import Control.Arrow.ArrowList
import Control.Arrow.ArrowTree

import Data.String.Unicode            ( normalizeNL )

import Text.XML.HXT.DOM.Interface

import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.XmlState
import Text.XML.HXT.Arrow.XmlState.TypeDefs

import qualified Text.XML.HXT.Parser.TagSoup as TS

-- ------------------------------------------------------------

{- |
   The system config option to enable the tagsoup parser

Here is an example, how to use it:

> ...
> import Text.HXT.XML.Core
> import Text.HXT.XML.TagSoup
> ...
>
> readDocument [ withTagSoup ] "some-file.xml"
> ...

reads the given document and parses it with the lazy tagsoup parser.
There is no validation enabled.
-}


withTagSoup                     :: SysConfig
withTagSoup :: SysConfig
withTagSoup                     = Selector XIOSysState (Bool, (Bool, IOSArrow XmlTree XmlTree))
-> (Bool, (Bool, IOSArrow XmlTree XmlTree)) -> SysConfig
forall s a. Selector s a -> a -> s -> s
setS (Selector XIOSysState Bool
theTagSoup       Selector XIOSysState Bool
-> Selector XIOSysState (Bool, IOSArrow XmlTree XmlTree)
-> Selector XIOSysState (Bool, (Bool, IOSArrow XmlTree XmlTree))
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&.
                                        Selector XIOSysState Bool
theExpat         Selector XIOSysState Bool
-> Selector XIOSysState (IOSArrow XmlTree XmlTree)
-> Selector XIOSysState (Bool, IOSArrow XmlTree XmlTree)
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&.
                                        Selector XIOSysState (IOSArrow XmlTree XmlTree)
theTagSoupParser
                                       ) (Bool
True, (Bool
False, IOSArrow XmlTree XmlTree
parseHtmlTagSoup))

-- | Turns off tagsoup parsing. The build in HXT parser will be used.

withoutTagSoup                  :: SysConfig
withoutTagSoup :: SysConfig
withoutTagSoup                  = Selector XIOSysState Bool -> Bool -> SysConfig
forall s a. Selector s a -> a -> s -> s
setS Selector XIOSysState Bool
theTagSoup Bool
False

-- ------------------------------------------------------------

-- | The Tagsoup parser arrow

parseHtmlTagSoup                :: IOSArrow XmlTree XmlTree
parseHtmlTagSoup :: IOSArrow XmlTree XmlTree
parseHtmlTagSoup                = (Bool, (Bool, (Bool, (Bool, Bool)))) -> IOSArrow XmlTree XmlTree
forall {s}.
(Bool, (Bool, (Bool, (Bool, Bool))))
-> IOSLA (XIOState s) XmlTree XmlTree
parse
                                  ((Bool, (Bool, (Bool, (Bool, Bool)))) -> IOSArrow XmlTree XmlTree)
-> IOSLA (XIOState ()) XmlTree (Bool, (Bool, (Bool, (Bool, Bool))))
-> IOSArrow XmlTree XmlTree
forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< Selector XIOSysState (Bool, (Bool, (Bool, (Bool, Bool))))
-> IOSLA (XIOState ()) XmlTree (Bool, (Bool, (Bool, (Bool, Bool))))
forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar
                                     (Selector XIOSysState Bool
theCheckNamespaces Selector XIOSysState Bool
-> Selector XIOSysState (Bool, (Bool, (Bool, Bool)))
-> Selector XIOSysState (Bool, (Bool, (Bool, (Bool, Bool))))
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&.
                                      Selector XIOSysState Bool
theWarnings        Selector XIOSysState Bool
-> Selector XIOSysState (Bool, (Bool, Bool))
-> Selector XIOSysState (Bool, (Bool, (Bool, Bool)))
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&.
                                      Selector XIOSysState Bool
thePreserveComment Selector XIOSysState Bool
-> Selector XIOSysState (Bool, Bool)
-> Selector XIOSysState (Bool, (Bool, Bool))
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&.
                                      Selector XIOSysState Bool
theRemoveWS        Selector XIOSysState Bool
-> Selector XIOSysState Bool -> Selector XIOSysState (Bool, Bool)
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&.
                                      Selector XIOSysState Bool
theLowerCaseNames
                                     )
    where
    parse :: (Bool, (Bool, (Bool, (Bool, Bool))))
-> IOSLA (XIOState s) XmlTree XmlTree
parse (Bool
withNamespaces', (Bool
withWarnings', (Bool
preserveCmt', (Bool
removeWS', Bool
lowerCaseNames'))))
                                = Int -> String -> IOSLA (XIOState s) XmlTree XmlTree
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
1 (String
"parse document with tagsoup " String -> String -> String
forall a. [a] -> [a] -> [a]
++
                                              ( if Bool
lowerCaseNames' then String
"HT" else String
"X" ) String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"ML parser"
                                             )
                                  IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                                  IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b) -> a (t b) (t b)
replaceChildren
                                  ( ( String -> IOSLA (XIOState s) XmlTree String
forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
a_source               -- get source name
                                      IOSLA (XIOState s) XmlTree String
-> IOSLA (XIOState s) XmlTree String
-> IOSLA (XIOState s) XmlTree (String, String)
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&&
                                      (IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree String
forall (a :: * -> * -> *) n.
ArrowXml a =>
a n XmlTree -> a n String
xshow IOSLA (XIOState s) XmlTree XmlTree
forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b)
getChildren IOSLA (XIOState s) XmlTree String
-> (String -> String) -> IOSLA (XIOState s) XmlTree String
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> (c -> d) -> a b d
>>^ String -> String
normalizeNL) -- get string to be parsed and normalize newline char
                                    )
                                    IOSLA (XIOState s) XmlTree (String, String)
-> IOSLA (XIOState s) (String, String) XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                                    (String -> String -> [XmlTree])
-> IOSLA (XIOState s) (String, String) XmlTree
forall (a :: * -> * -> *) b c d.
ArrowList a =>
(b -> c -> [d]) -> a (b, c) d
arr2L (Bool
-> Bool -> Bool -> Bool -> Bool -> String -> String -> [XmlTree]
TS.parseHtmlTagSoup Bool
withNamespaces' Bool
withWarnings' Bool
preserveCmt' Bool
removeWS' Bool
lowerCaseNames')
                                  )

-- ------------------------------------------------------------