{-# LANGUAGE BangPatterns #-}
-- |
-- Module         : Data.ByteString.Lazy.Search.DFA
-- Copyright      : Daniel Fischer
-- Licence        : BSD3
-- Maintainer     : Daniel Fischer <daniel.is.fischer@googlemail.com>
-- Stability      : Provisional
-- Portability    : non-portable (BangPatterns)
--
-- Fast search of lazy 'L.ByteString' values. Breaking,
-- splitting and replacing using a deterministic finite automaton.

module Data.ByteString.Lazy.Search.DFA ( -- * Overview
                                         -- $overview

                                         -- ** Complexity and performance
                                         -- $complexity

                                         -- ** Partial application
                                         -- $partial

                                         -- * Finding substrings
                                         indices
                                       , nonOverlappingIndices
                                         -- * Breaking on substrings
                                       , breakOn
                                       , breakAfter
                                       , breakFindAfter
                                         -- * Replacing
                                       , replace
                                         -- * Splitting
                                       , split
                                       , splitKeepEnd
                                       , splitKeepFront
                                       ) where

import Data.ByteString.Search.Internal.Utils (automaton, keep, ldrop, lsplit)
import Data.ByteString.Search.Substitution

import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Internal as LI
import Data.ByteString.Unsafe (unsafeIndex)

import Data.Array.Base (unsafeAt)
--import Data.Array.Unboxed (UArray)

import Data.Bits
import Data.Int (Int64)

-- $overview
--
-- This module provides functions related to searching a substring within
-- a string. The searching algorithm uses a deterministic finite automaton
-- based on the Knuth-Morris-Pratt algorithm.
-- The automaton is implemented as an array of @(patternLength + 1) * &#963;@
-- state transitions, where &#963; is the alphabet size (256), so it is only
-- suitable for short enough patterns, therefore the patterns in this module
-- are required to be strict 'S.ByteString's.
--
-- When searching a pattern in a UTF-8-encoded 'L.ByteString', be aware that
-- these functions work on bytes, not characters, so the indices are
-- byte-offsets, not character offsets.

-- $complexity
--
-- The time and space complexity of the preprocessing phase is
-- /O/(@patternLength * &#963;@).
-- The searching phase is /O/(@targetLength@), each target character is
-- inspected only once.
--
-- In general the functions in this module have about the same performance as
-- the corresponding functions using the Knuth-Morris-Pratt algorithm but
-- are considerably slower than the Boyer-Moore functions. For very short
-- patterns or, in the case of 'indices', patterns with a short period
-- which occur often, however, times are close to or even below the
-- Boyer-Moore times.

-- $partial
--
-- All functions can usefully be partially applied. Given only a pattern,
-- the automaton is constructed only once, allowing efficient re-use.

------------------------------------------------------------------------------
--                            Exported Functions                            --
------------------------------------------------------------------------------

-- | @'indices'@ finds the starting indices of all possibly overlapping
--   occurrences of the pattern in the target string.
--   If the pattern is empty, the result is @[0 .. 'length' target]@.
{-# INLINE indices #-}
indices :: S.ByteString     -- ^ Strict pattern to find
        -> L.ByteString     -- ^ Lazy string to search
        -> [Int64]          -- ^ Offsets of matches
indices :: ByteString -> ByteString -> [Int64]
indices !ByteString
pat = Bool -> ByteString -> [ByteString] -> [Int64]
lazySearcher Bool
True ByteString
pat ([ByteString] -> [Int64])
-> (ByteString -> [ByteString]) -> ByteString -> [Int64]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks

-- | @'nonOverlappingIndices'@ finds the starting indices of all
--   non-overlapping occurrences of the pattern in the target string.
--   It is more efficient than removing indices from the list produced
--   by 'indices'.
{-# INLINE nonOverlappingIndices #-}
nonOverlappingIndices :: S.ByteString   -- ^ Strict pattern to find
                      -> L.ByteString   -- ^ Lazy string to search
                      -> [Int64]        -- ^ Offsets of matches
nonOverlappingIndices :: ByteString -> ByteString -> [Int64]
nonOverlappingIndices !ByteString
pat = Bool -> ByteString -> [ByteString] -> [Int64]
lazySearcher Bool
False ByteString
pat ([ByteString] -> [Int64])
-> (ByteString -> [ByteString]) -> ByteString -> [Int64]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks

-- | @'breakOn' pattern target@ splits @target@ at the first occurrence
--   of @pattern@. If the pattern does not occur in the target, the
--   second component of the result is empty, otherwise it starts with
--   @pattern@. If the pattern is empty, the first component is empty.
--   For a non-empty pattern, the first component is generated lazily,
--   thus the first parts of it can be available before the pattern has
--   been found or determined to be absent.
--
-- @
--   'uncurry' 'L.append' . 'breakOn' pattern = 'id'
-- @
breakOn :: S.ByteString  -- ^ Strict pattern to search for
        -> L.ByteString  -- ^ Lazy string to search in
        -> (L.ByteString, L.ByteString)
                         -- ^ Head and tail of string broken at substring
breakOn :: ByteString -> ByteString -> (ByteString, ByteString)
breakOn ByteString
pat = [ByteString] -> (ByteString, ByteString)
breaker ([ByteString] -> (ByteString, ByteString))
-> (ByteString -> [ByteString])
-> ByteString
-> (ByteString, ByteString)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks
  where
    lbrk :: [ByteString] -> ([ByteString], [ByteString])
lbrk = Bool -> ByteString -> [ByteString] -> ([ByteString], [ByteString])
lazyBreaker Bool
True ByteString
pat
    breaker :: [ByteString] -> (ByteString, ByteString)
breaker [ByteString]
strs = let ([ByteString]
f, [ByteString]
b) = [ByteString] -> ([ByteString], [ByteString])
lbrk [ByteString]
strs
                   in ([ByteString] -> ByteString
L.fromChunks [ByteString]
f, [ByteString] -> ByteString
L.fromChunks [ByteString]
b)

-- | @'breakAfter' pattern target@ splits @target@ behind the first occurrence
--   of @pattern@. An empty second component means that either the pattern
--   does not occur in the target or the first occurrence of pattern is at
--   the very end of target. If you need to discriminate between those cases,
--   use breakFindAfter.
--   If the pattern is empty, the first component is empty.
--   For a non-empty pattern, the first component is generated lazily,
--   thus the first parts of it can be available before the pattern has
--   been found or determined to be absent.
-- @
--   'uncurry' 'L.append' . 'breakAfter' pattern = 'id'
-- @
breakAfter :: S.ByteString  -- ^ Strict pattern to search for
           -> L.ByteString  -- ^ Lazy string to search in
           -> (L.ByteString, L.ByteString)
                            -- ^ Head and tail of string broken after substring
breakAfter :: ByteString -> ByteString -> (ByteString, ByteString)
breakAfter ByteString
pat = [ByteString] -> (ByteString, ByteString)
breaker ([ByteString] -> (ByteString, ByteString))
-> (ByteString -> [ByteString])
-> ByteString
-> (ByteString, ByteString)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks
  where
    lbrk :: [ByteString] -> ([ByteString], [ByteString])
lbrk = Bool -> ByteString -> [ByteString] -> ([ByteString], [ByteString])
lazyBreaker Bool
False ByteString
pat
    breaker :: [ByteString] -> (ByteString, ByteString)
breaker [ByteString]
strs = let ([ByteString]
f, [ByteString]
b) = [ByteString] -> ([ByteString], [ByteString])
lbrk [ByteString]
strs
                   in ([ByteString] -> ByteString
L.fromChunks [ByteString]
f, [ByteString] -> ByteString
L.fromChunks [ByteString]
b)

-- | @'breakFindAfter'@ does the same as 'breakAfter' but additionally indicates
--   whether the pattern is present in the target.
--
-- @
--   'fst' . 'breakFindAfter' pat = 'breakAfter' pat
-- @
breakFindAfter :: S.ByteString  -- ^ Strict pattern to search for
               -> L.ByteString  -- ^ Lazy string to search in
               -> ((L.ByteString, L.ByteString), Bool)
                            -- ^ Head and tail of string broken after substring
                            --   and presence of pattern
breakFindAfter :: ByteString -> ByteString -> ((ByteString, ByteString), Bool)
breakFindAfter ByteString
pat
  | ByteString -> Bool
S.null ByteString
pat  = \ByteString
str -> ((ByteString
L.empty, ByteString
str), Bool
True)
breakFindAfter ByteString
pat = [ByteString] -> ((ByteString, ByteString), Bool)
breaker ([ByteString] -> ((ByteString, ByteString), Bool))
-> (ByteString -> [ByteString])
-> ByteString
-> ((ByteString, ByteString), Bool)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks
  where
    !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
    lbrk :: [ByteString] -> ([ByteString], [ByteString])
lbrk = Bool -> ByteString -> [ByteString] -> ([ByteString], [ByteString])
lazyBreaker Bool
True ByteString
pat
    breaker :: [ByteString] -> ((ByteString, ByteString), Bool)
breaker [ByteString]
strs = let ([ByteString]
f, [ByteString]
b) = [ByteString] -> ([ByteString], [ByteString])
lbrk [ByteString]
strs
                       ([ByteString]
f1, [ByteString]
b1) = Int -> [ByteString] -> ([ByteString], [ByteString])
lsplit Int
patLen [ByteString]
b
                       mbpat :: ByteString
mbpat = [ByteString] -> ByteString
L.fromChunks [ByteString]
f1
                   in (((ByteString -> ByteString -> ByteString)
-> ByteString -> [ByteString] -> ByteString
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ByteString -> ByteString -> ByteString
LI.chunk ByteString
mbpat [ByteString]
f, [ByteString] -> ByteString
L.fromChunks [ByteString]
b1), Bool -> Bool
not ([ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
b))

-- | @'replace' pat sub text@ replaces all (non-overlapping) occurrences of
--   @pat@ in @text@ with @sub@. If occurrences of @pat@ overlap, the first
--   occurrence that does not overlap with a replaced previous occurrence
--   is substituted. Occurrences of @pat@ arising from a substitution
--   will not be substituted. For example:
--
-- @
--   'replace' \"ana\" \"olog\" \"banana\" = \"bologna\"
--   'replace' \"ana\" \"o\" \"bananana\" = \"bono\"
--   'replace' \"aab\" \"abaa\" \"aaabb\" = \"aabaab\"
-- @
--
--   The result is a lazy 'L.ByteString',
--   which is lazily produced, without copying.
--   Equality of pattern and substitution is not checked, but
--
-- @
--   'replace' pat pat text == text
-- @
--
--   holds (the internal structure is generally different).
--   If the pattern is empty but not the substitution, the result
--   is equivalent to (were they 'String's) @cycle sub@.
--
--   For non-empty @pat@ and @sub@ a lazy 'L.ByteString',
--
-- @
--   'L.concat' . 'Data.List.intersperse' sub . 'split' pat = 'replace' pat sub
-- @
--
--   and analogous relations hold for other types of @sub@.
replace :: Substitution rep
        => S.ByteString     -- ^ Strict pattern to replace
        -> rep              -- ^ Replacement string
        -> L.ByteString     -- ^ Lazy string to modify
        -> L.ByteString     -- ^ Lazy result
replace :: forall rep.
Substitution rep =>
ByteString -> rep -> ByteString -> ByteString
replace ByteString
pat
  | ByteString -> Bool
S.null ByteString
pat = \rep
sub -> rep -> ByteString -> ByteString
forall a. Substitution a => a -> ByteString -> ByteString
prependCycle rep
sub
  | Bool
otherwise =
    let !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
        breaker :: [ByteString] -> ([ByteString], [ByteString])
breaker = Bool -> ByteString -> [ByteString] -> ([ByteString], [ByteString])
lazyBreaker Bool
True ByteString
pat
        repl :: ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
repl [ByteString] -> [ByteString]
subst [ByteString]
strs
          | [ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
strs   = []
          | Bool
otherwise   =
            let ([ByteString]
pre, [ByteString]
mtch) = [ByteString] -> ([ByteString], [ByteString])
breaker [ByteString]
strs
            in [ByteString]
pre [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ case [ByteString]
mtch of
                        [] -> []
                        [ByteString]
_  -> [ByteString] -> [ByteString]
subst (([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
repl [ByteString] -> [ByteString]
subst (Int -> [ByteString] -> [ByteString]
ldrop Int
patLen [ByteString]
mtch))
    in \rep
sub -> let {-# NOINLINE subst #-}
                   !subst :: [ByteString] -> [ByteString]
subst = rep -> [ByteString] -> [ByteString]
forall a. Substitution a => a -> [ByteString] -> [ByteString]
substitution rep
sub
                   repl1 :: [ByteString] -> [ByteString]
repl1 = ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
repl [ByteString] -> [ByteString]
subst
               in [ByteString] -> ByteString
L.fromChunks ([ByteString] -> ByteString)
-> (ByteString -> [ByteString]) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ByteString] -> [ByteString]
repl1 ([ByteString] -> [ByteString])
-> (ByteString -> [ByteString]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks


-- | @'split' pattern target@ splits @target@ at each (non-overlapping)
--   occurrence of @pattern@, removing @pattern@. If @pattern@ is empty,
--   the result is an infinite list of empty 'L.ByteString's, if @target@
--   is empty but not @pattern@, the result is an empty list, otherwise
--   the following relations hold (where @patL@ is the lazy 'L.ByteString'
--   corresponding to @pat@):
--
-- @
--   'L.concat' . 'Data.List.intersperse' patL . 'split' pat = 'id',
--   'length' ('split' pattern target) ==
--               'length' ('nonOverlappingIndices' pattern target) + 1,
-- @
--
--   no fragment in the result contains an occurrence of @pattern@.
split :: S.ByteString   -- ^ Strict pattern to split on
      -> L.ByteString   -- ^ Lazy string to split
      -> [L.ByteString] -- ^ Fragments of string
split :: ByteString -> ByteString -> [ByteString]
split ByteString
pat
  | ByteString -> Bool
S.null ByteString
pat  = [ByteString] -> ByteString -> [ByteString]
forall a b. a -> b -> a
const (ByteString -> [ByteString]
forall a. a -> [a]
repeat ByteString
L.empty)
split ByteString
pat = ([ByteString] -> ByteString) -> [[ByteString]] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map [ByteString] -> ByteString
L.fromChunks ([[ByteString]] -> [ByteString])
-> (ByteString -> [[ByteString]]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ByteString] -> [[ByteString]]
splitter ([ByteString] -> [[ByteString]])
-> (ByteString -> [ByteString]) -> ByteString -> [[ByteString]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks
  where
    !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
    breaker :: [ByteString] -> ([ByteString], [ByteString])
breaker = Bool -> ByteString -> [ByteString] -> ([ByteString], [ByteString])
lazyBreaker Bool
True ByteString
pat
    splitter :: [ByteString] -> [[ByteString]]
splitter [ByteString]
strs
      | [ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
strs  = []
      | Bool
otherwise  = [ByteString] -> [[ByteString]]
splitter' [ByteString]
strs
    splitter' :: [ByteString] -> [[ByteString]]
splitter' [ByteString]
strs
      | [ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
strs  = [[]]
      | Bool
otherwise  =
        case [ByteString] -> ([ByteString], [ByteString])
breaker [ByteString]
strs of
          ([ByteString]
pre, [ByteString]
mtch) ->
            [ByteString]
pre [ByteString] -> [[ByteString]] -> [[ByteString]]
forall a. a -> [a] -> [a]
: case [ByteString]
mtch of
                    [] -> []
                    [ByteString]
_  -> [ByteString] -> [[ByteString]]
splitter' (Int -> [ByteString] -> [ByteString]
ldrop Int
patLen [ByteString]
mtch)

-- | @'splitKeepEnd' pattern target@ splits @target@ after each (non-overlapping)
--   occurrence of @pattern@. If @pattern@ is empty, the result is an
--   infinite list of empty 'L.ByteString's, otherwise the following
--   relations hold:
--
-- @
--   'L.concat' . 'splitKeepEnd' pattern = 'id,'
-- @
--
--   all fragments in the result except possibly the last end with
--   @pattern@, no fragment contains more than one occurrence of @pattern@.
splitKeepEnd :: S.ByteString    -- ^ Strict pattern to split on
             -> L.ByteString    -- ^ Lazy string to split
             -> [L.ByteString]  -- ^ Fragments of string
splitKeepEnd :: ByteString -> ByteString -> [ByteString]
splitKeepEnd ByteString
pat
  | ByteString -> Bool
S.null ByteString
pat = [ByteString] -> ByteString -> [ByteString]
forall a b. a -> b -> a
const (ByteString -> [ByteString]
forall a. a -> [a]
repeat ByteString
L.empty)
splitKeepEnd ByteString
pat = ([ByteString] -> ByteString) -> [[ByteString]] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map [ByteString] -> ByteString
L.fromChunks ([[ByteString]] -> [ByteString])
-> (ByteString -> [[ByteString]]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ByteString] -> [[ByteString]]
splitter ([ByteString] -> [[ByteString]])
-> (ByteString -> [ByteString]) -> ByteString -> [[ByteString]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks
  where
    breaker :: [ByteString] -> ([ByteString], [ByteString])
breaker = Bool -> ByteString -> [ByteString] -> ([ByteString], [ByteString])
lazyBreaker Bool
False ByteString
pat
    splitter :: [ByteString] -> [[ByteString]]
splitter [] = []
    splitter [ByteString]
strs =
      case [ByteString] -> ([ByteString], [ByteString])
breaker [ByteString]
strs of
        ([ByteString]
pre, [ByteString]
mtch) -> [ByteString]
pre [ByteString] -> [[ByteString]] -> [[ByteString]]
forall a. a -> [a] -> [a]
: [ByteString] -> [[ByteString]]
splitter [ByteString]
mtch

-- | @'splitKeepFront'@ is like 'splitKeepEnd', except that @target@ is split
--   before each occurrence of @pattern@ and hence all fragments
--   with the possible exception of the first begin with @pattern@.
--   No fragment contains more than one non-overlapping occurrence
--   of @pattern@.
splitKeepFront :: S.ByteString    -- ^ Strict pattern to split on
               -> L.ByteString    -- ^ Lazy string to split
               -> [L.ByteString]  -- ^ Fragments of string
splitKeepFront :: ByteString -> ByteString -> [ByteString]
splitKeepFront ByteString
pat
  | ByteString -> Bool
S.null ByteString
pat  = [ByteString] -> ByteString -> [ByteString]
forall a b. a -> b -> a
const (ByteString -> [ByteString]
forall a. a -> [a]
repeat ByteString
L.empty)
splitKeepFront ByteString
pat = ([ByteString] -> ByteString) -> [[ByteString]] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map [ByteString] -> ByteString
L.fromChunks ([[ByteString]] -> [ByteString])
-> (ByteString -> [[ByteString]]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ByteString] -> [[ByteString]]
splitter ([ByteString] -> [[ByteString]])
-> (ByteString -> [ByteString]) -> ByteString -> [[ByteString]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks
  where
    !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
    breaker :: [ByteString] -> ([ByteString], [ByteString])
breaker = Bool -> ByteString -> [ByteString] -> ([ByteString], [ByteString])
lazyBreaker Bool
True ByteString
pat
    splitter :: [ByteString] -> [[ByteString]]
splitter [ByteString]
strs = case [ByteString] -> [[ByteString]]
splitter' [ByteString]
strs of
                      ([] : [[ByteString]]
rst) -> [[ByteString]]
rst
                      [[ByteString]]
other -> [[ByteString]]
other
    splitter' :: [ByteString] -> [[ByteString]]
splitter' []    = []
    splitter' [ByteString]
strs  =
      case [ByteString] -> ([ByteString], [ByteString])
breaker [ByteString]
strs of
        ([ByteString]
pre, [ByteString]
mtch) ->
          [ByteString]
pre [ByteString] -> [[ByteString]] -> [[ByteString]]
forall a. a -> [a] -> [a]
: case [ByteString]
mtch of
                  [] -> []
                  [ByteString]
_  -> case Int -> [ByteString] -> ([ByteString], [ByteString])
lsplit Int
patLen [ByteString]
mtch of
                          ([ByteString]
pt, [ByteString]
rst) ->
                            if [ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
rst
                              then [[ByteString]
pt]
                              else let ([ByteString]
h : [[ByteString]]
t) = [ByteString] -> [[ByteString]]
splitter' [ByteString]
rst
                                   in ([ByteString]
pt [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ [ByteString]
h) [ByteString] -> [[ByteString]] -> [[ByteString]]
forall a. a -> [a] -> [a]
: [[ByteString]]
t

------------------------------------------------------------------------------
--                            Searching Function                            --
------------------------------------------------------------------------------

lazySearcher :: Bool -> S.ByteString -> [S.ByteString] -> [Int64]
lazySearcher :: Bool -> ByteString -> [ByteString] -> [Int64]
lazySearcher Bool
_ !ByteString
pat
    | ByteString -> Bool
S.null ByteString
pat        =
      let zgo :: t -> [ByteString] -> [t]
zgo t
_ [] = []
          zgo !t
prior (!ByteString
str : [ByteString]
rest) =
              let !l :: Int
l = ByteString -> Int
S.length ByteString
str
                  !prior' :: t
prior' = t
prior t -> t -> t
forall a. Num a => a -> a -> a
+ Int -> t
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
l
              in [t
prior t -> t -> t
forall a. Num a => a -> a -> a
+ Int -> t
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
i | Int
i <- [Int
1 .. Int
l]] [t] -> [t] -> [t]
forall a. [a] -> [a] -> [a]
++ t -> [ByteString] -> [t]
zgo t
prior' [ByteString]
rest
      in (Int64
0Int64 -> [Int64] -> [Int64]
forall a. a -> [a] -> [a]
:) ([Int64] -> [Int64])
-> ([ByteString] -> [Int64]) -> [ByteString] -> [Int64]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int64 -> [ByteString] -> [Int64]
forall {t}. Num t => t -> [ByteString] -> [t]
zgo Int64
0
    | ByteString -> Int
S.length ByteString
pat Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 =
      let !w :: Word8
w = ByteString -> Word8
S.head ByteString
pat
          ixes :: ByteString -> [Int]
ixes = Word8 -> ByteString -> [Int]
S.elemIndices Word8
w
          go :: t -> [ByteString] -> [t]
go t
_ [] = []
          go !t
prior (!ByteString
str : [ByteString]
rest)
            = let !prior' :: t
prior' = t
prior t -> t -> t
forall a. Num a => a -> a -> a
+ Int -> t
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int
S.length ByteString
str)
              in (Int -> t) -> [Int] -> [t]
forall a b. (a -> b) -> [a] -> [b]
map ((t -> t -> t
forall a. Num a => a -> a -> a
+ t
prior) (t -> t) -> (Int -> t) -> Int -> t
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> t
forall a b. (Integral a, Num b) => a -> b
fromIntegral) (ByteString -> [Int]
ixes ByteString
str) [t] -> [t] -> [t]
forall a. [a] -> [a] -> [a]
++ t -> [ByteString] -> [t]
go t
prior' [ByteString]
rest
      in Int64 -> [ByteString] -> [Int64]
forall {t}. Num t => t -> [ByteString] -> [t]
go Int64
0
lazySearcher !Bool
overlap ByteString
pat = Int64 -> Int -> [ByteString] -> [Int64]
forall {p}. Num p => p -> Int -> [ByteString] -> [p]
search Int64
0 Int
0
  where
    !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
    !auto :: UArray Int Int
auto   = ByteString -> UArray Int Int
automaton ByteString
pat
    !p0 :: Word8
p0     = ByteString -> Int -> Word8
unsafeIndex ByteString
pat Int
0
    !ams :: Int
ams    = if Bool
overlap then Int
patLen else Int
0
    search :: p -> Int -> [ByteString] -> [p]
search p
_ Int
_ [] = []
    search !p
prior Int
st (!ByteString
str:[ByteString]
rest) = Int -> Int -> [p]
match Int
st Int
0
      where
        !strLen :: Int
strLen = ByteString -> Int
S.length ByteString
str
        {-# INLINE strAt #-}
        strAt :: Int -> Int
        strAt :: Int -> Int
strAt Int
i = Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString
str ByteString -> Int -> Word8
`unsafeIndex` Int
i)
        match :: Int -> Int -> [p]
match Int
0 !Int
idx
          | Int
idx Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
strLen = p -> Int -> [ByteString] -> [p]
search (p
prior p -> p -> p
forall a. Num a => a -> a -> a
+ Int -> p
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
strLen) Int
0 [ByteString]
rest
          | ByteString -> Int -> Word8
unsafeIndex ByteString
str Int
idx Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
p0   = Int -> Int -> [p]
match Int
1 (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
          | Bool
otherwise     = Int -> Int -> [p]
match Int
0 (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
        match Int
state Int
idx
          | Int
idx Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
strLen = p -> Int -> [ByteString] -> [p]
search (p
prior p -> p -> p
forall a. Num a => a -> a -> a
+ Int -> p
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
strLen) Int
state [ByteString]
rest
          | Bool
otherwise     =
            let nstate :: Int
nstate = UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
unsafeAt UArray Int Int
auto ((Int
state Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
8) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int
strAt Int
idx)
                !nxtIdx :: Int
nxtIdx = Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
            in if Int
nstate Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
patLen
                then (p
prior p -> p -> p
forall a. Num a => a -> a -> a
+ Int -> p
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
nxtIdx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patLen)) p -> [p] -> [p]
forall a. a -> [a] -> [a]
:
                            Int -> Int -> [p]
match Int
ams Int
nxtIdx
                else Int -> Int -> [p]
match Int
nstate Int
nxtIdx

------------------------------------------------------------------------------
--                                 Breaking                                 --
------------------------------------------------------------------------------

-- Code duplication :(
-- Needed for reasonable performance.
lazyBreaker :: Bool -> S.ByteString -> [S.ByteString]
                    -> ([S.ByteString], [S.ByteString])
lazyBreaker :: Bool -> ByteString -> [ByteString] -> ([ByteString], [ByteString])
lazyBreaker Bool
before ByteString
pat
  | ByteString -> Bool
S.null ByteString
pat  = \[ByteString]
strs -> ([], [ByteString]
strs)
  | ByteString -> Int
S.length ByteString
pat Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 =
    let !w :: Word8
w = ByteString -> Word8
S.head ByteString
pat
        !a :: Int
a = if Bool
before then Int
0 else Int
1
        ixes :: ByteString -> [Int]
ixes = Word8 -> ByteString -> [Int]
S.elemIndices Word8
w
        scan :: [ByteString] -> ([ByteString], [ByteString])
scan [] = ([], [])
        scan (!ByteString
str:[ByteString]
rest) =
            let !strLen :: Int
strLen = ByteString -> Int
S.length ByteString
str
            in case ByteString -> [Int]
ixes ByteString
str of
                []  -> let ([ByteString]
fr, [ByteString]
bk) = [ByteString] -> ([ByteString], [ByteString])
scan [ByteString]
rest in (ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
fr, [ByteString]
bk)
                (Int
i:[Int]
_) -> let !j :: Int
j = Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
a
                         in if Int
j Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
strLen
                              then ([ByteString
str],[ByteString]
rest)
                              else ([Int -> ByteString -> ByteString
S.take Int
j ByteString
str], Int -> ByteString -> ByteString
S.drop Int
j ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
rest)
    in [ByteString] -> ([ByteString], [ByteString])
scan
lazyBreaker !Bool
before ByteString
pat = [ByteString] -> Int -> [ByteString] -> ([ByteString], [ByteString])
bscan [] Int
0
  where
    !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
    !auto :: UArray Int Int
auto   = ByteString -> UArray Int Int
automaton ByteString
pat
    !p0 :: Word8
p0     = ByteString -> Int -> Word8
unsafeIndex ByteString
pat Int
0
    bscan :: [ByteString] -> Int -> [ByteString] -> ([ByteString], [ByteString])
bscan [ByteString]
_ Int
_ [] = ([], [])
    bscan ![ByteString]
past !Int
sta (!ByteString
str:[ByteString]
rest) = Int -> Int -> ([ByteString], [ByteString])
match Int
sta Int
0
      where
        !strLen :: Int
strLen = ByteString -> Int
S.length ByteString
str
        {-# INLINE strAt #-}
        strAt :: Int -> Int
        strAt :: Int -> Int
strAt Int
i = Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString
str ByteString -> Int -> Word8
`unsafeIndex` Int
i)
        match :: Int -> Int -> ([ByteString], [ByteString])
match Int
0 Int
idx
          | Int
idx Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
strLen =
            let ([ByteString]
fr, [ByteString]
bk) = [ByteString] -> Int -> [ByteString] -> ([ByteString], [ByteString])
bscan [] Int
0 [ByteString]
rest
            in ((ByteString
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
-> [ByteString]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall a b c. (a -> b -> c) -> b -> a -> c
flip ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
(.) (([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> (ByteString -> [ByteString] -> [ByteString])
-> ByteString
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (:)) [ByteString] -> [ByteString]
forall a. a -> a
id [ByteString]
past (ByteString
strByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:[ByteString]
fr), [ByteString]
bk)
          | ByteString -> Int -> Word8
unsafeIndex ByteString
str Int
idx Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
p0 = Int -> Int -> ([ByteString], [ByteString])
match Int
1 (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
          | Bool
otherwise = Int -> Int -> ([ByteString], [ByteString])
match Int
0 (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
        match Int
state Int
idx
          | Int
idx Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
strLen =
            let ([ByteString]
kp, ![ByteString]
rl) = if Bool
before
                                then Int -> [ByteString] -> ([ByteString], [ByteString])
keep Int
state (ByteString
strByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:[ByteString]
past)
                                else ([], ByteString
strByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:[ByteString]
past)
                ([ByteString]
fr, [ByteString]
bk) = [ByteString] -> Int -> [ByteString] -> ([ByteString], [ByteString])
bscan [ByteString]
kp Int
state [ByteString]
rest
            in ((ByteString
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
-> [ByteString]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall a b c. (a -> b -> c) -> b -> a -> c
flip ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
(.) (([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> (ByteString -> [ByteString] -> [ByteString])
-> ByteString
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (:)) [ByteString] -> [ByteString]
forall a. a -> a
id [ByteString]
rl [ByteString]
fr, [ByteString]
bk)
          | Bool
otherwise =
            let !nstate :: Int
nstate = UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
unsafeAt UArray Int Int
auto ((Int
state Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
8) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int
strAt Int
idx)
                !nxtIdx :: Int
nxtIdx = Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
            in if Int
nstate Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
patLen
                then case if Bool
before then Int
nxtIdx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patLen else Int
nxtIdx of
                       Int
0 -> ((ByteString
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
-> [ByteString]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall a b c. (a -> b -> c) -> b -> a -> c
flip ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
(.) (([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> (ByteString -> [ByteString] -> [ByteString])
-> ByteString
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (:)) [ByteString] -> [ByteString]
forall a. a -> a
id [ByteString]
past [], ByteString
strByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:[ByteString]
rest)
                       Int
stIx | Int
stIx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 -> Int -> [ByteString] -> [ByteString] -> ([ByteString], [ByteString])
rgo (-Int
stIx) (ByteString
strByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:[ByteString]
rest) [ByteString]
past
                            | Int
stIx Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
strLen ->
                              ((ByteString
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
-> [ByteString]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall a b c. (a -> b -> c) -> b -> a -> c
flip ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
(.) (([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> (ByteString -> [ByteString] -> [ByteString])
-> ByteString
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (:)) [ByteString] -> [ByteString]
forall a. a -> a
id [ByteString]
past [ByteString
str],[ByteString]
rest)
                            | Bool
otherwise ->
                              ((ByteString
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
-> [ByteString]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall a b c. (a -> b -> c) -> b -> a -> c
flip ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
(.) (([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> (ByteString -> [ByteString] -> [ByteString])
-> ByteString
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (:)) [ByteString] -> [ByteString]
forall a. a -> a
id [ByteString]
past
                                    [Int -> ByteString -> ByteString
S.take Int
stIx ByteString
str], Int -> ByteString -> ByteString
S.drop Int
stIx ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
rest)
                else Int -> Int -> ([ByteString], [ByteString])
match Int
nstate Int
nxtIdx


-- Did I already mention that I suck at finding names?
{-# INLINE rgo #-}
rgo :: Int -> [S.ByteString] -> [S.ByteString]
    -> ([S.ByteString], [S.ByteString])
rgo :: Int -> [ByteString] -> [ByteString] -> ([ByteString], [ByteString])
rgo !Int
kp [ByteString]
acc (!ByteString
str:[ByteString]
more)
  | Int
sl Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
kp    = ([ByteString] -> [ByteString]
forall a. [a] -> [a]
reverse [ByteString]
more, ByteString
strByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:[ByteString]
acc)
  | Int
sl Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
kp     = Int -> [ByteString] -> [ByteString] -> ([ByteString], [ByteString])
rgo (Int
kp Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
sl) (ByteString
strByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:[ByteString]
acc) [ByteString]
more
  | Bool
otherwise   = case Int -> ByteString -> (ByteString, ByteString)
S.splitAt (Int
sl Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
kp) ByteString
str of
                    (ByteString
fr, ByteString
bk) ->
                      ((ByteString
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
-> [ByteString]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall a b c. (a -> b -> c) -> b -> a -> c
flip ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
(.) (([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> (ByteString -> [ByteString] -> [ByteString])
-> ByteString
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (:)) [ByteString] -> [ByteString]
forall a. a -> a
id [ByteString]
more [ByteString
fr], ByteString
bkByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:[ByteString]
acc)
    where
      !sl :: Int
sl = ByteString -> Int
S.length ByteString
str
rgo Int
_ [ByteString]
_ [] = [Char] -> ([ByteString], [ByteString])
forall a. HasCallStack => [Char] -> a
error [Char]
"Not enough past!"
-- If that error is ever encountered, I screwed up badly.