{-# LANGUAGE BangPatterns #-}
{-# OPTIONS_HADDOCK hide, prune #-}
-- |
-- Module         : Data.ByteString.Lazy.Search.Internal.BoyerMoore
-- Copyright      : Daniel Fischer
--                  Chris Kuklewicz
-- Licence        : BSD3
-- Maintainer     : Daniel Fischer <daniel.is.fischer@googlemail.com>
-- Stability      : Provisional
-- Portability    : non-portable (BangPatterns)
--
-- Fast overlapping Boyer-Moore search of both strict and lazy
-- 'S.ByteString' values. Breaking, splitting and replacing
-- using the Boyer-Moore algorithm.
--
-- Descriptions of the algorithm can be found at
-- <http://www-igm.univ-mlv.fr/~lecroq/string/node14.html#SECTION00140>
-- and
-- <http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm>
--
-- Original authors: Daniel Fischer (daniel.is.fischer at googlemail.com) and
-- Chris Kuklewicz (haskell at list.mightyreason.com).

module Data.ByteString.Lazy.Search.Internal.BoyerMoore (
                                           matchLL
                                         , matchSL

                                           --  Non-overlapping
                                         , matchNOL

                                            --  Replacing substrings
                                            -- replacing
                                         , replaceAllL
                                            --  Breaking on substrings
                                            -- breaking
                                         , breakSubstringL
                                         , breakAfterL
                                         , breakFindAfterL
                                            --  Splitting on substrings
                                            -- splitting
                                         , splitKeepEndL
                                         , splitKeepFrontL
                                         , splitDropL
                                         ) where


import Data.ByteString.Search.Internal.Utils
                (occurs, suffShifts, ldrop, lsplit, keep, release, strictify)
import Data.ByteString.Search.Substitution

import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import Data.ByteString.Unsafe (unsafeIndex)

import Data.Array.Base (unsafeAt)

import Data.Word (Word8)
import Data.Int (Int64)

-- overview
--
-- This module exports three search functions for searching in lazy
-- ByteSrings, one for searching non-overlapping occurrences of a strict
-- pattern, and one each for searchin overlapping occurrences of a strict
-- resp. lazy pattern. The common base name is @match@, the suffix
-- indicates the type of search. These functions
-- return (for a non-empty pattern) a list of all the indices of the target
-- string where an occurrence of the pattern begins, if some occurrences
-- overlap, all starting indices are reported. The list is produced lazily,
-- so not necessarily the entire target string is searched.
--
-- The behaviour of these functions when given an empty pattern has changed.
-- Formerly, the @matchXY@ functions returned an empty list then, now it's
-- @[0 .. 'length' target]@.
--
-- Newly added are functions to replace all (non-overlapping) occurrences
-- of a pattern within a string, functions to break ByteStrings at the first
-- occurrence of a pattern and functions to split ByteStrings at each
-- occurrence of a pattern. None of these functions does copying, so they
-- don't introduce large memory overhead.
--
-- Internally, a lazy pattern is always converted to a strict ByteString,
-- which is necessary for an efficient implementation of the algorithm.
-- The limit this imposes on the length of the pattern is probably
-- irrelevant in practice, but perhaps it should be mentioned.
-- This also means that the @matchL*@ functions are mere convenience wrappers.
-- Except for the initial 'strictify'ing, there's no difference between lazy
-- and strict patterns, they call the same workers. There is, however, a
-- difference between strict and lazy target strings.
-- For the new functions, no such wrappers are provided, you have to
-- 'strictify' lazy patterns yourself.

-- caution
--
-- When working with a lazy target string, the relation between the pattern
-- length and the chunk size can play a big r&#244;le.
-- Crossing chunk boundaries is relatively expensive, so when that becomes
-- a frequent occurrence, as may happen when the pattern length is close
-- to or larger than the chunk size, performance is likely to degrade.
-- If it is needed, steps can be taken to ameliorate that effect, but unless
-- entirely separate functions are introduced, that would hurt the
-- performance for the more common case of patterns much shorter than
-- the default chunk size.

-- performance
--
-- In general, the Boyer-Moore algorithm is the most efficient method to
-- search for a pattern inside a string, so most of the time, you'll want
-- to use the functions of this module, hence this is where the most work
-- has gone. Very short patterns are an exception to this, for those you
-- should consider using a finite automaton
-- ("Data.ByteString.Search.DFA.Array"). That is also often the better
-- choice for searching longer periodic patterns in a lazy ByteString
-- with many matches.
--
-- Operating on a strict target string is mostly faster than on a lazy
-- target string, but the difference is usually small (according to my
-- tests).
--
-- The known exceptions to this rule of thumb are
--
-- [long targets] Then the smaller memory footprint of a lazy target often
-- gives (much) better performance.
--
-- [high number of matches] When there are very many matches, strict target
-- strings are much faster, especially if the pattern is periodic.
--
-- If both conditions hold, either may outweigh the other.

-- complexity
--
-- Preprocessing the pattern is /O/(@patternLength@ + &#963;) in time and
-- space (&#963; is the alphabet size, 256 here) for all functions.
-- The time complexity of the searching phase for @matchXY@
-- is /O/(@targetLength@ \/ @patternLength@) in the best case.
-- For non-periodic patterns, the worst case complexity is
-- /O/(@targetLength@), but for periodic patterns, the worst case complexity
-- is /O/(@targetLength@ * @patternLength@) for the original Boyer-Moore
-- algorithm.
--
-- The searching functions in this module now contain a modification which
-- drastically improves the performance for periodic patterns.
-- I believe that for strict target strings, the worst case is now
-- /O/(@targetLength@) also for periodic patterns and for lazy target strings,
-- my semi-educated guess is
-- /O/(@targetLength@ * (1 + @patternLength@ \/ @chunkSize@)).
-- I may be very wrong, though.
--
-- The other functions don't have to deal with possible overlapping
-- patterns, hence the worst case complexity for the processing phase
-- is /O/(@targetLength@) (respectively /O/(@firstIndex + patternLength@)
-- for the breaking functions if the pattern occurs).

-- currying
--
-- These functions can all be usefully curried. Given only a pattern
-- the curried version will compute the supporting lookup tables only
-- once, allowing for efficient re-use.  Similarly, the curried
-- 'matchLL' and 'matchLS' will compute the concatenated pattern only
-- once.

-- overflow
--
-- The current code uses @Int@ to keep track of the locations in the
-- target string.  If the length of the pattern plus the length of any
-- strict chunk of the target string is greater than
-- @'maxBound' :: 'Int'@ then this will overflow causing an error.  We
-- try to detect this and call 'error' before a segfault occurs.

------------------------------------------------------------------------------
--                                 Wrappers                                 --
------------------------------------------------------------------------------

-- matching
--
-- These functions find the indices of all (possibly overlapping)
-- occurrences of a pattern in a target string.
-- If the pattern is empty, the result is @[0 .. length target]@.
-- If the pattern is much shorter than the target string
-- and the pattern does not occur very near the beginning of the target,
--
-- > not . null $ matchSS pattern target
--
-- is a much more efficient version of 'S.isInfixOf'.

-- | @'matchLL'@ finds the starting indices of all possibly overlapping
--   occurrences of the pattern in the target string.
--   It is a simple wrapper for 'Data.ByteString.Lazy.Search.indices'.
--   If the pattern is empty, the result is @[0 .. 'length' target]@.
{-# INLINE matchLL #-}
matchLL :: L.ByteString     -- ^ Lazy pattern
        -> L.ByteString     -- ^ Lazy target string
        -> [Int64]          -- ^ Offsets of matches
matchLL :: ByteString -> ByteString -> [Int64]
matchLL ByteString
pat = [ByteString] -> [Int64]
search ([ByteString] -> [Int64])
-> (ByteString -> [ByteString]) -> ByteString -> [Int64]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks
  where
    search :: [ByteString] -> [Int64]
search  = Bool -> ByteString -> [ByteString] -> [Int64]
lazySearcher Bool
True (ByteString -> ByteString
strictify ByteString
pat)

-- | @'matchSL'@ finds the starting indices of all possibly overlapping
--   occurrences of the pattern in the target string.
--   It is an alias for 'Data.ByteString.Lazy.Search.indices'.
--   If the pattern is empty, the result is @[0 .. 'length' target]@.
{-# INLINE matchSL #-}
matchSL :: S.ByteString     -- ^ Strict pattern
        -> L.ByteString     -- ^ Lazy target string
        -> [Int64]          -- ^ Offsets of matches
matchSL :: ByteString -> ByteString -> [Int64]
matchSL ByteString
pat = [ByteString] -> [Int64]
search ([ByteString] -> [Int64])
-> (ByteString -> [ByteString]) -> ByteString -> [Int64]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks
  where
    search :: [ByteString] -> [Int64]
search = Bool -> ByteString -> [ByteString] -> [Int64]
lazySearcher Bool
True ByteString
pat

-- | @'matchNOL'@ finds the indices of all non-overlapping occurrences
--   of the pattern in the lazy target string.
{-# INLINE matchNOL #-}
matchNOL :: S.ByteString    -- ^ Strict pattern
         -> L.ByteString    -- ^ Lazy target string
         -> [Int64]         -- ^ Offsets of matches
matchNOL :: ByteString -> ByteString -> [Int64]
matchNOL ByteString
pat = [ByteString] -> [Int64]
search ([ByteString] -> [Int64])
-> (ByteString -> [ByteString]) -> ByteString -> [Int64]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks
  where
    search :: [ByteString] -> [Int64]
search = Bool -> ByteString -> [ByteString] -> [Int64]
lazySearcher Bool
False ByteString
pat

-- replacing
--
--   These functions replace all (non-overlapping) occurrences of a pattern
--   in the target string. If some occurrences overlap, the earliest is
--   replaced and replacing continues at the index after the replaced
--   occurrence, for example
--
-- > replaceAllL \"ana\" \"olog\" \"banana\" == \"bologna\",
-- > replaceAllS \"abacab\" \"u\" \"abacabacabacab\" == \"uacu\",
-- > replaceAllS \"aa\" \"aaa\" \"aaaa\" == \"aaaaaa\".
--
--   Equality of pattern and substitution is not checked, but
--
-- > pat == sub => 'strictify' (replaceAllS pat sub str) == str,
-- > pat == sub => replaceAllL pat sub str == str.
--
--   The result is a lazily generated lazy ByteString, the first chunks will
--   generally be available before the entire target has been scanned.
--   If the pattern is empty, but not the substitution, the result is
--   equivalent to @'cycle' sub@.

{-# INLINE replaceAllL #-}
replaceAllL :: Substitution rep
            => S.ByteString  -- ^ Pattern to replace
            -> rep           -- ^ Substitution string
            -> L.ByteString  -- ^ Target string
            -> L.ByteString  -- ^ Lazy result
replaceAllL :: forall rep.
Substitution rep =>
ByteString -> rep -> ByteString -> ByteString
replaceAllL ByteString
pat
    | ByteString -> Bool
S.null ByteString
pat = \rep
sub -> rep -> ByteString -> ByteString
forall a. Substitution a => a -> ByteString -> ByteString
prependCycle rep
sub
    | ByteString -> Int
S.length ByteString
pat Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 =
      let breaker :: [ByteString] -> ([ByteString], [ByteString])
breaker = ByteString -> [ByteString] -> ([ByteString], [ByteString])
lazyBreak ByteString
pat
          repl :: ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
repl [ByteString] -> [ByteString]
subst [ByteString]
strs
              | [ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
strs = []
              | Bool
otherwise =
                case [ByteString] -> ([ByteString], [ByteString])
breaker [ByteString]
strs of
                  ([ByteString]
pre, [ByteString]
mtch) ->
                        [ByteString]
pre [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ case [ByteString]
mtch of
                                [] -> []
                                [ByteString]
_  -> [ByteString] -> [ByteString]
subst (([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
repl [ByteString] -> [ByteString]
subst (Int -> [ByteString] -> [ByteString]
ldrop Int
1 [ByteString]
mtch))
      in \rep
sub -> let repl1 :: [ByteString] -> [ByteString]
repl1 = ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
repl (rep -> [ByteString] -> [ByteString]
forall a. Substitution a => a -> [ByteString] -> [ByteString]
substitution rep
sub)
                 in [ByteString] -> ByteString
L.fromChunks ([ByteString] -> ByteString)
-> (ByteString -> [ByteString]) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ByteString] -> [ByteString]
repl1 ([ByteString] -> [ByteString])
-> (ByteString -> [ByteString]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks
    | Bool
otherwise =
      let repl :: ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
repl = ByteString
-> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
lazyRepl ByteString
pat
      in \rep
sub -> let repl1 :: [ByteString] -> [ByteString]
repl1 = ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
repl (rep -> [ByteString] -> [ByteString]
forall a. Substitution a => a -> [ByteString] -> [ByteString]
substitution rep
sub)
                 in [ByteString] -> ByteString
L.fromChunks ([ByteString] -> ByteString)
-> (ByteString -> [ByteString]) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ByteString] -> [ByteString]
repl1 ([ByteString] -> [ByteString])
-> (ByteString -> [ByteString]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks

-- breaking
--
-- Break a string on a pattern. The first component of the result
-- contains the prefix of the string before the first occurrence of the
-- pattern, the second component contains the remainder.
-- The following relations hold:
--
-- > breakSubstringX \"\" str = (\"\", str)
-- > not (pat `isInfixOf` str) == null (snd $ breakSunbstringX pat str)
-- > True == case breakSubstringX pat str of
-- >          (x, y) -> not (pat `isInfixOf` x)
-- >                       && (null y || pat `isPrefixOf` y)

-- | The analogous function for a lazy target string.
--   The first component is generated lazily, so parts of it can be
--   available before the pattern is detected (or found to be absent).
{-# INLINE breakSubstringL #-}
breakSubstringL :: S.ByteString  -- ^ Pattern to break on
                -> L.ByteString  -- ^ String to break up
                -> (L.ByteString, L.ByteString)
                    -- ^ Prefix and remainder of broken string
breakSubstringL :: ByteString -> ByteString -> (ByteString, ByteString)
breakSubstringL ByteString
pat = [ByteString] -> (ByteString, ByteString)
breaker ([ByteString] -> (ByteString, ByteString))
-> (ByteString -> [ByteString])
-> ByteString
-> (ByteString, ByteString)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks
  where
    lbrk :: [ByteString] -> ([ByteString], [ByteString])
lbrk = ByteString -> [ByteString] -> ([ByteString], [ByteString])
lazyBreak ByteString
pat
    breaker :: [ByteString] -> (ByteString, ByteString)
breaker [ByteString]
strs = let ([ByteString]
f, [ByteString]
b) = [ByteString] -> ([ByteString], [ByteString])
lbrk [ByteString]
strs
                   in ([ByteString] -> ByteString
L.fromChunks [ByteString]
f, [ByteString] -> ByteString
L.fromChunks [ByteString]
b)

breakAfterL :: S.ByteString
            -> L.ByteString
            -> (L.ByteString, L.ByteString)
breakAfterL :: ByteString -> ByteString -> (ByteString, ByteString)
breakAfterL ByteString
pat
  | ByteString -> Bool
S.null ByteString
pat      = \ByteString
str -> (ByteString
L.empty, ByteString
str)
breakAfterL ByteString
pat     = [ByteString] -> (ByteString, ByteString)
breaker' ([ByteString] -> (ByteString, ByteString))
-> (ByteString -> [ByteString])
-> ByteString
-> (ByteString, ByteString)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks
  where
    !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
    breaker :: [ByteString] -> ([ByteString], [ByteString])
breaker = ByteString -> [ByteString] -> ([ByteString], [ByteString])
lazyBreak ByteString
pat
    breaker' :: [ByteString] -> (ByteString, ByteString)
breaker' [ByteString]
strs =
      let ([ByteString]
pre, [ByteString]
mtch) = [ByteString] -> ([ByteString], [ByteString])
breaker [ByteString]
strs
          ([ByteString]
pl, [ByteString]
a) = if [ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
mtch then ([],[]) else Int -> [ByteString] -> ([ByteString], [ByteString])
lsplit Int
patLen [ByteString]
mtch
      in ([ByteString] -> ByteString
L.fromChunks ([ByteString]
pre [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ [ByteString]
pl), [ByteString] -> ByteString
L.fromChunks [ByteString]
a)

breakFindAfterL :: S.ByteString
                -> L.ByteString
                -> ((L.ByteString, L.ByteString), Bool)
breakFindAfterL :: ByteString -> ByteString -> ((ByteString, ByteString), Bool)
breakFindAfterL ByteString
pat
  | ByteString -> Bool
S.null ByteString
pat  = \ByteString
str -> ((ByteString
L.empty, ByteString
str), Bool
True)
breakFindAfterL ByteString
pat = [ByteString] -> ((ByteString, ByteString), Bool)
breaker' ([ByteString] -> ((ByteString, ByteString), Bool))
-> (ByteString -> [ByteString])
-> ByteString
-> ((ByteString, ByteString), Bool)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks
  where
    !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
    breaker :: [ByteString] -> ([ByteString], [ByteString])
breaker = ByteString -> [ByteString] -> ([ByteString], [ByteString])
lazyBreak ByteString
pat
    breaker' :: [ByteString] -> ((ByteString, ByteString), Bool)
breaker' [ByteString]
strs =
      let ([ByteString]
pre, [ByteString]
mtch) = [ByteString] -> ([ByteString], [ByteString])
breaker [ByteString]
strs
          ([ByteString]
pl, [ByteString]
a) = if [ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
mtch then ([],[]) else Int -> [ByteString] -> ([ByteString], [ByteString])
lsplit Int
patLen [ByteString]
mtch
      in (([ByteString] -> ByteString
L.fromChunks ([ByteString]
pre [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ [ByteString]
pl), [ByteString] -> ByteString
L.fromChunks [ByteString]
a), Bool -> Bool
not ([ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
mtch))

-- splitting
--
-- These functions implement various splitting strategies.
--
-- If the pattern to split on is empty, all functions return an
-- infinite list of empty ByteStrings.
-- Otherwise, the names are rather self-explanatory.
--
-- For nonempty patterns, the following relations hold:
--
-- > concat (splitKeepXY pat str) == str
-- > concat ('Data.List.intersperse' pat (splitDropX pat str)) == str.
--
-- All fragments except possibly the last in the result of
-- @splitKeepEndX pat@ end with @pat@, none of the fragments contains
-- more than one occurrence of @pat@ or is empty.
--
-- All fragments except possibly the first in the result of
-- @splitKeepFrontX pat@ begin with @pat@, none of the fragments
-- contains more than one occurrence of @patq or is empty.
--
-- > splitDropX pat str == map dropPat (splitKeepFrontX pat str)
-- >   where
-- >     patLen = length pat
-- >     dropPat frag
-- >        | pat `isPrefixOf` frag = drop patLen frag
-- >        | otherwise             = frag
--
-- but @splitDropX@ is a little more efficient than that.

{-# INLINE splitKeepEndL #-}
splitKeepEndL :: S.ByteString    -- ^ Pattern to split on
              -> L.ByteString    -- ^ String to split
              -> [L.ByteString]  -- ^ List of fragments
splitKeepEndL :: ByteString -> ByteString -> [ByteString]
splitKeepEndL ByteString
pat
    | ByteString -> Bool
S.null ByteString
pat    = [ByteString] -> ByteString -> [ByteString]
forall a b. a -> b -> a
const (ByteString -> [ByteString]
forall a. a -> [a]
repeat ByteString
L.empty)
    | Bool
otherwise     =
      let splitter :: [ByteString] -> [[ByteString]]
splitter = ByteString -> [ByteString] -> [[ByteString]]
lazySplitKeepEnd ByteString
pat
      in  ([ByteString] -> ByteString) -> [[ByteString]] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map [ByteString] -> ByteString
L.fromChunks ([[ByteString]] -> [ByteString])
-> (ByteString -> [[ByteString]]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ByteString] -> [[ByteString]]
splitter ([ByteString] -> [[ByteString]])
-> (ByteString -> [ByteString]) -> ByteString -> [[ByteString]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks

{-# INLINE splitKeepFrontL #-}
splitKeepFrontL :: S.ByteString    -- ^ Pattern to split on
                -> L.ByteString    -- ^ String to split
                -> [L.ByteString]  -- ^ List of fragments
splitKeepFrontL :: ByteString -> ByteString -> [ByteString]
splitKeepFrontL ByteString
pat
    | ByteString -> Bool
S.null ByteString
pat    = [ByteString] -> ByteString -> [ByteString]
forall a b. a -> b -> a
const (ByteString -> [ByteString]
forall a. a -> [a]
repeat ByteString
L.empty)
    | Bool
otherwise     =
      let splitter :: [ByteString] -> [[ByteString]]
splitter = ByteString -> [ByteString] -> [[ByteString]]
lazySplitKeepFront ByteString
pat
      in  ([ByteString] -> ByteString) -> [[ByteString]] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map [ByteString] -> ByteString
L.fromChunks ([[ByteString]] -> [ByteString])
-> (ByteString -> [[ByteString]]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ByteString] -> [[ByteString]]
splitter ([ByteString] -> [[ByteString]])
-> (ByteString -> [ByteString]) -> ByteString -> [[ByteString]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks


{-# INLINE splitDropL #-}
splitDropL :: S.ByteString    -- ^ Pattern to split on
           -> L.ByteString    -- ^ String to split
           -> [L.ByteString]  -- ^ List of fragments
splitDropL :: ByteString -> ByteString -> [ByteString]
splitDropL ByteString
pat
    | ByteString -> Bool
S.null ByteString
pat    = [ByteString] -> ByteString -> [ByteString]
forall a b. a -> b -> a
const (ByteString -> [ByteString]
forall a. a -> [a]
repeat ByteString
L.empty)
    | Bool
otherwise     =
      let splitter :: [ByteString] -> [[ByteString]]
splitter = ByteString -> [ByteString] -> [[ByteString]]
lazySplitDrop ByteString
pat
      in ([ByteString] -> ByteString) -> [[ByteString]] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map [ByteString] -> ByteString
L.fromChunks ([[ByteString]] -> [ByteString])
-> (ByteString -> [[ByteString]]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ByteString] -> [[ByteString]]
splitter ([ByteString] -> [[ByteString]])
-> (ByteString -> [ByteString]) -> ByteString -> [[ByteString]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks

------------------------------------------------------------------------------
--                             Search Functions                             --
------------------------------------------------------------------------------

lazySearcher :: Bool -> S.ByteString -> [S.ByteString] -> [Int64]
lazySearcher :: Bool -> ByteString -> [ByteString] -> [Int64]
lazySearcher Bool
_ !ByteString
pat
    | ByteString -> Bool
S.null ByteString
pat        =
      let zgo :: t -> [ByteString] -> [t]
zgo !t
prior [] = [t
prior]
          zgo t
prior (!ByteString
str : [ByteString]
rest) =
              let !l :: Int
l = ByteString -> Int
S.length ByteString
str
                  !prior' :: t
prior' = t
prior t -> t -> t
forall a. Num a => a -> a -> a
+ Int -> t
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
l
              in [t
prior t -> t -> t
forall a. Num a => a -> a -> a
+ Int -> t
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
i | Int
i <- [Int
0 .. Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1]] [t] -> [t] -> [t]
forall a. [a] -> [a] -> [a]
++ t -> [ByteString] -> [t]
zgo t
prior' [ByteString]
rest
      in Int64 -> [ByteString] -> [Int64]
forall {t}. Num t => t -> [ByteString] -> [t]
zgo Int64
0
    | ByteString -> Int
S.length ByteString
pat Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 =
      let !w :: Word8
w = ByteString -> Word8
S.head ByteString
pat
          ixes :: ByteString -> [Int]
ixes = Word8 -> ByteString -> [Int]
S.elemIndices Word8
w
          go :: t -> [ByteString] -> [t]
go t
_ [] = []
          go !t
prior (!ByteString
str : [ByteString]
rest)
            = let !prior' :: t
prior' = t
prior t -> t -> t
forall a. Num a => a -> a -> a
+ Int -> t
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int
S.length ByteString
str)
              in (Int -> t) -> [Int] -> [t]
forall a b. (a -> b) -> [a] -> [b]
map ((t -> t -> t
forall a. Num a => a -> a -> a
+ t
prior) (t -> t) -> (Int -> t) -> Int -> t
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> t
forall a b. (Integral a, Num b) => a -> b
fromIntegral) (ByteString -> [Int]
ixes ByteString
str) [t] -> [t] -> [t]
forall a. [a] -> [a] -> [a]
++ t -> [ByteString] -> [t]
go t
prior' [ByteString]
rest
      in Int64 -> [ByteString] -> [Int64]
forall {t}. Num t => t -> [ByteString] -> [t]
go Int64
0
lazySearcher !Bool
overlap ByteString
pat = [ByteString] -> [Int64]
searcher
  where
    {-# INLINE patAt #-}
    patAt :: Int -> Word8
    patAt :: Int -> Word8
patAt !Int
i = ByteString -> Int -> Word8
unsafeIndex ByteString
pat Int
i

    !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
    !patEnd :: Int
patEnd = Int
patLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
    {-# INLINE preEnd #-}
    preEnd :: Int
preEnd  = Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
    !maxLen :: Int
maxLen = Int
forall a. Bounded a => a
maxBound Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patLen
    !occT :: UArray Int Int
occT   = ByteString -> UArray Int Int
occurs ByteString
pat        -- for bad-character-shift
    !suffT :: UArray Int Int
suffT  = ByteString -> UArray Int Int
suffShifts ByteString
pat    -- for good-suffix-shift
    !skip :: Int
skip   = if Bool
overlap then UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
unsafeAt UArray Int Int
suffT Int
0 else Int
patLen
    -- shift after a complete match
    !kept :: Int
kept   = Int
patLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
skip     -- length of known prefix after full match
    !pe :: Word8
pe     = Int -> Word8
patAt Int
patEnd      -- last pattern byte for fast comparison

    {-# INLINE occ #-}
    occ :: a -> Int
occ !a
w = UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
unsafeAt UArray Int Int
occT (a -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
w)

    {-# INLINE suff #-}
    suff :: Int -> Int
suff !Int
i = UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
unsafeAt UArray Int Int
suffT Int
i

    searcher :: [ByteString] -> [Int64]
searcher [ByteString]
lst = case [ByteString]
lst of
                    []      -> []
                    (ByteString
h : [ByteString]
t) ->
                      if Int
maxLen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< ByteString -> Int
S.length ByteString
h
                        then [Char] -> [Int64]
forall a. HasCallStack => [Char] -> a
error [Char]
"Overflow in BoyerMoore.lazySearcher"
                        else Int64
-> [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> [Int64]
seek Int64
0 [] ByteString
h [ByteString]
t Int
0 Int
patEnd

    -- seek is used to position the "zipper" of (past, str, future) to the
    -- correct S.ByteString to search. This is done by ensuring that
    -- 0 <= strPos < strLen, where strPos = diffPos + patPos.
    -- Note that future is not a strict parameter. The bytes being compared
    -- will then be (strAt strPos) and (patAt patPos).
    -- Splitting this into specialised versions is possible, but it would
    -- only be useful if the pattern length is close to (or larger than)
    -- the chunk size. For ordinary patterns of at most a few hundred bytes,
    -- the overhead of yet more code-paths and larger code size will probably
    -- outweigh the small gains in the relatively rare calls to seek.
    seek :: Int64 -> [S.ByteString] -> S.ByteString
            -> [S.ByteString] -> Int -> Int -> [Int64]
    seek :: Int64
-> [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> [Int64]
seek !Int64
prior ![ByteString]
past !ByteString
str [ByteString]
future !Int
diffPos !Int
patPos
        | Int
strPos Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 =  -- need to look at previous chunk
            case [ByteString]
past of
                (ByteString
h : [ByteString]
t) ->
                    let !hLen :: Int
hLen = ByteString -> Int
S.length ByteString
h
                    in Int64
-> [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> [Int64]
seek (Int64
prior Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
- Int -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
hLen) [ByteString]
t ByteString
h (ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
future)
                                (Int
diffPos Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
hLen) Int
patPos
                []      -> [Char] -> [Int64]
forall a. HasCallStack => [Char] -> a
error [Char]
"seek back too far!"
        | Int
strEnd Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
strPos =  -- need to look at next chunk if there is
            case [ByteString]
future of
                (ByteString
h : [ByteString]
t) ->
                    let {-# INLINE prior' #-}
                        prior' :: Int64
prior' = Int64
prior Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ Int -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
strLen
                        !diffPos' :: Int
diffPos' = Int
diffPos Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
strLen
                        {-# INLINE past' #-}
                        past' :: [ByteString]
past' = Int -> [ByteString] -> [ByteString]
release (-Int
diffPos') (ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
past)
                    in if Int
maxLen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< ByteString -> Int
S.length ByteString
h
                        then [Char] -> [Int64]
forall a. HasCallStack => [Char] -> a
error [Char]
"Overflow in BoyerMoore.lazySearcher"
                        else Int64
-> [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> [Int64]
seek Int64
prior' [ByteString]
past' ByteString
h [ByteString]
t Int
diffPos' Int
patPos
                []      -> []
        | Int
patPos Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
patEnd  = Int -> [Int64]
checkEnd Int
strPos
        | Int
diffPos Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0       = Int -> Int -> [Int64]
matcherN Int
diffPos Int
patPos
        | Bool
otherwise         = Int -> Int -> [Int64]
matcherP Int
diffPos Int
patPos
          where
            !strPos :: Int
strPos  = Int
diffPos Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patPos
            !strLen :: Int
strLen  = ByteString -> Int
S.length ByteString
str
            !strEnd :: Int
strEnd  = Int
strLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
            !maxDiff :: Int
maxDiff = Int
strLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patLen

            {-# INLINE strAt #-}
            strAt :: Int -> Word8
strAt !Int
i = ByteString -> Int -> Word8
unsafeIndex ByteString
str Int
i

            -- While comparing the last byte of the pattern, the bad-
            -- character-shift is always at least as large as the good-
            -- suffix-shift. Eliminating the unnecessary memory reads and
            -- comparison speeds things up noticeably.
            checkEnd :: Int -> [Int64]
checkEnd !Int
sI  -- index in string to compare to last of pattern
              | Int
strEnd Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
sI = Int64
-> [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> [Int64]
seek Int64
prior [ByteString]
past ByteString
str [ByteString]
future (Int
sI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patEnd) Int
patEnd
              | Bool
otherwise   =
                case Int -> Word8
strAt Int
sI of
                  !Word8
c | Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
pe   ->
                       if Int
sI Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
patEnd
                        then case Int
sI of
                              Int
0 -> Int64
-> [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> [Int64]
seek Int64
prior [ByteString]
past ByteString
str [ByteString]
future (-Int
patEnd) Int
preEnd
                              Int
_ -> Int -> Int -> [Int64]
matcherN (Int
sI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patEnd) Int
preEnd
                        else Int -> Int -> [Int64]
matcherP (Int
sI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patEnd) Int
preEnd
                     | Bool
otherwise -> Int -> [Int64]
checkEnd (Int
sI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall {a}. Integral a => a -> Int
occ Word8
c)

            -- Once the last byte has matched, we enter the full matcher
            -- diff is the offset of the window, patI the index of the
            -- pattern byte to compare next.

            -- matcherN is the tight loop that walks backwards from the end
            -- of the pattern checking for matching bytes. The offset is
            -- always negative, so no complete match can occur here.
            -- When a byte matches, we need to check whether we've reached
            -- the front of this chunk, otherwise whether we need the next.
            matcherN :: Int -> Int -> [Int64]
matcherN !Int
diff !Int
patI =
              case Int -> Word8
strAt (Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patI) of
                !Word8
c  | Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> Word8
patAt Int
patI   ->
                        if Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patI Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
                            then Int64
-> [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> [Int64]
seek Int64
prior [ByteString]
past ByteString
str [ByteString]
future Int
diff (Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                            else Int -> Int -> [Int64]
matcherN Int
diff (Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                    | Bool
otherwise         ->
                        let {-# INLINE badShift #-}
                            badShift :: Int
badShift = Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall {a}. Integral a => a -> Int
occ Word8
c
                            {-# INLINE goodShift #-}
                            goodShift :: Int
goodShift = Int -> Int
suff Int
patI
                            !diff' :: Int
diff' = Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
badShift Int
goodShift
                        in if Int
maxDiff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
diff'
                            then Int64
-> [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> [Int64]
seek Int64
prior [ByteString]
past ByteString
str [ByteString]
future Int
diff' Int
patEnd
                            else Int -> [Int64]
checkEnd (Int
diff' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patEnd)

            -- matcherP is the tight loop for non-negative offsets.
            -- When the pattern is shifted, we must check whether we leave
            -- the current chunk, otherwise we only need to check for a
            -- complete match.
            matcherP :: Int -> Int -> [Int64]
matcherP !Int
diff !Int
patI =
              case Int -> Word8
strAt (Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patI) of
                !Word8
c  | Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> Word8
patAt Int
patI   ->
                      if Int
patI Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
                        then Int64
prior Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ Int -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
diff Int64 -> [Int64] -> [Int64]
forall a. a -> [a] -> [a]
:
                              let !diff' :: Int
diff' = Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
skip
                              in if Int
maxDiff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
diff'
                                then Int64
-> [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> [Int64]
seek Int64
prior [ByteString]
past ByteString
str [ByteString]
future Int
diff' Int
patEnd
                                else
                                  if Int
skip Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
patLen
                                    then
                                      Int -> [Int64]
checkEnd (Int
diff' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patEnd)
                                    else
                                      Int -> Int -> [Int64]
afterMatch Int
diff' Int
patEnd
                        else Int -> Int -> [Int64]
matcherP Int
diff (Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                    | Bool
otherwise         ->
                        let {-# INLINE badShift #-}
                            badShift :: Int
badShift = Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall {a}. Integral a => a -> Int
occ Word8
c
                            {-# INLINE goodShift #-}
                            goodShift :: Int
goodShift = Int -> Int
suff Int
patI
                            !diff' :: Int
diff' = Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
badShift Int
goodShift
                        in if Int
maxDiff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
diff'
                            then Int64
-> [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> [Int64]
seek Int64
prior [ByteString]
past ByteString
str [ByteString]
future Int
diff' Int
patEnd
                            else Int -> [Int64]
checkEnd (Int
diff' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patEnd)

            -- After a full match, we know how long a prefix of the pattern
            -- still matches. Do not re-compare the prefix to prevent O(m*n)
            -- behaviour for periodic patterns.
            -- This breaks down at chunk boundaries, but except for long
            -- patterns with a short period, that shouldn't matter much.
            afterMatch :: Int -> Int -> [Int64]
afterMatch !Int
diff !Int
patI =
              case Int -> Word8
strAt (Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patI) of
                !Word8
c  | Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> Word8
patAt Int
patI ->
                      if Int
patI Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
kept
                        then Int64
prior Int64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+ Int -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
diff Int64 -> [Int64] -> [Int64]
forall a. a -> [a] -> [a]
:
                            let !diff' :: Int
diff' = Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
skip
                            in if Int
maxDiff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
diff'
                                then Int64
-> [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> [Int64]
seek Int64
prior [ByteString]
past ByteString
str [ByteString]
future Int
diff' Int
patEnd
                                else Int -> Int -> [Int64]
afterMatch Int
diff' Int
patEnd
                        else Int -> Int -> [Int64]
afterMatch Int
diff (Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                    | Int
patI Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
patEnd  ->
                        Int -> [Int64]
checkEnd (Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
2Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
patEnd) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall {a}. Integral a => a -> Int
occ Word8
c)
                    | Bool
otherwise       ->
                        let {-# INLINE badShift #-}
                            badShift :: Int
badShift = Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall {a}. Integral a => a -> Int
occ Word8
c
                            {-# INLINE goodShift #-}
                            goodShift :: Int
goodShift = Int -> Int
suff Int
patI
                            !diff' :: Int
diff' = Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
badShift Int
goodShift
                        in if Int
maxDiff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
diff'
                            then Int64
-> [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> [Int64]
seek Int64
prior [ByteString]
past ByteString
str [ByteString]
future Int
diff' Int
patEnd
                            else Int -> [Int64]
checkEnd (Int
diff' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patEnd)

------------------------------------------------------------------------------
--                            Breaking Functions                            --
------------------------------------------------------------------------------

-- Ugh! Code duplication ahead!
-- But we want to get the first component lazily, so it's no good to find
-- the first index (if any) and then split.
-- Therefore bite the bullet and copy most of the code of lazySearcher.
-- No need for afterMatch here, fortunately.
lazyBreak ::S.ByteString -> [S.ByteString] -> ([S.ByteString], [S.ByteString])
lazyBreak :: ByteString -> [ByteString] -> ([ByteString], [ByteString])
lazyBreak !ByteString
pat
  | ByteString -> Bool
S.null ByteString
pat  = \[ByteString]
lst -> ([],[ByteString]
lst)
  | ByteString -> Int
S.length ByteString
pat Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 =
    let !w :: Word8
w = ByteString -> Word8
S.head ByteString
pat
        go :: [ByteString] -> ([ByteString], [ByteString])
go [] = ([], [])
        go (!ByteString
str : [ByteString]
rest) =
            case Word8 -> ByteString -> [Int]
S.elemIndices Word8
w ByteString
str of
                []    -> let ([ByteString]
pre, [ByteString]
post) = [ByteString] -> ([ByteString], [ByteString])
go [ByteString]
rest in (ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
pre, [ByteString]
post)
                (Int
i:[Int]
_) -> if Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
                            then ([], ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
rest)
                            else ([Int -> ByteString -> ByteString
S.take Int
i ByteString
str], Int -> ByteString -> ByteString
S.drop Int
i ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
rest)
    in [ByteString] -> ([ByteString], [ByteString])
go
lazyBreak ByteString
pat = [ByteString] -> ([ByteString], [ByteString])
breaker
  where
    !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
    !patEnd :: Int
patEnd = Int
patLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
    !occT :: UArray Int Int
occT   = ByteString -> UArray Int Int
occurs ByteString
pat
    !suffT :: UArray Int Int
suffT  = ByteString -> UArray Int Int
suffShifts ByteString
pat
    !maxLen :: Int
maxLen = Int
forall a. Bounded a => a
maxBound Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patLen
    !pe :: Word8
pe     = Int -> Word8
patAt Int
patEnd

    {-# INLINE patAt #-}
    patAt :: Int -> Word8
patAt !Int
i = ByteString -> Int -> Word8
unsafeIndex ByteString
pat Int
i

    {-# INLINE occ #-}
    occ :: a -> Int
occ !a
w = UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
unsafeAt UArray Int Int
occT (a -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
w)

    {-# INLINE suff #-}
    suff :: Int -> Int
suff !Int
i = UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
unsafeAt UArray Int Int
suffT Int
i

    breaker :: [ByteString] -> ([ByteString], [ByteString])
breaker [ByteString]
lst =
      case [ByteString]
lst of
        []    -> ([],[])
        (ByteString
h:[ByteString]
t) ->
          if Int
maxLen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< ByteString -> Int
S.length ByteString
h
            then [Char] -> ([ByteString], [ByteString])
forall a. HasCallStack => [Char] -> a
error [Char]
"Overflow in BoyerMoore.lazyBreak"
            else [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> ([ByteString], [ByteString])
seek [] ByteString
h [ByteString]
t Int
0 Int
patEnd

    seek :: [S.ByteString] -> S.ByteString -> [S.ByteString]
                -> Int -> Int -> ([S.ByteString], [S.ByteString])
    seek :: [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> ([ByteString], [ByteString])
seek ![ByteString]
past !ByteString
str [ByteString]
future !Int
offset !Int
patPos
      | Int
strPos Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 =
        case [ByteString]
past of
          [] -> [Char] -> ([ByteString], [ByteString])
forall a. HasCallStack => [Char] -> a
error [Char]
"not enough past!"
          (ByteString
h : [ByteString]
t) -> [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> ([ByteString], [ByteString])
seek [ByteString]
t ByteString
h (ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
future) (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ByteString -> Int
S.length ByteString
h) Int
patPos
      | Int
strEnd Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
strPos =
        case [ByteString]
future of
          []      -> ((ByteString
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
-> [ByteString]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall a b c. (a -> b -> c) -> b -> a -> c
flip ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
(.) (([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> (ByteString -> [ByteString] -> [ByteString])
-> ByteString
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (:)) [ByteString] -> [ByteString]
forall a. a -> a
id [ByteString]
past [ByteString
str], [])
          (ByteString
h : [ByteString]
t) ->
            let !off' :: Int
off' = Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
strLen
                ([ByteString]
past', ![ByteString]
discharge) = Int -> [ByteString] -> ([ByteString], [ByteString])
keep (-Int
off') (ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
past)
            in if Int
maxLen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< ByteString -> Int
S.length ByteString
h
                then [Char] -> ([ByteString], [ByteString])
forall a. HasCallStack => [Char] -> a
error [Char]
"Overflow in BoyerMoore.lazyBreak (future)"
                else let ([ByteString]
pre,[ByteString]
post) = [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> ([ByteString], [ByteString])
seek [ByteString]
past' ByteString
h [ByteString]
t Int
off' Int
patPos
                     in ((ByteString
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
-> [ByteString]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall a b c. (a -> b -> c) -> b -> a -> c
flip ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
(.) (([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> (ByteString -> [ByteString] -> [ByteString])
-> ByteString
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (:)) [ByteString] -> [ByteString]
forall a. a -> a
id [ByteString]
discharge [ByteString]
pre, [ByteString]
post)
      | Int
patPos Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
patEnd = Int -> ([ByteString], [ByteString])
checkEnd Int
strPos
      | Int
offset Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 = Int -> Int -> ([ByteString], [ByteString])
matcherN Int
offset Int
patPos
      | Bool
otherwise  = Int -> Int -> ([ByteString], [ByteString])
matcherP Int
offset Int
patPos
      where
        {-# INLINE strAt #-}
        strAt :: Int -> Word8
strAt !Int
i = ByteString -> Int -> Word8
unsafeIndex ByteString
str Int
i

        !strLen :: Int
strLen = ByteString -> Int
S.length ByteString
str
        !strEnd :: Int
strEnd = Int
strLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
        !maxOff :: Int
maxOff = Int
strLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patLen
        !strPos :: Int
strPos = Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patPos

        checkEnd :: Int -> ([ByteString], [ByteString])
checkEnd !Int
sI
          | Int
strEnd Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
sI = [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> ([ByteString], [ByteString])
seek [ByteString]
past ByteString
str [ByteString]
future (Int
sI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patEnd) Int
patEnd
          | Bool
otherwise   =
            case Int -> Word8
strAt Int
sI of
              !Word8
c  | Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
pe   ->
                    if Int
sI Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
patEnd
                      then (if Int
sI Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
                              then [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> ([ByteString], [ByteString])
seek [ByteString]
past ByteString
str [ByteString]
future (-Int
patEnd) (Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                              else Int -> Int -> ([ByteString], [ByteString])
matcherN (Int
sI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patEnd) (Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1))
                      else Int -> Int -> ([ByteString], [ByteString])
matcherP (Int
sI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patEnd) (Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                  | Bool
otherwise -> Int -> ([ByteString], [ByteString])
checkEnd (Int
sI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall {a}. Integral a => a -> Int
occ Word8
c)

        matcherN :: Int -> Int -> ([ByteString], [ByteString])
matcherN !Int
off !Int
patI =
          case Int -> Word8
strAt (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patI) of
            !Word8
c  | Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> Word8
patAt Int
patI ->
                  if Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patI Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
                    then [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> ([ByteString], [ByteString])
seek [ByteString]
past ByteString
str [ByteString]
future Int
off (Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                    else Int -> Int -> ([ByteString], [ByteString])
matcherN Int
off (Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                | Bool
otherwise ->
                    let !off' :: Int
off' = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int -> Int
forall a. Ord a => a -> a -> a
max (Int -> Int
suff Int
patI) (Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall {a}. Integral a => a -> Int
occ Word8
c)
                    in if Int
maxOff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
off'
                        then [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> ([ByteString], [ByteString])
seek [ByteString]
past ByteString
str [ByteString]
future Int
off' Int
patEnd
                        else Int -> ([ByteString], [ByteString])
checkEnd (Int
off' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patEnd)

        matcherP :: Int -> Int -> ([ByteString], [ByteString])
matcherP !Int
off !Int
patI =
          case Int -> Word8
strAt (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patI) of
            !Word8
c  | Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> Word8
patAt Int
patI ->
                  if Int
patI Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
                    then let !pre :: [ByteString]
pre = if Int
off Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 then [] else [Int -> ByteString -> ByteString
S.take Int
off ByteString
str]
                             !post :: ByteString
post = Int -> ByteString -> ByteString
S.drop Int
off ByteString
str
                         in ((ByteString
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
-> [ByteString]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall a b c. (a -> b -> c) -> b -> a -> c
flip ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
(.) (([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> (ByteString -> [ByteString] -> [ByteString])
-> ByteString
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (:)) [ByteString] -> [ByteString]
forall a. a -> a
id [ByteString]
past [ByteString]
pre, ByteString
postByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:[ByteString]
future)
                    else Int -> Int -> ([ByteString], [ByteString])
matcherP Int
off (Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                | Bool
otherwise ->
                    let !off' :: Int
off' = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int -> Int
forall a. Ord a => a -> a -> a
max (Int -> Int
suff Int
patI) (Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall {a}. Integral a => a -> Int
occ Word8
c)
                    in if Int
maxOff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
off'
                        then [ByteString]
-> ByteString
-> [ByteString]
-> Int
-> Int
-> ([ByteString], [ByteString])
seek [ByteString]
past ByteString
str [ByteString]
future Int
off' Int
patEnd
                        else Int -> ([ByteString], [ByteString])
checkEnd (Int
off' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patEnd)


------------------------------------------------------------------------------
--                            Splitting Functions                           --
------------------------------------------------------------------------------

-- non-empty pattern
lazySplitKeepFront :: S.ByteString -> [S.ByteString] -> [[S.ByteString]]
lazySplitKeepFront :: ByteString -> [ByteString] -> [[ByteString]]
lazySplitKeepFront ByteString
pat = [ByteString] -> [[ByteString]]
splitter'
  where
    !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
    breaker :: [ByteString] -> ([ByteString], [ByteString])
breaker = ByteString -> [ByteString] -> ([ByteString], [ByteString])
lazyBreak ByteString
pat
    splitter' :: [ByteString] -> [[ByteString]]
splitter' [ByteString]
strs = case [ByteString] -> [[ByteString]]
splitter [ByteString]
strs of
                        ([]:[[ByteString]]
rest) -> [[ByteString]]
rest
                        [[ByteString]]
other -> [[ByteString]]
other
    splitter :: [ByteString] -> [[ByteString]]
splitter [] = []
    splitter [ByteString]
strs =
      case [ByteString] -> ([ByteString], [ByteString])
breaker [ByteString]
strs of
        ([ByteString]
pre, [ByteString]
mtch) ->
           [ByteString]
pre [ByteString] -> [[ByteString]] -> [[ByteString]]
forall a. a -> [a] -> [a]
: case [ByteString]
mtch of
                    [] -> []
                    [ByteString]
_  -> case Int -> [ByteString] -> ([ByteString], [ByteString])
lsplit Int
patLen [ByteString]
mtch of
                            ([ByteString]
pt, [ByteString]
rst) ->
                              if [ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
rst
                                then [[ByteString]
pt]
                                else let ([ByteString]
h : [[ByteString]]
t) = [ByteString] -> [[ByteString]]
splitter [ByteString]
rst
                                     in ([ByteString]
pt [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ [ByteString]
h) [ByteString] -> [[ByteString]] -> [[ByteString]]
forall a. a -> [a] -> [a]
: [[ByteString]]
t

-- non-empty pattern
lazySplitKeepEnd :: S.ByteString -> [S.ByteString] -> [[S.ByteString]]
lazySplitKeepEnd :: ByteString -> [ByteString] -> [[ByteString]]
lazySplitKeepEnd ByteString
pat = [ByteString] -> [[ByteString]]
splitter
  where
    !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
    breaker :: [ByteString] -> ([ByteString], [ByteString])
breaker = ByteString -> [ByteString] -> ([ByteString], [ByteString])
lazyBreak ByteString
pat
    splitter :: [ByteString] -> [[ByteString]]
splitter [] = []
    splitter [ByteString]
strs =
      case [ByteString] -> ([ByteString], [ByteString])
breaker [ByteString]
strs of
        ([ByteString]
pre, [ByteString]
mtch) ->
            let ([ByteString]
h : [[ByteString]]
t) = if [ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
mtch
                            then [[]]
                            else case Int -> [ByteString] -> ([ByteString], [ByteString])
lsplit Int
patLen [ByteString]
mtch of
                                    ([ByteString]
pt, [ByteString]
rst) -> [ByteString]
pt [ByteString] -> [[ByteString]] -> [[ByteString]]
forall a. a -> [a] -> [a]
: [ByteString] -> [[ByteString]]
splitter [ByteString]
rst
            in ([ByteString]
pre [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ [ByteString]
h) [ByteString] -> [[ByteString]] -> [[ByteString]]
forall a. a -> [a] -> [a]
: [[ByteString]]
t

lazySplitDrop :: S.ByteString -> [S.ByteString] -> [[S.ByteString]]
lazySplitDrop :: ByteString -> [ByteString] -> [[ByteString]]
lazySplitDrop ByteString
pat = [ByteString] -> [[ByteString]]
splitter
  where
    !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
    breaker :: [ByteString] -> ([ByteString], [ByteString])
breaker = ByteString -> [ByteString] -> ([ByteString], [ByteString])
lazyBreak ByteString
pat
    splitter :: [ByteString] -> [[ByteString]]
splitter [] = []
    splitter [ByteString]
strs = [ByteString] -> [[ByteString]]
splitter' [ByteString]
strs
    splitter' :: [ByteString] -> [[ByteString]]
splitter' [] = [[]]
    splitter' [ByteString]
strs = case [ByteString] -> ([ByteString], [ByteString])
breaker [ByteString]
strs of
                        ([ByteString]
pre,[ByteString]
mtch) ->
                            [ByteString]
pre [ByteString] -> [[ByteString]] -> [[ByteString]]
forall a. a -> [a] -> [a]
: case [ByteString]
mtch of
                                    [] -> []
                                    [ByteString]
_  -> [ByteString] -> [[ByteString]]
splitter' (Int -> [ByteString] -> [ByteString]
ldrop Int
patLen [ByteString]
mtch)

------------------------------------------------------------------------------
--                            Replacing Functions                           --
------------------------------------------------------------------------------

{-

These would be really nice.
Unfortunately they're too slow, so instead, there's another instance of
almost the same code as in lazySearcher below.

-- variant of below
lazyFRepl :: S.ByteString -> ([S.ByteString] -> [S.ByteString])
                -> [S.ByteString] -> [S.ByteString]
lazyFRepl pat = repl
  where
    !patLen = S.length pat
    breaker = lazyBreak pat
    repl sub = replacer
      where
        replacer [] = []
        replacer strs =
          let (pre, mtch) = breaker strs
          in pre ++ case mtch of
                      [] -> []
                      _  -> sub (replacer (ldrop patLen mtch))

-- This is nice and short. I really hope it's performing well!
lazyBRepl :: S.ByteString -> S.ByteString -> [S.ByteString] -> [S.ByteString]
lazyBRepl pat !sub = replacer
  where
    !patLen = S.length pat
    breaker = lazyBreak pat
    replacer [] = []
    replacer strs = let (pre, mtch) = breaker strs
                    in pre ++ case mtch of
                                [] -> []
                                _  -> sub : replacer (ldrop patLen mtch)
-}

-- Yet more code duplication.
--
-- Benchmark it against an implementation using lazyBreak and,
-- unless it's significantly faster, NUKE IT!!
--
-- Sigh, it is significantly faster. 10 - 25 %.
-- I could live with the 10, but 25 is too much.
--
-- Hmm, maybe an implementation via
-- replace pat sub = L.intercalate sub . split pat
-- would be competitive now.
-- TODO: test speed and space usage.
--
-- replacing loop for lazy ByteStrings as list of chunks,
-- called only for non-empty patterns
lazyRepl :: S.ByteString -> ([S.ByteString] -> [S.ByteString])
            -> [S.ByteString] -> [S.ByteString]
lazyRepl :: ByteString
-> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
lazyRepl ByteString
pat = ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
replacer
 where
  !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
  !patEnd :: Int
patEnd = Int
patLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
  !occT :: UArray Int Int
occT   = ByteString -> UArray Int Int
occurs ByteString
pat
  !suffT :: UArray Int Int
suffT  = ByteString -> UArray Int Int
suffShifts ByteString
pat
  !maxLen :: Int
maxLen = Int
forall a. Bounded a => a
maxBound Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patLen
  !pe :: Word8
pe     = Int -> Word8
patAt Int
patEnd

  {-# INLINE patAt #-}
  patAt :: Int -> Word8
patAt !Int
i = ByteString -> Int -> Word8
unsafeIndex ByteString
pat Int
i

  {-# INLINE occ #-}
  occ :: a -> Int
occ !a
w = UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
unsafeAt UArray Int Int
occT (a -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
w)

  {-# INLINE suff #-}
  suff :: Int -> Int
suff !Int
i = UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
unsafeAt UArray Int Int
suffT Int
i

  replacer :: ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
replacer [ByteString] -> [ByteString]
sub [ByteString]
lst =
      case [ByteString]
lst of
        []    -> []
        (ByteString
h:[ByteString]
t) ->
          if Int
maxLen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< ByteString -> Int
S.length ByteString
h
            then [Char] -> [ByteString]
forall a. HasCallStack => [Char] -> a
error [Char]
"Overflow in BoyerMoore.lazyRepl"
            else [ByteString]
-> ByteString -> [ByteString] -> Int -> Int -> [ByteString]
seek [] ByteString
h [ByteString]
t Int
0 Int
patEnd
   where
        chop :: Int -> [ByteString] -> [ByteString]
chop Int
_ [] = []
        chop !Int
k (!ByteString
str : [ByteString]
rest)
          | Int
k Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
s     =
            if Int
maxLen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< (Int
s Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
k)
                then [Char] -> [ByteString]
forall a. HasCallStack => [Char] -> a
error [Char]
"Overflow in BoyerMoore.lazyRepl (chop)"
                else [ByteString]
-> ByteString -> [ByteString] -> Int -> Int -> [ByteString]
seek [] (Int -> ByteString -> ByteString
S.drop Int
k ByteString
str) [ByteString]
rest Int
0 Int
patEnd
          | Bool
otherwise = Int -> [ByteString] -> [ByteString]
chop (Int
kInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
s) [ByteString]
rest
            where
              !s :: Int
s = ByteString -> Int
S.length ByteString
str

        seek :: [S.ByteString] -> S.ByteString -> [S.ByteString]
                                    -> Int -> Int -> [S.ByteString]
        seek :: [ByteString]
-> ByteString -> [ByteString] -> Int -> Int -> [ByteString]
seek ![ByteString]
past !ByteString
str [ByteString]
fut !Int
offset !Int
patPos
          | Int
strPos Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 =
            case [ByteString]
past of
              [] -> [Char] -> [ByteString]
forall a. HasCallStack => [Char] -> a
error [Char]
"not enough past!"
              (ByteString
h : [ByteString]
t) -> [ByteString]
-> ByteString -> [ByteString] -> Int -> Int -> [ByteString]
seek [ByteString]
t ByteString
h (ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
fut) (Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ByteString -> Int
S.length ByteString
h) Int
patPos
          | Int
strEnd Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
strPos =
            case [ByteString]
fut of
              []      -> (ByteString
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
-> [ByteString]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall a b c. (a -> b -> c) -> b -> a -> c
flip ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
(.) (([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> (ByteString -> [ByteString] -> [ByteString])
-> ByteString
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (:)) [ByteString] -> [ByteString]
forall a. a -> a
id [ByteString]
past [ByteString
str]
              (ByteString
h : [ByteString]
t) ->
                let !off' :: Int
off' = Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
strLen
                    ([ByteString]
past', ![ByteString]
discharge) = Int -> [ByteString] -> ([ByteString], [ByteString])
keep (-Int
off') (ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
past)
                in if Int
maxLen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< ByteString -> Int
S.length ByteString
h
                    then [Char] -> [ByteString]
forall a. HasCallStack => [Char] -> a
error [Char]
"Overflow in BoyerMoore.lazyRepl (future)"
                    else (ByteString
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
-> [ByteString]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall a b c. (a -> b -> c) -> b -> a -> c
flip ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
(.) (([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> (ByteString -> [ByteString] -> [ByteString])
-> ByteString
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (:)) [ByteString] -> [ByteString]
forall a. a -> a
id [ByteString]
discharge ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall a b. (a -> b) -> a -> b
$
                                            [ByteString]
-> ByteString -> [ByteString] -> Int -> Int -> [ByteString]
seek [ByteString]
past' ByteString
h [ByteString]
t Int
off' Int
patPos
          | Int
patPos Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
patEnd = Int -> [ByteString]
checkEnd Int
strPos
          | Int
offset Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 = Int -> Int -> [ByteString]
matcherN Int
offset Int
patPos
          | Bool
otherwise  = Int -> Int -> [ByteString]
matcherP Int
offset Int
patPos
            where
              {-# INLINE strAt #-}
              strAt :: Int -> Word8
strAt !Int
i = ByteString -> Int -> Word8
unsafeIndex ByteString
str Int
i

              !strLen :: Int
strLen = ByteString -> Int
S.length ByteString
str
              !strEnd :: Int
strEnd = Int
strLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
              !maxOff :: Int
maxOff = Int
strLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patLen
              !strPos :: Int
strPos = Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patPos

              checkEnd :: Int -> [ByteString]
checkEnd !Int
sI
                | Int
strEnd Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
sI = [ByteString]
-> ByteString -> [ByteString] -> Int -> Int -> [ByteString]
seek [ByteString]
past ByteString
str [ByteString]
fut (Int
sI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patEnd) Int
patEnd
                | Bool
otherwise   =
                  case Int -> Word8
strAt Int
sI of
                    !Word8
c  | Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
pe   ->
                          if Int
sI Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
patEnd
                            then (if Int
sI Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
                              then [ByteString]
-> ByteString -> [ByteString] -> Int -> Int -> [ByteString]
seek [ByteString]
past ByteString
str [ByteString]
fut (-Int
patEnd) (Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                              else Int -> Int -> [ByteString]
matcherN (Int
sI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patEnd) (Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1))
                          else Int -> Int -> [ByteString]
matcherP (Int
sI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patEnd) (Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                        | Bool
otherwise -> Int -> [ByteString]
checkEnd (Int
sI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall {a}. Integral a => a -> Int
occ Word8
c)

              matcherN :: Int -> Int -> [ByteString]
matcherN !Int
off !Int
patI =
                case Int -> Word8
strAt (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patI) of
                  !Word8
c  | Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> Word8
patAt Int
patI ->
                        if Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patI Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
                          then [ByteString]
-> ByteString -> [ByteString] -> Int -> Int -> [ByteString]
seek [ByteString]
past ByteString
str [ByteString]
fut Int
off (Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                          else Int -> Int -> [ByteString]
matcherN Int
off (Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                      | Bool
otherwise ->
                        let !off' :: Int
off' = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int -> Int
forall a. Ord a => a -> a -> a
max (Int -> Int
suff Int
patI) (Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall {a}. Integral a => a -> Int
occ Word8
c)
                        in if Int
maxOff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
off'
                            then [ByteString]
-> ByteString -> [ByteString] -> Int -> Int -> [ByteString]
seek [ByteString]
past ByteString
str [ByteString]
fut Int
off' Int
patEnd
                            else Int -> [ByteString]
checkEnd (Int
off' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patEnd)

              matcherP :: Int -> Int -> [ByteString]
matcherP !Int
off !Int
patI =
                case Int -> Word8
strAt (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patI) of
                  !Word8
c  | Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> Word8
patAt Int
patI ->
                        if Int
patI Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
                          then (ByteString
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
-> [ByteString]
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall a b c. (a -> b -> c) -> b -> a -> c
flip ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
(.) (([ByteString] -> [ByteString])
 -> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString])
-> (ByteString -> [ByteString] -> [ByteString])
-> ByteString
-> ([ByteString] -> [ByteString])
-> [ByteString]
-> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (:)) [ByteString] -> [ByteString]
forall a. a -> a
id [ByteString]
past ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall a b. (a -> b) -> a -> b
$
                            let pre :: [ByteString] -> [ByteString]
pre = if Int
off Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
                                        then [ByteString] -> [ByteString]
forall a. a -> a
id
                                        else (Int -> ByteString -> ByteString
S.take Int
off ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:)
                            in [ByteString] -> [ByteString]
pre ([ByteString] -> [ByteString])
-> ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ByteString] -> [ByteString]
sub ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall a b. (a -> b) -> a -> b
$
                                let !p :: Int
p = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen
                                in if Int
p Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
strLen
                                    then [ByteString]
-> ByteString -> [ByteString] -> Int -> Int -> [ByteString]
seek [] (Int -> ByteString -> ByteString
S.drop Int
p ByteString
str) [ByteString]
fut Int
0 Int
patEnd
                                    else Int -> [ByteString] -> [ByteString]
chop (Int
p Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
strLen) [ByteString]
fut
                        else Int -> Int -> [ByteString]
matcherP Int
off (Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                      | Bool
otherwise ->
                        let !off' :: Int
off' = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int -> Int
forall a. Ord a => a -> a -> a
max (Int -> Int
suff Int
patI) (Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall {a}. Integral a => a -> Int
occ Word8
c)
                        in if Int
maxOff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
off'
                            then [ByteString]
-> ByteString -> [ByteString] -> Int -> Int -> [ByteString]
seek [ByteString]
past ByteString
str [ByteString]
fut Int
off' Int
patEnd
                            else Int -> [ByteString]
checkEnd (Int
off' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patEnd)