{-# LANGUAGE BangPatterns #-}
{-# OPTIONS_HADDOCK hide, prune #-}
-- |
-- Module         : Data.ByteString.Search.Internal.BoyerMoore
-- Copyright      : Daniel Fischer
--                  Chris Kuklewicz
-- Licence        : BSD3
-- Maintainer     : Daniel Fischer <daniel.is.fischer@googlemail.com>
-- Stability      : Provisional
-- Portability    : non-portable (BangPatterns)
--
-- Fast overlapping Boyer-Moore search of both strict and lazy
-- 'S.ByteString' values. Breaking, splitting and replacing
-- using the Boyer-Moore algorithm.
--
-- Descriptions of the algorithm can be found at
-- <http://www-igm.univ-mlv.fr/~lecroq/string/node14.html#SECTION00140>
-- and
-- <http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm>
--
-- Original authors: Daniel Fischer (daniel.is.fischer at googlemail.com) and
-- Chris Kuklewicz (haskell at list.mightyreason.com).

module Data.ByteString.Search.Internal.BoyerMoore (
                                           matchLS
                                         , matchSS

                                           --  Non-overlapping
                                         , matchNOS

                                            --  Replacing substrings
                                            -- replacing
                                         , replaceAllS
                                            --  Breaking on substrings
                                            -- breaking
                                         , breakSubstringS
                                         , breakAfterS
                                            --  Splitting on substrings
                                            -- splitting
                                         , splitKeepEndS
                                         , splitKeepFrontS
                                         , splitDropS
                                         ) where


import Data.ByteString.Search.Internal.Utils
                (occurs, suffShifts, strictify)
import Data.ByteString.Search.Substitution

import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Internal as LI
import Data.ByteString.Unsafe (unsafeIndex)

import Data.Array.Base (unsafeAt)

import Data.Word (Word8)

-- overview
--
-- This module exports three search functions for searching in strict
-- ByteStrings. One for searching non-overlapping occurrences of a strict
-- pattern and one each for possibly overlapping occurrences of a lazy
-- resp. strict pattern. The common base name is @match@, the suffix
-- indicates the type of search to perform. These functions
-- return (for a non-empty pattern) a list of all the indices of the target
-- string where an occurrence of the pattern begins, if some occurrences
-- overlap, all starting indices are reported. The list is produced lazily,
-- so not necessarily the entire target string is searched.
--
-- The behaviour of these functions when given an empty pattern has changed.
-- Formerly, the @matchXY@ functions returned an empty list then, now it's
-- @[0 .. 'length' target]@.
--
-- Newly added are functions to replace all (non-overlapping) occurrences
-- of a pattern within a string, functions to break ByteStrings at the first
-- occurrence of a pattern and functions to split ByteStrings at each
-- occurrence of a pattern. None of these functions does copying, so they
-- don't introduce large memory overhead.
--
-- Internally, a lazy pattern is always converted to a strict ByteString,
-- which is necessary for an efficient implementation of the algorithm.
-- The limit this imposes on the length of the pattern is probably
-- irrelevant in practice, but perhaps it should be mentioned.
-- This also means that the @matchL*@ functions are mere convenience wrappers.
-- Except for the initial 'strictify'ing, there's no difference between lazy
-- and strict patterns, they call the same workers. There is, however, a
-- difference between strict and lazy target strings.
-- For the new functions, no such wrappers are provided, you have to
-- 'strictify' lazy patterns yourself.

-- caution
--
-- When working with a lazy target string, the relation between the pattern
-- length and the chunk size can play a big r&#244;le.
-- Crossing chunk boundaries is relatively expensive, so when that becomes
-- a frequent occurrence, as may happen when the pattern length is close
-- to or larger than the chunk size, performance is likely to degrade.
-- If it is needed, steps can be taken to ameliorate that effect, but unless
-- entirely separate functions are introduced, that would hurt the
-- performance for the more common case of patterns much shorter than
-- the default chunk size.

-- performance
--
-- In general, the Boyer-Moore algorithm is the most efficient method to
-- search for a pattern inside a string, so most of the time, you'll want
-- to use the functions of this module, hence this is where the most work
-- has gone. Very short patterns are an exception to this, for those you
-- should consider using a finite automaton
-- ("Data.ByteString.Search.DFA.Array"). That is also often the better
-- choice for searching longer periodic patterns in a lazy ByteString
-- with many matches.
--
-- Operating on a strict target string is mostly faster than on a lazy
-- target string, but the difference is usually small (according to my
-- tests).
--
-- The known exceptions to this rule of thumb are
--
-- [long targets] Then the smaller memory footprint of a lazy target often
-- gives (much) better performance.
--
-- [high number of matches] When there are very many matches, strict target
-- strings are much faster, especially if the pattern is periodic.
--
-- If both conditions hold, either may outweigh the other.

-- complexity
--
-- Preprocessing the pattern is /O/(@patternLength@ + &#963;) in time and
-- space (&#963; is the alphabet size, 256 here) for all functions.
-- The time complexity of the searching phase for @matchXY@
-- is /O/(@targetLength@ \/ @patternLength@) in the best case.
-- For non-periodic patterns, the worst case complexity is
-- /O/(@targetLength@), but for periodic patterns, the worst case complexity
-- is /O/(@targetLength@ * @patternLength@) for the original Boyer-Moore
-- algorithm.
--
-- The searching functions in this module now contain a modification which
-- drastically improves the performance for periodic patterns.
-- I believe that for strict target strings, the worst case is now
-- /O/(@targetLength@) also for periodic patterns and for lazy target strings,
-- my semi-educated guess is
-- /O/(@targetLength@ * (1 + @patternLength@ \/ @chunkSize@)).
-- I may be very wrong, though.
--
-- The other functions don't have to deal with possible overlapping
-- patterns, hence the worst case complexity for the processing phase
-- is /O/(@targetLength@) (respectively /O/(@firstIndex + patternLength@)
-- for the breaking functions if the pattern occurs).

-- currying
--
-- These functions can all be usefully curried. Given only a pattern
-- the curried version will compute the supporting lookup tables only
-- once, allowing for efficient re-use.  Similarly, the curried
-- 'matchLL' and 'matchLS' will compute the concatenated pattern only
-- once.

-- overflow
--
-- The current code uses @Int@ to keep track of the locations in the
-- target string.  If the length of the pattern plus the length of any
-- strict chunk of the target string is greater than
-- @'maxBound' :: 'Int'@ then this will overflow causing an error.  We
-- try to detect this and call 'error' before a segfault occurs.

------------------------------------------------------------------------------
--                                 Wrappers                                 --
------------------------------------------------------------------------------

-- matching
--
-- These functions find the indices of all (possibly overlapping)
-- occurrences of a pattern in a target string.
-- If the pattern is empty, the result is @[0 .. length target]@.
-- If the pattern is much shorter than the target string
-- and the pattern does not occur very near the beginning of the target,
--
-- > not . null $ matchSS pattern target
--
-- is a much more efficient version of 'S.isInfixOf'.

-- | @'matchLS'@ finds the starting indices of all possibly overlapping
--   occurrences of the pattern in the target string.
--   It is a simple wrapper for 'Data.ByteString.Search.indices'.
--   If the pattern is empty, the result is @[0 .. 'length' target]@.
{-# INLINE matchLS #-}
matchLS :: L.ByteString     -- ^ Lazy pattern
        -> S.ByteString     -- ^ Strict target string
        -> [Int]            -- ^ Offsets of matches
matchLS :: ByteString -> ByteString -> [Int]
matchLS ByteString
pat = ByteString -> [Int]
search
  where
    search :: ByteString -> [Int]
search = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
True (ByteString -> ByteString
strictify ByteString
pat)

-- | @'matchSS'@ finds the starting indices of all possibly overlapping
--   occurrences of the pattern in the target string.
--   It is an alias for 'Data.ByteString.Search.indices'.
--   If the pattern is empty, the result is @[0 .. 'length' target]@.
{-# INLINE matchSS #-}
matchSS :: S.ByteString     -- ^ Strict pattern
        -> S.ByteString     -- ^ Strict target string
        -> [Int]            -- ^ Offsets of matches
matchSS :: ByteString -> ByteString -> [Int]
matchSS ByteString
pat = ByteString -> [Int]
search
  where
    search :: ByteString -> [Int]
search = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
True ByteString
pat

-- | @'matchNOS'@ finds the indices of all non-overlapping occurrences
--   of the pattern in the Strict target string.
{-# INLINE matchNOS #-}
matchNOS :: S.ByteString    -- ^ Strict pattern
         -> S.ByteString    -- ^ Strict target string
         -> [Int]           -- ^ Offsets of matches
matchNOS :: ByteString -> ByteString -> [Int]
matchNOS ByteString
pat = ByteString -> [Int]
search
  where
    search :: ByteString -> [Int]
search = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False ByteString
pat

-- replacing
--
--   These functions replace all (non-overlapping) occurrences of a pattern
--   in the target string. If some occurrences overlap, the earliest is
--   replaced and replacing continues at the index after the replaced
--   occurrence, for example
--
-- > replaceAllL \"ana\" \"olog\" \"banana\" == \"bologna\",
-- > replaceAllS \"abacab\" \"u\" \"abacabacabacab\" == \"uacu\",
-- > replaceAllS \"aa\" \"aaa\" \"aaaa\" == \"aaaaaa\".
--
--   Equality of pattern and substitution is not checked, but
--
-- > pat == sub => 'strictify' (replaceAllS pat sub str) == str,
-- > pat == sub => replaceAllL pat sub str == str.
--
--   The result is a lazily generated lazy ByteString, the first chunks will
--   generally be available before the entire target has been scanned.
--   If the pattern is empty, but not the substitution, the result is
--   equivalent to @'cycle' sub@.

{-# INLINE replaceAllS #-}
replaceAllS :: Substitution rep
            => S.ByteString  -- ^ Pattern to replace
            -> rep           -- ^ Substitution string
            -> S.ByteString  -- ^ Target string
            -> L.ByteString  -- ^ Lazy result
replaceAllS :: forall rep.
Substitution rep =>
ByteString -> rep -> ByteString -> ByteString
replaceAllS ByteString
pat
    | ByteString -> Bool
S.null ByteString
pat = \rep
sub -> rep -> ByteString -> ByteString
forall a. Substitution a => a -> ByteString -> ByteString
prependCycle rep
sub (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString -> ByteString -> ByteString)
-> ByteString -> ByteString -> ByteString
forall a b c. (a -> b -> c) -> b -> a -> c
flip ByteString -> ByteString -> ByteString
LI.chunk ByteString
LI.Empty
    | Bool
otherwise =
      let repl :: ([ByteString] -> [ByteString]) -> ByteString -> [ByteString]
repl = ByteString
-> ([ByteString] -> [ByteString]) -> ByteString -> [ByteString]
strictRepl ByteString
pat
      in \rep
sub -> [ByteString] -> ByteString
L.fromChunks ([ByteString] -> ByteString)
-> (ByteString -> [ByteString]) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([ByteString] -> [ByteString]) -> ByteString -> [ByteString]
repl (rep -> [ByteString] -> [ByteString]
forall a. Substitution a => a -> [ByteString] -> [ByteString]
substitution rep
sub)

-- breaking
--
-- Break a string on a pattern. The first component of the result
-- contains the prefix of the string before the first occurrence of the
-- pattern, the second component contains the remainder.
-- The following relations hold:
--
-- > breakSubstringX \"\" str = (\"\", str)
-- > not (pat `isInfixOf` str) == null (snd $ breakSunbstringX pat str)
-- > True == case breakSubstringX pat str of
-- >          (x, y) -> not (pat `isInfixOf` x)
-- >                       && (null y || pat `isPrefixOf` y)

-- | This function has the same semantics as 'S.breakSubstring'
--   but is generally much faster.
{-# INLINE breakSubstringS #-}
breakSubstringS :: S.ByteString  -- ^ Pattern to break on
                -> S.ByteString  -- ^ String to break up
                -> (S.ByteString, S.ByteString)
                    -- ^ Prefix and remainder of broken string
breakSubstringS :: ByteString -> ByteString -> (ByteString, ByteString)
breakSubstringS = ByteString -> ByteString -> (ByteString, ByteString)
strictBreak

breakAfterS :: S.ByteString
            -> S.ByteString
            -> (S.ByteString, S.ByteString)
breakAfterS :: ByteString -> ByteString -> (ByteString, ByteString)
breakAfterS ByteString
pat
  | ByteString -> Bool
S.null ByteString
pat = \ByteString
str -> (ByteString
S.empty, ByteString
str)
breakAfterS ByteString
pat = ByteString -> (ByteString, ByteString)
breaker
  where
    !patLen :: Int
patLen  = ByteString -> Int
S.length ByteString
pat
    searcher :: ByteString -> [Int]
searcher = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False ByteString
pat
    breaker :: ByteString -> (ByteString, ByteString)
breaker ByteString
str = case ByteString -> [Int]
searcher ByteString
str of
                    []    -> (ByteString
str, ByteString
S.empty)
                    (Int
i:[Int]
_) -> Int -> ByteString -> (ByteString, ByteString)
S.splitAt (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str

-- splitting
--
-- These functions implement various splitting strategies.
--
-- If the pattern to split on is empty, all functions return an
-- infinite list of empty ByteStrings.
-- Otherwise, the names are rather self-explanatory.
--
-- For nonempty patterns, the following relations hold:
--
-- > concat (splitKeepXY pat str) == str
-- > concat ('Data.List.intersperse' pat (splitDropX pat str)) == str.
--
-- All fragments except possibly the last in the result of
-- @splitKeepEndX pat@ end with @pat@, none of the fragments contains
-- more than one occurrence of @pat@ or is empty.
--
-- All fragments except possibly the first in the result of
-- @splitKeepFrontX pat@ begin with @pat@, none of the fragments
-- contains more than one occurrence of @patq or is empty.
--
-- > splitDropX pat str == map dropPat (splitKeepFrontX pat str)
-- >   where
-- >     patLen = length pat
-- >     dropPat frag
-- >        | pat `isPrefixOf` frag = drop patLen frag
-- >        | otherwise             = frag
--
-- but @splitDropX@ is a little more efficient than that.


{-# INLINE splitKeepEndS #-}
splitKeepEndS :: S.ByteString    -- ^ Pattern to split on
              -> S.ByteString    -- ^ String to split
              -> [S.ByteString]  -- ^ List of fragments
splitKeepEndS :: ByteString -> ByteString -> [ByteString]
splitKeepEndS = ByteString -> ByteString -> [ByteString]
strictSplitKeepEnd

{-# INLINE splitKeepFrontS #-}
splitKeepFrontS :: S.ByteString    -- ^ Pattern to split on
                -> S.ByteString    -- ^ String to split
                -> [S.ByteString]  -- ^ List of fragments
splitKeepFrontS :: ByteString -> ByteString -> [ByteString]
splitKeepFrontS = ByteString -> ByteString -> [ByteString]
strictSplitKeepFront

{-# INLINE splitDropS #-}
splitDropS :: S.ByteString    -- ^ Pattern to split on
           -> S.ByteString    -- ^ String to split
           -> [S.ByteString]  -- ^ List of fragments
splitDropS :: ByteString -> ByteString -> [ByteString]
splitDropS = ByteString -> ByteString -> [ByteString]
strictSplitDrop

------------------------------------------------------------------------------
--                             Search Functions                             --
------------------------------------------------------------------------------

strictSearcher :: Bool -> S.ByteString -> S.ByteString -> [Int]
strictSearcher :: Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
_ !ByteString
pat
    | ByteString -> Bool
S.null ByteString
pat = Int -> Int -> [Int]
forall a. Enum a => a -> a -> [a]
enumFromTo Int
0 (Int -> [Int]) -> (ByteString -> Int) -> ByteString -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Int
S.length
    | ByteString -> Int
S.length ByteString
pat Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = let !w :: Word8
w = ByteString -> Word8
S.head ByteString
pat in Word8 -> ByteString -> [Int]
S.elemIndices Word8
w
strictSearcher !Bool
overlap ByteString
pat = ByteString -> [Int]
searcher
  where
    {-# INLINE patAt #-}
    patAt :: Int -> Word8
    patAt :: Int -> Word8
patAt !Int
i = ByteString -> Int -> Word8
unsafeIndex ByteString
pat Int
i

    !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
    !patEnd :: Int
patEnd = Int
patLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
    !maxLen :: Int
maxLen = Int
forall a. Bounded a => a
maxBound Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patLen
    !occT :: UArray Int Int
occT   = ByteString -> UArray Int Int
occurs ByteString
pat        -- for bad-character-shift
    !suffT :: UArray Int Int
suffT  = ByteString -> UArray Int Int
suffShifts ByteString
pat    -- for good-suffix-shift
    !skip :: Int
skip   = if Bool
overlap then UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
unsafeAt UArray Int Int
suffT Int
0 else Int
patLen
    -- shift after a complete match
    !kept :: Int
kept   = Int
patLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
skip     -- length of known prefix after full match
    !pe :: Word8
pe     = Int -> Word8
patAt Int
patEnd      -- last pattern byte for fast comparison

    {-# INLINE occ #-}
    occ :: a -> Int
occ !a
w = UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
unsafeAt UArray Int Int
occT (a -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
w)

    {-# INLINE suff #-}
    suff :: Int -> Int
suff !Int
i = UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
unsafeAt UArray Int Int
suffT Int
i

    searcher :: ByteString -> [Int]
searcher ByteString
str
        | Int
maxLen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
strLen
            = [Char] -> [Int]
forall a. HasCallStack => [Char] -> a
error [Char]
"Overflow in BoyerMoore.strictSearcher"
        | Int
maxDiff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0   = []
        | Bool
otherwise     = Int -> [Int]
checkEnd Int
patEnd
          where
            !strLen :: Int
strLen  = ByteString -> Int
S.length ByteString
str
            !strEnd :: Int
strEnd  = Int
strLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
            !maxDiff :: Int
maxDiff = Int
strLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patLen

            {-# INLINE strAt #-}
            strAt :: Int -> Word8
strAt !Int
i = ByteString -> Int -> Word8
unsafeIndex ByteString
str Int
i

            -- After a full match, we know how long a prefix of the pattern
            -- still matches. Do not re-compare the prefix to prevent O(m*n)
            -- behaviour for periodic patterns.
            afterMatch :: Int -> Int -> [Int]
afterMatch !Int
diff !Int
patI =
              case Int -> Word8
strAt (Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patI) of
                !Word8
c  | Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> Word8
patAt Int
patI ->
                      if Int
patI Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
kept
                        then Int
diff Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: let !diff' :: Int
diff' = Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
skip
                                    in if Int
maxDiff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
diff'
                                        then []
                                        else Int -> Int -> [Int]
afterMatch Int
diff' Int
patEnd
                        else Int -> Int -> [Int]
afterMatch Int
diff (Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                    | Int
patI Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
patEnd  ->
                            Int -> [Int]
checkEnd (Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall {a}. Integral a => a -> Int
occ Word8
c)
                    | Bool
otherwise       ->
                            let {-# INLINE badShift #-}
                                badShift :: Int
badShift = Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall {a}. Integral a => a -> Int
occ Word8
c
                                {-# INLINE goodShift #-}
                                goodShift :: Int
goodShift = Int -> Int
suff Int
patI
                                !diff' :: Int
diff' = Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
badShift Int
goodShift
                            in if Int
maxDiff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
diff'
                                then []
                                else Int -> [Int]
checkEnd (Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patEnd)

            -- While comparing the last byte of the pattern, the bad-
            -- character-shift is always at least as large as the good-
            -- suffix-shift. Eliminating the unnecessary memory reads and
            -- comparison speeds things up noticeably.
            checkEnd :: Int -> [Int]
checkEnd !Int
sI  -- index in string to compare to last of pattern
                | Int
strEnd Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
sI   = []
                | Bool
otherwise     =
                  case Int -> Word8
strAt Int
sI of
                    !Word8
c  | Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
pe   -> Int -> Int -> [Int]
findMatch (Int
sI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patEnd) (Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                        | Bool
otherwise -> Int -> [Int]
checkEnd (Int
sI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall {a}. Integral a => a -> Int
occ Word8
c)

            -- Once the last byte has matched, we enter the full matcher
            -- diff is the offset of the window, patI the index of the
            -- pattern byte to compare next.
            findMatch :: Int -> Int -> [Int]
findMatch !Int
diff !Int
patI =
                case Int -> Word8
strAt (Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patI) of
                    !Word8
c  | Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> Word8
patAt Int
patI ->
                            if Int
patI Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0    -- full match, report
                                then Int
diff Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: let !diff' :: Int
diff' = Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
skip
                                            in if Int
maxDiff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
diff'
                                                then []
                                                else
                                                  if Int
skip Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
patLen
                                                    then
                                                      Int -> [Int]
checkEnd (Int
diff' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patEnd)
                                                    else
                                                      Int -> Int -> [Int]
afterMatch Int
diff' Int
patEnd
                                else Int -> Int -> [Int]
findMatch Int
diff (Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                        | Bool
otherwise       ->
                            let !diff' :: Int
diff' = Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int -> Int
forall a. Ord a => a -> a -> a
max (Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall {a}. Integral a => a -> Int
occ Word8
c) (Int -> Int
suff Int
patI)
                            in if Int
maxDiff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
diff'
                                then []
                                else Int -> [Int]
checkEnd (Int
diff' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patEnd)

------------------------------------------------------------------------------
--                            Breaking Functions                            --
------------------------------------------------------------------------------

strictBreak :: S.ByteString -> S.ByteString -> (S.ByteString, S.ByteString)
strictBreak :: ByteString -> ByteString -> (ByteString, ByteString)
strictBreak ByteString
pat
    | ByteString -> Bool
S.null ByteString
pat    = \ByteString
str -> (ByteString
S.empty, ByteString
str)
    | Bool
otherwise     = ByteString -> (ByteString, ByteString)
breaker
      where
        searcher :: ByteString -> [Int]
searcher = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False ByteString
pat
        breaker :: ByteString -> (ByteString, ByteString)
breaker ByteString
str = case ByteString -> [Int]
searcher ByteString
str of
                        []      -> (ByteString
str, ByteString
S.empty)
                        (Int
i:[Int]
_)   -> Int -> ByteString -> (ByteString, ByteString)
S.splitAt Int
i ByteString
str

------------------------------------------------------------------------------
--                            Splitting Functions                           --
------------------------------------------------------------------------------

strictSplitKeepFront :: S.ByteString -> S.ByteString -> [S.ByteString]
strictSplitKeepFront :: ByteString -> ByteString -> [ByteString]
strictSplitKeepFront ByteString
pat
  | ByteString -> Bool
S.null ByteString
pat  = [ByteString] -> ByteString -> [ByteString]
forall a b. a -> b -> a
const (ByteString -> [ByteString]
forall a. a -> [a]
repeat ByteString
S.empty)
strictSplitKeepFront ByteString
pat = ByteString -> [ByteString]
splitter
  where
    !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
    searcher :: ByteString -> [Int]
searcher = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False ByteString
pat
    splitter :: ByteString -> [ByteString]
splitter ByteString
str
        | ByteString -> Bool
S.null ByteString
str    = []
        | Bool
otherwise     =
          case ByteString -> [Int]
searcher ByteString
str of
            []            -> [ByteString
str]
            (Int
i:[Int]
_)
              | Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0    -> ByteString -> [ByteString]
psplitter ByteString
str
              | Bool
otherwise -> Int -> ByteString -> ByteString
S.take Int
i ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: ByteString -> [ByteString]
psplitter (Int -> ByteString -> ByteString
S.drop Int
i ByteString
str)
    psplitter :: ByteString -> [ByteString]
psplitter !ByteString
str
        | ByteString -> Bool
S.null ByteString
str    = []
        | Bool
otherwise     =
          case ByteString -> [Int]
searcher (Int -> ByteString -> ByteString
S.drop Int
patLen ByteString
str) of
            []      -> [ByteString
str]
            (Int
i:[Int]
_)   -> Int -> ByteString -> ByteString
S.take (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:
                        ByteString -> [ByteString]
psplitter (Int -> ByteString -> ByteString
S.drop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str)

strictSplitKeepEnd :: S.ByteString -> S.ByteString -> [S.ByteString]
strictSplitKeepEnd :: ByteString -> ByteString -> [ByteString]
strictSplitKeepEnd ByteString
pat
  | ByteString -> Bool
S.null ByteString
pat  = [ByteString] -> ByteString -> [ByteString]
forall a b. a -> b -> a
const (ByteString -> [ByteString]
forall a. a -> [a]
repeat ByteString
S.empty)
strictSplitKeepEnd ByteString
pat = ByteString -> [ByteString]
splitter
  where
    !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
    searcher :: ByteString -> [Int]
searcher = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False ByteString
pat
    splitter :: ByteString -> [ByteString]
splitter ByteString
str
        | ByteString -> Bool
S.null ByteString
str    = []
        | Bool
otherwise     =
          case ByteString -> [Int]
searcher ByteString
str of
            [] -> [ByteString
str]
            (Int
i:[Int]
_) -> Int -> ByteString -> ByteString
S.take (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:
                        ByteString -> [ByteString]
splitter (Int -> ByteString -> ByteString
S.drop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str)

strictSplitDrop :: S.ByteString -> S.ByteString -> [S.ByteString]
strictSplitDrop :: ByteString -> ByteString -> [ByteString]
strictSplitDrop ByteString
pat
    | ByteString -> Bool
S.null ByteString
pat    = [ByteString] -> ByteString -> [ByteString]
forall a b. a -> b -> a
const (ByteString -> [ByteString]
forall a. a -> [a]
repeat ByteString
S.empty)
strictSplitDrop ByteString
pat = ByteString -> [ByteString]
splitter'
  where
    !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
    searcher :: ByteString -> [Int]
searcher = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False ByteString
pat
    splitter' :: ByteString -> [ByteString]
splitter' ByteString
str
        | ByteString -> Bool
S.null ByteString
str    = []
        | Bool
otherwise     = ByteString -> [ByteString]
splitter ByteString
str
    splitter :: ByteString -> [ByteString]
splitter ByteString
str
        | ByteString -> Bool
S.null ByteString
str    = [ByteString
S.empty]
        | Bool
otherwise     =
          case ByteString -> [Int]
searcher ByteString
str of
            []            -> [ByteString
str]
            (Int
i:[Int]
_) -> Int -> ByteString -> ByteString
S.take Int
i ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: ByteString -> [ByteString]
splitter (Int -> ByteString -> ByteString
S.drop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str)

------------------------------------------------------------------------------
--                            Replacing Functions                           --
------------------------------------------------------------------------------

-- replacing loop for strict ByteStrings, called only for
-- non-empty patterns and substitutions
strictRepl :: S.ByteString -> ([S.ByteString] -> [S.ByteString])
            -> S.ByteString -> [S.ByteString]
strictRepl :: ByteString
-> ([ByteString] -> [ByteString]) -> ByteString -> [ByteString]
strictRepl ByteString
pat  = ([ByteString] -> [ByteString]) -> ByteString -> [ByteString]
repl
  where
    !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
    searcher :: ByteString -> [Int]
searcher = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False ByteString
pat
    repl :: ([ByteString] -> [ByteString]) -> ByteString -> [ByteString]
repl [ByteString] -> [ByteString]
sub = ByteString -> [ByteString]
replacer
      where
        replacer :: ByteString -> [ByteString]
replacer ByteString
str
          | ByteString -> Bool
S.null ByteString
str    = []
          | Bool
otherwise     =
            case ByteString -> [Int]
searcher ByteString
str of
              []            -> [ByteString
str]
              (Int
i:[Int]
_)
                | Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0    -> [ByteString] -> [ByteString]
sub ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall a b. (a -> b) -> a -> b
$ ByteString -> [ByteString]
replacer (Int -> ByteString -> ByteString
S.drop Int
patLen ByteString
str)
                | Bool
otherwise ->
                  Int -> ByteString -> ByteString
S.take Int
i ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString] -> [ByteString]
sub (ByteString -> [ByteString]
replacer (Int -> ByteString -> ByteString
S.drop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str))