{-# LANGUAGE BangPatterns, FlexibleContexts #-}
{-# OPTIONS_HADDOCK hide, prune #-}
-- |
-- Module         : Data.ByteString.Search.Internal.Utils
-- Copyright      : Daniel Fischer
-- Licence        : BSD3
-- Maintainer     : Daniel Fischer <daniel.is.fischer@googlemail.com>
-- Stability      : Provisional
-- Portabiltity   : non-portable
--
-- Author         : Daniel Fischer
--
-- Utilities for several searching algorithms.

module Data.ByteString.Search.Internal.Utils ( kmpBorders
                                             , automaton
                                             , occurs
                                             , suffShifts
                                             , ldrop
                                             , ltake
                                             , lsplit
                                             , release
                                             , keep
                                             , strictify
                                             ) where

import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import Data.ByteString.Unsafe (unsafeIndex)

import Data.Array.Base (unsafeRead, unsafeWrite, unsafeAt)
import Data.Array.ST
import Data.Array.Unboxed
import Control.Monad (when)

import Data.Bits
import Data.Word (Word8)

------------------------------------------------------------------------------
--                              Preprocessing                               --
------------------------------------------------------------------------------

{-# INLINE automaton #-}
automaton :: S.ByteString -> UArray Int Int
automaton :: ByteString -> UArray Int Int
automaton !ByteString
pat = (forall s. ST s (STUArray s Int Int)) -> UArray Int Int
forall i e. (forall s. ST s (STUArray s i e)) -> UArray i e
runSTUArray (do
    let !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
        {-# INLINE patAt #-}
        patAt :: Int -> b
patAt !Int
i = Word8 -> b
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word8
unsafeIndex ByteString
pat Int
i)
        !bord :: UArray Int Int
bord = ByteString -> UArray Int Int
kmpBorders ByteString
pat
    STUArray s Int Int
aut <- (Int, Int) -> Int -> ST s (STUArray s Int Int)
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
(i, i) -> e -> m (a i e)
newArray (Int
0, (Int
patLen Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
256 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Int
0
    STUArray s Int Int -> Int -> Int -> ST s ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> Int -> e -> m ()
unsafeWrite STUArray s Int Int
aut (Int -> Int
forall {b}. Num b => Int -> b
patAt Int
0) Int
1
    let loop :: Int -> m (STUArray s Int Int)
loop !Int
state = do
            let !base :: Int
base = Int
state Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
8
                inner :: Int -> m (STUArray s Int Int)
inner Int
j
                    | Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0     = if Int
state Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
patLen
                                    then STUArray s Int Int -> m (STUArray s Int Int)
forall (m :: * -> *) a. Monad m => a -> m a
return STUArray s Int Int
aut
                                    else Int -> m (STUArray s Int Int)
loop (Int
stateInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
                    | Bool
otherwise = do
                        let !i :: Int
i = Int
base Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int
forall {b}. Num b => Int -> b
patAt Int
j
                        Int
s <- STUArray s Int Int -> Int -> m Int
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> Int -> m e
unsafeRead STUArray s Int Int
aut Int
i
                        Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
s Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0) (STUArray s Int Int -> Int -> Int -> m ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> Int -> e -> m ()
unsafeWrite STUArray s Int Int
aut Int
i (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1))
                        Int -> m (STUArray s Int Int)
inner (UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
unsafeAt UArray Int Int
bord Int
j)
            if Int
state Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
patLen
                then Int -> m (STUArray s Int Int)
inner (UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
unsafeAt UArray Int Int
bord Int
state)
                else Int -> m (STUArray s Int Int)
inner Int
state
    Int -> ST s (STUArray s Int Int)
forall {m :: * -> *}.
MArray (STUArray s) Int m =>
Int -> m (STUArray s Int Int)
loop Int
1)

-- kmpBorders calculates the width of the widest borders of the prefixes
-- of the pattern which are not extensible to borders of the next
-- longer prefix. Most entries will be 0.
{-# INLINE kmpBorders #-}
kmpBorders :: S.ByteString -> UArray Int Int
kmpBorders :: ByteString -> UArray Int Int
kmpBorders ByteString
pat = (forall s. ST s (STUArray s Int Int)) -> UArray Int Int
forall i e. (forall s. ST s (STUArray s i e)) -> UArray i e
runSTUArray (do
    let !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
        {-# INLINE patAt #-}
        patAt :: Int -> Word8
        patAt :: Int -> Word8
patAt Int
i = ByteString -> Int -> Word8
unsafeIndex ByteString
pat Int
i
    STUArray s Int Int
ar <- (Int, Int) -> ST s (STUArray s Int Int)
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
(i, i) -> m (a i e)
newArray_ (Int
0, Int
patLen)
    STUArray s Int Int -> Int -> Int -> ST s ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> Int -> e -> m ()
unsafeWrite STUArray s Int Int
ar Int
0 (-Int
1)
    let dec :: Word8 -> Int -> m Int
dec Word8
w Int
j
            | Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 Bool -> Bool -> Bool
|| Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> Word8
patAt Int
j = Int -> m Int
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> m Int) -> Int -> m Int
forall a b. (a -> b) -> a -> b
$! Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1
            | Bool
otherwise = STUArray s Int Int -> Int -> m Int
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> Int -> m e
unsafeRead STUArray s Int Int
ar Int
j m Int -> (Int -> m Int) -> m Int
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Word8 -> Int -> m Int
dec Word8
w
        bordLoop :: Int -> Int -> m (STUArray s Int Int)
bordLoop !Int
i !Int
j
            | Int
patLen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
i    = STUArray s Int Int -> m (STUArray s Int Int)
forall (m :: * -> *) a. Monad m => a -> m a
return STUArray s Int Int
ar
            | Bool
otherwise     = do
                let !w :: Word8
w = Int -> Word8
patAt (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
                Int
j' <- Word8 -> Int -> m Int
forall {m :: * -> *}.
MArray (STUArray s) Int m =>
Word8 -> Int -> m Int
dec Word8
w Int
j
                if Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
patLen Bool -> Bool -> Bool
&& Int -> Word8
patAt Int
j' Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> Word8
patAt Int
i
                    then STUArray s Int Int -> Int -> m Int
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> Int -> m e
unsafeRead STUArray s Int Int
ar Int
j' m Int -> (Int -> m ()) -> m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= STUArray s Int Int -> Int -> Int -> m ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> Int -> e -> m ()
unsafeWrite STUArray s Int Int
ar Int
i
                    else STUArray s Int Int -> Int -> Int -> m ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> Int -> e -> m ()
unsafeWrite STUArray s Int Int
ar Int
i Int
j'
                Int -> Int -> m (STUArray s Int Int)
bordLoop (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
j'
    Int -> Int -> ST s (STUArray s Int Int)
forall {m :: * -> *}.
MArray (STUArray s) Int m =>
Int -> Int -> m (STUArray s Int Int)
bordLoop Int
1 (-Int
1))

------------------------------------------------------------------------------
--                        Boyer-Moore Preprocessing                         --
------------------------------------------------------------------------------

{- Table of last occurrences of bytes in the pattern.

For each byte we record the (negated) position of its last
occurrence in the pattern except at the last position.

Thus, if byte b gives a mismatch at pattern position patPos,
we know that we can shift the window right by at least

patPos - (last occurrence of b in init pat)

or, since we negated the positions,

patPos + (occurs pat)

If the byte doesn't occur in the pattern, we can shift the window
so that the start of the pattern is aligned with the byte after this,
hence the default value of 1.

Complexity: O(patLen + size of alphabet)

-}
{- Precondition: non-empty pattern

This invariant is guaranteed by not exporting occurs,
inside this module, we don't call it for empty patterns.

-}
{-# INLINE occurs #-}
occurs :: S.ByteString -> UArray Int Int
occurs :: ByteString -> UArray Int Int
occurs ByteString
pat = (forall s. ST s (STUArray s Int Int)) -> UArray Int Int
forall i e. (forall s. ST s (STUArray s i e)) -> UArray i e
runSTUArray (do
    let !patEnd :: Int
patEnd = ByteString -> Int
S.length ByteString
pat Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
        {-# INLINE patAt #-}
        patAt :: Int -> Int
        patAt :: Int -> Int
patAt Int
i = Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word8
unsafeIndex ByteString
pat Int
i)
    STUArray s Int Int
ar <- (Int, Int) -> Int -> ST s (STUArray s Int Int)
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
(i, i) -> e -> m (a i e)
newArray (Int
0, Int
255) Int
1
    let loop :: Int -> m (STUArray s Int Int)
loop !Int
i
            | Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
patEnd   = STUArray s Int Int -> m (STUArray s Int Int)
forall (m :: * -> *) a. Monad m => a -> m a
return STUArray s Int Int
ar
            | Bool
otherwise     = do
                STUArray s Int Int -> Int -> Int -> m ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> Int -> e -> m ()
unsafeWrite STUArray s Int Int
ar (Int -> Int
patAt Int
i) (-Int
i)
                Int -> m (STUArray s Int Int)
loop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
    Int -> ST s (STUArray s Int Int)
forall {m :: * -> *}.
MArray (STUArray s) Int m =>
Int -> m (STUArray s Int Int)
loop Int
0)

{- Table of suffix-shifts.

When a mismatch occurs at pattern position patPos, assumed to be not the
last position in the pattern, the suffix u of length (patEnd - patPos)
has been successfully matched.
Let c be the byte in the pattern at position patPos.

If the sub-pattern u also occurs in the pattern somewhere *not* preceded
by c, let uPos be the position of the last byte in u for the last of
all such occurrences. Then there can be no match if the window is shifted
less than (patEnd - uPos) places, because either the part of the string
which matched the suffix u is not aligned with an occurrence of u in the
pattern, or it is aligned with an occurrence of u which is preceded by
the same byte c as the originally matched suffix.

If the complete sub-pattern u does not occur again in the pattern, or all
of its occurrences are preceded by the byte c, then we can align the
pattern with the string so that a suffix v of u matches a prefix of the
pattern. If v is chosen maximal, no smaller shift can give a match, so
we can shift by at least (patLen - length v).

If a complete match is encountered, we can shift by at least the same
amount as if the first byte of the pattern was a mismatch, no complete
match is possible between these positions.

For non-periodic patterns, only very short suffixes will usually occur
again in the pattern, so if a longer suffix has been matched before a
mismatch, the window can then be shifted entirely past the partial
match, so that part of the string will not be re-compared.
For periodic patterns, the suffix shifts will be shorter in general,
leading to an O(strLen * patLen) worst-case performance.

To compute the suffix-shifts, we use an array containing the lengths of
the longest common suffixes of the entire pattern and its prefix ending
with position pos.

-}
{- Precondition: non-empty pattern -}
{-# INLINE suffShifts #-}
suffShifts :: S.ByteString -> UArray Int Int
suffShifts :: ByteString -> UArray Int Int
suffShifts ByteString
pat = (forall s. ST s (STUArray s Int Int)) -> UArray Int Int
forall i e. (forall s. ST s (STUArray s i e)) -> UArray i e
runSTUArray (do
    let !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
        !patEnd :: Int
patEnd = Int
patLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
        !suff :: UArray Int Int
suff   = ByteString -> UArray Int Int
suffLengths ByteString
pat
    STUArray s Int Int
ar <- (Int, Int) -> Int -> ST s (STUArray s Int Int)
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
(i, i) -> e -> m (a i e)
newArray (Int
0,Int
patEnd) Int
patLen
    let preShift :: Int -> Int -> m ()
preShift !Int
idx !Int
j
            | Int
idx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0   = () -> m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
            | UArray Int Int
suff UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
`unsafeAt` Int
idx Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 = do
                let !shf :: Int
shf = Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
idx
                    fillToShf :: Int -> m ()
fillToShf !Int
i
                        | Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
shf  = () -> m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
                        | Bool
otherwise = do
                            STUArray s Int Int -> Int -> Int -> m ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> Int -> e -> m ()
unsafeWrite STUArray s Int Int
ar Int
i Int
shf
                            Int -> m ()
fillToShf (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
                Int -> m ()
forall {m :: * -> *}. MArray (STUArray s) Int m => Int -> m ()
fillToShf Int
j
                Int -> Int -> m ()
preShift (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Int
shf
            | Bool
otherwise = Int -> Int -> m ()
preShift (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Int
j
        sufShift :: Int -> m (STUArray s Int Int)
sufShift !Int
idx
            | Int
idx Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
patEnd = STUArray s Int Int -> m (STUArray s Int Int)
forall (m :: * -> *) a. Monad m => a -> m a
return STUArray s Int Int
ar
            | Bool
otherwise     = do
                STUArray s Int Int -> Int -> Int -> m ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> Int -> e -> m ()
unsafeWrite STUArray s Int Int
ar (Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
unsafeAt UArray Int Int
suff Int
idx) (Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
idx)
                Int -> m (STUArray s Int Int)
sufShift (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
    Int -> Int -> ST s ()
forall {m :: * -> *}.
MArray (STUArray s) Int m =>
Int -> Int -> m ()
preShift (Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Int
0
    Int -> ST s (STUArray s Int Int)
forall {m :: * -> *}.
MArray (STUArray s) Int m =>
Int -> m (STUArray s Int Int)
sufShift Int
0)

{- Table of suffix-lengths.

The value of this array at place i is the length of the longest common
suffix of the entire pattern and the prefix of the pattern ending at
position i.

Usually, most of the entries will be 0. Only if the byte at position i
is the same as the last byte of the pattern can the value be positive.
In any case the value at index patEnd is patLen (since the pattern is
identical to itself) and 0 <= value at i <= (i + 1).

To keep this part of preprocessing linear in the length of the pattern,
the implementation must be non-obvious (the obvious algorithm for this
is quadratic).

When the index under consideration is inside a previously identified
common suffix, we align that suffix with the end of the pattern and
check whether the suffix ending at the position corresponding to idx
is shorter than the part of the suffix up to idx. If that is the case,
the length of the suffix ending at idx is that of the suffix at the
corresponding position. Otherwise extend the suffix as far as possible.
If the index under consideration is not inside a previously identified
common suffix, compare with the last byte of the pattern. If that gives
a suffix of length > 1, for the next index we're in the previous
situation, otherwise we're back in the same situation for the next
index.

-}
{- Precondition: non-empty pattern -}
{-# INLINE suffLengths #-}
suffLengths :: S.ByteString -> UArray Int Int
suffLengths :: ByteString -> UArray Int Int
suffLengths ByteString
pat = (forall s. ST s (STUArray s Int Int)) -> UArray Int Int
forall i e. (forall s. ST s (STUArray s i e)) -> UArray i e
runSTUArray (do
    let !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
        !patEnd :: Int
patEnd = Int
patLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
        !preEnd :: Int
preEnd = Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
        {-# INLINE patAt #-}
        patAt :: Int -> Word8
patAt Int
i = ByteString -> Int -> Word8
unsafeIndex ByteString
pat Int
i
        -- last byte for comparisons
        !pe :: Word8
pe     = Int -> Word8
patAt Int
patEnd
        -- find index preceding the longest suffix
        dec :: Int -> Int -> Int
dec !Int
diff !Int
j
            | Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 Bool -> Bool -> Bool
|| Int -> Word8
patAt Int
j Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Int -> Word8
patAt (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
diff) = Int
j
            | Bool
otherwise = Int -> Int -> Int
dec Int
diff (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
    STUArray s Int Int
ar <- (Int, Int) -> ST s (STUArray s Int Int)
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
(i, i) -> m (a i e)
newArray_ (Int
0, Int
patEnd)
    STUArray s Int Int -> Int -> Int -> ST s ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> Int -> e -> m ()
unsafeWrite STUArray s Int Int
ar Int
patEnd Int
patLen
    let noSuff :: Int -> m (STUArray s Int Int)
noSuff !Int
i
            | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0     = STUArray s Int Int -> m (STUArray s Int Int)
forall (m :: * -> *) a. Monad m => a -> m a
return STUArray s Int Int
ar
            | Int -> Word8
patAt Int
i Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
pe = do
                let !diff :: Int
diff  = Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i
                    !nextI :: Int
nextI = Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
                    !prevI :: Int
prevI = Int -> Int -> Int
dec Int
diff Int
nextI
                if Int
prevI Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
nextI
                    then STUArray s Int Int -> Int -> Int -> m ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> Int -> e -> m ()
unsafeWrite STUArray s Int Int
ar Int
i Int
1 m () -> m (STUArray s Int Int) -> m (STUArray s Int Int)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int -> m (STUArray s Int Int)
noSuff Int
nextI
                    else do STUArray s Int Int -> Int -> Int -> m ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> Int -> e -> m ()
unsafeWrite STUArray s Int Int
ar Int
i (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
prevI)
                            Int -> Int -> Int -> m (STUArray s Int Int)
suffLoop Int
prevI Int
preEnd Int
nextI
            | Bool
otherwise = do
                STUArray s Int Int -> Int -> Int -> m ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> Int -> e -> m ()
unsafeWrite STUArray s Int Int
ar Int
i Int
0
                Int -> m (STUArray s Int Int)
noSuff (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
        suffLoop :: Int -> Int -> Int -> m (STUArray s Int Int)
suffLoop !Int
pre !Int
end !Int
idx
            | Int
idx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0   = STUArray s Int Int -> m (STUArray s Int Int)
forall (m :: * -> *) a. Monad m => a -> m a
return STUArray s Int Int
ar
            | Int
pre Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
idx =
              if Int -> Word8
patAt Int
idx Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
pe
                then STUArray s Int Int -> Int -> Int -> m ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> Int -> e -> m ()
unsafeWrite STUArray s Int Int
ar Int
idx Int
0 m () -> m (STUArray s Int Int) -> m (STUArray s Int Int)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int -> Int -> Int -> m (STUArray s Int Int)
suffLoop Int
pre (Int
end Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                else do
                    Int
prevS <- STUArray s Int Int -> Int -> m Int
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> Int -> m e
unsafeRead STUArray s Int Int
ar Int
end
                    if Int
pre Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
prevS Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
idx
                        then do STUArray s Int Int -> Int -> Int -> m ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> Int -> e -> m ()
unsafeWrite STUArray s Int Int
ar Int
idx Int
prevS
                                Int -> Int -> Int -> m (STUArray s Int Int)
suffLoop Int
pre (Int
end Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                        else do let !prI :: Int
prI = Int -> Int -> Int
dec (Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
idx) Int
pre
                                STUArray s Int Int -> Int -> Int -> m ()
forall (a :: * -> * -> *) e (m :: * -> *) i.
(MArray a e m, Ix i) =>
a i e -> Int -> e -> m ()
unsafeWrite STUArray s Int Int
ar Int
idx (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
prI)
                                Int -> Int -> Int -> m (STUArray s Int Int)
suffLoop Int
prI Int
preEnd (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
            | Bool
otherwise = Int -> m (STUArray s Int Int)
noSuff Int
idx
    Int -> ST s (STUArray s Int Int)
forall {m :: * -> *}.
MArray (STUArray s) Int m =>
Int -> m (STUArray s Int Int)
noSuff Int
preEnd)

------------------------------------------------------------------------------
--                             Helper Functions                             --
------------------------------------------------------------------------------

{-# INLINE strictify #-}
strictify :: L.ByteString -> S.ByteString
strictify :: ByteString -> ByteString
strictify = [ByteString] -> ByteString
S.concat ([ByteString] -> ByteString)
-> (ByteString -> [ByteString]) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks

-- drop k bytes from a list of strict ByteStrings
{-# INLINE ldrop #-}
ldrop :: Int -> [S.ByteString] -> [S.ByteString]
ldrop :: Int -> [ByteString] -> [ByteString]
ldrop Int
_ [] = []
ldrop Int
k (!ByteString
h : [ByteString]
t)
  | Int
k Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
l     = Int -> ByteString -> ByteString
S.drop Int
k ByteString
h ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
t
  | Bool
otherwise = Int -> [ByteString] -> [ByteString]
ldrop (Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
l) [ByteString]
t
    where
      !l :: Int
l = ByteString -> Int
S.length ByteString
h

-- take k bytes from a list of strict ByteStrings
{-# INLINE ltake #-}
ltake :: Int -> [S.ByteString] -> [S.ByteString]
ltake :: Int -> [ByteString] -> [ByteString]
ltake Int
_ [] = []
ltake !Int
k (!ByteString
h : [ByteString]
t)
  | Int
l Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
k     = ByteString
h ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: Int -> [ByteString] -> [ByteString]
ltake (Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
l) [ByteString]
t
  | Bool
otherwise = [Int -> ByteString -> ByteString
S.take Int
k ByteString
h]
    where
      !l :: Int
l = ByteString -> Int
S.length ByteString
h

-- split a list of strict ByteStrings at byte k
{-# INLINE lsplit #-}
lsplit :: Int -> [S.ByteString] -> ([S.ByteString], [S.ByteString])
lsplit :: Int -> [ByteString] -> ([ByteString], [ByteString])
lsplit Int
_ [] = ([],[])
lsplit !Int
k (!ByteString
h : [ByteString]
t)
  = case Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Int
k Int
l of
      Ordering
LT -> ([Int -> ByteString -> ByteString
S.take Int
k ByteString
h], Int -> ByteString -> ByteString
S.drop Int
k ByteString
h ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
t)
      Ordering
EQ -> ([ByteString
h], [ByteString]
t)
      Ordering
GT -> let ([ByteString]
u, [ByteString]
v) = Int -> [ByteString] -> ([ByteString], [ByteString])
lsplit (Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
l) [ByteString]
t in (ByteString
h ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
u, [ByteString]
v)
  where
    !l :: Int
l = ByteString -> Int
S.length ByteString
h


-- release is used to keep the zipper in lazySearcher from remembering
-- the leading part of the searched string.  The deep parameter is the
-- number of characters that the past needs to hold.  This ensures
-- lazy streaming consumption of the searched string.
{-# INLINE release #-}
release :: Int ->  [S.ByteString] -> [S.ByteString]
release :: Int -> [ByteString] -> [ByteString]
release !Int
deep [ByteString]
_
    | Int
deep Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0 = []
release !Int
deep (!ByteString
x:[ByteString]
xs) = let !rest :: [ByteString]
rest = Int -> [ByteString] -> [ByteString]
release (Int
deepInt -> Int -> Int
forall a. Num a => a -> a -> a
-ByteString -> Int
S.length ByteString
x) [ByteString]
xs in ByteString
x ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
rest
release Int
_ [] = [Char] -> [ByteString]
forall a. HasCallStack => [Char] -> a
error [Char]
"stringsearch.release could not find enough past!"

-- keep is like release, only we mustn't forget the part of the past
-- we don't need anymore for matching but have to keep it for
-- breaking, splitting and replacing.
-- The names would be more appropriate the other way round, but that's
-- a historical accident, so what?
{-# INLINE keep #-}
keep :: Int -> [S.ByteString] -> ([S.ByteString],[S.ByteString])
keep :: Int -> [ByteString] -> ([ByteString], [ByteString])
keep !Int
deep [ByteString]
xs
    | Int
deep Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
1    = ([],[ByteString]
xs)
keep Int
deep (!ByteString
x:[ByteString]
xs) = let (![ByteString]
p,[ByteString]
d) = Int -> [ByteString] -> ([ByteString], [ByteString])
keep (Int
deep Int -> Int -> Int
forall a. Num a => a -> a -> a
- ByteString -> Int
S.length ByteString
x) [ByteString]
xs in (ByteString
xByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:[ByteString]
p,[ByteString]
d)
keep Int
_ [] = [Char] -> ([ByteString], [ByteString])
forall a. HasCallStack => [Char] -> a
error [Char]
"Forgot too much"