{-# LANGUAGE BangPatterns #-}
{-# OPTIONS_HADDOCK hide, prune #-}
module Data.ByteString.Search.Internal.BoyerMoore (
matchLS
, matchSS
, matchNOS
, replaceAllS
, breakSubstringS
, breakAfterS
, splitKeepEndS
, splitKeepFrontS
, splitDropS
) where
import Data.ByteString.Search.Internal.Utils
(occurs, suffShifts, strictify)
import Data.ByteString.Search.Substitution
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Internal as LI
import Data.ByteString.Unsafe (unsafeIndex)
import Data.Array.Base (unsafeAt)
import Data.Word (Word8)
{-# INLINE matchLS #-}
matchLS :: L.ByteString
-> S.ByteString
-> [Int]
matchLS :: ByteString -> ByteString -> [Int]
matchLS ByteString
pat = ByteString -> [Int]
search
where
search :: ByteString -> [Int]
search = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
True (ByteString -> ByteString
strictify ByteString
pat)
{-# INLINE matchSS #-}
matchSS :: S.ByteString
-> S.ByteString
-> [Int]
matchSS :: ByteString -> ByteString -> [Int]
matchSS ByteString
pat = ByteString -> [Int]
search
where
search :: ByteString -> [Int]
search = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
True ByteString
pat
{-# INLINE matchNOS #-}
matchNOS :: S.ByteString
-> S.ByteString
-> [Int]
matchNOS :: ByteString -> ByteString -> [Int]
matchNOS ByteString
pat = ByteString -> [Int]
search
where
search :: ByteString -> [Int]
search = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False ByteString
pat
{-# INLINE replaceAllS #-}
replaceAllS :: Substitution rep
=> S.ByteString
-> rep
-> S.ByteString
-> L.ByteString
replaceAllS :: forall rep.
Substitution rep =>
ByteString -> rep -> ByteString -> ByteString
replaceAllS ByteString
pat
| ByteString -> Bool
S.null ByteString
pat = \rep
sub -> rep -> ByteString -> ByteString
forall a. Substitution a => a -> ByteString -> ByteString
prependCycle rep
sub (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString -> ByteString -> ByteString)
-> ByteString -> ByteString -> ByteString
forall a b c. (a -> b -> c) -> b -> a -> c
flip ByteString -> ByteString -> ByteString
LI.chunk ByteString
LI.Empty
| Bool
otherwise =
let repl :: ([ByteString] -> [ByteString]) -> ByteString -> [ByteString]
repl = ByteString
-> ([ByteString] -> [ByteString]) -> ByteString -> [ByteString]
strictRepl ByteString
pat
in \rep
sub -> [ByteString] -> ByteString
L.fromChunks ([ByteString] -> ByteString)
-> (ByteString -> [ByteString]) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([ByteString] -> [ByteString]) -> ByteString -> [ByteString]
repl (rep -> [ByteString] -> [ByteString]
forall a. Substitution a => a -> [ByteString] -> [ByteString]
substitution rep
sub)
{-# INLINE breakSubstringS #-}
breakSubstringS :: S.ByteString
-> S.ByteString
-> (S.ByteString, S.ByteString)
breakSubstringS :: ByteString -> ByteString -> (ByteString, ByteString)
breakSubstringS = ByteString -> ByteString -> (ByteString, ByteString)
strictBreak
breakAfterS :: S.ByteString
-> S.ByteString
-> (S.ByteString, S.ByteString)
breakAfterS :: ByteString -> ByteString -> (ByteString, ByteString)
breakAfterS ByteString
pat
| ByteString -> Bool
S.null ByteString
pat = \ByteString
str -> (ByteString
S.empty, ByteString
str)
breakAfterS ByteString
pat = ByteString -> (ByteString, ByteString)
breaker
where
!patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
searcher :: ByteString -> [Int]
searcher = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False ByteString
pat
breaker :: ByteString -> (ByteString, ByteString)
breaker ByteString
str = case ByteString -> [Int]
searcher ByteString
str of
[] -> (ByteString
str, ByteString
S.empty)
(Int
i:[Int]
_) -> Int -> ByteString -> (ByteString, ByteString)
S.splitAt (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str
{-# INLINE splitKeepEndS #-}
splitKeepEndS :: S.ByteString
-> S.ByteString
-> [S.ByteString]
splitKeepEndS :: ByteString -> ByteString -> [ByteString]
splitKeepEndS = ByteString -> ByteString -> [ByteString]
strictSplitKeepEnd
{-# INLINE splitKeepFrontS #-}
splitKeepFrontS :: S.ByteString
-> S.ByteString
-> [S.ByteString]
splitKeepFrontS :: ByteString -> ByteString -> [ByteString]
splitKeepFrontS = ByteString -> ByteString -> [ByteString]
strictSplitKeepFront
{-# INLINE splitDropS #-}
splitDropS :: S.ByteString
-> S.ByteString
-> [S.ByteString]
splitDropS :: ByteString -> ByteString -> [ByteString]
splitDropS = ByteString -> ByteString -> [ByteString]
strictSplitDrop
strictSearcher :: Bool -> S.ByteString -> S.ByteString -> [Int]
strictSearcher :: Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
_ !ByteString
pat
| ByteString -> Bool
S.null ByteString
pat = Int -> Int -> [Int]
forall a. Enum a => a -> a -> [a]
enumFromTo Int
0 (Int -> [Int]) -> (ByteString -> Int) -> ByteString -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Int
S.length
| ByteString -> Int
S.length ByteString
pat Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = let !w :: Word8
w = ByteString -> Word8
S.head ByteString
pat in Word8 -> ByteString -> [Int]
S.elemIndices Word8
w
strictSearcher !Bool
overlap ByteString
pat = ByteString -> [Int]
searcher
where
{-# INLINE patAt #-}
patAt :: Int -> Word8
patAt :: Int -> Word8
patAt !Int
i = ByteString -> Int -> Word8
unsafeIndex ByteString
pat Int
i
!patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
!patEnd :: Int
patEnd = Int
patLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
!maxLen :: Int
maxLen = Int
forall a. Bounded a => a
maxBound Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patLen
!occT :: UArray Int Int
occT = ByteString -> UArray Int Int
occurs ByteString
pat
!suffT :: UArray Int Int
suffT = ByteString -> UArray Int Int
suffShifts ByteString
pat
!skip :: Int
skip = if Bool
overlap then UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
unsafeAt UArray Int Int
suffT Int
0 else Int
patLen
!kept :: Int
kept = Int
patLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
skip
!pe :: Word8
pe = Int -> Word8
patAt Int
patEnd
{-# INLINE occ #-}
occ :: a -> Int
occ !a
w = UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
unsafeAt UArray Int Int
occT (a -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
w)
{-# INLINE suff #-}
suff :: Int -> Int
suff !Int
i = UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
unsafeAt UArray Int Int
suffT Int
i
searcher :: ByteString -> [Int]
searcher ByteString
str
| Int
maxLen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
strLen
= [Char] -> [Int]
forall a. HasCallStack => [Char] -> a
error [Char]
"Overflow in BoyerMoore.strictSearcher"
| Int
maxDiff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 = []
| Bool
otherwise = Int -> [Int]
checkEnd Int
patEnd
where
!strLen :: Int
strLen = ByteString -> Int
S.length ByteString
str
!strEnd :: Int
strEnd = Int
strLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
!maxDiff :: Int
maxDiff = Int
strLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patLen
{-# INLINE strAt #-}
strAt :: Int -> Word8
strAt !Int
i = ByteString -> Int -> Word8
unsafeIndex ByteString
str Int
i
afterMatch :: Int -> Int -> [Int]
afterMatch !Int
diff !Int
patI =
case Int -> Word8
strAt (Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patI) of
!Word8
c | Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> Word8
patAt Int
patI ->
if Int
patI Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
kept
then Int
diff Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: let !diff' :: Int
diff' = Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
skip
in if Int
maxDiff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
diff'
then []
else Int -> Int -> [Int]
afterMatch Int
diff' Int
patEnd
else Int -> Int -> [Int]
afterMatch Int
diff (Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
| Int
patI Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
patEnd ->
Int -> [Int]
checkEnd (Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall {a}. Integral a => a -> Int
occ Word8
c)
| Bool
otherwise ->
let {-# INLINE badShift #-}
badShift :: Int
badShift = Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall {a}. Integral a => a -> Int
occ Word8
c
{-# INLINE goodShift #-}
goodShift :: Int
goodShift = Int -> Int
suff Int
patI
!diff' :: Int
diff' = Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
badShift Int
goodShift
in if Int
maxDiff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
diff'
then []
else Int -> [Int]
checkEnd (Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patEnd)
checkEnd :: Int -> [Int]
checkEnd !Int
sI
| Int
strEnd Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
sI = []
| Bool
otherwise =
case Int -> Word8
strAt Int
sI of
!Word8
c | Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
pe -> Int -> Int -> [Int]
findMatch (Int
sI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patEnd) (Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
| Bool
otherwise -> Int -> [Int]
checkEnd (Int
sI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall {a}. Integral a => a -> Int
occ Word8
c)
findMatch :: Int -> Int -> [Int]
findMatch !Int
diff !Int
patI =
case Int -> Word8
strAt (Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patI) of
!Word8
c | Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> Word8
patAt Int
patI ->
if Int
patI Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
then Int
diff Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: let !diff' :: Int
diff' = Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
skip
in if Int
maxDiff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
diff'
then []
else
if Int
skip Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
patLen
then
Int -> [Int]
checkEnd (Int
diff' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patEnd)
else
Int -> Int -> [Int]
afterMatch Int
diff' Int
patEnd
else Int -> Int -> [Int]
findMatch Int
diff (Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
| Bool
otherwise ->
let !diff' :: Int
diff' = Int
diff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int -> Int
forall a. Ord a => a -> a -> a
max (Int
patI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall {a}. Integral a => a -> Int
occ Word8
c) (Int -> Int
suff Int
patI)
in if Int
maxDiff Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
diff'
then []
else Int -> [Int]
checkEnd (Int
diff' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patEnd)
strictBreak :: S.ByteString -> S.ByteString -> (S.ByteString, S.ByteString)
strictBreak :: ByteString -> ByteString -> (ByteString, ByteString)
strictBreak ByteString
pat
| ByteString -> Bool
S.null ByteString
pat = \ByteString
str -> (ByteString
S.empty, ByteString
str)
| Bool
otherwise = ByteString -> (ByteString, ByteString)
breaker
where
searcher :: ByteString -> [Int]
searcher = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False ByteString
pat
breaker :: ByteString -> (ByteString, ByteString)
breaker ByteString
str = case ByteString -> [Int]
searcher ByteString
str of
[] -> (ByteString
str, ByteString
S.empty)
(Int
i:[Int]
_) -> Int -> ByteString -> (ByteString, ByteString)
S.splitAt Int
i ByteString
str
strictSplitKeepFront :: S.ByteString -> S.ByteString -> [S.ByteString]
strictSplitKeepFront :: ByteString -> ByteString -> [ByteString]
strictSplitKeepFront ByteString
pat
| ByteString -> Bool
S.null ByteString
pat = [ByteString] -> ByteString -> [ByteString]
forall a b. a -> b -> a
const (ByteString -> [ByteString]
forall a. a -> [a]
repeat ByteString
S.empty)
strictSplitKeepFront ByteString
pat = ByteString -> [ByteString]
splitter
where
!patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
searcher :: ByteString -> [Int]
searcher = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False ByteString
pat
splitter :: ByteString -> [ByteString]
splitter ByteString
str
| ByteString -> Bool
S.null ByteString
str = []
| Bool
otherwise =
case ByteString -> [Int]
searcher ByteString
str of
[] -> [ByteString
str]
(Int
i:[Int]
_)
| Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 -> ByteString -> [ByteString]
psplitter ByteString
str
| Bool
otherwise -> Int -> ByteString -> ByteString
S.take Int
i ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: ByteString -> [ByteString]
psplitter (Int -> ByteString -> ByteString
S.drop Int
i ByteString
str)
psplitter :: ByteString -> [ByteString]
psplitter !ByteString
str
| ByteString -> Bool
S.null ByteString
str = []
| Bool
otherwise =
case ByteString -> [Int]
searcher (Int -> ByteString -> ByteString
S.drop Int
patLen ByteString
str) of
[] -> [ByteString
str]
(Int
i:[Int]
_) -> Int -> ByteString -> ByteString
S.take (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:
ByteString -> [ByteString]
psplitter (Int -> ByteString -> ByteString
S.drop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str)
strictSplitKeepEnd :: S.ByteString -> S.ByteString -> [S.ByteString]
strictSplitKeepEnd :: ByteString -> ByteString -> [ByteString]
strictSplitKeepEnd ByteString
pat
| ByteString -> Bool
S.null ByteString
pat = [ByteString] -> ByteString -> [ByteString]
forall a b. a -> b -> a
const (ByteString -> [ByteString]
forall a. a -> [a]
repeat ByteString
S.empty)
strictSplitKeepEnd ByteString
pat = ByteString -> [ByteString]
splitter
where
!patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
searcher :: ByteString -> [Int]
searcher = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False ByteString
pat
splitter :: ByteString -> [ByteString]
splitter ByteString
str
| ByteString -> Bool
S.null ByteString
str = []
| Bool
otherwise =
case ByteString -> [Int]
searcher ByteString
str of
[] -> [ByteString
str]
(Int
i:[Int]
_) -> Int -> ByteString -> ByteString
S.take (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:
ByteString -> [ByteString]
splitter (Int -> ByteString -> ByteString
S.drop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str)
strictSplitDrop :: S.ByteString -> S.ByteString -> [S.ByteString]
strictSplitDrop :: ByteString -> ByteString -> [ByteString]
strictSplitDrop ByteString
pat
| ByteString -> Bool
S.null ByteString
pat = [ByteString] -> ByteString -> [ByteString]
forall a b. a -> b -> a
const (ByteString -> [ByteString]
forall a. a -> [a]
repeat ByteString
S.empty)
strictSplitDrop ByteString
pat = ByteString -> [ByteString]
splitter'
where
!patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
searcher :: ByteString -> [Int]
searcher = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False ByteString
pat
splitter' :: ByteString -> [ByteString]
splitter' ByteString
str
| ByteString -> Bool
S.null ByteString
str = []
| Bool
otherwise = ByteString -> [ByteString]
splitter ByteString
str
splitter :: ByteString -> [ByteString]
splitter ByteString
str
| ByteString -> Bool
S.null ByteString
str = [ByteString
S.empty]
| Bool
otherwise =
case ByteString -> [Int]
searcher ByteString
str of
[] -> [ByteString
str]
(Int
i:[Int]
_) -> Int -> ByteString -> ByteString
S.take Int
i ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: ByteString -> [ByteString]
splitter (Int -> ByteString -> ByteString
S.drop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str)
strictRepl :: S.ByteString -> ([S.ByteString] -> [S.ByteString])
-> S.ByteString -> [S.ByteString]
strictRepl :: ByteString
-> ([ByteString] -> [ByteString]) -> ByteString -> [ByteString]
strictRepl ByteString
pat = ([ByteString] -> [ByteString]) -> ByteString -> [ByteString]
repl
where
!patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
searcher :: ByteString -> [Int]
searcher = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False ByteString
pat
repl :: ([ByteString] -> [ByteString]) -> ByteString -> [ByteString]
repl [ByteString] -> [ByteString]
sub = ByteString -> [ByteString]
replacer
where
replacer :: ByteString -> [ByteString]
replacer ByteString
str
| ByteString -> Bool
S.null ByteString
str = []
| Bool
otherwise =
case ByteString -> [Int]
searcher ByteString
str of
[] -> [ByteString
str]
(Int
i:[Int]
_)
| Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 -> [ByteString] -> [ByteString]
sub ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall a b. (a -> b) -> a -> b
$ ByteString -> [ByteString]
replacer (Int -> ByteString -> ByteString
S.drop Int
patLen ByteString
str)
| Bool
otherwise ->
Int -> ByteString -> ByteString
S.take Int
i ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString] -> [ByteString]
sub (ByteString -> [ByteString]
replacer (Int -> ByteString -> ByteString
S.drop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str))