{-# LANGUAGE BangPatterns #-}
module Data.ByteString.Search.DFA (
indices
, nonOverlappingIndices
, breakOn
, breakAfter
, replace
, split
, splitKeepEnd
, splitKeepFront
) where
import Data.ByteString.Search.Internal.Utils (automaton)
import Data.ByteString.Search.Substitution
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Internal as LI
import Data.ByteString.Unsafe (unsafeIndex)
import Data.Array.Base (unsafeAt)
import Data.Bits
{-# INLINE indices #-}
indices :: S.ByteString
-> S.ByteString
-> [Int]
indices :: ByteString -> ByteString -> [Int]
indices = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
True
{-# INLINE nonOverlappingIndices #-}
nonOverlappingIndices :: S.ByteString
-> S.ByteString
-> [Int]
nonOverlappingIndices :: ByteString -> ByteString -> [Int]
nonOverlappingIndices = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False
breakOn :: S.ByteString
-> S.ByteString
-> (S.ByteString, S.ByteString)
breakOn :: ByteString -> ByteString -> (ByteString, ByteString)
breakOn ByteString
pat = ByteString -> (ByteString, ByteString)
breaker
where
searcher :: ByteString -> [Int]
searcher = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False ByteString
pat
breaker :: ByteString -> (ByteString, ByteString)
breaker ByteString
str = case ByteString -> [Int]
searcher ByteString
str of
[] -> (ByteString
str, ByteString
S.empty)
(Int
i:[Int]
_) -> Int -> ByteString -> (ByteString, ByteString)
S.splitAt Int
i ByteString
str
breakAfter :: S.ByteString
-> S.ByteString
-> (S.ByteString, S.ByteString)
breakAfter :: ByteString -> ByteString -> (ByteString, ByteString)
breakAfter ByteString
pat = ByteString -> (ByteString, ByteString)
breaker
where
!patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
searcher :: ByteString -> [Int]
searcher = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False ByteString
pat
breaker :: ByteString -> (ByteString, ByteString)
breaker ByteString
str = case ByteString -> [Int]
searcher ByteString
str of
[] -> (ByteString
str, ByteString
S.empty)
(Int
i:[Int]
_) -> Int -> ByteString -> (ByteString, ByteString)
S.splitAt (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str
replace :: Substitution rep
=> S.ByteString
-> rep
-> S.ByteString
-> L.ByteString
replace :: forall rep.
Substitution rep =>
ByteString -> rep -> ByteString -> ByteString
replace ByteString
pat
| ByteString -> Bool
S.null ByteString
pat = \rep
sub -> rep -> ByteString -> ByteString
forall a. Substitution a => a -> ByteString -> ByteString
prependCycle rep
sub (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString -> ByteString -> ByteString)
-> ByteString -> ByteString -> ByteString
forall a b c. (a -> b -> c) -> b -> a -> c
flip ByteString -> ByteString -> ByteString
LI.chunk ByteString
LI.Empty
| Bool
otherwise =
let !patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
searcher :: ByteString -> [Int]
searcher = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False ByteString
pat
repl :: p -> ByteString -> [ByteString]
repl p
sub =
let {-# NOINLINE subst #-}
!subst :: [ByteString] -> [ByteString]
subst = p -> [ByteString] -> [ByteString]
forall a. Substitution a => a -> [ByteString] -> [ByteString]
substitution p
sub
replacer :: ByteString -> [ByteString]
replacer ByteString
str
| ByteString -> Bool
S.null ByteString
str = []
| Bool
otherwise =
case ByteString -> [Int]
searcher ByteString
str of
[] -> [ByteString
str]
(Int
i:[Int]
_)
| Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 -> [ByteString] -> [ByteString]
subst ([ByteString] -> [ByteString]) -> [ByteString] -> [ByteString]
forall a b. (a -> b) -> a -> b
$ ByteString -> [ByteString]
replacer (Int -> ByteString -> ByteString
S.drop Int
patLen ByteString
str)
| Bool
otherwise -> Int -> ByteString -> ByteString
S.take Int
i ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString] -> [ByteString]
subst
(ByteString -> [ByteString]
replacer (Int -> ByteString -> ByteString
S.drop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str))
in ByteString -> [ByteString]
replacer
in \rep
sub -> [ByteString] -> ByteString
L.fromChunks ([ByteString] -> ByteString)
-> (ByteString -> [ByteString]) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. rep -> ByteString -> [ByteString]
forall {p}. Substitution p => p -> ByteString -> [ByteString]
repl rep
sub
split :: S.ByteString
-> S.ByteString
-> [S.ByteString]
split :: ByteString -> ByteString -> [ByteString]
split ByteString
pat
| ByteString -> Bool
S.null ByteString
pat = [ByteString] -> ByteString -> [ByteString]
forall a b. a -> b -> a
const (ByteString -> [ByteString]
forall a. a -> [a]
repeat ByteString
S.empty)
split ByteString
pat = ByteString -> [ByteString]
splitter
where
!patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
searcher :: ByteString -> [Int]
searcher = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False ByteString
pat
splitter :: ByteString -> [ByteString]
splitter ByteString
str
| ByteString -> Bool
S.null ByteString
str = []
| Bool
otherwise = ByteString -> [ByteString]
splitter' ByteString
str
splitter' :: ByteString -> [ByteString]
splitter' ByteString
str
| ByteString -> Bool
S.null ByteString
str = [ByteString
S.empty]
| Bool
otherwise =
case ByteString -> [Int]
searcher ByteString
str of
[] -> [ByteString
str]
(Int
i:[Int]
_) -> Int -> ByteString -> ByteString
S.take Int
i ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: ByteString -> [ByteString]
splitter' (Int -> ByteString -> ByteString
S.drop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str)
splitKeepEnd :: S.ByteString
-> S.ByteString
-> [S.ByteString]
splitKeepEnd :: ByteString -> ByteString -> [ByteString]
splitKeepEnd ByteString
pat
| ByteString -> Bool
S.null ByteString
pat = [ByteString] -> ByteString -> [ByteString]
forall a b. a -> b -> a
const (ByteString -> [ByteString]
forall a. a -> [a]
repeat ByteString
S.empty)
splitKeepEnd ByteString
pat = ByteString -> [ByteString]
splitter
where
!patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
searcher :: ByteString -> [Int]
searcher = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False ByteString
pat
splitter :: ByteString -> [ByteString]
splitter ByteString
str
| ByteString -> Bool
S.null ByteString
str = []
| Bool
otherwise =
case ByteString -> [Int]
searcher ByteString
str of
[] -> [ByteString
str]
(Int
i:[Int]
_) -> Int -> ByteString -> ByteString
S.take (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:
ByteString -> [ByteString]
splitter (Int -> ByteString -> ByteString
S.drop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str)
splitKeepFront :: S.ByteString
-> S.ByteString
-> [S.ByteString]
splitKeepFront :: ByteString -> ByteString -> [ByteString]
splitKeepFront ByteString
pat
| ByteString -> Bool
S.null ByteString
pat = [ByteString] -> ByteString -> [ByteString]
forall a b. a -> b -> a
const (ByteString -> [ByteString]
forall a. a -> [a]
repeat ByteString
S.empty)
splitKeepFront ByteString
pat = ByteString -> [ByteString]
splitter
where
!patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
searcher :: ByteString -> [Int]
searcher = Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
False ByteString
pat
splitter :: ByteString -> [ByteString]
splitter ByteString
str
| ByteString -> Bool
S.null ByteString
str = []
| Bool
otherwise =
case ByteString -> [Int]
searcher ByteString
str of
[] -> [ByteString
str]
(Int
i:[Int]
rst)
| Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 -> case [Int]
rst of
[] -> [ByteString
str]
(Int
j:[Int]
_) -> Int -> ByteString -> ByteString
S.take Int
j ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: ByteString -> [ByteString]
splitter' (Int -> ByteString -> ByteString
S.drop Int
j ByteString
str)
| Bool
otherwise -> Int -> ByteString -> ByteString
S.take Int
i ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: ByteString -> [ByteString]
splitter' (Int -> ByteString -> ByteString
S.drop Int
i ByteString
str)
splitter' :: ByteString -> [ByteString]
splitter' ByteString
str
| ByteString -> Bool
S.null ByteString
str = []
| Bool
otherwise =
case ByteString -> [Int]
searcher (Int -> ByteString -> ByteString
S.drop Int
patLen ByteString
str) of
[] -> [ByteString
str]
(Int
i:[Int]
_) -> Int -> ByteString -> ByteString
S.take (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:
ByteString -> [ByteString]
splitter' (Int -> ByteString -> ByteString
S.drop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen) ByteString
str)
strictSearcher :: Bool -> S.ByteString -> S.ByteString -> [Int]
strictSearcher :: Bool -> ByteString -> ByteString -> [Int]
strictSearcher Bool
_ !ByteString
pat
| ByteString -> Bool
S.null ByteString
pat = Int -> Int -> [Int]
forall a. Enum a => a -> a -> [a]
enumFromTo Int
0 (Int -> [Int]) -> (ByteString -> Int) -> ByteString -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Int
S.length
| ByteString -> Int
S.length ByteString
pat Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = let !w :: Word8
w = ByteString -> Word8
S.head ByteString
pat in Word8 -> ByteString -> [Int]
S.elemIndices Word8
w
strictSearcher !Bool
overlap ByteString
pat = ByteString -> [Int]
search
where
!patLen :: Int
patLen = ByteString -> Int
S.length ByteString
pat
!auto :: UArray Int Int
auto = ByteString -> UArray Int Int
automaton ByteString
pat
!p0 :: Word8
p0 = ByteString -> Int -> Word8
unsafeIndex ByteString
pat Int
0
!ams :: Int
ams = if Bool
overlap then Int
patLen else Int
0
search :: ByteString -> [Int]
search ByteString
str = Int -> Int -> [Int]
match Int
0 Int
0
where
!strLen :: Int
strLen = ByteString -> Int
S.length ByteString
str
{-# INLINE strAt #-}
strAt :: Int -> Int
strAt :: Int -> Int
strAt !Int
i = Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word8
unsafeIndex ByteString
str Int
i)
match :: Int -> Int -> [Int]
match Int
0 Int
idx
| Int
idx Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
strLen = []
| ByteString -> Int -> Word8
unsafeIndex ByteString
str Int
idx Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
p0 = Int -> Int -> [Int]
match Int
1 (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
| Bool
otherwise = Int -> Int -> [Int]
match Int
0 (Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
match Int
state Int
idx
| Int
idx Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
strLen = []
| Bool
otherwise =
let !nstate :: Int
nstate = UArray Int Int -> Int -> Int
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
unsafeAt UArray Int Int
auto ((Int
state Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
8) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int
strAt Int
idx)
!nxtIdx :: Int
nxtIdx = Int
idx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
in if Int
nstate Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
patLen
then (Int
nxtIdx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patLen) Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> Int -> [Int]
match Int
ams Int
nxtIdx
else Int -> Int -> [Int]
match Int
nstate Int
nxtIdx