{-# LANGUAGE BangPatterns #-}
module Data.ByteString.Search.KarpRabin (
indicesOfAny
) where
import qualified Data.ByteString as S
import Data.ByteString.Unsafe (unsafeIndex)
import qualified Data.IntMap as IM
import Data.Array
import Data.Array.Base (unsafeAt)
import Data.Word (Word8)
import Data.Bits
import Data.List (foldl')
{-# INLINE indicesOfAny #-}
indicesOfAny :: [S.ByteString]
-> S.ByteString
-> [(Int,[Int])]
indicesOfAny :: [ByteString] -> ByteString -> [(Int, [Int])]
indicesOfAny [ByteString]
pats
| [ByteString] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
nepats = [(Int, [Int])] -> ByteString -> [(Int, [Int])]
forall a b. a -> b -> a
const []
| Bool
otherwise = [ByteString] -> ByteString -> [(Int, [Int])]
strictMatcher [ByteString]
nepats
where
nepats :: [ByteString]
nepats = (ByteString -> Bool) -> [ByteString] -> [ByteString]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (ByteString -> Bool) -> ByteString -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Bool
S.null) [ByteString]
pats
{-# INLINE rehash1 #-}
rehash1 :: Int -> Int -> Word8 -> Word8 -> Int
rehash1 :: Int -> Int -> Word8 -> Word8 -> Int
rehash1 Int
out Int
h Word8
o Word8
n =
(Int
h Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
o Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
out)) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
n
{-# INLINE rehash2 #-}
rehash2 :: Int -> Int -> Word8 -> Word8 -> Int
rehash2 :: Int -> Int -> Word8 -> Word8 -> Int
rehash2 Int
out Int
h Word8
o Word8
n =
(Int
h Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
2 Int -> Int -> Int
forall a. Num a => a -> a -> a
- (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
o Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
out)) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
n
{-# INLINE rehash3 #-}
rehash3 :: Int -> Int -> Word8 -> Word8 -> Int
rehash3 :: Int -> Int -> Word8 -> Word8 -> Int
rehash3 Int
out Int
h Word8
o Word8
n =
(Int
h Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
3 Int -> Int -> Int
forall a. Num a => a -> a -> a
- (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
o Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
out)) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
n
{-# INLINE rehash4 #-}
rehash4 :: Int -> Int -> Word8 -> Word8 -> Int
rehash4 :: Int -> Int -> Word8 -> Word8 -> Int
rehash4 Int
out Int
h Word8
o Word8
n =
(Int
h Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
4 Int -> Int -> Int
forall a. Num a => a -> a -> a
- (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
o Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
out)) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
n
strictMatcher :: [S.ByteString] -> S.ByteString -> [(Int,[Int])]
strictMatcher :: [ByteString] -> ByteString -> [(Int, [Int])]
strictMatcher [ByteString]
pats = ByteString -> [(Int, [Int])]
search
where
!hLen :: Int
hLen = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum (Int
32 Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: (ByteString -> Int) -> [ByteString] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ByteString -> Int
S.length [ByteString]
pats)
!shDi :: Int
shDi = case Int
32 Int -> Int -> Int
forall a. Integral a => a -> a -> a
`quot` Int
hLen of
Int
q | Int
q Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
4 -> Int
q
| Bool
otherwise -> Int
4
!outS :: Int
outS = Int
shDiInt -> Int -> Int
forall a. Num a => a -> a -> a
*Int
hLen
!patNum :: Int
patNum = [ByteString] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [ByteString]
pats
!patArr :: Array Int ByteString
patArr = (Int, Int) -> [ByteString] -> Array Int ByteString
forall i e. Ix i => (i, i) -> [e] -> Array i e
listArray (Int
0, Int
patNum Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) [ByteString]
pats
{-# INLINE rehash #-}
rehash :: Int -> Word8 -> Word8 -> Int
rehash :: Int -> Word8 -> Word8 -> Int
rehash = case Int
shDi of
Int
1 -> Int -> Int -> Word8 -> Word8 -> Int
rehash1 Int
hLen
Int
2 -> Int -> Int -> Word8 -> Word8 -> Int
rehash2 Int
outS
Int
3 -> Int -> Int -> Word8 -> Word8 -> Int
rehash3 Int
outS
Int
_ -> Int -> Int -> Word8 -> Word8 -> Int
rehash4 Int
outS
hash :: S.ByteString -> Int
hash :: ByteString -> Int
hash = (Int -> Word8 -> Int) -> Int -> ByteString -> Int
forall a. (a -> Word8 -> a) -> a -> ByteString -> a
S.foldl' (\Int
h Word8
w -> (Int
h Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
shDi) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w) Int
0 (ByteString -> Int)
-> (ByteString -> ByteString) -> ByteString -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> ByteString -> ByteString
S.take Int
hLen
!hashMap :: IntMap [Int]
hashMap =
(IntMap [Int] -> (Int, Int) -> IntMap [Int])
-> IntMap [Int] -> [(Int, Int)] -> IntMap [Int]
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\IntMap [Int]
mp (Int
h,Int
i) -> ([Int] -> [Int] -> [Int])
-> Int -> [Int] -> IntMap [Int] -> IntMap [Int]
forall a. (a -> a -> a) -> Int -> a -> IntMap a -> IntMap a
IM.insertWith (([Int] -> [Int] -> [Int]) -> [Int] -> [Int] -> [Int]
forall a b c. (a -> b -> c) -> b -> a -> c
flip [Int] -> [Int] -> [Int]
forall a. [a] -> [a] -> [a]
(++)) Int
h [Int
i] IntMap [Int]
mp) IntMap [Int]
forall a. IntMap a
IM.empty ([(Int, Int)] -> IntMap [Int]) -> [(Int, Int)] -> IntMap [Int]
forall a b. (a -> b) -> a -> b
$
[Int] -> [Int] -> [(Int, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip ((ByteString -> Int) -> [ByteString] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map ByteString -> Int
hash [ByteString]
pats) [Int
0 :: Int .. ]
search :: ByteString -> [(Int, [Int])]
search ByteString
str
| Int
strLen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
hLen = []
| Bool
otherwise = Int -> Int -> [(Int, [Int])]
go Int
0 Int
shash
where
!strLen :: Int
strLen = ByteString -> Int
S.length ByteString
str
!maxIdx :: Int
maxIdx = Int
strLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
hLen
{-# INLINE strAt #-}
strAt :: Int -> Word8
strAt !Int
i = ByteString -> Int -> Word8
unsafeIndex ByteString
str Int
i
!shash :: Int
shash = ByteString -> Int
hash ByteString
str
go :: Int -> Int -> [(Int, [Int])]
go !Int
sI !Int
h =
case Int -> IntMap [Int] -> Maybe [Int]
forall a. Int -> IntMap a -> Maybe a
IM.lookup Int
h IntMap [Int]
hashMap of
Maybe [Int]
Nothing ->
if Int
sI Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
maxIdx
then []
else Int -> Int -> [(Int, [Int])]
go (Int
sI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int -> Word8 -> Word8 -> Int
rehash Int
h (Int -> Word8
strAt Int
sI) (Int -> Word8
strAt (Int
sI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
hLen)))
Just [Int]
ps ->
let !rst :: ByteString
rst = Int -> ByteString -> ByteString
S.drop Int
sI ByteString
str
{-# INLINE hd #-}
hd :: Word8
hd = Int -> Word8
strAt Int
sI
{-# INLINE more #-}
more :: [(Int, [Int])]
more = if Int
sI Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
maxIdx then [] else
Int -> Int -> [(Int, [Int])]
go (Int
sI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int -> Word8 -> Word8 -> Int
rehash Int
h Word8
hd (Int -> Word8
strAt (Int
sI Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
hLen)))
{-# INLINE okay #-}
okay :: ByteString -> Bool
okay ByteString
bs = ByteString -> ByteString -> Bool
S.isPrefixOf ByteString
bs ByteString
rst
in case (Int -> Bool) -> [Int] -> [Int]
forall a. (a -> Bool) -> [a] -> [a]
filter (ByteString -> Bool
okay (ByteString -> Bool) -> (Int -> ByteString) -> Int -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Array Int ByteString
patArr Array Int ByteString -> Int -> ByteString
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
`unsafeAt`)) [Int]
ps of
[] -> [(Int, [Int])]
more
[Int]
qs -> Int -> [(Int, [Int])] -> [(Int, [Int])]
seq ([Int] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Int]
qs) ([(Int, [Int])] -> [(Int, [Int])])
-> [(Int, [Int])] -> [(Int, [Int])]
forall a b. (a -> b) -> a -> b
$
(Int
sI,[Int]
qs) (Int, [Int]) -> [(Int, [Int])] -> [(Int, [Int])]
forall a. a -> [a] -> [a]
: [(Int, [Int])]
more