{-# LANGUAGE BangPatterns #-}

module Unison.Util.Text.Pattern where

import Data.Char (isAlphaNum, isControl, isLetter, isLower, isMark, isNumber, isPrint, isPunctuation, isSeparator, isSpace, isSymbol, isUpper)
import Data.Text qualified as DT
import Unison.Util.Text (Text)
import Unison.Util.Text qualified as Text

data Pattern
  = Join [Pattern] -- sequencing of patterns
  | Or Pattern Pattern -- left-biased choice: tries second pattern only if first fails
  | Capture Pattern -- capture all the text consumed by the inner pattern, discarding its subcaptures
  | CaptureAs Text Pattern -- capture the given text, discarding its subcaptures, and name the capture
  | Many Bool Pattern -- zero or more repetitions (at least 1 can be written: Join [p, Many p]); boolean determines whether it's the correct version (True) or the original (False).
  | Replicate Int Int Pattern -- m to n occurrences of a pattern, optional = 0-1
  | Eof -- succeed if given the empty text, fail otherwise
  | Literal Text -- succeed if input starts with the given text, advance by that text
  | Char CharPattern -- succeed if input starts with a char matching the given pattern, advance by 1 char
  | Lookahead Pattern -- Succeed if the given pattern matches the input, but don't consume any input
  | NegativeLookahead Pattern -- Succeed if the given pattern does not match the input, but don't consume any input
  | Lookbehind1 CharPattern -- Succeed if the previous char matches.
  | NegativeLookbehind1 CharPattern -- Succeed if the previous char does not match. Needed because lookbehind with negation is not exactly semantically equivalent.
  deriving (Int -> Pattern -> ShowS
[Pattern] -> ShowS
Pattern -> String
(Int -> Pattern -> ShowS)
-> (Pattern -> String) -> ([Pattern] -> ShowS) -> Show Pattern
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Pattern -> ShowS
showsPrec :: Int -> Pattern -> ShowS
$cshow :: Pattern -> String
show :: Pattern -> String
$cshowList :: [Pattern] -> ShowS
showList :: [Pattern] -> ShowS
Show, Pattern -> Pattern -> Bool
(Pattern -> Pattern -> Bool)
-> (Pattern -> Pattern -> Bool) -> Eq Pattern
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Pattern -> Pattern -> Bool
== :: Pattern -> Pattern -> Bool
$c/= :: Pattern -> Pattern -> Bool
/= :: Pattern -> Pattern -> Bool
Eq, Eq Pattern
Eq Pattern =>
(Pattern -> Pattern -> Ordering)
-> (Pattern -> Pattern -> Bool)
-> (Pattern -> Pattern -> Bool)
-> (Pattern -> Pattern -> Bool)
-> (Pattern -> Pattern -> Bool)
-> (Pattern -> Pattern -> Pattern)
-> (Pattern -> Pattern -> Pattern)
-> Ord Pattern
Pattern -> Pattern -> Bool
Pattern -> Pattern -> Ordering
Pattern -> Pattern -> Pattern
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Pattern -> Pattern -> Ordering
compare :: Pattern -> Pattern -> Ordering
$c< :: Pattern -> Pattern -> Bool
< :: Pattern -> Pattern -> Bool
$c<= :: Pattern -> Pattern -> Bool
<= :: Pattern -> Pattern -> Bool
$c> :: Pattern -> Pattern -> Bool
> :: Pattern -> Pattern -> Bool
$c>= :: Pattern -> Pattern -> Bool
>= :: Pattern -> Pattern -> Bool
$cmax :: Pattern -> Pattern -> Pattern
max :: Pattern -> Pattern -> Pattern
$cmin :: Pattern -> Pattern -> Pattern
min :: Pattern -> Pattern -> Pattern
Ord)

data CharPattern
  = Any -- any char
  | Not CharPattern -- negation of the given pattern
  | Union CharPattern CharPattern -- match if either pattern matches
  | Intersect CharPattern CharPattern -- match if both patterns match
  | CharRange Char Char -- match if char is in the given range
  | CharSet [Char] -- match if char is in the given set
  | CharClass CharClass -- match if char is in the given class
  deriving (Int -> CharPattern -> ShowS
[CharPattern] -> ShowS
CharPattern -> String
(Int -> CharPattern -> ShowS)
-> (CharPattern -> String)
-> ([CharPattern] -> ShowS)
-> Show CharPattern
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> CharPattern -> ShowS
showsPrec :: Int -> CharPattern -> ShowS
$cshow :: CharPattern -> String
show :: CharPattern -> String
$cshowList :: [CharPattern] -> ShowS
showList :: [CharPattern] -> ShowS
Show, CharPattern -> CharPattern -> Bool
(CharPattern -> CharPattern -> Bool)
-> (CharPattern -> CharPattern -> Bool) -> Eq CharPattern
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: CharPattern -> CharPattern -> Bool
== :: CharPattern -> CharPattern -> Bool
$c/= :: CharPattern -> CharPattern -> Bool
/= :: CharPattern -> CharPattern -> Bool
Eq, Eq CharPattern
Eq CharPattern =>
(CharPattern -> CharPattern -> Ordering)
-> (CharPattern -> CharPattern -> Bool)
-> (CharPattern -> CharPattern -> Bool)
-> (CharPattern -> CharPattern -> Bool)
-> (CharPattern -> CharPattern -> Bool)
-> (CharPattern -> CharPattern -> CharPattern)
-> (CharPattern -> CharPattern -> CharPattern)
-> Ord CharPattern
CharPattern -> CharPattern -> Bool
CharPattern -> CharPattern -> Ordering
CharPattern -> CharPattern -> CharPattern
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: CharPattern -> CharPattern -> Ordering
compare :: CharPattern -> CharPattern -> Ordering
$c< :: CharPattern -> CharPattern -> Bool
< :: CharPattern -> CharPattern -> Bool
$c<= :: CharPattern -> CharPattern -> Bool
<= :: CharPattern -> CharPattern -> Bool
$c> :: CharPattern -> CharPattern -> Bool
> :: CharPattern -> CharPattern -> Bool
$c>= :: CharPattern -> CharPattern -> Bool
>= :: CharPattern -> CharPattern -> Bool
$cmax :: CharPattern -> CharPattern -> CharPattern
max :: CharPattern -> CharPattern -> CharPattern
$cmin :: CharPattern -> CharPattern -> CharPattern
min :: CharPattern -> CharPattern -> CharPattern
Ord)

data CharClass
  = AlphaNum -- alphabetic or numeric characters
  | Upper -- uppercase alphabetic characters
  | Lower -- lowercase alphabetic characters
  | Whitespace -- whitespace characters (space, tab, newline, etc.)
  | Control -- non-printing control characters
  | Printable -- letters, numbers, punctuation, symbols, spaces
  | MarkChar -- accents, diacritics, etc.
  | Number -- numeric characters in any script
  | Punctuation -- connectors, brackets, quotes
  | Symbol -- symbols (math, currency, etc.)
  | Separator -- spaces, line separators, paragraph separators
  | Letter -- letters in any script
  deriving (Int -> CharClass -> ShowS
[CharClass] -> ShowS
CharClass -> String
(Int -> CharClass -> ShowS)
-> (CharClass -> String)
-> ([CharClass] -> ShowS)
-> Show CharClass
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> CharClass -> ShowS
showsPrec :: Int -> CharClass -> ShowS
$cshow :: CharClass -> String
show :: CharClass -> String
$cshowList :: [CharClass] -> ShowS
showList :: [CharClass] -> ShowS
Show, CharClass -> CharClass -> Bool
(CharClass -> CharClass -> Bool)
-> (CharClass -> CharClass -> Bool) -> Eq CharClass
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: CharClass -> CharClass -> Bool
== :: CharClass -> CharClass -> Bool
$c/= :: CharClass -> CharClass -> Bool
/= :: CharClass -> CharClass -> Bool
Eq, Eq CharClass
Eq CharClass =>
(CharClass -> CharClass -> Ordering)
-> (CharClass -> CharClass -> Bool)
-> (CharClass -> CharClass -> Bool)
-> (CharClass -> CharClass -> Bool)
-> (CharClass -> CharClass -> Bool)
-> (CharClass -> CharClass -> CharClass)
-> (CharClass -> CharClass -> CharClass)
-> Ord CharClass
CharClass -> CharClass -> Bool
CharClass -> CharClass -> Ordering
CharClass -> CharClass -> CharClass
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: CharClass -> CharClass -> Ordering
compare :: CharClass -> CharClass -> Ordering
$c< :: CharClass -> CharClass -> Bool
< :: CharClass -> CharClass -> Bool
$c<= :: CharClass -> CharClass -> Bool
<= :: CharClass -> CharClass -> Bool
$c> :: CharClass -> CharClass -> Bool
> :: CharClass -> CharClass -> Bool
$c>= :: CharClass -> CharClass -> Bool
>= :: CharClass -> CharClass -> Bool
$cmax :: CharClass -> CharClass -> CharClass
max :: CharClass -> CharClass -> CharClass
$cmin :: CharClass -> CharClass -> CharClass
min :: CharClass -> CharClass -> CharClass
Ord)

-- Wrapper type. Holds a pattern together with its compilation. This is used as
-- the semantic value of a unison `Pattern a`. Laziness avoids building the
-- matcher until it actually needs to be used, and also avoids recalculating the
-- match function if a `CPattern` is 'run' multiple times, while allowing the
-- builtin runner to just take two arguments, and not try to build a partial
-- application by hand.
--
-- In the future, this can existentially quantify over the type being matched.
data CPattern = CP Pattern (Text -> Maybe ([Text], Text))

instance Eq CPattern where
  CP Pattern
p Text -> Maybe ([Text], Text)
_ == :: CPattern -> CPattern -> Bool
== CP Pattern
q Text -> Maybe ([Text], Text)
_ = Pattern
p Pattern -> Pattern -> Bool
forall a. Eq a => a -> a -> Bool
== Pattern
q

instance Ord CPattern where
  CP Pattern
p Text -> Maybe ([Text], Text)
_ compare :: CPattern -> CPattern -> Ordering
`compare` CP Pattern
q Text -> Maybe ([Text], Text)
_ = Pattern -> Pattern -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Pattern
p Pattern
q

cpattern :: Pattern -> CPattern
cpattern :: Pattern -> CPattern
cpattern Pattern
p = Pattern -> (Text -> Maybe ([Text], Text)) -> CPattern
CP Pattern
p (Pattern -> Text -> Maybe ([Text], Text)
run Pattern
p)

run :: Pattern -> Text -> Maybe ([Text], Text)
run :: Pattern -> Text -> Maybe ([Text], Text)
run Pattern
p =
  let cp :: Stack -> Maybe Char -> Text -> Maybe ([Text], Text)
cp = Pattern -> Compiled (Maybe ([Text], Text))
forall r. Pattern -> Compiled r
compile Pattern
p (\Stack
_ Maybe Char
_ Text
_ -> Maybe ([Text], Text)
forall a. Maybe a
Nothing) (\Stack
acc Maybe Char
_ Text
rem -> ([Text], Text) -> Maybe ([Text], Text)
forall a. a -> Maybe a
Just (Stack -> [Text]
s Stack
acc, Text
rem))
      s :: Stack -> [Text]
s = [Text] -> [Text]
forall a. [a] -> [a]
reverse ([Text] -> [Text]) -> (Stack -> [Text]) -> Stack -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Text] -> [Text]) -> [Text]
capturesToList (([Text] -> [Text]) -> [Text])
-> (Stack -> [Text] -> [Text]) -> Stack -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Stack -> [Text] -> [Text]
stackCaptures
   in \Text
t -> Stack -> Maybe Char -> Text -> Maybe ([Text], Text)
cp (([Text] -> [Text]) -> Stack
Empty [Text] -> [Text]
emptyCaptures) Maybe Char
forall a. Maybe a
Nothing Text
t

-- Stack used to track captures and to support backtracking.
-- A `try` will push a `Mark` that allows the old state
-- (both the list of captures and the current remainder)
-- to be restored on failure.
data Stack = Empty !Captures | Mark !Captures !(Maybe Char) !Text !Stack

-- A difference list for representing the captures of a pattern.
-- So that capture lists can be appended in O(1).
type Captures = [Text] -> [Text]

stackCaptures :: Stack -> Captures
stackCaptures :: Stack -> [Text] -> [Text]
stackCaptures (Mark [Text] -> [Text]
cs Maybe Char
_ Text
_ Stack
_) = [Text] -> [Text]
cs
stackCaptures (Empty [Text] -> [Text]
cs) = [Text] -> [Text]
cs
{-# INLINE stackCaptures #-}

pushCaptures :: Captures -> Stack -> Stack
pushCaptures :: ([Text] -> [Text]) -> Stack -> Stack
pushCaptures [Text] -> [Text]
c (Empty [Text] -> [Text]
cs) = ([Text] -> [Text]) -> Stack
Empty (([Text] -> [Text]) -> ([Text] -> [Text]) -> [Text] -> [Text]
appendCaptures [Text] -> [Text]
c [Text] -> [Text]
cs)
pushCaptures [Text] -> [Text]
c (Mark [Text] -> [Text]
cs Maybe Char
oc Text
t Stack
s) = ([Text] -> [Text]) -> Maybe Char -> Text -> Stack -> Stack
Mark (([Text] -> [Text]) -> ([Text] -> [Text]) -> [Text] -> [Text]
appendCaptures [Text] -> [Text]
c [Text] -> [Text]
cs) Maybe Char
oc Text
t Stack
s
{-# INLINE pushCaptures #-}

pushCapture :: Text -> Stack -> Stack
pushCapture :: Text -> Stack -> Stack
pushCapture Text
txt = ([Text] -> [Text]) -> Stack -> Stack
pushCaptures (Text
txt Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
:)
{-# INLINE pushCapture #-}

appendCaptures :: Captures -> Captures -> Captures
appendCaptures :: ([Text] -> [Text]) -> ([Text] -> [Text]) -> [Text] -> [Text]
appendCaptures [Text] -> [Text]
c1 [Text] -> [Text]
c2 = [Text] -> [Text]
c1 ([Text] -> [Text]) -> ([Text] -> [Text]) -> [Text] -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> [Text]
c2
{-# INLINE appendCaptures #-}

emptyCaptures :: Captures
emptyCaptures :: [Text] -> [Text]
emptyCaptures = [Text] -> [Text]
forall a. a -> a
id

capturesToList :: Captures -> [Text]
capturesToList :: ([Text] -> [Text]) -> [Text]
capturesToList [Text] -> [Text]
c = [Text] -> [Text]
c []

type Compiled r = (Stack -> Maybe Char -> Text -> r) -> (Stack -> Maybe Char -> Text -> r) -> Stack -> Maybe Char -> Text -> r

compile :: Pattern -> Compiled r
compile :: forall r. Pattern -> Compiled r
compile Pattern
Eof !Stack -> Maybe Char -> Text -> r
err !Stack -> Maybe Char -> Text -> r
success = Stack -> Maybe Char -> Text -> r
go
  where
    go :: Stack -> Maybe Char -> Text -> r
go Stack
acc Maybe Char
c Text
t
      | Text -> Int
Text.size Text
t Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Stack -> Maybe Char -> Text -> r
success Stack
acc Maybe Char
c Text
t
      | Bool
otherwise = Stack -> Maybe Char -> Text -> r
err Stack
acc Maybe Char
c Text
t
compile (Literal Text
txt) !Stack -> Maybe Char -> Text -> r
err !Stack -> Maybe Char -> Text -> r
success = Stack -> Maybe Char -> Text -> r
go
  where
    go :: Stack -> Maybe Char -> Text -> r
go Stack
acc Maybe Char
oc Text
t =
      let candidate :: Text
candidate = Int -> Text -> Text
Text.take (Text -> Int
Text.size Text
txt) Text
t
       in if Text
candidate Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
txt
            then
              let t' :: Text
t' = Int -> Text -> Text
Text.drop (Text -> Int
Text.size Text
txt) Text
t
               in case Text -> Maybe (Text, Char)
Text.unsnoc Text
candidate of
                    Just (Text
_, Char
c) -> Stack -> Maybe Char -> Text -> r
success Stack
acc (Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c) Text
t'
                    Maybe (Text, Char)
Nothing -> Stack -> Maybe Char -> Text -> r
success Stack
acc Maybe Char
oc Text
t'
            else
              Stack -> Maybe Char -> Text -> r
err Stack
acc Maybe Char
oc Text
t
compile (Char CharPattern
Any) !Stack -> Maybe Char -> Text -> r
err !Stack -> Maybe Char -> Text -> r
success = Stack -> Maybe Char -> Text -> r
go
  where
    go :: Stack -> Maybe Char -> Text -> r
go Stack
acc Maybe Char
oc Text
t = case Text -> Maybe (Char, Text)
Text.uncons Text
t of
      Just (Char
c', Text
rem) -> Stack -> Maybe Char -> Text -> r
success Stack
acc (Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c') Text
rem
      Maybe (Char, Text)
Nothing -> Stack -> Maybe Char -> Text -> r
err Stack
acc Maybe Char
oc Text
t
compile (CaptureAs Text
t Pattern
p) !Stack -> Maybe Char -> Text -> r
err !Stack -> Maybe Char -> Text -> r
success = Stack -> Maybe Char -> Text -> r
go
  where
    err' :: Stack -> Maybe Char -> Text -> Stack -> Maybe Char -> Text -> r
err' Stack
_ Maybe Char
_ Text
_ Stack
acc0 Maybe Char
c0 Text
t0 = Stack -> Maybe Char -> Text -> r
err Stack
acc0 Maybe Char
c0 Text
t0
    success' :: Stack -> Maybe Char -> Text -> Stack -> Maybe Char -> Text -> r
success' Stack
_ Maybe Char
cr Text
rem Stack
acc0 Maybe Char
_ Text
_ = Stack -> Maybe Char -> Text -> r
success (Text -> Stack -> Stack
pushCapture Text
t Stack
acc0) Maybe Char
cr Text
rem
    compiled :: Stack -> Maybe Char -> Text -> Stack -> Maybe Char -> Text -> r
compiled = Pattern -> Compiled (Stack -> Maybe Char -> Text -> r)
forall r. Pattern -> Compiled r
compile Pattern
p Stack -> Maybe Char -> Text -> Stack -> Maybe Char -> Text -> r
err' Stack -> Maybe Char -> Text -> Stack -> Maybe Char -> Text -> r
success'
    go :: Stack -> Maybe Char -> Text -> r
go Stack
acc Maybe Char
c Text
t = Stack -> Maybe Char -> Text -> Stack -> Maybe Char -> Text -> r
compiled Stack
acc Maybe Char
c Text
t Stack
acc Maybe Char
c Text
t
compile (Capture (Many Bool
_ (Char CharPattern
Any))) !Stack -> Maybe Char -> Text -> r
_ !Stack -> Maybe Char -> Text -> r
success = \Stack
acc Maybe Char
c Text
t ->
  case Text -> Maybe (Text, Char)
Text.unsnoc Text
t of
    Just (Text
_, Char
c) -> Stack -> Maybe Char -> Text -> r
success (Text -> Stack -> Stack
pushCapture Text
t Stack
acc) (Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c) Text
Text.empty
    Maybe (Text, Char)
Nothing -> Stack -> Maybe Char -> Text -> r
success (Text -> Stack -> Stack
pushCapture Text
t Stack
acc) Maybe Char
c Text
t
compile (Capture Pattern
c) !Stack -> Maybe Char -> Text -> r
err !Stack -> Maybe Char -> Text -> r
success = Stack -> Maybe Char -> Text -> r
go
  where
    err' :: Stack -> Maybe Char -> Text -> Stack -> Maybe Char -> Text -> r
err' Stack
_ Maybe Char
_ Text
_ Stack
acc0 Maybe Char
c0 Text
t0 = Stack -> Maybe Char -> Text -> r
err Stack
acc0 Maybe Char
c0 Text
t0
    success' :: Stack -> Maybe Char -> Text -> Stack -> Maybe Char -> Text -> r
success' Stack
_ Maybe Char
cr Text
rem Stack
acc0 Maybe Char
_ Text
t0 = Stack -> Maybe Char -> Text -> r
success (Text -> Stack -> Stack
pushCapture (Int -> Text -> Text
Text.take (Text -> Int
Text.size Text
t0 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Text -> Int
Text.size Text
rem) Text
t0) Stack
acc0) Maybe Char
cr Text
rem
    compiled :: Stack -> Maybe Char -> Text -> Stack -> Maybe Char -> Text -> r
compiled = Pattern -> Compiled (Stack -> Maybe Char -> Text -> r)
forall r. Pattern -> Compiled r
compile Pattern
c Stack -> Maybe Char -> Text -> Stack -> Maybe Char -> Text -> r
err' Stack -> Maybe Char -> Text -> Stack -> Maybe Char -> Text -> r
success'
    go :: Stack -> Maybe Char -> Text -> r
go Stack
acc Maybe Char
c Text
t = Stack -> Maybe Char -> Text -> Stack -> Maybe Char -> Text -> r
compiled Stack
acc Maybe Char
c Text
t Stack
acc Maybe Char
c Text
t
compile (Or Pattern
p1 Pattern
p2) Stack -> Maybe Char -> Text -> r
err Stack -> Maybe Char -> Text -> r
success = Stack -> Maybe Char -> Text -> r
cp1
  where
    cp2 :: Stack -> Maybe Char -> Text -> r
cp2 = Pattern -> Compiled r
forall r. Pattern -> Compiled r
compile Pattern
p2 Stack -> Maybe Char -> Text -> r
err Stack -> Maybe Char -> Text -> r
success
    cp1 :: Stack -> Maybe Char -> Text -> r
cp1 = String -> Compiled r -> Compiled r
forall r. String -> Compiled r -> Compiled r
try String
"Or" (Pattern -> Compiled r
forall r. Pattern -> Compiled r
compile Pattern
p1) Stack -> Maybe Char -> Text -> r
cp2 Stack -> Maybe Char -> Text -> r
success
compile (Join [Pattern]
ps) !Stack -> Maybe Char -> Text -> r
err !Stack -> Maybe Char -> Text -> r
success = [Pattern] -> Stack -> Maybe Char -> Text -> r
go [Pattern]
ps
  where
    go :: [Pattern] -> Stack -> Maybe Char -> Text -> r
go [] = Stack -> Maybe Char -> Text -> r
success
    go (Pattern
p : [Pattern]
ps) =
      let pc :: Stack -> Maybe Char -> Text -> r
pc = Pattern -> Compiled r
forall r. Pattern -> Compiled r
compile Pattern
p Stack -> Maybe Char -> Text -> r
err Stack -> Maybe Char -> Text -> r
psc
          psc :: Stack -> Maybe Char -> Text -> r
psc = Pattern -> Compiled r
forall r. Pattern -> Compiled r
compile ([Pattern] -> Pattern
Join [Pattern]
ps) Stack -> Maybe Char -> Text -> r
err Stack -> Maybe Char -> Text -> r
success
       in Stack -> Maybe Char -> Text -> r
pc
compile (Char CharPattern
cp) !Stack -> Maybe Char -> Text -> r
err !Stack -> Maybe Char -> Text -> r
success = Stack -> Maybe Char -> Text -> r
go
  where
    ok :: Char -> Bool
ok = CharPattern -> Char -> Bool
charPatternPred CharPattern
cp
    go :: Stack -> Maybe Char -> Text -> r
go Stack
acc Maybe Char
c Text
t = case Text -> Maybe (Char, Text)
Text.uncons Text
t of
      Just (Char
ch, Text
rem) | Char -> Bool
ok Char
ch -> Stack -> Maybe Char -> Text -> r
success Stack
acc (Char -> Maybe Char
forall a. a -> Maybe a
Just Char
ch) Text
rem
      Maybe (Char, Text)
_ -> Stack -> Maybe Char -> Text -> r
err Stack
acc Maybe Char
c Text
t
compile (Many Bool
correct Pattern
p) !Stack -> Maybe Char -> Text -> r
_ !Stack -> Maybe Char -> Text -> r
success = case Pattern
p of
  Char CharPattern
Any ->
    ( \Stack
acc Maybe Char
c Text
t -> case Text -> Maybe (Text, Char)
Text.unsnoc Text
t of
        Just (Text
_, Char
c) -> Stack -> Maybe Char -> Text -> r
success Stack
acc (Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c) Text
Text.empty
        Maybe (Text, Char)
Nothing -> Stack -> Maybe Char -> Text -> r
success Stack
acc Maybe Char
c Text
t
    )
  Char CharPattern
cp -> (Char -> Bool) -> Stack -> Maybe Char -> Text -> r
walker (CharPattern -> Char -> Bool
charPatternPred CharPattern
cp)
  Pattern
p -> Stack -> Maybe Char -> Text -> r
go
    where
      go :: Stack -> Maybe Char -> Text -> r
go
        | Bool
correct = String -> Compiled r -> Compiled r
forall r. String -> Compiled r -> Compiled r
try String
"Many" (Pattern -> Compiled r
forall r. Pattern -> Compiled r
compile Pattern
p) Stack -> Maybe Char -> Text -> r
success Stack -> Maybe Char -> Text -> r
success'
        | Bool
otherwise = Pattern -> Compiled r
forall r. Pattern -> Compiled r
compile Pattern
p Stack -> Maybe Char -> Text -> r
success Stack -> Maybe Char -> Text -> r
success'
      success' :: Stack -> Maybe Char -> Text -> r
success' Stack
acc Maybe Char
c Text
rem =
        if Text -> Int
Text.size Text
rem Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
          then
            Stack -> Maybe Char -> Text -> r
success Stack
acc Maybe Char
c Text
rem
          else Stack -> Maybe Char -> Text -> r
go Stack
acc Maybe Char
c Text
rem
  where
    walker :: (Char -> Bool) -> Stack -> Maybe Char -> Text -> r
walker Char -> Bool
ok = Stack -> Maybe Char -> Text -> r
go
      where
        go :: Stack -> Maybe Char -> Text -> r
go Stack
acc Maybe Char
c Text
t = case Text -> Maybe (Chunk, Text)
Text.unconsChunk Text
t of
          Maybe (Chunk, Text)
Nothing -> Stack -> Maybe Char -> Text -> r
success Stack
acc Maybe Char
c Text
t
          Just (Chunk -> Text
Text.chunkToText -> Text
txt, Text
t) -> case (Char -> Bool) -> Text -> (Text, Text)
DT.span Char -> Bool
ok Text
txt of
            (Text
prefix, Text
rem) -> case Text -> Maybe (Text, Char)
DT.unsnoc Text
prefix of
              -- moving the remainder to the root of the tree is much more efficient
              -- since the next uncons will be O(1) rather than O(log n)
              -- this can't unbalance the tree too badly since these promoted chunks
              -- are being consumed and will get removed by a subsequent uncons
              Just (Text
_, Char
c)
                | Text -> Bool
DT.null Text
rem -> Stack -> Maybe Char -> Text -> r
go Stack
acc (Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c) Text
t
                | Bool
otherwise -> Stack -> Maybe Char -> Text -> r
success Stack
acc (Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c) (Text -> Text -> Text
Text.appendUnbalanced (Text -> Text
Text.fromText Text
rem) Text
t)
              Maybe (Text, Char)
Nothing
                | Text -> Bool
DT.null Text
rem -> Stack -> Maybe Char -> Text -> r
go Stack
acc Maybe Char
c Text
t
                | Bool
otherwise -> Stack -> Maybe Char -> Text -> r
success Stack
acc Maybe Char
c (Text -> Text -> Text
Text.appendUnbalanced (Text -> Text
Text.fromText Text
rem) Text
t)
    {-# INLINE walker #-}
compile (Replicate Int
m Int
n Pattern
p) !Stack -> Maybe Char -> Text -> r
err !Stack -> Maybe Char -> Text -> r
success = case Pattern
p of
  Char CharPattern
Any -> \Stack
acc Maybe Char
oc Text
t ->
    let sz :: Int
sz = Text -> Int
Text.size Text
t
     in if Int
sz Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
m
          then Stack -> Maybe Char -> Text -> r
err Stack
acc Maybe Char
oc Text
t
          else
            if Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
1
              then Stack -> Maybe Char -> Text -> r
success Stack
acc Maybe Char
oc Text
t
              else case Text -> Maybe (Char, Text)
Text.uncons (Int -> Text -> Text
Text.drop (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) (Int
sz Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)) Text
t) of
                Just (Char
c, Text
rem) -> Stack -> Maybe Char -> Text -> r
success Stack
acc (Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c) Text
rem
                Maybe (Char, Text)
Nothing -> Stack -> Maybe Char -> Text -> r
success Stack
acc Maybe Char
oc Text
Text.empty
  Char CharPattern
cp -> (Char -> Bool) -> Stack -> Maybe Char -> Text -> r
dropper (CharPattern -> Char -> Bool
charPatternPred CharPattern
cp)
  Pattern
_ -> String -> Compiled r -> Compiled r
forall r. String -> Compiled r -> Compiled r
try String
"Replicate" (Int -> Compiled r
go1 Int
m) Stack -> Maybe Char -> Text -> r
err (Int -> Stack -> Maybe Char -> Text -> r
go2 (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
m))
  where
    go1 :: Int -> Compiled r
go1 Int
0 = \Stack -> Maybe Char -> Text -> r
_err Stack -> Maybe Char -> Text -> r
success Stack
stk Maybe Char
oc Text
rem -> Stack -> Maybe Char -> Text -> r
success Stack
stk Maybe Char
oc Text
rem
    go1 Int
n = \Stack -> Maybe Char -> Text -> r
err Stack -> Maybe Char -> Text -> r
success -> Pattern -> Compiled r
forall r. Pattern -> Compiled r
compile Pattern
p Stack -> Maybe Char -> Text -> r
err (Int -> Compiled r
go1 (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Stack -> Maybe Char -> Text -> r
err Stack -> Maybe Char -> Text -> r
success)
    go2 :: Int -> Stack -> Maybe Char -> Text -> r
go2 Int
0 = Stack -> Maybe Char -> Text -> r
success
    go2 Int
n = String -> Compiled r -> Compiled r
forall r. String -> Compiled r -> Compiled r
try String
"Replicate" (Pattern -> Compiled r
forall r. Pattern -> Compiled r
compile Pattern
p) Stack -> Maybe Char -> Text -> r
success (Int -> Stack -> Maybe Char -> Text -> r
go2 (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1))

    dropper :: (Char -> Bool) -> Stack -> Maybe Char -> Text -> r
dropper Char -> Bool
ok Stack
acc Maybe Char
oc Text
t
      | (Int
i, Text
rest) <- (Char -> Bool) -> Int -> Text -> (Int, Text)
Text.dropWhileMax Char -> Bool
ok Int
n Text
t,
        Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
m =
          let lastDropped :: Maybe Char
lastDropped = if Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0 then Int -> Text -> Maybe Char
Text.at (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Text
t else Maybe Char
oc
           in Stack -> Maybe Char -> Text -> r
success Stack
acc Maybe Char
lastDropped Text
rest
      | Bool
otherwise = Stack -> Maybe Char -> Text -> r
err Stack
acc Maybe Char
oc Text
t
compile (Lookahead Pattern
p) !Stack -> Maybe Char -> Text -> r
err !Stack -> Maybe Char -> Text -> r
success = Stack -> Maybe Char -> Text -> r
cp
  where
    cp :: Stack -> Maybe Char -> Text -> r
cp = String -> Compiled r -> Compiled r
forall r. String -> Compiled r -> Compiled r
lookahead String
"Lookahead" (Pattern -> Compiled r
forall r. Pattern -> Compiled r
compile Pattern
p) Stack -> Maybe Char -> Text -> r
err Stack -> Maybe Char -> Text -> r
success
compile (NegativeLookahead Pattern
p) !Stack -> Maybe Char -> Text -> r
err !Stack -> Maybe Char -> Text -> r
success = Stack -> Maybe Char -> Text -> r
cp
  where
    cp :: Stack -> Maybe Char -> Text -> r
cp = String -> Compiled r -> Compiled r
forall r. String -> Compiled r -> Compiled r
lookahead String
"NegativeLookahead" (Pattern -> Compiled r
forall r. Pattern -> Compiled r
compile Pattern
p) Stack -> Maybe Char -> Text -> r
success Stack -> Maybe Char -> Text -> r
err
compile (Lookbehind1 CharPattern
cp) !Stack -> Maybe Char -> Text -> r
err !Stack -> Maybe Char -> Text -> r
success = \Stack
acc Maybe Char
oc Text
t ->
  case Maybe Char
oc of
    Just Char
c ->
      if CharPattern -> Char -> Bool
charPatternPred CharPattern
cp Char
c
        then
          Stack -> Maybe Char -> Text -> r
success Stack
acc Maybe Char
oc Text
t
        else
          Stack -> Maybe Char -> Text -> r
err Stack
acc Maybe Char
oc Text
t
    Maybe Char
Nothing -> Stack -> Maybe Char -> Text -> r
err Stack
acc Maybe Char
oc Text
t
compile (NegativeLookbehind1 CharPattern
cp) !Stack -> Maybe Char -> Text -> r
err !Stack -> Maybe Char -> Text -> r
success = \Stack
acc Maybe Char
oc Text
t ->
  case Maybe Char
oc of
    Just Char
c ->
      if CharPattern -> Char -> Bool
charPatternPred CharPattern
cp Char
c
        then
          Stack -> Maybe Char -> Text -> r
err Stack
acc Maybe Char
oc Text
t
        else
          Stack -> Maybe Char -> Text -> r
success Stack
acc Maybe Char
oc Text
t
    Maybe Char
Nothing -> Stack -> Maybe Char -> Text -> r
success Stack
acc Maybe Char
oc Text
t

charInPred, charNotInPred :: [Char] -> Char -> Bool
charInPred :: String -> Char -> Bool
charInPred [] = Bool -> Char -> Bool
forall a b. a -> b -> a
const Bool
False
charInPred (Char
c : String
chs) = let ok :: Char -> Bool
ok = String -> Char -> Bool
charInPred String
chs in \Char
ci -> Char
ci Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c Bool -> Bool -> Bool
|| Char -> Bool
ok Char
ci
charNotInPred :: String -> Char -> Bool
charNotInPred [] = Bool -> Char -> Bool
forall a b. a -> b -> a
const Bool
True
charNotInPred (Char
c : String
chs) = let ok :: Char -> Bool
ok = String -> Char -> Bool
charNotInPred String
chs in (\Char
ci -> Char
ci Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
c Bool -> Bool -> Bool
&& Char -> Bool
ok Char
ci)

charPatternPred :: CharPattern -> Char -> Bool
charPatternPred :: CharPattern -> Char -> Bool
charPatternPred CharPattern
Any = Bool -> Char -> Bool
forall a b. a -> b -> a
const Bool
True
charPatternPred (Not CharPattern
cp) = let notOk :: Char -> Bool
notOk = CharPattern -> Char -> Bool
charPatternPred CharPattern
cp in \Char
ci -> Bool -> Bool
not (Char -> Bool
notOk Char
ci)
charPatternPred (Union CharPattern
cp1 CharPattern
cp2) = let ok1 :: Char -> Bool
ok1 = CharPattern -> Char -> Bool
charPatternPred CharPattern
cp1; ok2 :: Char -> Bool
ok2 = CharPattern -> Char -> Bool
charPatternPred CharPattern
cp2 in \Char
ci -> Char -> Bool
ok1 Char
ci Bool -> Bool -> Bool
|| Char -> Bool
ok2 Char
ci
charPatternPred (Intersect CharPattern
cp1 CharPattern
cp2) = let ok1 :: Char -> Bool
ok1 = CharPattern -> Char -> Bool
charPatternPred CharPattern
cp1; ok2 :: Char -> Bool
ok2 = CharPattern -> Char -> Bool
charPatternPred CharPattern
cp2 in \Char
ci -> Char -> Bool
ok1 Char
ci Bool -> Bool -> Bool
&& Char -> Bool
ok2 Char
ci
charPatternPred (CharRange Char
c1 Char
c2) = \Char
ci -> Char
ci Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
c1 Bool -> Bool -> Bool
&& Char
ci Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c2
charPatternPred (CharSet String
cs) = String -> Char -> Bool
charInPred String
cs
charPatternPred (CharClass CharClass
cc) = CharClass -> Char -> Bool
charClassPred CharClass
cc

charClassPred :: CharClass -> Char -> Bool
charClassPred :: CharClass -> Char -> Bool
charClassPred CharClass
AlphaNum = Char -> Bool
isAlphaNum
charClassPred CharClass
Upper = Char -> Bool
isUpper
charClassPred CharClass
Lower = Char -> Bool
isLower
charClassPred CharClass
Whitespace = Char -> Bool
isSpace
charClassPred CharClass
Control = Char -> Bool
isControl
charClassPred CharClass
Printable = Char -> Bool
isPrint
charClassPred CharClass
MarkChar = Char -> Bool
isMark
charClassPred CharClass
Number = Char -> Bool
isNumber
charClassPred CharClass
Punctuation = Char -> Bool
isPunctuation
charClassPred CharClass
Symbol = Char -> Bool
isSymbol
charClassPred CharClass
Separator = Char -> Bool
isSeparator
charClassPred CharClass
Letter = Char -> Bool
isLetter

-- runs c and if it fails, restores state to what it was before
try :: String -> Compiled r -> Compiled r
try :: forall r. String -> Compiled r -> Compiled r
try String
msg Compiled r
c Stack -> Maybe Char -> Text -> r
err Stack -> Maybe Char -> Text -> r
success Stack
stk Maybe Char
oc Text
rem =
  Compiled r
c Stack -> Maybe Char -> Text -> r
err' Stack -> Maybe Char -> Text -> r
success' (([Text] -> [Text]) -> Maybe Char -> Text -> Stack -> Stack
Mark [Text] -> [Text]
forall a. a -> a
id Maybe Char
oc Text
rem Stack
stk) Maybe Char
oc Text
rem
  where
    success' :: Stack -> Maybe Char -> Text -> r
success' Stack
stk Maybe Char
oc Text
rem = case Stack
stk of
      Mark [Text] -> [Text]
caps Maybe Char
_ Text
_ Stack
stk -> Stack -> Maybe Char -> Text -> r
success (([Text] -> [Text]) -> Stack -> Stack
pushCaptures [Text] -> [Text]
caps Stack
stk) Maybe Char
oc Text
rem
      Stack
_ -> String -> r
forall a. HasCallStack => String -> a
error (String -> r) -> String -> r
forall a b. (a -> b) -> a -> b
$ String
"Pattern compiler error in: " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
msg
    err' :: Stack -> Maybe Char -> Text -> r
err' Stack
stk Maybe Char
_ Text
_ = case Stack
stk of
      Mark [Text] -> [Text]
_ Maybe Char
oc Text
rem Stack
stk -> Stack -> Maybe Char -> Text -> r
err Stack
stk Maybe Char
oc Text
rem
      Stack
_ -> String -> r
forall a. HasCallStack => String -> a
error (String -> r) -> String -> r
forall a b. (a -> b) -> a -> b
$ String
"Pattern compiler error in: " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
msg
{-# INLINE try #-}

-- runs c and restores state to what it was before,
-- regardless of whether it succeeds or not
lookahead :: String -> Compiled r -> Compiled r
lookahead :: forall r. String -> Compiled r -> Compiled r
lookahead String
msg Compiled r
c Stack -> Maybe Char -> Text -> r
err Stack -> Maybe Char -> Text -> r
success Stack
stk Maybe Char
oc Text
rem =
  Compiled r
c Stack -> Maybe Char -> Text -> r
err' Stack -> Maybe Char -> Text -> r
success' (([Text] -> [Text]) -> Maybe Char -> Text -> Stack -> Stack
Mark [Text] -> [Text]
forall a. a -> a
id Maybe Char
oc Text
rem Stack
stk) Maybe Char
oc Text
rem
  where
    success' :: Stack -> Maybe Char -> Text -> r
success' Stack
stk Maybe Char
_ Text
_ = case Stack
stk of
      Mark [Text] -> [Text]
caps Maybe Char
oc Text
rem Stack
stk -> Stack -> Maybe Char -> Text -> r
success (([Text] -> [Text]) -> Stack -> Stack
pushCaptures [Text] -> [Text]
caps Stack
stk) Maybe Char
oc Text
rem
      Stack
_ -> String -> r
forall a. HasCallStack => String -> a
error (String -> r) -> String -> r
forall a b. (a -> b) -> a -> b
$ String
"Pattern compiler error in: " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
msg
    err' :: Stack -> Maybe Char -> Text -> r
err' Stack
stk Maybe Char
_ Text
_ = case Stack
stk of
      Mark [Text] -> [Text]
_ Maybe Char
oc Text
rem Stack
stk -> Stack -> Maybe Char -> Text -> r
err Stack
stk Maybe Char
oc Text
rem
      Stack
_ -> String -> r
forall a. HasCallStack => String -> a
error (String -> r) -> String -> r
forall a b. (a -> b) -> a -> b
$ String
"Pattern compiler error in: " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
msg
{-# INLINE lookahead #-}