-- |
-- Copyright:   (c) 2021-2022 Andrew Lelechenko
-- Licence:     BSD3
-- Maintainer:  Andrew Lelechenko <andrew.lelechenko@gmail.com>

{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnliftedFFITypes #-}

module Data.Text.Lines.Internal
  ( TextLines(..)
  , fromText
  , null
  -- * Lines
  , lines
  , lengthInLines
  , splitAtLine
  -- * Code points
  , length
  , span
  , splitAt
  , Position(..)
  , lengthAsPosition
  , splitAtPosition
  -- * Utils
  , textLines
  , binarySearch
  , wordToInt
  , intToWord
  ) where

import Prelude ((+), (-), (*), subtract, quot, fromIntegral, seq, error)
import Control.DeepSeq (NFData, rnf)
import Data.Bits (toIntegralSized)
import Data.Bool (Bool, otherwise, not)
import Data.Char (Char)
import Data.Eq (Eq, (==))
import Data.Foldable (foldMap)
import Data.Function (on, (.), ($))
import Data.Int (Int)
import Data.List (map, mapAccumL, filter)
import Data.List.NonEmpty (NonEmpty(..))
import Data.Maybe (Maybe(..))
import Data.Monoid (Monoid(..))
import Data.Ord (Ord, compare, (<=), (<), (>))
import Data.Semigroup (Semigroup(..))
import Data.String (IsString(..))
import qualified Data.Text.Array as TA
import Data.Text.Internal (Text(..))
import qualified Data.Text as T
import Data.Tuple (snd)
import qualified Data.Vector.Unboxed as U
import Data.Word (Word)
import Foreign.C.Types (CSize(..))
import GHC.Exts (ByteArray#)
import System.IO (IO)
import System.IO.Unsafe (unsafeDupablePerformIO)
import System.Posix.Types (CSsize(..))
import Text.Show (Show, show)

#if MIN_VERSION_text(2,0,0)
#else
import Data.Bits (shiftR)
#endif

#ifdef DEBUG
import Data.Bool ((&&))
import Data.Char (generalCategory, GeneralCategory(..))
import Data.Eq ((/=))
import Data.List ((++))
import Data.Ord ((>=))
import Data.Text.Internal (showText)
import GHC.Stack (HasCallStack)
#else
#define HasCallStack ()
#endif

-- | A wrapper around 'Text' for fast line/column navigation.
-- Concatenation takes linear time.
--
-- This is a building block for 'Data.Text.Rope.Rope',
-- which provides logarithmic concatenation.
data TextLines = TextLines
  { TextLines -> Text
toText     :: !Text
  -- ^ Extract 'Text', O(1).
  , TextLines -> Vector Int
_nlIndices :: !(U.Vector Int)
  }

instance NFData TextLines where
  rnf :: TextLines -> ()
rnf = (TextLines -> () -> ()
forall a b. a -> b -> b
`seq` ())

instance Eq TextLines where
  == :: TextLines -> TextLines -> Bool
(==) = Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
(==) (Text -> Text -> Bool)
-> (TextLines -> Text) -> TextLines -> TextLines -> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` TextLines -> Text
toText

instance Ord TextLines where
  compare :: TextLines -> TextLines -> Ordering
compare = Text -> Text -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (Text -> Text -> Ordering)
-> (TextLines -> Text) -> TextLines -> TextLines -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` TextLines -> Text
toText

instance Show TextLines where
#ifdef DEBUG
  show (TextLines x y) = "TextLines { " ++ showText x ++ ", " ++ show y ++ " }"
#else
  show :: TextLines -> String
show = Text -> String
forall a. Show a => a -> String
show (Text -> String) -> (TextLines -> Text) -> TextLines -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TextLines -> Text
toText
#endif

instance IsString TextLines where
  fromString :: String -> TextLines
fromString = Text -> TextLines
fromText (Text -> TextLines) -> (String -> Text) -> String -> TextLines
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
forall a. IsString a => String -> a
fromString

-- | Create from 'Text', linear time.
fromText :: HasCallStack => Text -> TextLines
fromText :: Text -> TextLines
fromText Text
t = Text -> Vector Int -> TextLines
textLines Text
t ([Int] -> Vector Int
forall a. Unbox a => [a] -> Vector a
U.fromList ([Int] -> Vector Int) -> [Int] -> Vector Int
forall a b. (a -> b) -> a -> b
$ Text -> [Int]
nlIndices Text
t)

nlIndices :: Text -> [Int]
#if MIN_VERSION_text(2,0,0)
nlIndices :: Text -> [Int]
nlIndices (Text (TA.ByteArray ByteArray#
arr#) Int
off Int
len) = Int -> [Int]
go Int
off
  where
    go :: Int -> [Int]
go !Int
n
      | Int
delta Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 = []
      | Bool
otherwise = (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
delta) Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> [Int]
go (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
delta Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
      where
        delta :: Int
delta = CSsize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (CSsize -> Int) -> CSsize -> Int
forall a b. (a -> b) -> a -> b
$ IO CSsize -> CSsize
forall a. IO a -> a
unsafeDupablePerformIO (IO CSsize -> CSsize) -> IO CSsize -> CSsize
forall a b. (a -> b) -> a -> b
$
          ByteArray# -> CSize -> CSize -> IO CSsize
memchr ByteArray#
arr# (Int -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n) (Int -> CSize
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
n))
#else
nlIndices (Text arr off len) = go off
  where
    go !n
      | delta < 0 = []
      | TA.unsafeIndex arr (n + delta) == 0x0A = (n + delta) : go (n + delta + 1)
      | otherwise = go (n + delta + 1)
      where
        delta = fromIntegral (unsafeDupablePerformIO $
          memchr (TA.aBA arr) (2 * fromIntegral n) (2 * fromIntegral (len + off - n))) `shiftR` 1
#endif

foreign import ccall unsafe "_hs_text_lines_memchr0A" memchr
  :: ByteArray# -> CSize -> CSize -> IO CSsize

-- | Check whether a text is empty, O(1).
null :: TextLines -> Bool
null :: TextLines -> Bool
null = Text -> Bool
T.null (Text -> Bool) -> (TextLines -> Text) -> TextLines -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TextLines -> Text
toText

concat :: [TextLines] -> TextLines
concat :: [TextLines] -> TextLines
concat [TextLines]
ts = case [TextLines]
ts' of
  [] -> TextLines
forall a. Monoid a => a
mempty
  [TextLines
x] -> TextLines
x
  [TextLines]
_ -> Text -> Vector Int -> TextLines
textLines
    ([Text] -> Text
T.concat ((TextLines -> Text) -> [TextLines] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map TextLines -> Text
toText [TextLines]
ts'))
    ([Vector Int] -> Vector Int
forall a. Unbox a => [Vector a] -> Vector a
U.concat ((Int, [Vector Int]) -> [Vector Int]
forall a b. (a, b) -> b
snd ((Int -> TextLines -> (Int, Vector Int))
-> Int -> [TextLines] -> (Int, [Vector Int])
forall (t :: * -> *) s a b.
Traversable t =>
(s -> a -> (s, b)) -> s -> t a -> (s, t b)
mapAccumL Int -> TextLines -> (Int, Vector Int)
f Int
0 [TextLines]
ts')))
  where
    ts' :: [TextLines]
ts' = (TextLines -> Bool) -> [TextLines] -> [TextLines]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (TextLines -> Bool) -> TextLines -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TextLines -> Bool
null) [TextLines]
ts
    f :: Int -> TextLines -> (Int, Vector Int)
f Int
l (TextLines (Text Array
_ Int
off Int
len) Vector Int
nls) = (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len, (Int -> Int) -> Vector Int -> Vector Int
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
U.map (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
off)) Vector Int
nls)

instance Semigroup TextLines where
  TextLines t1 :: Text
t1@(Text Array
_ Int
off1 Int
len1) Vector Int
s1 <> :: TextLines -> TextLines -> TextLines
<> TextLines t2 :: Text
t2@(Text Array
_ Int
off2 Int
_) Vector Int
s2
    | Text -> Bool
T.null Text
t1 = Text -> Vector Int -> TextLines
textLines Text
t2 Vector Int
s2
    | Text -> Bool
T.null Text
t2 = Text -> Vector Int -> TextLines
textLines Text
t1 Vector Int
s1
    | Bool
otherwise = Text -> Vector Int -> TextLines
textLines
      (Text
t1 Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
t2)
      ((Int -> Int) -> Vector Int -> Vector Int
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
U.map (Int -> Int -> Int
forall a. Num a => a -> a -> a
subtract Int
off1) Vector Int
s1 Vector Int -> Vector Int -> Vector Int
forall a. Semigroup a => a -> a -> a
<> (Int -> Int) -> Vector Int -> Vector Int
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
U.map (Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
len1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
off2)) Vector Int
s2)
      -- This relies on specific implementation of instance Semigroup Text!

  sconcat :: NonEmpty TextLines -> TextLines
sconcat (TextLines
x :| [TextLines]
xs) = [TextLines] -> TextLines
concat (TextLines
x TextLines -> [TextLines] -> [TextLines]
forall a. a -> [a] -> [a]
: [TextLines]
xs)

  stimes :: forall b. Integral b => b -> TextLines -> TextLines
stimes b
1 TextLines
tl = TextLines
tl
  stimes b
n (TextLines t :: Text
t@(Text Array
_ Int
off Int
len) Vector Int
nls)
    | b
n b -> b -> Bool
forall a. Eq a => a -> a -> Bool
== Int -> b
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n' = Text -> Vector Int -> TextLines
textLines Text
t' Vector Int
nls'
    | Bool
otherwise = String -> TextLines
forall a. HasCallStack => String -> a
error String
"Data.Text.Lines: stimes argument is too large"
    where
      n' :: Int
n' = b -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral b
n
      t' :: Text
t' = Int -> Text -> Text
T.replicate Int
n' Text
t
      nls' :: Vector Int
nls' = (Int -> Vector Int) -> [Int] -> Vector Int
forall m a. Monoid m => (a -> m) -> [a] -> m
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap (\Int
i -> (Int -> Int) -> Vector Int -> Vector Int
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
U.map (\Int
j -> Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
i) Vector Int
nls) [Int
0..Int
n'Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1]

instance Monoid TextLines where
  mempty :: TextLines
mempty = Text -> Vector Int -> TextLines
textLines Text
forall a. Monoid a => a
mempty Vector Int
forall a. Monoid a => a
mempty
  mappend :: TextLines -> TextLines -> TextLines
mappend = TextLines -> TextLines -> TextLines
forall a. Semigroup a => a -> a -> a
(<>)
  mconcat :: [TextLines] -> TextLines
mconcat = [TextLines] -> TextLines
concat

-- | Equivalent to 'Data.List.length' . 'lines', but in O(1).
--
-- >>> :set -XOverloadedStrings
-- >>> lengthInLines ""
-- 0
-- >>> lengthInLines "foo"
-- 1
-- >>> lengthInLines "foo\n"
-- 1
-- >>> lengthInLines "foo\n\n"
-- 2
-- >>> lengthInLines "foo\nbar"
-- 2
--
lengthInLines :: TextLines -> Word
lengthInLines :: TextLines -> Word
lengthInLines (TextLines Text
t Vector Int
nls) = case Text -> Maybe (Text, Char)
T.unsnoc Text
t of
  Maybe (Text, Char)
Nothing -> Word
0
  Just (Text
_, Char
ch) -> Int -> Word
intToWord (Int -> Word) -> Int -> Word
forall a b. (a -> b) -> a -> b
$ Vector Int -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector Int
nls Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (if Char
ch Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n' then Int
0 else Int
1)

-- | Split into lines by @\\n@, similar to @Data.Text.@'Data.Text.lines'.
-- Each line is produced in O(1).
--
-- >>> :set -XOverloadedStrings
-- >>> lines ""
-- []
-- >>> lines "foo"
-- ["foo"]
-- >>> lines "foo\n"
-- ["foo"]
-- >>> lines "foo\n\n"
-- ["foo",""]
-- >>> lines "foo\nbar"
-- ["foo","bar"]
--
lines :: TextLines -> [Text]
lines :: TextLines -> [Text]
lines (TextLines (Text Array
arr Int
off Int
len) Vector Int
nls) = Int -> [Int] -> [Text]
go Int
off (Vector Int -> [Int]
forall a. Unbox a => Vector a -> [a]
U.toList Vector Int
nls)
  where
    arrLen :: Int
arrLen = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len
    go :: Int -> [Int] -> [Text]
go Int
i [] = [Array -> Int -> Int -> Text
Text Array
arr Int
i (Int
arrLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i) | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
arrLen]
    go Int
i (Int
x : [Int]
xs) = Array -> Int -> Int -> Text
Text Array
arr Int
i (Int
x Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i) Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: Int -> [Int] -> [Text]
go (Int
x Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) [Int]
xs

-- | Split at given line, O(1).
--
-- >>> :set -XOverloadedStrings
-- >>> map (\l -> splitAtLine l "foo\nbar") [0..3]
-- [("","foo\nbar"),("foo\n","bar"),("foo\nbar",""),("foo\nbar","")]
--
splitAtLine :: HasCallStack => Word -> TextLines -> (TextLines, TextLines)
splitAtLine :: Word -> TextLines -> (TextLines, TextLines)
splitAtLine Word
k = Position -> TextLines -> (TextLines, TextLines)
splitAtPosition (Word -> Word -> Position
Position Word
k Word
0)

-------------------------------------------------------------------------------
-- Unicode code points

-- | Length in code points, similar to @Data.Text.@'Data.Text.length'.
-- Takes linear time.
--
-- >>> :set -XOverloadedStrings
-- >>> length "fя𐀀"
-- 3
-- >>> Data.Text.Utf16.Lines.length "fя𐀀"
-- 4
--
length :: TextLines -> Word
length :: TextLines -> Word
length = Int -> Word
intToWord (Int -> Word) -> (TextLines -> Int) -> TextLines -> Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Int
T.length (Text -> Int) -> (TextLines -> Text) -> TextLines -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TextLines -> Text
toText

-- | Represent a position in a text.
data Position = Position
  { Position -> Word
posLine   :: !Word -- ^ Line.
  , Position -> Word
posColumn :: !Word -- ^ Column in code points.
  } deriving (Position -> Position -> Bool
(Position -> Position -> Bool)
-> (Position -> Position -> Bool) -> Eq Position
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Position -> Position -> Bool
== :: Position -> Position -> Bool
$c/= :: Position -> Position -> Bool
/= :: Position -> Position -> Bool
Eq, Eq Position
Eq Position =>
(Position -> Position -> Ordering)
-> (Position -> Position -> Bool)
-> (Position -> Position -> Bool)
-> (Position -> Position -> Bool)
-> (Position -> Position -> Bool)
-> (Position -> Position -> Position)
-> (Position -> Position -> Position)
-> Ord Position
Position -> Position -> Bool
Position -> Position -> Ordering
Position -> Position -> Position
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: Position -> Position -> Ordering
compare :: Position -> Position -> Ordering
$c< :: Position -> Position -> Bool
< :: Position -> Position -> Bool
$c<= :: Position -> Position -> Bool
<= :: Position -> Position -> Bool
$c> :: Position -> Position -> Bool
> :: Position -> Position -> Bool
$c>= :: Position -> Position -> Bool
>= :: Position -> Position -> Bool
$cmax :: Position -> Position -> Position
max :: Position -> Position -> Position
$cmin :: Position -> Position -> Position
min :: Position -> Position -> Position
Ord, Int -> Position -> ShowS
[Position] -> ShowS
Position -> String
(Int -> Position -> ShowS)
-> (Position -> String) -> ([Position] -> ShowS) -> Show Position
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Position -> ShowS
showsPrec :: Int -> Position -> ShowS
$cshow :: Position -> String
show :: Position -> String
$cshowList :: [Position] -> ShowS
showList :: [Position] -> ShowS
Show)

instance NFData Position where
  rnf :: Position -> ()
rnf = (Position -> () -> ()
forall a b. a -> b -> b
`seq` ())

-- | Associativity does not hold when 'posLine' overflows.
instance Semigroup Position where
  Position Word
l1 Word
c1 <> :: Position -> Position -> Position
<> Position Word
l2 Word
c2 =
    Word -> Word -> Position
Position (Word
l1 Word -> Word -> Word
forall a. Num a => a -> a -> a
+ Word
l2) (if Word
l2 Word -> Word -> Bool
forall a. Eq a => a -> a -> Bool
== Word
0 then Word
c1 Word -> Word -> Word
forall a. Num a => a -> a -> a
+ Word
c2 else Word
c2)

instance Monoid Position where
  mempty :: Position
mempty = Word -> Word -> Position
Position Word
0 Word
0
  mappend :: Position -> Position -> Position
mappend = Position -> Position -> Position
forall a. Semigroup a => a -> a -> a
(<>)

-- | Measure text length as an amount of lines and columns.
-- Time is proportional to the length of the last line.
--
-- >>> :set -XOverloadedStrings
-- >>> lengthAsPosition "f𐀀"
-- Position {posLine = 0, posColumn = 2}
-- >>> lengthAsPosition "f\n𐀀"
-- Position {posLine = 1, posColumn = 1}
-- >>> lengthAsPosition "f\n𐀀\n"
-- Position {posLine = 2, posColumn = 0}
--
lengthAsPosition
  :: TextLines
  -> Position
lengthAsPosition :: TextLines -> Position
lengthAsPosition (TextLines (Text Array
arr Int
off Int
len) Vector Int
nls) = Position
  { posLine :: Word
posLine   = Int -> Word
intToWord (Int -> Word) -> Int -> Word
forall a b. (a -> b) -> a -> b
$ Vector Int -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector Int
nls
  , posColumn :: Word
posColumn = Int -> Word
intToWord (Int -> Word) -> Int -> Word
forall a b. (a -> b) -> a -> b
$ Text -> Int
T.length (Text -> Int) -> Text -> Int
forall a b. (a -> b) -> a -> b
$ Array -> Int -> Int -> Text
Text Array
arr Int
nl (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
nl)
  }
  where
    nl :: Int
nl = if Vector Int -> Bool
forall a. Unbox a => Vector a -> Bool
U.null Vector Int
nls then Int
off else Vector Int -> Int
forall a. Unbox a => Vector a -> a
U.last Vector Int
nls Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1

-- | Span by a predicate, similar to @Data.Text.@'Data.Text.span'.
-- Takes linear (by length of the prefix satisfying the predicate) time.
span
  :: HasCallStack
  => (Char -> Bool)
  -> TextLines
  -> (TextLines, TextLines)
span :: (Char -> Bool) -> TextLines -> (TextLines, TextLines)
span Char -> Bool
f tl :: TextLines
tl@(TextLines tx :: Text
tx@(Text Array
arr Int
off Int
_) Vector Int
nls)
  | Int
len' Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = (TextLines
forall a. Monoid a => a
mempty, TextLines
tl)
  | Bool
otherwise = (TextLines
y, TextLines
z)
  where
    (Text Array
_ Int
off' Int
len', Text
tz) = (Char -> Bool) -> Text -> (Text, Text)
T.span Char -> Bool
f Text
tx
    -- This assumes that offset is the same as in tx
    n :: Int
n = Vector Int -> Int -> Int
forall a. (Ord a, Unbox a) => Vector a -> a -> Int
binarySearch Vector Int
nls (Int
off' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len')
    y :: TextLines
y = Text -> Vector Int -> TextLines
textLines (Array -> Int -> Int -> Text
Text Array
arr Int
off (Int
off' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len' Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
off)) (Int -> Vector Int -> Vector Int
forall a. Unbox a => Int -> Vector a -> Vector a
U.take Int
n Vector Int
nls)
    z :: TextLines
z = Text -> Vector Int -> TextLines
textLines Text
tz (Int -> Vector Int -> Vector Int
forall a. Unbox a => Int -> Vector a -> Vector a
U.drop Int
n Vector Int
nls)

-- | Combination of 'splitAtLine' and subsequent 'splitAt'.
-- Time is linear in 'posColumn', but does not depend on 'posLine'.
--
-- >>> :set -XOverloadedStrings
-- >>> splitAtPosition (Position 1 0) "f\n𐀀я"
-- ("f\n","𐀀я")
-- >>> splitAtPosition (Position 1 1) "f\n𐀀я"
-- ("f\n𐀀","я")
-- >>> splitAtPosition (Position 1 2) "f\n𐀀я"
-- ("f\n𐀀я","")
-- >>> splitAtPosition (Position 0 2) "f\n𐀀я"
-- ("f\n","𐀀я")
-- >>> splitAtPosition (Position 0 3) "f\n𐀀я"
-- ("f\n𐀀","я")
-- >>> splitAtPosition (Position 0 4) "f\n𐀀я"
-- ("f\n𐀀я","")
--
splitAtPosition
  :: HasCallStack
  => Position
  -> TextLines
  -> (TextLines, TextLines)
splitAtPosition :: Position -> TextLines -> (TextLines, TextLines)
splitAtPosition (Position Word
line Word
column) (TextLines (Text Array
arr Int
off Int
len) Vector Int
nls) = (TextLines
y, TextLines
z)
  where
    arrLen :: Int
arrLen = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len
    nl :: Int
nl
      | Word
line Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
0 = Int
off
      | Word
line Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
> Int -> Word
intToWord (Vector Int -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector Int
nls) = Int
arrLen
      | Bool
otherwise = Vector Int
nls Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
U.! (Word -> Int
wordToInt Word
line Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
    tx :: Text
tx = Array -> Int -> Int -> Text
Text Array
arr Int
nl (Int
arrLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
nl)
    (Text Array
_ Int
off' Int
len', Text
tz)
      | Word
column Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
0 = (Array -> Int -> Int -> Text
Text Array
arr Int
nl Int
0, Text
tx)
      | Bool
otherwise = case Word -> Maybe Int
forall a b.
(Integral a, Integral b, Bits a, Bits b) =>
a -> Maybe b
toIntegralSized Word
column of
        Maybe Int
Nothing -> (Text
tx, Text
forall a. Monoid a => a
mempty)
        Just Int
column' -> Int -> Text -> (Text, Text)
T.splitAt Int
column' Text
tx
    -- This assumes that offset is the same as in tx
    n :: Int
n = Vector Int -> Int -> Int
forall a. (Ord a, Unbox a) => Vector a -> a -> Int
binarySearch Vector Int
nls (Int
off' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len')
    y :: TextLines
y = Text -> Vector Int -> TextLines
textLines (Array -> Int -> Int -> Text
Text Array
arr Int
off (Int
off' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len' Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
off)) (Int -> Vector Int -> Vector Int
forall a. Unbox a => Int -> Vector a -> Vector a
U.take Int
n Vector Int
nls)
    z :: TextLines
z = Text -> Vector Int -> TextLines
textLines Text
tz (Int -> Vector Int -> Vector Int
forall a. Unbox a => Int -> Vector a -> Vector a
U.drop Int
n Vector Int
nls)

-- | Split at given code point, similar to @Data.Text.@'Data.Text.splitAt'.
-- Takes linear time.
--
-- >>> :set -XOverloadedStrings
-- >>> map (\c -> splitAt c "fя𐀀") [0..4]
-- [("","fя𐀀"),("f","я𐀀"),("fя","𐀀"),("fя𐀀",""),("fя𐀀","")]
--
splitAt :: HasCallStack => Word -> TextLines -> (TextLines, TextLines)
splitAt :: Word -> TextLines -> (TextLines, TextLines)
splitAt = Position -> TextLines -> (TextLines, TextLines)
splitAtPosition (Position -> TextLines -> (TextLines, TextLines))
-> (Word -> Position)
-> Word
-> TextLines
-> (TextLines, TextLines)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word -> Word -> Position
Position Word
0

-------------------------------------------------------------------------------
-- Utils

binarySearch
  :: (Ord a, U.Unbox a)
  => U.Vector a
  -> a
  -> Int
binarySearch :: forall a. (Ord a, Unbox a) => Vector a -> a -> Int
binarySearch Vector a
vec a
el
  | Vector a -> Bool
forall a. Unbox a => Vector a -> Bool
U.null Vector a
vec = Int
0
  | a
el a -> a -> Bool
forall a. Ord a => a -> a -> Bool
<= Vector a -> a
forall a. Unbox a => Vector a -> a
U.head Vector a
vec = Int
0
  | Vector a -> a
forall a. Unbox a => Vector a -> a
U.last Vector a
vec a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
el = Vector a -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector a
vec
  | Bool
otherwise = Int -> Int -> Int
go Int
0 (Vector a -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector a
vec Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
  where
    go :: Int -> Int -> Int
go Int
i Int
j
      | Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
j = Int
j
      | Vector a
vec Vector a -> Int -> a
forall a. Unbox a => Vector a -> Int -> a
U.! Int
k a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
el = Int -> Int -> Int
go Int
k Int
j
      | Bool
otherwise = Int -> Int -> Int
go Int
i Int
k
      where
        k :: Int
k = (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
j) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`quot` Int
2
{-# SPECIALIZE binarySearch :: U.Vector Int -> Int -> Int #-}

intToWord :: Int -> Word
intToWord :: Int -> Word
intToWord = Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral

wordToInt :: Word -> Int
wordToInt :: Word -> Int
wordToInt = Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral

-------------------------------------------------------------------------------
-- Debug

#ifdef DEBUG

isValid :: TextLines -> Bool
isValid (TextLines t@(Text arr off len) stops) =
  not containsSurrogates && len >= 0 && go off (U.toList stops)
  where
    arrLen = off + len
    go i [] = T.all (/= '\n') (Text arr i (arrLen - i))
    go i (x : xs) = i <= x
                 && T.all (/= '\n') (Text arr i (x - i))
                 && T.head (Text arr x (arrLen - x)) == '\n'
                 && go (x + 1) xs
    containsSurrogates = T.any ((== Surrogate) . generalCategory) t

textLines :: HasCallStack => Text -> U.Vector Int -> TextLines
textLines x y
  | isValid t = t
  | otherwise = error $ "Data.Text.Lines: violated internal invariant in " ++ show t
  where
    t = TextLines x y

#else

textLines :: HasCallStack => Text -> U.Vector Int -> TextLines
textLines :: Text -> Vector Int -> TextLines
textLines = Text -> Vector Int -> TextLines
TextLines

#endif