module Unison.Hashing.V2.Tokenizable
( Tokenizable (..),
Hashable1 (..),
Token (..),
hashTokenizable,
accumulate,
accumulateToken,
)
where
import Crypto.Hash qualified as CH
import Data.ByteArray qualified as BA
import Data.ByteString qualified as B
import Data.ByteString.Builder (doubleBE, int64BE, toLazyByteString, word64BE)
import Data.ByteString.Lazy qualified as BL
import Data.Map qualified as Map
import Data.Set qualified as Set
import Unison.Hash (Hash)
import Unison.Hash qualified as Hash
import Unison.Prelude
import Unison.Util.Relation (Relation)
import Unison.Util.Relation qualified as Relation
import Unison.Util.Relation3 (Relation3)
import Unison.Util.Relation3 qualified as Relation3
import Unison.Util.Relation4 (Relation4)
import Unison.Util.Relation4 qualified as Relation4
hashingVersion :: Token
hashingVersion :: Token
hashingVersion = Word8 -> Token
Tag Word8
2
data Token
= Tag !Word8
| Bytes !ByteString
| Int !Int64
| Text !Text
| Double !Double
| Hashed !Hash
| Nat !Word64
accumulateToken :: (Tokenizable t) => t -> Token
accumulateToken :: forall t. Tokenizable t => t -> Token
accumulateToken = Hash -> Token
Hashed (Hash -> Token) -> (t -> Hash) -> t -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. t -> Hash
forall t. Tokenizable t => t -> Hash
hashTokenizable
hashTokenizable :: (Tokenizable t) => t -> Hash
hashTokenizable :: forall t. Tokenizable t => t -> Hash
hashTokenizable = [Token] -> Hash
accumulate ([Token] -> Hash) -> (t -> [Token]) -> t -> Hash
forall b c a. (b -> c) -> (a -> b) -> a -> c
. t -> [Token]
forall t. Tokenizable t => t -> [Token]
tokens
class Tokenizable t where
tokens :: t -> [Token]
instance (Tokenizable a) => Tokenizable [a] where
tokens :: [a] -> [Token]
tokens = (a -> Token) -> [a] -> [Token]
forall a b. (a -> b) -> [a] -> [b]
map a -> Token
forall t. Tokenizable t => t -> Token
accumulateToken
instance (Tokenizable a, Tokenizable b) => Tokenizable (a, b) where
tokens :: (a, b) -> [Token]
tokens (a
a, b
b) = [a -> Token
forall t. Tokenizable t => t -> Token
accumulateToken a
a, b -> Token
forall t. Tokenizable t => t -> Token
accumulateToken b
b]
instance (Tokenizable a) => Tokenizable (Set.Set a) where
tokens :: Set a -> [Token]
tokens = [a] -> [Token]
forall t. Tokenizable t => t -> [Token]
tokens ([a] -> [Token]) -> (Set a -> [a]) -> Set a -> [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Set a -> [a]
forall a. Set a -> [a]
Set.toList
instance (Tokenizable k, Tokenizable v) => Tokenizable (Map.Map k v) where
tokens :: Map k v -> [Token]
tokens = [(k, v)] -> [Token]
forall t. Tokenizable t => t -> [Token]
tokens ([(k, v)] -> [Token])
-> (Map k v -> [(k, v)]) -> Map k v -> [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map k v -> [(k, v)]
forall k a. Map k a -> [(k, a)]
Map.toList
instance (Tokenizable a, Tokenizable b) => Tokenizable (Relation a b) where
tokens :: Relation a b -> [Token]
tokens = [(a, b)] -> [Token]
forall t. Tokenizable t => t -> [Token]
tokens ([(a, b)] -> [Token])
-> (Relation a b -> [(a, b)]) -> Relation a b -> [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Relation a b -> [(a, b)]
forall a b. Relation a b -> [(a, b)]
Relation.toList
instance (Tokenizable d1, Tokenizable d2, Tokenizable d3) => Tokenizable (Relation3 d1 d2 d3) where
tokens :: Relation3 d1 d2 d3 -> [Token]
tokens Relation3 d1 d2 d3
s = [[(d1, (d2, d3))] -> Token
forall t. Tokenizable t => t -> Token
accumulateToken ([(d1, (d2, d3))] -> Token) -> [(d1, (d2, d3))] -> Token
forall a b. (a -> b) -> a -> b
$ Relation3 d1 d2 d3 -> [(d1, (d2, d3))]
forall a b c. Relation3 a b c -> [(a, (b, c))]
Relation3.toNestedList Relation3 d1 d2 d3
s]
instance (Tokenizable d1, Tokenizable d2, Tokenizable d3, Tokenizable d4) => Tokenizable (Relation4 d1 d2 d3 d4) where
tokens :: Relation4 d1 d2 d3 d4 -> [Token]
tokens Relation4 d1 d2 d3 d4
s = [[(d1, (d2, (d3, d4)))] -> Token
forall t. Tokenizable t => t -> Token
accumulateToken ([(d1, (d2, (d3, d4)))] -> Token)
-> [(d1, (d2, (d3, d4)))] -> Token
forall a b. (a -> b) -> a -> b
$ Relation4 d1 d2 d3 d4 -> [(d1, (d2, (d3, d4)))]
forall a b c d. Relation4 a b c d -> [(a, (b, (c, d)))]
Relation4.toNestedList Relation4 d1 d2 d3 d4
s]
instance Tokenizable () where
tokens :: () -> [Token]
tokens ()
_ = []
instance Tokenizable Double where
tokens :: Double -> [Token]
tokens Double
d = [Double -> Token
Double Double
d]
instance Tokenizable Text where
tokens :: Text -> [Token]
tokens Text
s = [Text -> Token
Text Text
s]
instance Tokenizable Char where
tokens :: Char -> [Token]
tokens Char
c = [Word64 -> Token
Nat (Word64 -> Token) -> Word64 -> Token
forall a b. (a -> b) -> a -> b
$ Int -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word64) -> Int -> Word64
forall a b. (a -> b) -> a -> b
$ Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
c]
instance Tokenizable ByteString where
tokens :: ByteString -> [Token]
tokens ByteString
bs = [ByteString -> Token
Bytes ByteString
bs]
instance Tokenizable Word64 where
tokens :: Word64 -> [Token]
tokens Word64
w = [Word64 -> Token
Nat Word64
w]
instance Tokenizable Int64 where
tokens :: Int64 -> [Token]
tokens Int64
w = [Int64 -> Token
Int Int64
w]
instance Tokenizable Bool where
tokens :: Bool -> [Token]
tokens Bool
b = [Word8 -> Token
Tag (Word8 -> Token) -> (Int -> Word8) -> Int -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Token) -> Int -> Token
forall a b. (a -> b) -> a -> b
$ Bool -> Int
forall a. Enum a => a -> Int
fromEnum Bool
b]
instance Tokenizable Hash where
tokens :: Hash -> [Token]
tokens Hash
h = [ByteString -> Token
Bytes (Hash -> ByteString
Hash.toByteString Hash
h)]
accumulate :: [Token] -> Hash
accumulate :: [Token] -> Hash
accumulate = ByteString -> Hash
Hash.fromByteString (ByteString -> Hash) -> ([Token] -> ByteString) -> [Token] -> Hash
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Digest SHA3_512 -> ByteString
forall bin bout.
(ByteArrayAccess bin, ByteArray bout) =>
bin -> bout
BA.convert (Digest SHA3_512 -> ByteString)
-> ([Token] -> Digest SHA3_512) -> [Token] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Context SHA3_512 -> Digest SHA3_512
forall a. HashAlgorithm a => Context a -> Digest a
CH.hashFinalize (Context SHA3_512 -> Digest SHA3_512)
-> ([Token] -> Context SHA3_512) -> [Token] -> Digest SHA3_512
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Context SHA3_512 -> [Token] -> Context SHA3_512
go Context SHA3_512
forall a. HashAlgorithm a => Context a
CH.hashInit
where
go :: CH.Context CH.SHA3_512 -> [Token] -> CH.Context CH.SHA3_512
go :: Context SHA3_512 -> [Token] -> Context SHA3_512
go Context SHA3_512
acc [Token]
tokens = Context SHA3_512 -> [ByteString] -> Context SHA3_512
forall a ba.
(HashAlgorithm a, ByteArrayAccess ba) =>
Context a -> [ba] -> Context a
CH.hashUpdates Context SHA3_512
acc (Token
hashingVersion Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: [Token]
tokens [Token] -> (Token -> [ByteString]) -> [ByteString]
forall a b. [a] -> (a -> [b]) -> [b]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Token -> [ByteString]
toBS)
toBS :: Token -> [ByteString]
toBS (Tag Word8
b) = [Word8 -> ByteString
B.singleton Word8
b]
toBS (Bytes ByteString
bs) = [Int -> ByteString
forall n. Integral n => n -> ByteString
encodeLength (Int -> ByteString) -> Int -> ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> Int
B.length ByteString
bs, ByteString
bs]
toBS (Int Int64
i) = [ByteString -> ByteString
BL.toStrict (ByteString -> ByteString)
-> (Int64 -> ByteString) -> Int64 -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> ByteString
toLazyByteString (Builder -> ByteString)
-> (Int64 -> Builder) -> Int64 -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int64 -> Builder
int64BE (Int64 -> ByteString) -> Int64 -> ByteString
forall a b. (a -> b) -> a -> b
$ Int64
i]
toBS (Nat Word64
i) = [ByteString -> ByteString
BL.toStrict (ByteString -> ByteString)
-> (Word64 -> ByteString) -> Word64 -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> ByteString
toLazyByteString (Builder -> ByteString)
-> (Word64 -> Builder) -> Word64 -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word64 -> Builder
word64BE (Word64 -> ByteString) -> Word64 -> ByteString
forall a b. (a -> b) -> a -> b
$ Word64
i]
toBS (Double Double
d) = [ByteString -> ByteString
BL.toStrict (ByteString -> ByteString)
-> (Double -> ByteString) -> Double -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> ByteString
toLazyByteString (Builder -> ByteString)
-> (Double -> Builder) -> Double -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> Builder
doubleBE (Double -> ByteString) -> Double -> ByteString
forall a b. (a -> b) -> a -> b
$ Double
d]
toBS (Text Text
txt) =
let tbytes :: ByteString
tbytes = Text -> ByteString
encodeUtf8 Text
txt
in [Int -> ByteString
forall n. Integral n => n -> ByteString
encodeLength (ByteString -> Int
B.length ByteString
tbytes), ByteString
tbytes]
toBS (Hashed Hash
h) = [Hash -> ByteString
Hash.toByteString Hash
h]
encodeLength :: (Integral n) => n -> B.ByteString
encodeLength :: forall n. Integral n => n -> ByteString
encodeLength = ByteString -> ByteString
BL.toStrict (ByteString -> ByteString) -> (n -> ByteString) -> n -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> ByteString
toLazyByteString (Builder -> ByteString) -> (n -> Builder) -> n -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word64 -> Builder
word64BE (Word64 -> Builder) -> (n -> Word64) -> n -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. n -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral
class Hashable1 f where
hash1 :: ([a] -> ([Hash], a -> Hash)) -> (a -> Hash) -> f a -> Hash