module Unison.Hashing.V2.Tokenizable
  ( Tokenizable (..),
    Hashable1 (..),
    Token (..),
    hashTokenizable,
    accumulate,
    accumulateToken,
  )
where

import Crypto.Hash qualified as CH
import Data.ByteArray qualified as BA
import Data.ByteString qualified as B
import Data.ByteString.Builder (doubleBE, int64BE, toLazyByteString, word64BE)
import Data.ByteString.Lazy qualified as BL
import Data.Map qualified as Map
import Data.Set qualified as Set
import Unison.Hash (Hash)
import Unison.Hash qualified as Hash
import Unison.Prelude
import Unison.Util.Relation (Relation)
import Unison.Util.Relation qualified as Relation
import Unison.Util.Relation3 (Relation3)
import Unison.Util.Relation3 qualified as Relation3
import Unison.Util.Relation4 (Relation4)
import Unison.Util.Relation4 qualified as Relation4

-- | The version of the current hashing function.
-- This should be incremented every time the hashing function is changed.
--
-- The reasoning is that, if a change to the hashing function changes the hashes for _some_
-- values, it should change it for _all_ values so that we don't have collisions between
-- different hashing function versions. If we don't do this, it's possible for the hashes of
-- simple types (like an Int for example) to keep the same hashes, which would lead to
-- collisions in the `hash` table, since each hash has a different hash version but the same
-- base32 representation.
hashingVersion :: Token
hashingVersion :: Token
hashingVersion = Word8 -> Token
Tag Word8
2

data Token
  = Tag !Word8
  | Bytes !ByteString
  | Int !Int64
  | Text !Text
  | Double !Double
  | Hashed !Hash
  | Nat !Word64

accumulateToken :: (Tokenizable t) => t -> Token
accumulateToken :: forall t. Tokenizable t => t -> Token
accumulateToken = Hash -> Token
Hashed (Hash -> Token) -> (t -> Hash) -> t -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. t -> Hash
forall t. Tokenizable t => t -> Hash
hashTokenizable

-- | Tokenize then accumulate a type into a Hash.
hashTokenizable :: (Tokenizable t) => t -> Hash
hashTokenizable :: forall t. Tokenizable t => t -> Hash
hashTokenizable = [Token] -> Hash
accumulate ([Token] -> Hash) -> (t -> [Token]) -> t -> Hash
forall b c a. (b -> c) -> (a -> b) -> a -> c
. t -> [Token]
forall t. Tokenizable t => t -> [Token]
tokens

-- | Tokenizable converts a value into a set of hashing tokens which will later be accumulated
-- into a Hash. Be very careful when adding or altering instances of this typeclass, changing
-- the hash of a value is a major breaking change and requires a complete codebase migration.
--
-- If you simply want to provide a convenience instance for a type which wraps some Hashable
-- type, write an instance of 'Hashable' which calls through to the inner instance instead.
--
-- E.g. If I want to be able to hash a @TaggedBranch@ using its Branch0 hashable instance:
--
-- @@
-- data TaggedBranch = TaggedBranch String Branch
--
-- instance Hashable TaggedBranch where
--   hash (TaggedBranch _ b) = hash b
-- @@
class Tokenizable t where
  tokens :: t -> [Token]

instance (Tokenizable a) => Tokenizable [a] where
  tokens :: [a] -> [Token]
tokens = (a -> Token) -> [a] -> [Token]
forall a b. (a -> b) -> [a] -> [b]
map a -> Token
forall t. Tokenizable t => t -> Token
accumulateToken

instance (Tokenizable a, Tokenizable b) => Tokenizable (a, b) where
  tokens :: (a, b) -> [Token]
tokens (a
a, b
b) = [a -> Token
forall t. Tokenizable t => t -> Token
accumulateToken a
a, b -> Token
forall t. Tokenizable t => t -> Token
accumulateToken b
b]

instance (Tokenizable a) => Tokenizable (Set.Set a) where
  tokens :: Set a -> [Token]
tokens = [a] -> [Token]
forall t. Tokenizable t => t -> [Token]
tokens ([a] -> [Token]) -> (Set a -> [a]) -> Set a -> [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Set a -> [a]
forall a. Set a -> [a]
Set.toList

instance (Tokenizable k, Tokenizable v) => Tokenizable (Map.Map k v) where
  tokens :: Map k v -> [Token]
tokens = [(k, v)] -> [Token]
forall t. Tokenizable t => t -> [Token]
tokens ([(k, v)] -> [Token])
-> (Map k v -> [(k, v)]) -> Map k v -> [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map k v -> [(k, v)]
forall k a. Map k a -> [(k, a)]
Map.toList

instance (Tokenizable a, Tokenizable b) => Tokenizable (Relation a b) where
  tokens :: Relation a b -> [Token]
tokens = [(a, b)] -> [Token]
forall t. Tokenizable t => t -> [Token]
tokens ([(a, b)] -> [Token])
-> (Relation a b -> [(a, b)]) -> Relation a b -> [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Relation a b -> [(a, b)]
forall a b. Relation a b -> [(a, b)]
Relation.toList

instance (Tokenizable d1, Tokenizable d2, Tokenizable d3) => Tokenizable (Relation3 d1 d2 d3) where
  tokens :: Relation3 d1 d2 d3 -> [Token]
tokens Relation3 d1 d2 d3
s = [[(d1, (d2, d3))] -> Token
forall t. Tokenizable t => t -> Token
accumulateToken ([(d1, (d2, d3))] -> Token) -> [(d1, (d2, d3))] -> Token
forall a b. (a -> b) -> a -> b
$ Relation3 d1 d2 d3 -> [(d1, (d2, d3))]
forall a b c. Relation3 a b c -> [(a, (b, c))]
Relation3.toNestedList Relation3 d1 d2 d3
s]

instance (Tokenizable d1, Tokenizable d2, Tokenizable d3, Tokenizable d4) => Tokenizable (Relation4 d1 d2 d3 d4) where
  tokens :: Relation4 d1 d2 d3 d4 -> [Token]
tokens Relation4 d1 d2 d3 d4
s = [[(d1, (d2, (d3, d4)))] -> Token
forall t. Tokenizable t => t -> Token
accumulateToken ([(d1, (d2, (d3, d4)))] -> Token)
-> [(d1, (d2, (d3, d4)))] -> Token
forall a b. (a -> b) -> a -> b
$ Relation4 d1 d2 d3 d4 -> [(d1, (d2, (d3, d4)))]
forall a b c d. Relation4 a b c d -> [(a, (b, (c, d)))]
Relation4.toNestedList Relation4 d1 d2 d3 d4
s]

instance Tokenizable () where
  tokens :: () -> [Token]
tokens ()
_ = []

instance Tokenizable Double where
  tokens :: Double -> [Token]
tokens Double
d = [Double -> Token
Double Double
d]

instance Tokenizable Text where
  tokens :: Text -> [Token]
tokens Text
s = [Text -> Token
Text Text
s]

instance Tokenizable Char where
  tokens :: Char -> [Token]
tokens Char
c = [Word64 -> Token
Nat (Word64 -> Token) -> Word64 -> Token
forall a b. (a -> b) -> a -> b
$ Int -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word64) -> Int -> Word64
forall a b. (a -> b) -> a -> b
$ Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
c]

instance Tokenizable ByteString where
  tokens :: ByteString -> [Token]
tokens ByteString
bs = [ByteString -> Token
Bytes ByteString
bs]

instance Tokenizable Word64 where
  tokens :: Word64 -> [Token]
tokens Word64
w = [Word64 -> Token
Nat Word64
w]

instance Tokenizable Int64 where
  tokens :: Int64 -> [Token]
tokens Int64
w = [Int64 -> Token
Int Int64
w]

instance Tokenizable Bool where
  tokens :: Bool -> [Token]
tokens Bool
b = [Word8 -> Token
Tag (Word8 -> Token) -> (Int -> Word8) -> Int -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Token) -> Int -> Token
forall a b. (a -> b) -> a -> b
$ Bool -> Int
forall a. Enum a => a -> Int
fromEnum Bool
b]

instance Tokenizable Hash where
  tokens :: Hash -> [Token]
tokens Hash
h = [ByteString -> Token
Bytes (Hash -> ByteString
Hash.toByteString Hash
h)]

accumulate :: [Token] -> Hash
accumulate :: [Token] -> Hash
accumulate = ByteString -> Hash
Hash.fromByteString (ByteString -> Hash) -> ([Token] -> ByteString) -> [Token] -> Hash
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Digest SHA3_512 -> ByteString
forall bin bout.
(ByteArrayAccess bin, ByteArray bout) =>
bin -> bout
BA.convert (Digest SHA3_512 -> ByteString)
-> ([Token] -> Digest SHA3_512) -> [Token] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Context SHA3_512 -> Digest SHA3_512
forall a. HashAlgorithm a => Context a -> Digest a
CH.hashFinalize (Context SHA3_512 -> Digest SHA3_512)
-> ([Token] -> Context SHA3_512) -> [Token] -> Digest SHA3_512
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Context SHA3_512 -> [Token] -> Context SHA3_512
go Context SHA3_512
forall a. HashAlgorithm a => Context a
CH.hashInit
  where
    go :: CH.Context CH.SHA3_512 -> [Token] -> CH.Context CH.SHA3_512
    go :: Context SHA3_512 -> [Token] -> Context SHA3_512
go Context SHA3_512
acc [Token]
tokens = Context SHA3_512 -> [ByteString] -> Context SHA3_512
forall a ba.
(HashAlgorithm a, ByteArrayAccess ba) =>
Context a -> [ba] -> Context a
CH.hashUpdates Context SHA3_512
acc (Token
hashingVersion Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
: [Token]
tokens [Token] -> (Token -> [ByteString]) -> [ByteString]
forall a b. [a] -> (a -> [b]) -> [b]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Token -> [ByteString]
toBS)
    toBS :: Token -> [ByteString]
toBS (Tag Word8
b) = [Word8 -> ByteString
B.singleton Word8
b]
    toBS (Bytes ByteString
bs) = [Int -> ByteString
forall n. Integral n => n -> ByteString
encodeLength (Int -> ByteString) -> Int -> ByteString
forall a b. (a -> b) -> a -> b
$ ByteString -> Int
B.length ByteString
bs, ByteString
bs]
    toBS (Int Int64
i) = [ByteString -> ByteString
BL.toStrict (ByteString -> ByteString)
-> (Int64 -> ByteString) -> Int64 -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> ByteString
toLazyByteString (Builder -> ByteString)
-> (Int64 -> Builder) -> Int64 -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int64 -> Builder
int64BE (Int64 -> ByteString) -> Int64 -> ByteString
forall a b. (a -> b) -> a -> b
$ Int64
i]
    toBS (Nat Word64
i) = [ByteString -> ByteString
BL.toStrict (ByteString -> ByteString)
-> (Word64 -> ByteString) -> Word64 -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> ByteString
toLazyByteString (Builder -> ByteString)
-> (Word64 -> Builder) -> Word64 -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word64 -> Builder
word64BE (Word64 -> ByteString) -> Word64 -> ByteString
forall a b. (a -> b) -> a -> b
$ Word64
i]
    toBS (Double Double
d) = [ByteString -> ByteString
BL.toStrict (ByteString -> ByteString)
-> (Double -> ByteString) -> Double -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> ByteString
toLazyByteString (Builder -> ByteString)
-> (Double -> Builder) -> Double -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> Builder
doubleBE (Double -> ByteString) -> Double -> ByteString
forall a b. (a -> b) -> a -> b
$ Double
d]
    toBS (Text Text
txt) =
      let tbytes :: ByteString
tbytes = Text -> ByteString
encodeUtf8 Text
txt
       in [Int -> ByteString
forall n. Integral n => n -> ByteString
encodeLength (ByteString -> Int
B.length ByteString
tbytes), ByteString
tbytes]
    toBS (Hashed Hash
h) = [Hash -> ByteString
Hash.toByteString Hash
h]
    encodeLength :: (Integral n) => n -> B.ByteString
    encodeLength :: forall n. Integral n => n -> ByteString
encodeLength = ByteString -> ByteString
BL.toStrict (ByteString -> ByteString) -> (n -> ByteString) -> n -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Builder -> ByteString
toLazyByteString (Builder -> ByteString) -> (n -> Builder) -> n -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word64 -> Builder
word64BE (Word64 -> Builder) -> (n -> Word64) -> n -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. n -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral

class Hashable1 f where
  -- | Produce a hash for an `f a`, given a hashing function for `a`.
  -- If there is a notion of order-independence in some aspect of a subterm
  -- of `f`, then the first argument (`hashUnordered :: [a] -> ([h], a -> h)`)
  -- should be used to impose an order, and then apply that order in further hashing.
  -- Otherwise the second argument (`hash :: a -> h`) should be used.
  --
  -- Example 1: A simple functor with no unordered components. Hashable1 instance
  --            just uses `hash`:
  --
  --   data T a = One a | Two a a deriving Functor
  --
  --   instance Hashable1 T where
  --     hash1 _ hash t = case t of
  --       One a -> accumulate [Tag 0, Hashed (hash a)]
  --       Two a a2 -> accumulate [Tag 1, Hashed (hash a), Hashed (hash a2)]
  --
  -- Example 2: A functor with unordered components. For hashing, we need to
  --            pick a canonical ordering of the unordered components, so we
  --            use `hashUnordered`:
  --
  --   data U a = U { unordered :: [a], uno :: a, dos :: a } deriving Functor
  --
  --   instance Hashable1 U where
  --     hash1 hashUnordered _ (U unordered uno dos) =
  --       let (hs, hash) = hashUnordered unordered
  --       in accumulate $ map Hashed hs ++ [Hashed (hash uno), Hashed (hash dos)]
  hash1 :: ([a] -> ([Hash], a -> Hash)) -> (a -> Hash) -> f a -> Hash