MD5> and [md5s (get_word_32s $ replicate x 'a', fromIntegral (8*x) :: Zord64) == (md5s $ replicate x 'a') | x <- [0..1030]] True MD5> and [md5s (take (x*8) $ cycle [False, True, True, False, False, False, False, True]) == (md5s $ replicate x 'a') | x <- [0..1030]] True MD5> > module MD5 (md5, md5s, md5i, > md5M, md5sM, md5iM, > ABCD) where > import Char > import Bits > import Word Nasty kludge to create a type Zord64 which is really a Word64 but works how we want in hugs too... #ifdef __GLASGOW_HASKELL__ #include "ZordGHC.hs" #else > import ZordHUGS #endif > type ABCD = (Word32, Word32, Word32, Word32) > type XYZ = (Word32, Word32, Word32) > type Rotation = Int > monad_test :: (Int, String) -> IO ((Int, String), String) > monad_test (0, s) = return ((1, s), s) > monad_test e = return (e, "") > class (Show a) => MD5 a where > get_next :: a -> ([Word32], Int, a) -- get the next blocks worth > -- \ \ \------ the rest of the input > -- \ \--------- the number of bits returned > -- \--------------- the bits returned in 32bit words > len_pad :: Zord64 -> a -> a -- add the padding and length to the end > combine :: a -> a -> a -- combine 2 sets of data into 1 > leng :: a -> Zord64 -- return the length of the data > anull :: a -- the null data element > instance MD5 [Bool] where > get_next s = (bool_to_words ys, length ys, zs) > where (ys, zs) = takeDrop 512 s > len_pad l bs = bs ++ [True] > ++ replicate (fromIntegral $ (447 - l) .&. 511) False > ++ [l .&. (shiftL 1 x) > 0 | x <- list] > where list = f [0..63] > f (b1:b2:b3:b4:b5:b6:b7:b8:xs) = b8:b7:b6:b5:b4:b3:b2:b1:f xs > f [] = [] > f _ = undefined > combine s t = s ++ t > leng s = fromIntegral $ length s > anull = [] The string instance is fairly straightforward > instance MD5 String where > get_next s = (get_word_32s ys, 8 * length ys, zs) > where (ys, zs) = takeDrop 64 s > len_pad = len_pads > combine s t = s ++ t > leng s = fromIntegral $ length s > anull = "" No guarantees about this one... > instance MD5 ([Word32], Zord64) where > get_next (ws, l) = (xs, fromIntegral taken, (ys, l - taken)) > where (xs, ys) = takeDrop 16 ws > taken = if l > 511 then 512 else l .&. 511 > len_pad = len_padw > leng (_, i) = i > anull = ([], 0) > combine (w1, l1) (w2, l2) = (w, l1 + l2) > where w = init w1 ++ fst (foldl (comb offset) ([], last w1) w2) > offset = fromIntegral $ l1 .&. 31 > comb :: Int -> ([Word32], Word32) -> Word32 -> ([Word32], Word32) > comb o (ws, i) x = (ws ++ [i + shiftL x o], shiftR x (32 - o)) The simplest function, gives you the MD5 of a string as 4-tuple of 32bit words. > md5 :: (MD5 a) => a -> ABCD > md5 m = md5_main False 0 magic_numbers m Returns a hex number ala the md5sum program > md5s :: (MD5 a) => a -> String > md5s = md5_display . md5 Returns an integer equivalent to the above hex number > md5i :: (MD5 a) => a -> Integer > md5i = abcd_to_integer . md5 The first argument is a function that takes an a to a new a (the a is basically the state of the function) and a bit more of the string. If the function doesn't return any string when it is called then the string is assumed to be complete. The next argument is the initial state. Other than that, and that they clearly need to return values wrapped in a monad, these functions are equivalent to the above 3. > md5M :: (Monad m, MD5 d) => (a -> m (a, d)) -> a -> m ABCD > md5M f e = do (_, abcd) <- md5M_main False 0 f e (anull, magic_numbers) > return abcd > md5sM :: (Monad m, MD5 d) => (a -> m (a, d)) -> a -> m String > md5sM f e = do abcd <- md5M f e > return $ md5_display abcd > md5iM :: (Monad m, MD5 d) => (a -> m (a, d)) -> a -> m Integer > md5iM f e = do abcd <- md5M f e > return $ abcd_to_integer abcd The magic numbers from the RFC. > magic_numbers :: ABCD > magic_numbers = (0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476) Given a bit of input and it's length (mod 2^64), append the padding and length. > len_pads :: Zord64 -> String -> String > len_pads c64 s = s ++ padding ++ l > where padding = '\128':replicate (fromIntegral bytes) '\000' > bits = (440 - c64) .&. 511 > bytes = shiftR bits 3 > l = map chr $ size_split 8 c64 > len_padw :: Zord64 -> ([Word32], Zord64) -> ([Word32], Zord64) > len_padw c64 (ws, _) = (beginning ++ nextish ++ blanks ++ size, -- 0 > {- that 0 is a lie, but we don't care any more -} > if c64 .&. 511 >= 448 then 1024 else 512) > where beginning = if length ws > 0 then start ++ lastone' else [] > start = init ws > lastone = last ws > offset = c64 .&. 31 > lastone' = [if offset > 0 then lastone + theone else lastone] > theone = shiftL (shiftR 128 (fromIntegral $ offset .&. 7)) > (fromIntegral $ offset .&. (31 - 7)) > nextish = if offset == 0 then [128] else [] > c64' = c64 + (32 - offset) > num_blanks = (fromIntegral $ shiftR ((448 - c64') .&. 511) 5) > blanks = replicate num_blanks 0 > lowsize = fromIntegral $ c64 .&. (fromIntegral $ 2^32 - 1) > topsize = fromIntegral $ shiftR c64 32 > size = [lowsize, topsize] Split the size into character size chunks. > size_split :: Int -> Zord64 -> [Int] > size_split 0 _ = [] > size_split p n = fromIntegral d:size_split (p-1) n' > where n' = shiftR n 8 > d = n .&. 255 Decides what to do. The first argument indicates if padding has been added. The second is the length mod 2^64 so far. Then we have the starting state, the rest of the string and the final state. > md5_main :: (MD5 a) => Bool -> Zord64 -> ABCD -> a -> ABCD > md5_main padded ilen (a, b, c, d) m > = if leng m == 0 && padded then (a, b, c, d) > else md5_main padded' (ilen + 512) abcd m'' > where (m16, l, m') = get_next m > len' = ilen + fromIntegral l > ((m16', _, m''), padded') = if not padded && l < 512 > then (get_next $ len_pad len' m, True) > else ((m16, l, m'), padded) > (a', b', c', d') = md5_do_block (a, b, c, d) m16' > abcd = (a + a', b + b', c + c', d + d') Equivalent to md5_main for the monadic functions. > md5M_main :: (Monad m, MD5 d) => Bool -> Zord64 -> (a -> m (a, d)) -> a > -> (d, ABCD) > -> m (d, ABCD) > md5M_main padded len f e (m, (a, b, c, d)) > = if l < 512 > then if not padded > then do (e', m') <- f e > if leng m' == 0 > then md5M_main True 0 f e (len_pad len m, (a, b, c, d)) > else let extra_length = leng m' > len' = (len + shiftL extra_length 3) in > md5M_main False len' f e' (combine m m', (a, b, c, d)) > else return (m, (a, b, c, d)) > else md5M_main padded len f e (rest, (a + a', b + b', c + c', d + d')) > where (block, l, rest) = get_next m > (a', b', c', d') = md5_do_block (a, b, c, d) block Process a 64-character (512 bit) block > md5_do_block :: ABCD -> [Word32] -> ABCD > md5_do_block abcd0 w = abcd4 > where (r1, r2, r3, r4) = rounds > {- s1 = map (\x -> s0 !! x) [1,6,11,0,5,10,15,4,9,14,3,8,13,2,7,12] > {- [(5 * x + 1) `mod` 16 | x <- [0..15]] -} > s2 = map (\x -> s0 !! x) [5,8,11,14,1,4,7,10,13,0,3,6,9,12,15,2] > {- [(3 * x + 5) `mod` 16 | x <- [0..15]] -} > s3 = map (\x -> s0 !! x) [0,7,14,5,12,3,10,1,8,15,6,13,4,11,2,9] > {- [(7 * x) `mod` 16 | x <- [0..15]] -} -} > perm5 [c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15] > = [c1,c6,c11,c0,c5,c10,c15,c4,c9,c14,c3,c8,c13,c2,c7,c12] > perm5 _ = undefined > perm3 [c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15] > = [c5,c8,c11,c14,c1,c4,c7,c10,c13,c0,c3,c6,c9,c12,c15,c2] > perm3 _ = undefined > perm7 [c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15] > = [c0,c7,c14,c5,c12,c3,c10,c1,c8,c15,c6,c13,c4,c11,c2,c9] > perm7 _ = undefined > abcd1 = md5_round md5_f abcd0 w r1 > abcd2 = md5_round md5_g abcd1 (perm5 w) r2 > abcd3 = md5_round md5_h abcd2 (perm3 w) r3 > abcd4 = md5_round md5_i abcd3 (perm7 w) r4 Convert a 64 byte string to 16 32 bit words > get_word_32s :: String -> [Word32] > get_word_32s "" = [] > get_word_32s ss = this:get_word_32s ss' > where (s, ss') = takeDrop 4 ss > this = foldr (\c i -> shiftL i 8 + (fromIntegral.ord) c) 0 s Do one of the rounds > md5_round :: (XYZ -> Word32) -> ABCD -> [Word32] > -> [(Rotation, Word32)] -> ABCD > md5_round f (a, b, c, d) s ns = foldl (doit f) (a, b, c, d) ns' > where ns' = zipWith (\x (y, z) -> (y, x + z)) s ns Apply one of the functions md5_[fghi] and put the new ABCD together > doit :: (XYZ -> Word32) -> ABCD -> (Rotation, Word32) -> ABCD > doit f (a, b, c, d) (s, ki) = (d, a', b, c) > where mid_a = a + f(b,c,d) + ki > rot_a = rotL mid_a s > a' = b + rot_a > md5_f :: XYZ -> Word32 > md5_f (x, y, z) = z `xor` (x .&. (y `xor` z)) > {- optimised version of: (x .&. y) .|. ((complement x) .&. z) -} > md5_g :: XYZ -> Word32 > md5_g (x, y, z) = md5_f (z, x, y) > {- was: (x .&. z) .|. (y .&. (complement z)) -} > md5_h :: XYZ -> Word32 > md5_h (x, y, z) = x `xor` y `xor` z > md5_i :: XYZ -> Word32 > md5_i (x, y, z) = y `xor` (x .|. (complement z)) > rounds :: ([(Rotation, Word32)], > [(Rotation, Word32)], > [(Rotation, Word32)], > [(Rotation, Word32)]) > rounds = (r1, r2, r3, r4) > where r1 = [(s11, 0xd76aa478), (s12, 0xe8c7b756), (s13, 0x242070db), > (s14, 0xc1bdceee), (s11, 0xf57c0faf), (s12, 0x4787c62a), > (s13, 0xa8304613), (s14, 0xfd469501), (s11, 0x698098d8), > (s12, 0x8b44f7af), (s13, 0xffff5bb1), (s14, 0x895cd7be), > (s11, 0x6b901122), (s12, 0xfd987193), (s13, 0xa679438e), > (s14, 0x49b40821)] > r2 = [(s21, 0xf61e2562), (s22, 0xc040b340), (s23, 0x265e5a51), > (s24, 0xe9b6c7aa), (s21, 0xd62f105d), (s22, 0x2441453), > (s23, 0xd8a1e681), (s24, 0xe7d3fbc8), (s21, 0x21e1cde6), > (s22, 0xc33707d6), (s23, 0xf4d50d87), (s24, 0x455a14ed), > (s21, 0xa9e3e905), (s22, 0xfcefa3f8), (s23, 0x676f02d9), > (s24, 0x8d2a4c8a)] > r3 = [(s31, 0xfffa3942), (s32, 0x8771f681), (s33, 0x6d9d6122), > (s34, 0xfde5380c), (s31, 0xa4beea44), (s32, 0x4bdecfa9), > (s33, 0xf6bb4b60), (s34, 0xbebfbc70), (s31, 0x289b7ec6), > (s32, 0xeaa127fa), (s33, 0xd4ef3085), (s34, 0x4881d05), > (s31, 0xd9d4d039), (s32, 0xe6db99e5), (s33, 0x1fa27cf8), > (s34, 0xc4ac5665)] > r4 = [(s41, 0xf4292244), (s42, 0x432aff97), (s43, 0xab9423a7), > (s44, 0xfc93a039), (s41, 0x655b59c3), (s42, 0x8f0ccc92), > (s43, 0xffeff47d), (s44, 0x85845dd1), (s41, 0x6fa87e4f), > (s42, 0xfe2ce6e0), (s43, 0xa3014314), (s44, 0x4e0811a1), > (s41, 0xf7537e82), (s42, 0xbd3af235), (s43, 0x2ad7d2bb), > (s44, 0xeb86d391)] > s11 = 7 > s12 = 12 > s13 = 17 > s14 = 22 > s21 = 5 > s22 = 9 > s23 = 14 > s24 = 20 > s31 = 4 > s32 = 11 > s33 = 16 > s34 = 23 > s41 = 6 > s42 = 10 > s43 = 15 > s44 = 21 > takeDrop :: Int -> [a] -> ([a], [a]) > takeDrop _ [] = ([], []) > takeDrop 0 xs = ([], xs) > takeDrop n (x:xs) = (x:ys, zs) > where (ys, zs) = takeDrop (n-1) xs Turn the 4 32 bit words into a string representing the hex number they represent. > md5_display :: ABCD -> String > md5_display (a,b,c,d) = concat $ map display_32bits_as_hex [a,b,c,d] Split the 32 bit word up, swap the chunks over and convert the numbers to their hex equivalents. > display_32bits_as_hex :: Word32 -> String > display_32bits_as_hex w = map getc [y2,y1,y4,y3,y6,y5,y8,y7] > where [y1,y2,y3,y4,y5,y6,y7,y8] > = map (\x -> (shiftR w (4*x)) .&. 15) [0..7] > getc n = (['0'..'9'] ++ ['a'..'f']) !! (fromIntegral n) Convert to an integer, performing endianness magic as we go > abcd_to_integer :: ABCD -> Integer > abcd_to_integer (a, b, c, d) = toInteger (rev_num a) * 2^(96 :: Int) > + toInteger (rev_num b) * 2^(64 :: Int) > + toInteger (rev_num c) * 2^(32 :: Int) > + toInteger (rev_num d) > where rev_num i > = foldl (\so_far next -> shiftL so_far 8 > + (shiftR i next .&. 255)) 0 [0,8,16,24] hugs doesn't implement rotateL on Word32s so we write our own > rotL :: Word32 -> Rotation -> Word32 > rotL a s = shiftL a s .|. shiftL a (s-32) > bool_to_word :: [Bool] -> Word32 > bool_to_word = foldl (\w b -> shiftL w 1 + if b then 1 else 0) 0 > bool_to_words :: [Bool] -> [Word32] > bool_to_words [] = [] > bool_to_words bs = foldr (\w8 wn -> shiftL wn 8 + bool_to_word w8) 0 [bs1, bs2, bs3, bs4]:bool_to_words bs4' > where (bs1, bs1') = takeDrop 8 bs > (bs2, bs2') = takeDrop 8 bs1' > (bs3, bs3') = takeDrop 8 bs2' > (bs4, bs4') = takeDrop 8 bs3'