> module MD5 (md5, md5s, md5i, > md5M, md5sM, md5iM, > ABCD, MD5(..), Zord64) where > import Char > import Bits > import Word Nasty kludge to create a type Zord64 which is really a Word64 but works how we want in hugs too... #ifdef __GLASGOW_HASKELL__ #include "ZordGHC.hs" #else > import ZordHUGS #endif ======================== TYPES AND CLASS DEFINTIONS ======================== > type ABCD = (Word32, Word32, Word32, Word32) > type XYZ = (Word32, Word32, Word32) > type Rotation = Int Anything we want to work out the MD5 of must be an instance of class MD5 > class MD5 a where > get_next :: a -> ([Word32], Int, a) -- get the next blocks worth > -- \ \ \------ the rest of the input > -- \ \--------- the number of bits returned > -- \--------------- the bits returned in 32bit words > len_pad :: Zord64 -> a -> a -- append the padding and length > combine :: a -> a -> a -- combine 2 sets of data into 1 > leng :: a -> Zord64 -- return the length of the data > zero :: a -- the element of 0 length > -- (the identity for combine) Mainly exists because it's fairly easy to do MD5s on input where the length is not a multiple of 8 > instance MD5 [Bool] where > get_next s = (bools_to_word32s ys, length ys, zs) > where (ys, zs) = splitAt 512 s > len_pad l bs = bs ++ [True] > ++ replicate (fromIntegral $ (447 - l) .&. 511) False > ++ [l .&. (shiftL 1 x) > 0 | x <- (mangle [0..63])] > where mangle [] = [] > mangle xs = reverse ys ++ mangle zs > where (ys, zs) = splitAt 8 xs > combine s t = s ++ t > leng s = fromIntegral $ length s > zero = [] The string instance is fairly straightforward > instance MD5 String where > get_next s = (string_to_word32s ys, 8 * length ys, zs) > where (ys, zs) = splitAt 64 s > len_pad c64 s = s ++ padding ++ l > where padding = '\128':replicate (fromIntegral zeros) '\000' > zeros = shiftR ((440 - c64) .&. 511) 3 > l = length_to_chars 8 c64 > combine s t = s ++ t > leng s = fromIntegral $ 8 * length s > zero = "" YA instance that is believed will be useful > instance MD5 ([Word32], Zord64) where > get_next (ws, l) = (xs, fromIntegral taken, (ys, l - taken)) > where (xs, ys) = splitAt 16 ws > taken = if l > 511 then 512 else l .&. 511 > len_pad c64 (ws, l) = (beginning ++ nextish ++ blanks ++ size, newlen) > where beginning = if length ws > 0 then start ++ lastone' else [] > start = init ws > lastone = last ws > offset = c64 .&. 31 > lastone' = [if offset > 0 then lastone + theone else lastone] > theone = shiftL (shiftR 128 (fromIntegral $ offset .&. 7)) > (fromIntegral $ offset .&. (31 - 7)) > nextish = if offset == 0 then [128] else [] > c64' = c64 + (32 - offset) > num_blanks = (fromIntegral $ shiftR ((448 - c64') .&. 511) 5) > blanks = replicate num_blanks 0 > lowsize = fromIntegral $ c64 .&. (shiftL 1 32 - 1) > topsize = fromIntegral $ shiftR c64 32 > size = [lowsize, topsize] > newlen = l .&. (complement 511) > + if c64 .&. 511 >= 448 then 1024 else 512 > leng = snd > zero = ([], 0) > combine (w1, l1) (w2, l2) = > if l1 == 0 > then (w2, l2) > else if offset == 0 > then (w1 ++ w2, l') -- \/-- round up > else (take (fromIntegral $ shiftR (l' + 31) 5) w, l') > where w = init w1 ++ combine_word32s offset (bit_fiddle $ last w1) w2 > l' = l1 + l2 > offset = fromIntegral $ l1 .&. 31 Combine a bit_fiddled word32 with a list of word32s with a given offset > combine_word32s :: Int -- The offset through the word where we > -- join them up > -> Word32 -- A bit_fiddled word with offset > -- significant bits > -> [Word32] -- The list of other words we are combining with > -> [Word32] -- The resulting list of combined words > combine_word32s _ w ([]) = [bit_fiddle w] > combine_word32s o w (x:xs) = w':rest > where x' = bit_fiddle x > w' = bit_fiddle $ w + shiftR x' o > rest = combine_word32s o (shiftL x' (32 - o)) xs Rearrange a 32bit word so the bits are in significance order > bit_fiddle :: Word32 -> Word32 > bit_fiddle w = shiftL w 24 + (shiftL w 8 .&. shiftL 255 16) > + shiftR w 24 + (shiftR w 8 .&. shiftL 255 8) > instance Num ABCD where > (a1, b1, c1, d1) + (a2, b2, c2, d2) = (a1 + a2, b1 + b2, c1 + c2, d1 + d2) ======================== EXPORTED FUNCTIONS ======================== The simplest function, gives you the MD5 of a string as 4-tuple of 32bit words. > md5 :: (MD5 a) => a -> ABCD > md5 m = md5_main False 0 magic_numbers m Returns a hex number ala the md5sum program > md5s :: (MD5 a) => a -> String > md5s = abcd_to_string . md5 Returns an integer equivalent to the above hex number > md5i :: (MD5 a) => a -> Integer > md5i = abcd_to_integer . md5 The first argument is a function that takes an a to a new a (the a is basically the state of the function) and a bit more of the string. If the function doesn't return any string when it is called then the string is assumed to be complete. The next argument is the initial state. Other than that, and that they clearly need to return values wrapped in a monad, these functions are equivalent to the above 3. > md5M :: (Monad m, MD5 d) => (a -> m (a, d)) -> a -> m ABCD > md5M f e = do (_, abcd) <- md5M_main False 0 f e (zero, magic_numbers) > return abcd > md5sM :: (Monad m, MD5 d) => (a -> m (a, d)) -> a -> m String > md5sM f e = do abcd <- md5M f e > return $ abcd_to_string abcd > md5iM :: (Monad m, MD5 d) => (a -> m (a, d)) -> a -> m Integer > md5iM f e = do abcd <- md5M f e > return $ abcd_to_integer abcd ======================== THE CORE ALGORITHM ======================== Decides what to do. The first argument indicates if padding has been added. The second is the length mod 2^64 so far. Then we have the starting state, the rest of the string and the final state. > md5_main :: (MD5 a) => > Bool -- Have we added padding yet? > -> Zord64 -- The length so far mod 2^64 > -> ABCD -- The initial state > -> a -- The non-processed portion of the message > -> ABCD -- The resulting state > md5_main padded ilen abcd m > = if leng m == 0 && padded > then abcd > else md5_main padded' (ilen + 512) (abcd + abcd') m'' > where (m16, l, m') = get_next m > len' = ilen + fromIntegral l > ((m16', _, m''), padded') = if not padded && l < 512 > then (get_next $ len_pad len' m, True) > else ((m16, l, m'), padded) > abcd' = md5_do_block abcd m16' Equivalent to md5_main for the monadic functions. > md5M_main :: (Monad m, MD5 d) => > Bool -- Have we added padding and length yet? > -> Zord64 -- The length so far mod 2^64 > -> (a -> m (a, d)) -- A function that takes a current state to > -- a (monadic) new state and the next bit of > -- the message > -> a -- The state of the monadic function > -> (d, ABCD) -- The bit of message not processed yet and > -- the initial state > -> m (d, ABCD) -- The bit of message not processed at the > -- end and the resulting state > md5M_main padded len f e (m, abcd) > = if l < 512 > then if not padded > then do (e', m') <- f e > if leng m' == 0 > then md5M_main True 0 f e (len_pad len m, abcd) > else let extra_length = leng m' > len' = len + extra_length in > md5M_main False len' f e' (combine m m', abcd) > else return (m, abcd) > else md5M_main padded len f e (rest, abcd + abcd') > where (block, l, rest) = get_next m > abcd' = md5_do_block abcd block md5_do_block processes a 512 bit block by calling md5_round 4 times to apply each round with the correct constants and permutations of the block > md5_do_block :: ABCD -- Initial state > -> [Word32] -- The block to be processed - 16 32bit words > -> ABCD -- Resulting state > md5_do_block abcd0 w = abcd4 > where (r1, r2, r3, r4) = rounds > {- > map (\x -> w !! x) [1,6,11,0,5,10,15,4,9,14,3,8,13,2,7,12] > -- [(5 * x + 1) `mod` 16 | x <- [0..15]] > map (\x -> w !! x) [5,8,11,14,1,4,7,10,13,0,3,6,9,12,15,2] > -- [(3 * x + 5) `mod` 16 | x <- [0..15]] > map (\x -> w !! x) [0,7,14,5,12,3,10,1,8,15,6,13,4,11,2,9] > -- [(7 * x) `mod` 16 | x <- [0..15]] > -} > perm5 [c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15] > = [c1,c6,c11,c0,c5,c10,c15,c4,c9,c14,c3,c8,c13,c2,c7,c12] > perm5 _ = error "broke at perm5" > perm3 [c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15] > = [c5,c8,c11,c14,c1,c4,c7,c10,c13,c0,c3,c6,c9,c12,c15,c2] > perm3 _ = error "broke at perm3" > perm7 [c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15] > = [c0,c7,c14,c5,c12,c3,c10,c1,c8,c15,c6,c13,c4,c11,c2,c9] > perm7 _ = error "broke at perm7" > abcd1 = md5_round md5_f abcd0 w r1 > abcd2 = md5_round md5_g abcd1 (perm5 w) r2 > abcd3 = md5_round md5_h abcd2 (perm3 w) r3 > abcd4 = md5_round md5_i abcd3 (perm7 w) r4 md5_round does one of the rounds. It takes an auxiliary function and foldls (md5_inner_function f) to repeatedly apply it to the initial state with the correct constants > md5_round :: (XYZ -> Word32) -- Auxiliary function (F, G, H or I > -- for those of you with a copy of > -- the prayer book^W^WRFC) > -> ABCD -- Initial state > -> [Word32] -- The 16 32bit words of input > -> [(Rotation, Word32)] -- The list of 16 rotations and > -- additive constants > -> ABCD -- Resulting state > md5_round f abcd s ns = foldl (md5_inner_function f) abcd ns' > where ns' = zipWith (\x (y, z) -> (y, x + z)) s ns Apply one of the functions md5_[fghi] and put the new ABCD together > md5_inner_function :: (XYZ -> Word32) -- Auxiliary function > -> ABCD -- Initial state > -> (Rotation, Word32) -- The rotation and additive > -- constant (X[i] + T[j]) > -> ABCD -- Resulting state > md5_inner_function f (a, b, c, d) (s, ki) = (d, a', b, c) > where mid_a = a + f(b,c,d) + ki > rot_a = rotL mid_a s > a' = b + rot_a The 4 auxiliary functions > md5_f :: XYZ -> Word32 > md5_f (x, y, z) = z `xor` (x .&. (y `xor` z)) > {- optimised version of: (x .&. y) .|. ((complement x) .&. z) -} > md5_g :: XYZ -> Word32 > md5_g (x, y, z) = md5_f (z, x, y) > {- was: (x .&. z) .|. (y .&. (complement z)) -} > md5_h :: XYZ -> Word32 > md5_h (x, y, z) = x `xor` y `xor` z > md5_i :: XYZ -> Word32 > md5_i (x, y, z) = y `xor` (x .|. (complement z)) The magic numbers from the RFC. > magic_numbers :: ABCD > magic_numbers = (0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476) The 4 lists of (rotation, additive constant) tuples, one for each round > rounds :: ([(Rotation, Word32)], > [(Rotation, Word32)], > [(Rotation, Word32)], > [(Rotation, Word32)]) > rounds = (r1, r2, r3, r4) > where r1 = [(s11, 0xd76aa478), (s12, 0xe8c7b756), (s13, 0x242070db), > (s14, 0xc1bdceee), (s11, 0xf57c0faf), (s12, 0x4787c62a), > (s13, 0xa8304613), (s14, 0xfd469501), (s11, 0x698098d8), > (s12, 0x8b44f7af), (s13, 0xffff5bb1), (s14, 0x895cd7be), > (s11, 0x6b901122), (s12, 0xfd987193), (s13, 0xa679438e), > (s14, 0x49b40821)] > r2 = [(s21, 0xf61e2562), (s22, 0xc040b340), (s23, 0x265e5a51), > (s24, 0xe9b6c7aa), (s21, 0xd62f105d), (s22, 0x2441453), > (s23, 0xd8a1e681), (s24, 0xe7d3fbc8), (s21, 0x21e1cde6), > (s22, 0xc33707d6), (s23, 0xf4d50d87), (s24, 0x455a14ed), > (s21, 0xa9e3e905), (s22, 0xfcefa3f8), (s23, 0x676f02d9), > (s24, 0x8d2a4c8a)] > r3 = [(s31, 0xfffa3942), (s32, 0x8771f681), (s33, 0x6d9d6122), > (s34, 0xfde5380c), (s31, 0xa4beea44), (s32, 0x4bdecfa9), > (s33, 0xf6bb4b60), (s34, 0xbebfbc70), (s31, 0x289b7ec6), > (s32, 0xeaa127fa), (s33, 0xd4ef3085), (s34, 0x4881d05), > (s31, 0xd9d4d039), (s32, 0xe6db99e5), (s33, 0x1fa27cf8), > (s34, 0xc4ac5665)] > r4 = [(s41, 0xf4292244), (s42, 0x432aff97), (s43, 0xab9423a7), > (s44, 0xfc93a039), (s41, 0x655b59c3), (s42, 0x8f0ccc92), > (s43, 0xffeff47d), (s44, 0x85845dd1), (s41, 0x6fa87e4f), > (s42, 0xfe2ce6e0), (s43, 0xa3014314), (s44, 0x4e0811a1), > (s41, 0xf7537e82), (s42, 0xbd3af235), (s43, 0x2ad7d2bb), > (s44, 0xeb86d391)] > s11 = 7 > s12 = 12 > s13 = 17 > s14 = 22 > s21 = 5 > s22 = 9 > s23 = 14 > s24 = 20 > s31 = 4 > s32 = 11 > s33 = 16 > s34 = 23 > s41 = 6 > s42 = 10 > s43 = 15 > s44 = 21 ======================== CONVERSION FUNCTIONS ======================== Turn the 4 32 bit words into a string representing the hex number they represent. > abcd_to_string :: ABCD -> String > abcd_to_string (a,b,c,d) = concat $ map display_32bits_as_hex [a,b,c,d] Split the 32 bit word up, swap the chunks over and convert the numbers to their hex equivalents. > display_32bits_as_hex :: Word32 -> String > display_32bits_as_hex w = map getc [y2,y1,y4,y3,y6,y5,y8,y7] > where [y1,y2,y3,y4,y5,y6,y7,y8] > = map (\x -> (shiftR w (4*x)) .&. 15) [0..7] > getc n = (['0'..'9'] ++ ['a'..'f']) !! (fromIntegral n) Convert to an integer, performing endianness magic as we go > abcd_to_integer :: ABCD -> Integer > abcd_to_integer (a, b, c, d) = toInteger (rev_num a) * 2^(96 :: Int) > + toInteger (rev_num b) * 2^(64 :: Int) > + toInteger (rev_num c) * 2^(32 :: Int) > + toInteger (rev_num d) > where rev_num i > = foldl (\so_far next -> shiftL so_far 8 > + (shiftR i next .&. 255)) 0 [0,8,16,24] Used to convert a 64 byte string to 16 32bit words > string_to_word32s :: String -> [Word32] > string_to_word32s "" = [] > string_to_word32s ss = this:string_to_word32s ss' > where (s, ss') = splitAt 4 ss > this = foldr (\c i -> shiftL i 8 + (fromIntegral.ord) c) 0 s Used to convert a list of 512 bools to 16 32bit words > bools_to_word32s :: [Bool] -> [Word32] > bools_to_word32s [] = [] > bools_to_word32s bs = this:bools_to_word32s rest > where (bs1, bs1') = splitAt 8 bs > (bs2, bs2') = splitAt 8 bs1' > (bs3, bs3') = splitAt 8 bs2' > (bs4, rest) = splitAt 8 bs3' > this = boolss_to_word32 [bs1, bs2, bs3, bs4] > bools_to_word8 = foldl (\w b -> shiftL w 1 + if b then 1 else 0) 0 > boolss_to_word32 = foldr (\w8 w -> shiftL w 8 + bools_to_word8 w8) 0 Convert the size into a list of characters used by the len_pad function for strings > length_to_chars :: Int -> Zord64 -> String > length_to_chars 0 _ = [] > length_to_chars p n = this:length_to_chars (p-1) (shiftR n 8) > where this = chr $ fromIntegral $ n .&. 255 ======================== MISC ======================== hugs doesn't implement rotateL on Word32s so we write our own > rotL :: Word32 -> Rotation -> Word32 > rotL a s = shiftL a s .|. shiftL a (s-32)