{-# LANGUAGE CPP, MagicHash, TypeSynonymInstances, FlexibleInstances #-}
module Codec.Binary.UTF8.Light (
UTF8(..)
, lenUTF8
, lenUTF16
, countUTF8
, decodeUTF8
, encodeUTF8
, encodeUTF8'
, withUTF8
, putUTF8
, putUTF8Ln
, hPutUTF8
, hPutUTF8Ln
, readUTF8File
, writeUTF8File
, appendUTF8File
, hGetUTF8Line
, hGetUTF8Contents
, hGetUTF8
, hGetUTF8NonBlocking
, c2w
, w2c
, i2w
, w2i
, flipUTF8
, unflipUTF8
, flipTab
, unflipTab
, showHex
, toBits
, fromBits
, Int8,Int16,Int32
, Word,Word8,Word16,Word32
) where
import Data.Bits
import Data.List(foldl')
import Data.Char(chr,ord)
import Data.Monoid(Monoid(..))
import Data.ByteString(ByteString)
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Internal as B
import Data.ByteString.Unsafe
import System.IO(Handle)
#if defined(__GLASGOW_HASKELL__)
import GHC.Exts
(Int(I#),Word(W#),Char(C#)
,Ptr(Ptr),FunPtr(FunPtr))
import GHC.Int
(Int8(I8#),Int16(I16#),Int32(I32#))
import GHC.Word
(Word8(W8#),Word16(W16#),Word32(W32#))
import GHC.Prim
(Char#,Int#,Word#,Addr#
,ord#,chr#,int2Word#,word2Int#
,and#,or#,xor#,not#
,gtWord#,geWord#,eqWord#
,neWord#,ltWord#,leWord#
,uncheckedShiftL#,uncheckedShiftRL#
,narrow8Int#,narrow16Int#,narrow32Int#
,narrow8Word#,narrow16Word#,narrow32Word#)
#else
import Data.Word
(Word,Word8,Word16,Word32)
import Data.Int(Int32)
#endif
fi :: (Num b, Integral a) => a -> b
fi :: forall b a. (Num b, Integral a) => a -> b
fi = a -> b
forall a b. (Integral a, Num b) => a -> b
fromIntegral
class UTF8 a where
encode :: a -> ByteString
decode :: ByteString -> a
instance UTF8 ByteString where
encode :: ByteString -> ByteString
encode = ByteString -> ByteString
forall a. a -> a
id
decode :: ByteString -> ByteString
decode = ByteString -> ByteString
forall a. a -> a
id
instance UTF8 [Word32] where
encode :: [Word32] -> ByteString
encode = [Word32] -> ByteString
encodeUTF8
decode :: ByteString -> [Word32]
decode = ByteString -> [Word32]
decodeUTF8
instance UTF8 [Word] where
encode :: [Word] -> ByteString
encode = [Word32] -> ByteString
encodeUTF8 ([Word32] -> ByteString)
-> ([Word] -> [Word32]) -> [Word] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word -> Word32) -> [Word] -> [Word32]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Word -> Word32
forall b a. (Num b, Integral a) => a -> b
fi
decode :: ByteString -> [Word]
decode = (Word32 -> Word) -> [Word32] -> [Word]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Word32 -> Word
forall b a. (Num b, Integral a) => a -> b
fi ([Word32] -> [Word])
-> (ByteString -> [Word32]) -> ByteString -> [Word]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Word32]
decodeUTF8
instance UTF8 [Int32] where
encode :: [Int32] -> ByteString
encode = [Word32] -> ByteString
encodeUTF8 ([Word32] -> ByteString)
-> ([Int32] -> [Word32]) -> [Int32] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int32 -> Word32) -> [Int32] -> [Word32]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Int32 -> Word32
forall b a. (Num b, Integral a) => a -> b
fi
decode :: ByteString -> [Int32]
decode = (Word32 -> Int32) -> [Word32] -> [Int32]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Word32 -> Int32
forall b a. (Num b, Integral a) => a -> b
fi ([Word32] -> [Int32])
-> (ByteString -> [Word32]) -> ByteString -> [Int32]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Word32]
decodeUTF8
instance UTF8 [Int] where
encode :: [Int] -> ByteString
encode = [Word32] -> ByteString
encodeUTF8 ([Word32] -> ByteString)
-> ([Int] -> [Word32]) -> [Int] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> Word32) -> [Int] -> [Word32]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Int -> Word32
forall b a. (Num b, Integral a) => a -> b
fi
decode :: ByteString -> [Int]
decode = (Word32 -> Int) -> [Word32] -> [Int]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Word32 -> Int
forall b a. (Num b, Integral a) => a -> b
fi ([Word32] -> [Int])
-> (ByteString -> [Word32]) -> ByteString -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Word32]
decodeUTF8
instance UTF8 String where
encode :: [Char] -> ByteString
encode = [Int] -> ByteString
forall a. UTF8 a => a -> ByteString
encode ([Int] -> ByteString) -> ([Char] -> [Int]) -> [Char] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Int) -> [Char] -> [Int]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Char -> Int
ord
decode :: ByteString -> [Char]
decode = (Int -> Char) -> [Int] -> [Char]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Int -> Char
chr ([Int] -> [Char]) -> (ByteString -> [Int]) -> ByteString -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Int]
forall a. UTF8 a => ByteString -> a
decode
withUTF8 :: (UTF8 a) => a -> (ByteString -> b) -> b
withUTF8 :: forall a b. UTF8 a => a -> (ByteString -> b) -> b
withUTF8 a
a ByteString -> b
k = ByteString -> b
k (a -> ByteString
forall a. UTF8 a => a -> ByteString
encode a
a)
putUTF8 :: (UTF8 a) => a -> IO ()
putUTF8 :: forall a. UTF8 a => a -> IO ()
putUTF8 = (a -> (ByteString -> IO ()) -> IO ())
-> (ByteString -> IO ()) -> a -> IO ()
forall a b c. (a -> b -> c) -> b -> a -> c
flip a -> (ByteString -> IO ()) -> IO ()
forall a b. UTF8 a => a -> (ByteString -> b) -> b
withUTF8 ByteString -> IO ()
B.putStr
putUTF8Ln :: (UTF8 a) => a -> IO ()
putUTF8Ln :: forall a. UTF8 a => a -> IO ()
putUTF8Ln = (a -> (ByteString -> IO ()) -> IO ())
-> (ByteString -> IO ()) -> a -> IO ()
forall a b c. (a -> b -> c) -> b -> a -> c
flip a -> (ByteString -> IO ()) -> IO ()
forall a b. UTF8 a => a -> (ByteString -> b) -> b
withUTF8 ByteString -> IO ()
B8.putStrLn
hPutUTF8 :: (UTF8 a) => Handle -> a -> IO ()
hPutUTF8 :: forall a. UTF8 a => Handle -> a -> IO ()
hPutUTF8 Handle
h = (a -> (ByteString -> IO ()) -> IO ())
-> (ByteString -> IO ()) -> a -> IO ()
forall a b c. (a -> b -> c) -> b -> a -> c
flip a -> (ByteString -> IO ()) -> IO ()
forall a b. UTF8 a => a -> (ByteString -> b) -> b
withUTF8 (Handle -> ByteString -> IO ()
B.hPut Handle
h)
hPutUTF8Ln :: (UTF8 a) => Handle -> a -> IO ()
hPutUTF8Ln :: forall a. UTF8 a => Handle -> a -> IO ()
hPutUTF8Ln Handle
h = (a -> (ByteString -> IO ()) -> IO ())
-> (ByteString -> IO ()) -> a -> IO ()
forall a b c. (a -> b -> c) -> b -> a -> c
flip a -> (ByteString -> IO ()) -> IO ()
forall a b. UTF8 a => a -> (ByteString -> b) -> b
withUTF8 (Handle -> ByteString -> IO ()
B8.hPutStrLn Handle
h)
readUTF8File :: (UTF8 a) => FilePath -> IO a
readUTF8File :: forall a. UTF8 a => [Char] -> IO a
readUTF8File = (a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (a -> IO a) -> (ByteString -> a) -> ByteString -> IO a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> a
forall a. UTF8 a => ByteString -> a
decode (ByteString -> IO a) -> IO ByteString -> IO a
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<) (IO ByteString -> IO a)
-> ([Char] -> IO ByteString) -> [Char] -> IO a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> IO ByteString
B.readFile
writeUTF8File :: (UTF8 a) => FilePath -> a -> IO ()
writeUTF8File :: forall a. UTF8 a => [Char] -> a -> IO ()
writeUTF8File [Char]
p = [Char] -> ByteString -> IO ()
B.writeFile [Char]
p (ByteString -> IO ()) -> (a -> ByteString) -> a -> IO ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> ByteString
forall a. UTF8 a => a -> ByteString
encode
appendUTF8File :: (UTF8 a) => FilePath -> a -> IO ()
appendUTF8File :: forall a. UTF8 a => [Char] -> a -> IO ()
appendUTF8File [Char]
p = [Char] -> ByteString -> IO ()
B.appendFile [Char]
p (ByteString -> IO ()) -> (a -> ByteString) -> a -> IO ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> ByteString
forall a. UTF8 a => a -> ByteString
encode
hGetUTF8Line :: (UTF8 a) => Handle -> IO a
hGetUTF8Line :: forall a. UTF8 a => Handle -> IO a
hGetUTF8Line = (a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (a -> IO a) -> (ByteString -> a) -> ByteString -> IO a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> a
forall a. UTF8 a => ByteString -> a
decode (ByteString -> IO a) -> IO ByteString -> IO a
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<) (IO ByteString -> IO a)
-> (Handle -> IO ByteString) -> Handle -> IO a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Handle -> IO ByteString
B.hGetLine
hGetUTF8Contents :: (UTF8 a) => Handle -> IO a
hGetUTF8Contents :: forall a. UTF8 a => Handle -> IO a
hGetUTF8Contents = (a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (a -> IO a) -> (ByteString -> a) -> ByteString -> IO a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> a
forall a. UTF8 a => ByteString -> a
decode (ByteString -> IO a) -> IO ByteString -> IO a
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<) (IO ByteString -> IO a)
-> (Handle -> IO ByteString) -> Handle -> IO a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Handle -> IO ByteString
B.hGetContents
hGetUTF8 :: (UTF8 a) => Handle -> Int -> IO a
hGetUTF8 :: forall a. UTF8 a => Handle -> Int -> IO a
hGetUTF8 Handle
h = (a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (a -> IO a) -> (ByteString -> a) -> ByteString -> IO a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> a
forall a. UTF8 a => ByteString -> a
decode (ByteString -> IO a) -> IO ByteString -> IO a
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<) (IO ByteString -> IO a) -> (Int -> IO ByteString) -> Int -> IO a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Handle -> Int -> IO ByteString
B.hGet Handle
h
hGetUTF8NonBlocking :: (UTF8 a) => Handle -> Int -> IO a
hGetUTF8NonBlocking :: forall a. UTF8 a => Handle -> Int -> IO a
hGetUTF8NonBlocking Handle
h = (a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (a -> IO a) -> (ByteString -> a) -> ByteString -> IO a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> a
forall a. UTF8 a => ByteString -> a
decode (ByteString -> IO a) -> IO ByteString -> IO a
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<) (IO ByteString -> IO a) -> (Int -> IO ByteString) -> Int -> IO a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Handle -> Int -> IO ByteString
B.hGetNonBlocking Handle
h
lenUTF8 :: Word8 -> Int
{-# INLINE lenUTF8 #-}
lenUTF8 :: Word8 -> Int
lenUTF8 Word8
w8
| Word8
w8 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
0x80 = Int
1
| Word8
w8 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
0xe0 = Int
2
| Word8
w8 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
0xf0 = Int
3
| Word8
w8 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
0xf8 = Int
4
| Bool
otherwise = Int
0
lenUTF16 :: Word16 -> Int
lenUTF16 :: Word16 -> Int
lenUTF16 Word16
w16
| Word16
w16Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftR`Int
10Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
==Word16
0x36 = Int
2
| Word16
w16Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftR`Int
10Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
==Word16
0x37 = Int
0
| Bool
otherwise = Int
1
countUTF8 :: ByteString -> [Int]
countUTF8 :: ByteString -> [Int]
countUTF8 ByteString
s = Int -> Int -> ByteString -> [Int]
go Int
0 (ByteString -> Int
B.length ByteString
s) ByteString
s
where go :: Int -> Int -> ByteString -> [Int]
go :: Int -> Int -> ByteString -> [Int]
go Int
i Int
len ByteString
s | Int
len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
i = []
| Bool
otherwise = case Word8 -> Int
lenUTF8 (ByteString -> Int -> Word8
unsafeIndex ByteString
s Int
i)
of Int
0 -> []
Int
n -> Int
n Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: Int -> Int -> ByteString -> [Int]
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
n) Int
len ByteString
s
encodeUTF8 :: [Word32] -> ByteString
encodeUTF8 :: [Word32] -> ByteString
encodeUTF8 = [Word8] -> ByteString
B.pack ([Word8] -> ByteString)
-> ([Word32] -> [Word8]) -> [Word32] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[Word8]] -> [Word8]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Word8]] -> [Word8])
-> ([Word32] -> [[Word8]]) -> [Word32] -> [Word8]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Word32] -> [[Word8]]
encodeUTF8'
#if !defined(__GLASGOW_HASKELL__)
encodeUTF8' :: [Word32] -> [[Word8]]
encodeUTF8' [] = []
encodeUTF8' (x:xs)
| x < 0x80 =
[fi x] : encodeUTF8' xs
| x < 0x800 =
[ fi(x`shiftR`6.|.0xc0)
, fi(x.&.0x3f.|.0x80)
] : encodeUTF8' xs
| x < 0xf0000 =
[ fi(x`shiftR`12.|.0xe0)
, fi(x`shiftR`6.&.0x3f.|.0x80)
, fi(x.&.0x3f.|.0x80)
] : encodeUTF8' xs
| x < 0xe00000 =
[ fi(x`shiftR`18.|.0xf0)
, fi(x`shiftR`12.&.0x3f.|.0x80)
, fi(x`shiftR`6.&.0x3f.|.0x80)
, fi(x.&.0x3f.|.0x80)
] : encodeUTF8' xs
| otherwise = [] : encodeUTF8' xs
decodeUTF8 :: ByteString -> [Word32]
decodeUTF8 s = go 0 (B.length s) s
where go :: Int -> Int -> ByteString -> [Word32]
go i len s | len <= i = []
| otherwise = let c1 = unsafeIndex s i
in case lenUTF8 c1 of
0 -> []
1 -> fi c1 : go (i+1) len s
2 -> if len <= i+1 then [] else
let c2 = unsafeIndex s (i+1)
in fi(c1.&.0x1f)`shiftL`6
`xor`fi(c2.&.0x3f)
: go (i+2) len s
3 -> if len <= i+2 then [] else
let c2 = unsafeIndex s (i+1)
c3 = unsafeIndex s (i+2)
in fi(c1.&.0x1f)`shiftL`12
`xor`fi(c2.&.0x3f)`shiftL`6
`xor`fi(c3.&.0x3f)
: go (i+3) len s
4 -> if len <= i+3 then [] else
let c2 = unsafeIndex s (i+1)
c3 = unsafeIndex s (i+2)
c4 = unsafeIndex s (i+3)
in fi(c1.&.0x1f)`shiftL`18
`xor`fi(c2.&.0x3f)`shiftL`12
`xor`fi(c3.&.0x3f)`shiftL`6
`xor`fi(c4.&.0x3f)
: go (i+4) len s
#else
encodeUTF8' :: [Word32] -> [[Word8]]
encodeUTF8' :: [Word32] -> [[Word8]]
encodeUTF8' [] = []
encodeUTF8' ((W32# Word#
w):[Word32]
xs)
#if MIN_VERSION_base(4,7,0)
| Int# -> Int
I# (Word#
wWord# -> Word# -> Int#
`ltWord#`(Int# -> Word#
int2Word# Int#
0x80#)) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0 =
#else
| w`ltWord#`(int2Word# 0x80#) =
#endif
[Word# -> Word8
W8# Word#
w] [Word8] -> [[Word8]] -> [[Word8]]
forall a. a -> [a] -> [a]
: [Word32] -> [[Word8]]
encodeUTF8' [Word32]
xs
#if MIN_VERSION_base(4,7,0)
| Int# -> Int
I# (Word#
wWord# -> Word# -> Int#
`ltWord#`(Int# -> Word#
int2Word# Int#
0x800#)) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0 =
#else
| w`ltWord#`(int2Word# 0x800#) =
#endif
[ Word# -> Word8
W8#(Word#
wWord# -> Int# -> Word#
`uncheckedShiftRL#`Int#
6#
Word# -> Word# -> Word#
`or#`(Int# -> Word#
int2Word# Int#
0xc0#))
, Word# -> Word8
W8#(Word#
wWord# -> Word# -> Word#
`and#`(Int# -> Word#
int2Word# Int#
0x3f#)
Word# -> Word# -> Word#
`or#`(Int# -> Word#
int2Word# Int#
0x80#))
] [Word8] -> [[Word8]] -> [[Word8]]
forall a. a -> [a] -> [a]
: [Word32] -> [[Word8]]
encodeUTF8' [Word32]
xs
#if MIN_VERSION_base(4,7,0)
| Int# -> Int
I# (Word#
wWord# -> Word# -> Int#
`ltWord#`(Int# -> Word#
int2Word# Int#
0xf0000#)) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0 =
#else
| w`ltWord#`(int2Word# 0xf0000#) =
#endif
[ Word# -> Word8
W8#(Word#
wWord# -> Int# -> Word#
`uncheckedShiftRL#`Int#
12#
Word# -> Word# -> Word#
`or#`(Int# -> Word#
int2Word# Int#
0xe0#))
, Word# -> Word8
W8#(Word#
wWord# -> Int# -> Word#
`uncheckedShiftRL#`Int#
6#
Word# -> Word# -> Word#
`and#`(Int# -> Word#
int2Word# Int#
0x3f#)
Word# -> Word# -> Word#
`or#`(Int# -> Word#
int2Word# Int#
0x80#))
, Word# -> Word8
W8#(Word#
wWord# -> Word# -> Word#
`and#`(Int# -> Word#
int2Word# Int#
0x3f#)
Word# -> Word# -> Word#
`or#`(Int# -> Word#
int2Word# Int#
0x80#))
] [Word8] -> [[Word8]] -> [[Word8]]
forall a. a -> [a] -> [a]
: [Word32] -> [[Word8]]
encodeUTF8' [Word32]
xs
#if MIN_VERSION_base(4,7,0)
| Int# -> Int
I# (Word#
wWord# -> Word# -> Int#
`ltWord#`(Int# -> Word#
int2Word# Int#
0xe00000#)) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
0 =
#else
| w`ltWord#`(int2Word# 0xe00000#) =
#endif
[ Word# -> Word8
W8#(Word#
wWord# -> Int# -> Word#
`uncheckedShiftRL#`Int#
18#
Word# -> Word# -> Word#
`or#`(Int# -> Word#
int2Word# Int#
0xf0#))
, Word# -> Word8
W8#(Word#
wWord# -> Int# -> Word#
`uncheckedShiftRL#`Int#
12#
Word# -> Word# -> Word#
`and#`(Int# -> Word#
int2Word# Int#
0x3f#)
Word# -> Word# -> Word#
`or#`(Int# -> Word#
int2Word# Int#
0x80#))
, Word# -> Word8
W8#(Word#
wWord# -> Int# -> Word#
`uncheckedShiftRL#`Int#
6#
Word# -> Word# -> Word#
`and#`(Int# -> Word#
int2Word# Int#
0x3f#)
Word# -> Word# -> Word#
`or#`(Int# -> Word#
int2Word# Int#
0x80#))
, Word# -> Word8
W8#(Word#
wWord# -> Word# -> Word#
`and#`(Int# -> Word#
int2Word# Int#
0x3f#)
Word# -> Word# -> Word#
`or#`(Int# -> Word#
int2Word# Int#
0x80#))
] [Word8] -> [[Word8]] -> [[Word8]]
forall a. a -> [a] -> [a]
: [Word32] -> [[Word8]]
encodeUTF8' [Word32]
xs
| Bool
otherwise = [] [Word8] -> [[Word8]] -> [[Word8]]
forall a. a -> [a] -> [a]
: [Word32] -> [[Word8]]
encodeUTF8' [Word32]
xs
decodeUTF8 :: ByteString -> [Word32]
decodeUTF8 :: ByteString -> [Word32]
decodeUTF8 ByteString
s = Int -> Int -> ByteString -> [Word32]
go Int
0 (ByteString -> Int
B.length ByteString
s) ByteString
s
where go :: Int -> Int -> ByteString -> [Word32]
go :: Int -> Int -> ByteString -> [Word32]
go Int
i Int
len ByteString
s | Int
len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
i = []
| Bool
otherwise = let c1 :: Word8
c1 = ByteString -> Int -> Word8
unsafeIndex ByteString
s Int
i
in case Word8 -> Int
lenUTF8 Word8
c1 of
Int
0 -> []
Int
1 -> Word8 -> Word32
forall b a. (Num b, Integral a) => a -> b
fi Word8
c1 Word32 -> [Word32] -> [Word32]
forall a. a -> [a] -> [a]
: Int -> Int -> ByteString -> [Word32]
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Int
len ByteString
s
Int
2 -> if Int
len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1 then [] else
let c2 :: Word8
c2 = ByteString -> Int -> Word8
unsafeIndex ByteString
s (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
in Word8 -> Word32
forall b a. (Num b, Integral a) => a -> b
fi(Word8
c1Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&.Word8
0x1f)Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL`Int
6
Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
`xor`Word8 -> Word32
forall b a. (Num b, Integral a) => a -> b
fi(Word8
c2Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&.Word8
0x3f)
Word32 -> [Word32] -> [Word32]
forall a. a -> [a] -> [a]
: Int -> Int -> ByteString -> [Word32]
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2) Int
len ByteString
s
Int
3 -> if Int
len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2 then [] else
let c2 :: Word8
c2 = ByteString -> Int -> Word8
unsafeIndex ByteString
s (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
c3 :: Word8
c3 = ByteString -> Int -> Word8
unsafeIndex ByteString
s (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2)
in Word8 -> Word32
forall b a. (Num b, Integral a) => a -> b
fi(Word8
c1Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&.Word8
0x1f)Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL`Int
12
Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
`xor`Word8 -> Word32
forall b a. (Num b, Integral a) => a -> b
fi(Word8
c2Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&.Word8
0x3f)Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL`Int
6
Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
`xor`Word8 -> Word32
forall b a. (Num b, Integral a) => a -> b
fi(Word8
c3Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&.Word8
0x3f)
Word32 -> [Word32] -> [Word32]
forall a. a -> [a] -> [a]
: Int -> Int -> ByteString -> [Word32]
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3) Int
len ByteString
s
Int
4 -> if Int
len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3 then [] else
let c2 :: Word8
c2 = ByteString -> Int -> Word8
unsafeIndex ByteString
s (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
c3 :: Word8
c3 = ByteString -> Int -> Word8
unsafeIndex ByteString
s (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2)
c4 :: Word8
c4 = ByteString -> Int -> Word8
unsafeIndex ByteString
s (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3)
in Word8 -> Word32
forall b a. (Num b, Integral a) => a -> b
fi(Word8
c1Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&.Word8
0x1f)Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL`Int
18
Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
`xor`Word8 -> Word32
forall b a. (Num b, Integral a) => a -> b
fi(Word8
c2Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&.Word8
0x3f)Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL`Int
12
Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
`xor`Word8 -> Word32
forall b a. (Num b, Integral a) => a -> b
fi(Word8
c3Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&.Word8
0x3f)Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL`Int
6
Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
`xor`Word8 -> Word32
forall b a. (Num b, Integral a) => a -> b
fi(Word8
c4Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&.Word8
0x3f)
Word32 -> [Word32] -> [Word32]
forall a. a -> [a] -> [a]
: Int -> Int -> ByteString -> [Word32]
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
4) Int
len ByteString
s
#endif
w2c :: Word32 -> Char
{-# INLINE w2c #-}
#if defined(__GLASGOW_HASKELL__)
w2c :: Word32 -> Char
w2c (W32# Word#
w) = Char# -> Char
C#(Int# -> Char#
chr#(Word# -> Int#
word2Int# Word#
w))
#else
w2c = unsafeChr . fromIntegral
#endif
c2w :: Char -> Word32
{-# INLINE c2w #-}
#if defined(__GLASGOW_HASKELL__)
c2w :: Char -> Word32
c2w (C# Char#
c) = Word# -> Word32
W32#(Int# -> Word#
int2Word#(Char# -> Int#
ord# Char#
c))
#else
c2w = fromIntegral . ord
#endif
i2w :: Int -> Word32
{-# INLINE i2w #-}
#if defined(__GLASGOW_HASKELL__)
i2w :: Int -> Word32
i2w (I# Int#
i) = Word# -> Word32
W32#(Int# -> Word#
int2Word# Int#
i)
#else
i2w = fi
#endif
w2i :: Word32 -> Int
{-# INLINE w2i #-}
#if defined(__GLASGOW_HASKELL__)
w2i :: Word32 -> Int
w2i (W32# Word#
w) = Int# -> Int
I#(Word# -> Int#
word2Int# Word#
w)
#else
w2i = fi
#endif
toBits :: Word8 -> [Word8]
toBits :: Word8 -> [Word8]
toBits Word8
w8 = (Int -> Word8) -> [Int] -> [Word8]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&.Word8
0x01) (Word8 -> Word8) -> (Int -> Word8) -> Int -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word8
w8Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`shiftR`)) [Int
7,Int
6,Int
5,Int
4,Int
3,Int
2,Int
1,Int
0]
fromBits :: [Word8] -> Word8
fromBits :: [Word8] -> Word8
fromBits = (Word8 -> (Int, Word8) -> Word8)
-> Word8 -> [(Int, Word8)] -> Word8
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\Word8
a (Int
n,Word8
b) -> Word8
aWord8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|.Word8
bWord8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`shiftL`Int
n) Word8
0
([(Int, Word8)] -> Word8)
-> ([Word8] -> [(Int, Word8)]) -> [Word8] -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Int, Word8)] -> [(Int, Word8)]
forall a. [a] -> [a]
reverse ([(Int, Word8)] -> [(Int, Word8)])
-> ([Word8] -> [(Int, Word8)]) -> [Word8] -> [(Int, Word8)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> [Word8] -> [(Int, Word8)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0..Int
7] ([Word8] -> [(Int, Word8)])
-> ([Word8] -> [Word8]) -> [Word8] -> [(Int, Word8)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Word8] -> [Word8]
forall a. [a] -> [a]
reverse
hexTab :: ByteString
hexTab :: ByteString
hexTab = [Word8] -> ByteString
B.pack ([Word8] -> ByteString)
-> ([Char] -> [Word8]) -> [Char] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Word8) -> [Char] -> [Word8]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Char -> Word8
B.c2w ([Char] -> ByteString) -> [Char] -> ByteString
forall a b. (a -> b) -> a -> b
$
[Char]
"0123456789abcdef"
showHex :: Int -> String
showHex :: Int -> [Char]
showHex Int
i = ([Char]
"0x"[Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++)
([Char] -> [Char])
-> ((Int -> Char) -> [Char]) -> (Int -> Char) -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Int -> Char) -> [Int] -> [Char])
-> [Int] -> (Int -> Char) -> [Char]
forall a b c. (a -> b -> c) -> b -> a -> c
flip (Int -> Char) -> [Int] -> [Char]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [Int
28,Int
24,Int
20,Int
16,Int
12,Int
8,Int
4,Int
0] ((Int -> Char) -> [Char]) -> (Int -> Char) -> [Char]
forall a b. (a -> b) -> a -> b
$ \Int
n ->
Word8 -> Char
B.w2c (ByteString -> Int -> Word8
unsafeIndex ByteString
hexTab (Int
iInt -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR`Int
nInt -> Int -> Int
forall a. Bits a => a -> a -> a
.&.Int
0xf))
flipUTF8 :: (UTF8 a) => a -> a
flipUTF8 :: forall a. UTF8 a => a -> a
flipUTF8 = ByteString -> a
forall a. UTF8 a => ByteString -> a
decode (ByteString -> a) -> (a -> ByteString) -> a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Int, Int)] -> ByteString -> ByteString
flipString [(Int, Int)]
flipTab (ByteString -> ByteString) -> (a -> ByteString) -> a -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> ByteString
forall a. UTF8 a => a -> ByteString
encode
unflipUTF8 :: (UTF8 a) => a -> a
unflipUTF8 :: forall a. UTF8 a => a -> a
unflipUTF8 = ByteString -> a
forall a. UTF8 a => ByteString -> a
decode (ByteString -> a) -> (a -> ByteString) -> a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Int, Int)] -> ByteString -> ByteString
flipString [(Int, Int)]
unflipTab (ByteString -> ByteString) -> (a -> ByteString) -> a -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> ByteString
forall a. UTF8 a => a -> ByteString
encode
flipString :: [(Int,Int)] -> ByteString -> ByteString
flipString :: [(Int, Int)] -> ByteString -> ByteString
flipString [(Int, Int)]
tab = [Char] -> ByteString
forall a. UTF8 a => a -> ByteString
encode
([Char] -> ByteString)
-> (ByteString -> [Char]) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> [Char]
forall a. [a] -> [a]
reverse
([Char] -> [Char])
-> (ByteString -> [Char]) -> ByteString -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> Char) -> [Int] -> [Char]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Char -> (Int -> Char) -> Maybe Int -> Char
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Char
' ' Int -> Char
chr
(Maybe Int -> Char) -> (Int -> Maybe Int) -> Int -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> [(Int, Int)] -> Maybe Int)
-> [(Int, Int)] -> Int -> Maybe Int
forall a b c. (a -> b -> c) -> b -> a -> c
flip Int -> [(Int, Int)] -> Maybe Int
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup [(Int, Int)]
tab)
([Int] -> [Char]) -> (ByteString -> [Int]) -> ByteString -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Int]
forall a. UTF8 a => ByteString -> a
decode
unflipTab :: [(Int,Int)]
unflipTab :: [(Int, Int)]
unflipTab = ((Int, Int) -> (Int, Int)) -> [(Int, Int)] -> [(Int, Int)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Int -> Int -> (Int, Int)) -> (Int, Int) -> (Int, Int)
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry((Int -> Int -> (Int, Int)) -> Int -> Int -> (Int, Int)
forall a b c. (a -> b -> c) -> b -> a -> c
flip(,))) [(Int, Int)]
flipTab
flipTab :: [(Int,Int)]
flipTab :: [(Int, Int)]
flipTab = ((Char, Int) -> (Int, Int)) -> [(Char, Int)] -> [(Int, Int)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\(Char
a,Int
b)->(Char -> Int
ord Char
a,Int
b))
[(Char
'a', Int
0x250)
,(Char
'b', Char -> Int
ord Char
'q')
,(Char
'c', Int
0x254)
,(Char
'd', Char -> Int
ord Char
'p')
,(Char
'e', Int
0x1dd)
,(Char
'f', Int
0x25f)
,(Char
'g', Int
0x183)
,(Char
'h', Int
0x265)
,(Char
'i', Int
0x131)
,(Char
'j', Int
0x27e)
,(Char
'k', Int
0x29e)
,(Char
'l', Char -> Int
ord Char
'l')
,(Char
'm', Int
0x26f)
,(Char
'n', Char -> Int
ord Char
'u')
,(Char
'o', Char -> Int
ord Char
'o')
,(Char
'p', Char -> Int
ord Char
'b')
,(Char
'q', Char -> Int
ord Char
'd')
,(Char
'r', Int
0x279)
,(Char
's', Char -> Int
ord Char
's')
,(Char
't', Int
0x287)
,(Char
'u', Char -> Int
ord Char
'n')
,(Char
'v', Int
0x28c)
,(Char
'w', Int
0x28d)
,(Char
'x', Char -> Int
ord Char
'x')
,(Char
'y', Int
0x28e)
,(Char
'z', Char -> Int
ord Char
'z')
,(Char
'.', Int
0x2d9)
,(Char
'[', Char -> Int
ord Char
']')
,(Char
']', Char -> Int
ord Char
'[')
,(Char
'{', Char -> Int
ord Char
'}')
,(Char
'}', Char -> Int
ord Char
'{')
,(Char
'<', Char -> Int
ord Char
'>')
,(Char
'>', Char -> Int
ord Char
'<')
,(Char
'?', Int
0xbf)
,(Char
'!', Int
0xa1)
,(Char
'\'', Char -> Int
ord Char
',')
,(Char
'_', Int
0x203e)
,(Char
';', Int
0x061b)
]