-module(utf8). -export([encode/1, decode/1]). encode(List) -> to_bin(List, []). decode(Bin) -> from_bin(Bin, []). to_bin([], Out) -> list_to_binary( lists:reverse(Out)); to_bin([Char|Rest], Out) when Char < 16#80 -> to_bin(Rest, [<>|Out]); to_bin([Char|Rest], Out) when Char < 16#800 -> <<_:5, X:5, Y:6>> = <>, to_bin(Rest, [<<2#110:3, X:5, 2#10:2, Y:6>>|Out]); to_bin([Char|_], _) when Char > 16#D7FF, Char < 16#E000 -> {error, invalid_surrogate_char}; to_bin([Char|Rest], Out) when Char < 16#10000 -> <> = <>, to_bin(Rest, [<<2#1110:4, X:4, 2#10:2, Y:6, 2#10:2, Z:6>> | Out]); to_bin([Char|Rest], Out) when Char < 16#110000 -> <<_:3, W:3, X:6, Y:6, Z:6>> = <>, Enc = <<2#11110:5, W:3, 2:2, X:6, 2:2, Y:6, 2:2, Z:6>>, to_bin(Rest, [Enc | Out]); to_bin([Char|_], _) when Char > 16#10FFFF -> {error, no_chars_above_10ffff_hex}. from_bin(<<>>, Out) -> lists:reverse(Out); from_bin(<<0:1, X:7, Rest/binary>>, Out) -> from_bin(Rest, [X | Out]); from_bin(<<6:3, X:5, 2:2, Y:6, Rest/binary>>, Out) -> <> = <<0:5,X:5, Y:6>>, from_bin(Rest, [New | Out]); from_bin(<<14:4, X:4, 2:2, Y:6, 2:2, Z:6, Rest/binary>>, Out) -> <> = <>, from_bin(Rest, [New | Out]); from_bin(<<30:5, W:3, 2:2, X:6, 2:2, Y:6, 2:2, Z:6, Rest/binary>>, Out) -> <> = <<0:3, W:3, X:6, Y:6, Z:6>>, from_bin(Rest, [New | Out]).