Wednesday, September 25, 2013

Function To Check ASCII or UTF-8 Format


public string CheckEncoding(string input)
        {
            string returnValue = String.Empty;
            string sOut = Encoding.ASCII.GetString(Encoding.ASCII.GetBytes(input));
            if (input == sOut)
            {
                return "ASCII";
            }
            else
            {
                var utf8Format = Encoding.UTF8;
                byte[] b = utf8Format.GetBytes(input);
                int taster = b.Length;
                bool checkUtf8 = false;
                if (b.Length >= 3 && b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF)
                {
                   checkUtf8 = true;
                }
                else
                {
                    int i = 0;
                    while (i < taster - 4)
                    {
                        if (b[i] <= 0x7F)
                        {
                            i += 1;
                            continue;
                        }
                        if (b[i] >= 0xC2 && b[i] <= 0xDF && b[i + 1] >= 0x80 && b[i + 1] < 0xC0)
                        {
                            i += 2;
                            checkUtf8 = true;
                            continue;
                        }
                        if (b[i] >= 0xE0 && b[i] <= 0xF0 && b[i + 1] >= 0x80 && b[i + 1] < 0xC0 && b[i + 2] >= 0x80 &&
                            b[i + 2] < 0xC0)
                        {
                            i += 3;
                            checkUtf8 = true;
                            continue;
                        }
                        if (b[i] >= 0xF0 && b[i] <= 0xF4 && b[i + 1] >= 0x80 && b[i + 1] < 0xC0 && b[i + 2] >= 0x80 &&
                            b[i + 2] < 0xC0 && b[i + 3] >= 0x80 && b[i + 3] < 0xC0)
                        {
                            i += 4;
                            checkUtf8 = true;
                            continue;
                        }
                        checkUtf8 = false;
                        break;
                    }
                }
                if(checkUtf8 == true)
                {
                    returnValue = "UTF-8";
                }
            }
            return returnValue;
        }

No comments:

Post a Comment