Escaper.java

/*
 * Copyright 2017 Michael Mackenzie High
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.mackenziehigh.sexpr.internal;

import java.util.Arrays;

/**
 * Escaper.
 *
 * <p>
 * Herein, an escape sequence is any one of the following substrings:
 * </p>
 * <ul>
 * <li>\b</li>
 * <li>\t</li>
 * <li>\n</li>
 * <li>\f</li>
 * <li>\r</li>
 * <li>\'</li>
 * <li>\"</li>
 * <li>\\</li>
 * <li>\ uWXYZ, where WXYZ is a four-digit hexadecimal Unicode character code.</li>
 * </ul>
 */
public final class Escaper
{
    /**
     * This is the singleton instance of this class.
     */
    public static final Escaper instance = new Escaper();

    /**
     * Lookup Table.
     */
    private static final String[] ASCII = new String[128];

    static
    {
        ASCII['\b'] = "\\b";
        ASCII['\t'] = "\\t";
        ASCII['\n'] = "\\n";
        ASCII['\f'] = "\\f";
        ASCII['\r'] = "\\r";
        ASCII[34] = "\\\"";
        ASCII[39] = "\\'";
        ASCII[92] = "\\\\";

        /**
         * Normal Characters.
         */
        ASCII[32] = Character.toString((char) 32);
        ASCII[33] = "!";
        //
        ASCII[35] = "#";
        ASCII[36] = "$";
        ASCII[37] = "%";
        ASCII[38] = "&";
        //
        ASCII[40] = "(";
        ASCII[41] = ")";
        ASCII[42] = "*";
        ASCII[43] = "+";
        ASCII[44] = ",";
        ASCII[45] = "-";
        ASCII[46] = ".";
        ASCII[47] = "/";
        ASCII[48] = "0";
        ASCII[49] = "1";
        ASCII[50] = "2";
        ASCII[51] = "3";
        ASCII[52] = "4";
        ASCII[53] = "5";
        ASCII[54] = "6";
        ASCII[55] = "7";
        ASCII[56] = "8";
        ASCII[57] = "9";
        ASCII[58] = ":";
        ASCII[59] = ";";
        ASCII[60] = "<";
        ASCII[61] = "=";
        ASCII[62] = ">";
        ASCII[63] = "?";
        ASCII[64] = "@";
        ASCII[65] = "A";
        ASCII[66] = "B";
        ASCII[67] = "C";
        ASCII[68] = "D";
        ASCII[69] = "E";
        ASCII[70] = "F";
        ASCII[71] = "G";
        ASCII[72] = "H";
        ASCII[73] = "I";
        ASCII[74] = "J";
        ASCII[75] = "K";
        ASCII[76] = "L";
        ASCII[77] = "M";
        ASCII[78] = "N";
        ASCII[79] = "O";
        ASCII[80] = "P";
        ASCII[81] = "Q";
        ASCII[82] = "R";
        ASCII[83] = "S";
        ASCII[84] = "T";
        ASCII[85] = "U";
        ASCII[86] = "V";
        ASCII[87] = "W";
        ASCII[88] = "X";
        ASCII[89] = "Y";
        ASCII[90] = "Z";
        ASCII[91] = "[";
        //
        ASCII[93] = "]";
        ASCII[94] = "^";
        ASCII[95] = "_";
        ASCII[96] = "`";
        ASCII[97] = "a";
        ASCII[98] = "b";
        ASCII[99] = "c";
        ASCII[100] = "d";
        ASCII[101] = "e";
        ASCII[102] = "f";
        ASCII[103] = "g";
        ASCII[104] = "h";
        ASCII[105] = "i";
        ASCII[106] = "j";
        ASCII[107] = "k";
        ASCII[108] = "l";
        ASCII[109] = "m";
        ASCII[110] = "n";
        ASCII[111] = "o";
        ASCII[112] = "p";
        ASCII[113] = "q";
        ASCII[114] = "r";
        ASCII[115] = "s";
        ASCII[116] = "t";
        ASCII[117] = "u";
        ASCII[118] = "v";
        ASCII[119] = "w";
        ASCII[120] = "x";
        ASCII[121] = "y";
        ASCII[122] = "z";
        ASCII[123] = "{";
        ASCII[124] = "|";
        ASCII[125] = "}";
        ASCII[126] = "~";
    }

    /**
     * Sole Constructor.
     */
    private Escaper ()
    {
        // Pass
    }

    /**
     * This method creates a string from a char-array,
     * with each special-character replaced with
     * a relevant escape sequence.
     *
     * @param input will be converted to a string.
     * @return the new string.
     */
    public String escape (final char[] input)
    {
        final StringBuilder str = new StringBuilder();

        for (int i = 0; i < input.length; i++)
        {
            final char chr = input[i];

            if (chr == '\b')
            {
                str.append("\\b");
            }
            else if (chr == '\t')
            {
                str.append("\\t");
            }
            else if (chr == '\n')
            {
                str.append("\\n");
            }
            else if (chr == '\f')
            {
                str.append("\\f");
            }
            else if (chr == '\r')
            {
                str.append("\\r");
            }
            else if (chr == '\'')
            {
                str.append("\\\'");
            }
            else if (chr == '\"')
            {
                str.append("\\\"");
            }
            else if (chr == '\\')
            {
                str.append("\\\\");
            }
            else if (chr < ASCII.length && ASCII[chr] != null)
            {
                str.append(ASCII[chr]);
            }
            else
            {
                str.append(String.format("\\u%04X", (int) chr));
            }
        }

        return str.toString();
    }

    /**
     * This method replaces escape sequences in a string
     * with the equivalent special-characters and then
     * returns the equivalent char-array.
     *
     * <p>
     * In short, X.equals(expand(escape(X))).
     * </p>
     *
     * @param input is an escaped string.
     * @return the non-escaped char-array.
     */
    public char[] expand (final String input)
    {
        final char[] chars = input.toCharArray();

        final char[] temp = new char[chars.length];

        int i = 0; // position in the input
        int k = 0; // position in the output

        while (i < chars.length)
        {
            final int remaining = chars.length - i;

            final boolean slash = remaining >= 1 && chars[i] == '\\';

            if (slash && remaining == 0)
            {
                throw new IllegalArgumentException("Escape Slash at End of String");
            }

            /**
             * Detect the two-character escape-sequences.
             */
            final boolean s1 = slash && remaining >= 2 && chars[i + 1] == 'b';
            final boolean s2 = slash && remaining >= 2 && chars[i + 1] == 't';
            final boolean s3 = slash && remaining >= 2 && chars[i + 1] == 'n';
            final boolean s4 = slash && remaining >= 2 && chars[i + 1] == 'f';
            final boolean s5 = slash && remaining >= 2 && chars[i + 1] == 'r';
            final boolean s6 = slash && remaining >= 2 && chars[i + 1] == '\\';
            final boolean s7 = slash && remaining >= 2 && chars[i + 1] == '\'';
            final boolean s8 = slash && remaining >= 2 && chars[i + 1] == '\"';

            /**
             * Detect the hexadecimal Unicode escape-sequences.
             */
            final boolean d0 = slash && remaining >= 6 && chars[i + 1] == 'u';
            final boolean d1 = slash && remaining >= 6 && isHex(chars[i + 2]);
            final boolean d2 = slash && remaining >= 6 && isHex(chars[i + 3]);
            final boolean d3 = slash && remaining >= 6 && isHex(chars[i + 4]);
            final boolean d4 = slash && remaining >= 6 && isHex(chars[i + 5]);

            if (slash && !d0 && !s1 && !s2 && !s3 && !s4 && !s5 && !s6 && !s7 && !s8)
            {
                throw new IllegalArgumentException("No Such Escape Sequence");
            }

            if (slash && d0 && (!d1 || !d2 || !d3 || !d4))
            {
                throw new IllegalArgumentException("Invalid Unicode Escape Sequence");
            }

            /**
             * If the character is not part of an escape sequence,
             * then take the character itself.
             */
            char r = chars[i];

            /**
             * Replace escape-sequences with their expansions.
             */
            r = s1 ? '\b' : r;
            r = s2 ? '\t' : r;
            r = s3 ? '\n' : r;
            r = s4 ? '\f' : r;
            r = s5 ? '\r' : r;
            r = s6 ? '\\' : r;
            r = s7 ? '\'' : r;
            r = s8 ? '\"' : r;
            r = d0 && d1 && d2 && d3 && d4 ? (char) Integer.parseInt("" + chars[i + 2] + chars[i + 3] + chars[i + 4] + chars[i + 5], 16) : r;

            /**
             * Append the character onto the output.
             */
            temp[k++] = r;

            /**
             * Advance forward.
             * 6 = Unicode Escape Sequence
             * 2 = Two Character Escape Sequence
             * 1 = No Escape Sequence
             */
            i += slash ? (d0 ? 6 : 2) : 1;
        }

        final char[] result = Arrays.copyOf(temp, k);

        return result;
    }

    private boolean isHex (final char value)
    {
        switch (value)
        {
            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9':
            case 'A':
            case 'B':
            case 'C':
            case 'D':
            case 'E':
            case 'F':
                return true;
            default:
                return false;
        }
    }
}