001/*
002 * CDDL HEADER START
003 *
004 * The contents of this file are subject to the terms of the
005 * Common Development and Distribution License, Version 1.0 only
006 * (the "License").  You may not use this file except in compliance
007 * with the License.
008 *
009 * You can obtain a copy of the license at legal-notices/CDDLv1_0.txt
010 * or http://forgerock.org/license/CDDLv1.0.html.
011 * See the License for the specific language governing permissions
012 * and limitations under the License.
013 *
014 * When distributing Covered Code, include this CDDL HEADER in each
015 * file and include the License file at legal-notices/CDDLv1_0.txt.
016 * If applicable, add the following below this CDDL HEADER, with the
017 * fields enclosed by brackets "[]" replaced with your own identifying
018 * information:
019 *      Portions Copyright [yyyy] [name of copyright owner]
020 *
021 * CDDL HEADER END
022 *
023 *
024 *      Copyright 2013-2014 Manuel Gaupp
025 */
026package org.forgerock.opendj.ldap;
027
028import java.math.BigInteger;
029import java.util.regex.Matcher;
030import java.util.regex.Pattern;
031
032import org.forgerock.util.Reject;
033import org.forgerock.i18n.LocalizableMessage;
034
035import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_NO_VALID_IDENTIFIEDCHOICE;
036import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_NO_VALID_IDENTIFIER;
037import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_NO_VALID_INTEGER;
038import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_NO_VALID_SEPARATOR;
039import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_NO_VALID_STRING;
040import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_PATTERN_NO_MATCH;
041import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_SPACE_CHAR_EXPECTED;
042
043/**
044 * This class implements a parser for strings which are encoded using the
045 * Generic String Encoding Rules (GSER) defined in RFC 3641.
046 *
047 * @see <a href="http://tools.ietf.org/html/rfc3641">RFC 3641 - Generic String
048 * Encoding Rules (GSER) for ASN.1 Types</a>
049 */
050public final class GSERParser {
051
052    private final String gserValue;
053
054    private int pos;
055
056    private final int length;
057
058    /**
059     * Pattern to match an identifier defined in RFC 3641, section 3.4.
060     * <pre>
061     * An &lt;identifier&gt; conforms to the definition of an identifier in ASN.1
062     * notation (Clause 11.3 of X.680 [8]).  It begins with a lowercase
063     * letter and is followed by zero or more letters, digits, and hyphens.
064     * A hyphen is not permitted to be the last character, nor is it to be
065     * followed by another hyphen.  The case of letters in an identifier is
066     * always significant.
067     *
068     *    identifier    = lowercase *alphanumeric *(hyphen 1*alphanumeric)
069     *    alphanumeric  = uppercase / lowercase / decimal-digit
070     *    uppercase     = %x41-5A  ; "A" to "Z"
071     *    lowercase     = %x61-7A  ; "a" to "z"
072     *    decimal-digit = %x30-39  ; "0" to "9"
073     *    hyphen        = "-"
074     * </pre>
075     */
076    private static final Pattern GSER_IDENTIFIER = Pattern.compile("^([a-z]([A-Za-z0-9]|(-[A-Za-z0-9]))*)");
077
078    /**
079     * Pattern to match the identifier part (including the colon) of an
080     * IdentifiedChoiceValue defined in RFC 3641, section 3.12.
081     * <pre>
082     *    IdentifiedChoiceValue = identifier ":" Value
083     * </pre>
084     */
085    private static final Pattern GSER_CHOICE_IDENTIFIER = Pattern.compile("^([a-z]([A-Za-z0-9]|(-[A-Za-z0-9]))*:)");
086
087    /**
088     * Pattern to match "sp", containing zero, one or more space characters.
089     * <pre>
090     *    sp = *%x20  ; zero, one or more space characters
091     * </pre>
092     */
093    private static final Pattern GSER_SP = Pattern.compile("^( *)");
094
095    /**
096     * Pattern to match "msp", containing at least one space character.
097     * <pre>
098     *    msp = 1*%x20  ; one or more space characters
099     * </pre>
100     */
101    private static final Pattern GSER_MSP = Pattern.compile("^( +)");
102
103    /**
104     * Pattern to match an Integer value.
105     */
106    private static final Pattern GSER_INTEGER = Pattern.compile("^(\\d+)");
107
108    /**
109     * Pattern to match a GSER StringValue, defined in RFC 3641, section 3.2:
110     * <pre>
111     * Any embedded double quotes in the resulting UTF-8 character string
112     * are escaped by repeating the double quote characters.
113     *
114     * [...]
115     *
116     *    StringValue       = dquote *SafeUTF8Character dquote
117     *    dquote            = %x22 ; &quot; (double quote)
118     * </pre>
119     */
120    private static final Pattern GSER_STRING = Pattern.compile("^(\"([^\"]|(\"\"))*\")");
121
122    /**
123     * Pattern to match the beginning of a GSER encoded Sequence.
124     * <pre>
125     *    SequenceValue = ComponentList
126     *    ComponentList = "{" [ sp NamedValue *( "," sp NamedValue) ] sp "}"
127     * </pre>
128     */
129    private static final Pattern GSER_SEQUENCE_START = Pattern.compile("^(\\{)");
130
131    /**
132     * Pattern to match the end of a GSER encoded Sequence.
133     * <pre>
134     *    SequenceValue = ComponentList
135     *    ComponentList = "{" [ sp NamedValue *( "," sp NamedValue) ] sp "}"
136     * </pre>
137     */
138    private static final Pattern GSER_SEQUENCE_END = Pattern.compile("^(\\})");
139
140    /**
141     * Pattern to match the separator used in GSER encoded sequences.
142     */
143    private static final Pattern GSER_SEP = Pattern.compile("^(,)");
144
145    /**
146     * Creates a new GSER Parser.
147     *
148     * @param value the GSER encoded String value
149     */
150    public GSERParser(CharSequence value) {
151        Reject.checkNotNull(value);
152        this.gserValue = value.toString();
153        this.pos = 0;
154        this.length = value.length();
155    }
156
157    /**
158     * Determines if the GSER String contains at least one character to be read.
159     *
160     * @return <code>true</code> if there is at least one remaining character or
161     * <code>false</code> otherwise.
162     */
163    public boolean hasNext() {
164        return pos < length;
165    }
166
167    /**
168     * Determines if the remaining GSER String matches the provided pattern.
169     *
170     * @param pattern the pattern to search for
171     *
172     * @return <code>true</code> if the remaining string matches the pattern or
173     * <code>false</code> otherwise.
174     */
175    private boolean hasNext(Pattern pattern) {
176        if (!hasNext()) {
177            return false;
178        }
179
180        Matcher matcher = pattern.matcher(gserValue.substring(pos, length));
181
182        return matcher.find();
183    }
184
185    /**
186     * Returns the String matched by the first capturing group of the pattern.
187     * The parser advances past the input matched by the first capturing group.
188     *
189     * @param pattern the pattern to search for
190     *
191     * @return the String matched by the first capturing group of the pattern
192     *
193     * @throws DecodeException If no match could be found
194     */
195    private String next(Pattern pattern) throws DecodeException {
196        Matcher matcher = pattern.matcher(gserValue.substring(pos, length));
197        if (matcher.find() && matcher.groupCount() >= 1) {
198            pos += matcher.end(1);
199            return matcher.group(1);
200        } else {
201            final LocalizableMessage msg =
202                    WARN_GSER_PATTERN_NO_MATCH.get(pattern.pattern(),
203                                                   gserValue.substring(pos, length));
204            throw DecodeException.error(msg);
205        }
206    }
207
208    /**
209     * Skips the input matched by the first capturing group.
210     *
211     * @param pattern the pattern to search for
212     *
213     * @throws DecodeException If no match could be found
214     */
215    private void skip(Pattern pattern) throws DecodeException {
216        Matcher matcher = pattern.matcher(gserValue.substring(pos, length));
217
218        if (matcher.find() && matcher.groupCount() >= 1) {
219            pos += matcher.end(1);
220        } else {
221            final LocalizableMessage msg =
222                    WARN_GSER_PATTERN_NO_MATCH.get(pattern.pattern(),
223                                                   gserValue.substring(pos, length));
224            throw DecodeException.error(msg);
225        }
226    }
227
228    /**
229     * Skips the input matching zero, one or more space characters.
230     *
231     * @return reference to this GSERParser
232     *
233     * @throws DecodeException If no match could be found
234     */
235    public GSERParser skipSP() throws DecodeException {
236        skip(GSER_SP);
237        return this;
238    }
239
240    /**
241     * Skips the input matching one or more space characters.
242     *
243     * @return reference to this GSERParser
244     *
245     * @throws DecodeException If no match could be found
246     */
247    public GSERParser skipMSP() throws DecodeException {
248        skip(GSER_MSP);
249        return this;
250    }
251
252    /**
253     * Skips the input matching the start of a sequence and subsequent space
254     * characters.
255     *
256     * @return reference to this GSERParser
257     *
258     * @throws DecodeException If the input does not match the start of a
259     * sequence
260     */
261    public GSERParser readStartSequence() throws DecodeException {
262        next(GSER_SEQUENCE_START);
263        skip(GSER_SP);
264        return this;
265    }
266
267    /**
268     * Skips the input matching the end of a sequence and preceding space
269     * characters.
270     *
271     * @return reference to this GSERParser
272     *
273     * @throws DecodeException If the input does not match the end of a sequence
274     */
275    public GSERParser readEndSequence() throws DecodeException {
276        skip(GSER_SP);
277        next(GSER_SEQUENCE_END);
278        return this;
279    }
280
281    /**
282     * Skips the input matching the separator pattern (",") and subsequenct
283     * space characters.
284     *
285     * @return reference to this GSERParser
286     *
287     * @throws DecodeException If the input does not match the separator
288     * pattern.
289     */
290    public GSERParser skipSeparator() throws DecodeException {
291        if (!hasNext(GSER_SEP)) {
292            final LocalizableMessage msg =
293                    WARN_GSER_NO_VALID_SEPARATOR.get(gserValue.substring(pos, length));
294            throw DecodeException.error(msg);
295        }
296        skip(GSER_SEP);
297        skip(GSER_SP);
298        return this;
299    }
300
301    /**
302     * Returns the next element as a String.
303     *
304     * @return the input matching the String pattern
305     *
306     * @throws DecodeException If the input does not match the string pattern.
307     */
308    public String nextString() throws DecodeException {
309        if (!hasNext(GSER_STRING)) {
310            final LocalizableMessage msg =
311                    WARN_GSER_NO_VALID_STRING.get(gserValue.substring(pos, length));
312            throw DecodeException.error(msg);
313        }
314
315        String str = next(GSER_STRING);
316
317        // Strip leading and trailing dquotes; unescape double dquotes
318        return str.substring(1, str.length() - 1).replace("\"\"", "\"");
319    }
320
321    /**
322     * Returns the next element as an Integer.
323     *
324     * @return the input matching the integer pattern
325     *
326     * @throws DecodeException If the input does not match the integer pattern
327     */
328    public int nextInteger() throws DecodeException {
329        if (!hasNext(GSER_INTEGER)) {
330            final LocalizableMessage msg =
331                    WARN_GSER_NO_VALID_INTEGER.get(gserValue.substring(pos, length));
332            throw DecodeException.error(msg);
333        }
334        return Integer.valueOf(next(GSER_INTEGER)).intValue();
335    }
336
337    /**
338     * Returns the next element as a BigInteger.
339     *
340     * @return the input matching the integer pattern
341     *
342     * @throws DecodeException If the input does not match the integer pattern
343     */
344    public BigInteger nextBigInteger() throws DecodeException {
345        if (!hasNext(GSER_INTEGER)) {
346            final LocalizableMessage msg =
347                    WARN_GSER_NO_VALID_INTEGER.get(gserValue.substring(pos, length));
348            throw DecodeException.error(msg);
349        }
350        return new BigInteger(next(GSER_INTEGER));
351    }
352
353    /**
354     * Returns the identifier of the next NamedValue element.
355     *
356     * @return the identifier of the NamedValue element
357     *
358     * @throws DecodeException If the input does not match the identifier
359     * pattern of a NamedValue
360     */
361    public String nextNamedValueIdentifier() throws DecodeException {
362        if (!hasNext(GSER_IDENTIFIER)) {
363            final LocalizableMessage msg =
364                    WARN_GSER_NO_VALID_IDENTIFIER.get(gserValue.substring(pos, length));
365            throw DecodeException.error(msg);
366        }
367        String identifier = next(GSER_IDENTIFIER);
368        if (!hasNext(GSER_MSP)) {
369            final LocalizableMessage msg =
370                    WARN_GSER_SPACE_CHAR_EXPECTED.get(gserValue.substring(pos, length));
371            throw DecodeException.error(msg);
372        }
373        skipMSP();
374        return identifier;
375    }
376
377    /**
378     * Return the identifier of the next IdentifiedChoiceValue element.
379     *
380     * @return the identifier of the IdentifiedChoiceValue element
381     *
382     * @throws DecodeException If the input does not match the identifier
383     * pattern of an IdentifiedChoiceValue
384     */
385    public String nextChoiceValueIdentifier() throws DecodeException {
386        if (!hasNext(GSER_CHOICE_IDENTIFIER)) {
387            final LocalizableMessage msg =
388                    WARN_GSER_NO_VALID_IDENTIFIEDCHOICE.get(gserValue.substring(pos, length));
389            throw DecodeException.error(msg);
390        }
391        String identifier = next(GSER_CHOICE_IDENTIFIER);
392
393        // Remove the colon at the end of the identifier
394        return identifier.substring(0, identifier.length() - 1);
395    }
396
397    /**
398     * Returns the GSER encoded String value.
399     *
400     * @return The GSER encoded String value.
401     */
402    @Override
403    public String toString() {
404        return gserValue;
405    }
406}