001/* 002 * CDDL HEADER START 003 * 004 * The contents of this file are subject to the terms of the 005 * Common Development and Distribution License, Version 1.0 only 006 * (the "License"). You may not use this file except in compliance 007 * with the License. 008 * 009 * You can obtain a copy of the license at legal-notices/CDDLv1_0.txt 010 * or http://forgerock.org/license/CDDLv1.0.html. 011 * See the License for the specific language governing permissions 012 * and limitations under the License. 013 * 014 * When distributing Covered Code, include this CDDL HEADER in each 015 * file and include the License file at legal-notices/CDDLv1_0.txt. 016 * If applicable, add the following below this CDDL HEADER, with the 017 * fields enclosed by brackets "[]" replaced with your own identifying 018 * information: 019 * Portions Copyright [yyyy] [name of copyright owner] 020 * 021 * CDDL HEADER END 022 * 023 * 024 * Copyright 2013-2014 Manuel Gaupp 025 */ 026package org.forgerock.opendj.ldap; 027 028import java.math.BigInteger; 029import java.util.regex.Matcher; 030import java.util.regex.Pattern; 031 032import org.forgerock.util.Reject; 033import org.forgerock.i18n.LocalizableMessage; 034 035import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_NO_VALID_IDENTIFIEDCHOICE; 036import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_NO_VALID_IDENTIFIER; 037import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_NO_VALID_INTEGER; 038import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_NO_VALID_SEPARATOR; 039import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_NO_VALID_STRING; 040import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_PATTERN_NO_MATCH; 041import static com.forgerock.opendj.ldap.CoreMessages.WARN_GSER_SPACE_CHAR_EXPECTED; 042 043/** 044 * This class implements a parser for strings which are encoded using the 045 * Generic String Encoding Rules (GSER) defined in RFC 3641. 046 * 047 * @see <a href="http://tools.ietf.org/html/rfc3641">RFC 3641 - Generic String 048 * Encoding Rules (GSER) for ASN.1 Types</a> 049 */ 050public final class GSERParser { 051 052 private final String gserValue; 053 054 private int pos; 055 056 private final int length; 057 058 /** 059 * Pattern to match an identifier defined in RFC 3641, section 3.4. 060 * <pre> 061 * An <identifier> conforms to the definition of an identifier in ASN.1 062 * notation (Clause 11.3 of X.680 [8]). It begins with a lowercase 063 * letter and is followed by zero or more letters, digits, and hyphens. 064 * A hyphen is not permitted to be the last character, nor is it to be 065 * followed by another hyphen. The case of letters in an identifier is 066 * always significant. 067 * 068 * identifier = lowercase *alphanumeric *(hyphen 1*alphanumeric) 069 * alphanumeric = uppercase / lowercase / decimal-digit 070 * uppercase = %x41-5A ; "A" to "Z" 071 * lowercase = %x61-7A ; "a" to "z" 072 * decimal-digit = %x30-39 ; "0" to "9" 073 * hyphen = "-" 074 * </pre> 075 */ 076 private static final Pattern GSER_IDENTIFIER = Pattern.compile("^([a-z]([A-Za-z0-9]|(-[A-Za-z0-9]))*)"); 077 078 /** 079 * Pattern to match the identifier part (including the colon) of an 080 * IdentifiedChoiceValue defined in RFC 3641, section 3.12. 081 * <pre> 082 * IdentifiedChoiceValue = identifier ":" Value 083 * </pre> 084 */ 085 private static final Pattern GSER_CHOICE_IDENTIFIER = Pattern.compile("^([a-z]([A-Za-z0-9]|(-[A-Za-z0-9]))*:)"); 086 087 /** 088 * Pattern to match "sp", containing zero, one or more space characters. 089 * <pre> 090 * sp = *%x20 ; zero, one or more space characters 091 * </pre> 092 */ 093 private static final Pattern GSER_SP = Pattern.compile("^( *)"); 094 095 /** 096 * Pattern to match "msp", containing at least one space character. 097 * <pre> 098 * msp = 1*%x20 ; one or more space characters 099 * </pre> 100 */ 101 private static final Pattern GSER_MSP = Pattern.compile("^( +)"); 102 103 /** 104 * Pattern to match an Integer value. 105 */ 106 private static final Pattern GSER_INTEGER = Pattern.compile("^(\\d+)"); 107 108 /** 109 * Pattern to match a GSER StringValue, defined in RFC 3641, section 3.2: 110 * <pre> 111 * Any embedded double quotes in the resulting UTF-8 character string 112 * are escaped by repeating the double quote characters. 113 * 114 * [...] 115 * 116 * StringValue = dquote *SafeUTF8Character dquote 117 * dquote = %x22 ; " (double quote) 118 * </pre> 119 */ 120 private static final Pattern GSER_STRING = Pattern.compile("^(\"([^\"]|(\"\"))*\")"); 121 122 /** 123 * Pattern to match the beginning of a GSER encoded Sequence. 124 * <pre> 125 * SequenceValue = ComponentList 126 * ComponentList = "{" [ sp NamedValue *( "," sp NamedValue) ] sp "}" 127 * </pre> 128 */ 129 private static final Pattern GSER_SEQUENCE_START = Pattern.compile("^(\\{)"); 130 131 /** 132 * Pattern to match the end of a GSER encoded Sequence. 133 * <pre> 134 * SequenceValue = ComponentList 135 * ComponentList = "{" [ sp NamedValue *( "," sp NamedValue) ] sp "}" 136 * </pre> 137 */ 138 private static final Pattern GSER_SEQUENCE_END = Pattern.compile("^(\\})"); 139 140 /** 141 * Pattern to match the separator used in GSER encoded sequences. 142 */ 143 private static final Pattern GSER_SEP = Pattern.compile("^(,)"); 144 145 /** 146 * Creates a new GSER Parser. 147 * 148 * @param value the GSER encoded String value 149 */ 150 public GSERParser(CharSequence value) { 151 Reject.checkNotNull(value); 152 this.gserValue = value.toString(); 153 this.pos = 0; 154 this.length = value.length(); 155 } 156 157 /** 158 * Determines if the GSER String contains at least one character to be read. 159 * 160 * @return <code>true</code> if there is at least one remaining character or 161 * <code>false</code> otherwise. 162 */ 163 public boolean hasNext() { 164 return pos < length; 165 } 166 167 /** 168 * Determines if the remaining GSER String matches the provided pattern. 169 * 170 * @param pattern the pattern to search for 171 * 172 * @return <code>true</code> if the remaining string matches the pattern or 173 * <code>false</code> otherwise. 174 */ 175 private boolean hasNext(Pattern pattern) { 176 if (!hasNext()) { 177 return false; 178 } 179 180 Matcher matcher = pattern.matcher(gserValue.substring(pos, length)); 181 182 return matcher.find(); 183 } 184 185 /** 186 * Returns the String matched by the first capturing group of the pattern. 187 * The parser advances past the input matched by the first capturing group. 188 * 189 * @param pattern the pattern to search for 190 * 191 * @return the String matched by the first capturing group of the pattern 192 * 193 * @throws DecodeException If no match could be found 194 */ 195 private String next(Pattern pattern) throws DecodeException { 196 Matcher matcher = pattern.matcher(gserValue.substring(pos, length)); 197 if (matcher.find() && matcher.groupCount() >= 1) { 198 pos += matcher.end(1); 199 return matcher.group(1); 200 } else { 201 final LocalizableMessage msg = 202 WARN_GSER_PATTERN_NO_MATCH.get(pattern.pattern(), 203 gserValue.substring(pos, length)); 204 throw DecodeException.error(msg); 205 } 206 } 207 208 /** 209 * Skips the input matched by the first capturing group. 210 * 211 * @param pattern the pattern to search for 212 * 213 * @throws DecodeException If no match could be found 214 */ 215 private void skip(Pattern pattern) throws DecodeException { 216 Matcher matcher = pattern.matcher(gserValue.substring(pos, length)); 217 218 if (matcher.find() && matcher.groupCount() >= 1) { 219 pos += matcher.end(1); 220 } else { 221 final LocalizableMessage msg = 222 WARN_GSER_PATTERN_NO_MATCH.get(pattern.pattern(), 223 gserValue.substring(pos, length)); 224 throw DecodeException.error(msg); 225 } 226 } 227 228 /** 229 * Skips the input matching zero, one or more space characters. 230 * 231 * @return reference to this GSERParser 232 * 233 * @throws DecodeException If no match could be found 234 */ 235 public GSERParser skipSP() throws DecodeException { 236 skip(GSER_SP); 237 return this; 238 } 239 240 /** 241 * Skips the input matching one or more space characters. 242 * 243 * @return reference to this GSERParser 244 * 245 * @throws DecodeException If no match could be found 246 */ 247 public GSERParser skipMSP() throws DecodeException { 248 skip(GSER_MSP); 249 return this; 250 } 251 252 /** 253 * Skips the input matching the start of a sequence and subsequent space 254 * characters. 255 * 256 * @return reference to this GSERParser 257 * 258 * @throws DecodeException If the input does not match the start of a 259 * sequence 260 */ 261 public GSERParser readStartSequence() throws DecodeException { 262 next(GSER_SEQUENCE_START); 263 skip(GSER_SP); 264 return this; 265 } 266 267 /** 268 * Skips the input matching the end of a sequence and preceding space 269 * characters. 270 * 271 * @return reference to this GSERParser 272 * 273 * @throws DecodeException If the input does not match the end of a sequence 274 */ 275 public GSERParser readEndSequence() throws DecodeException { 276 skip(GSER_SP); 277 next(GSER_SEQUENCE_END); 278 return this; 279 } 280 281 /** 282 * Skips the input matching the separator pattern (",") and subsequenct 283 * space characters. 284 * 285 * @return reference to this GSERParser 286 * 287 * @throws DecodeException If the input does not match the separator 288 * pattern. 289 */ 290 public GSERParser skipSeparator() throws DecodeException { 291 if (!hasNext(GSER_SEP)) { 292 final LocalizableMessage msg = 293 WARN_GSER_NO_VALID_SEPARATOR.get(gserValue.substring(pos, length)); 294 throw DecodeException.error(msg); 295 } 296 skip(GSER_SEP); 297 skip(GSER_SP); 298 return this; 299 } 300 301 /** 302 * Returns the next element as a String. 303 * 304 * @return the input matching the String pattern 305 * 306 * @throws DecodeException If the input does not match the string pattern. 307 */ 308 public String nextString() throws DecodeException { 309 if (!hasNext(GSER_STRING)) { 310 final LocalizableMessage msg = 311 WARN_GSER_NO_VALID_STRING.get(gserValue.substring(pos, length)); 312 throw DecodeException.error(msg); 313 } 314 315 String str = next(GSER_STRING); 316 317 // Strip leading and trailing dquotes; unescape double dquotes 318 return str.substring(1, str.length() - 1).replace("\"\"", "\""); 319 } 320 321 /** 322 * Returns the next element as an Integer. 323 * 324 * @return the input matching the integer pattern 325 * 326 * @throws DecodeException If the input does not match the integer pattern 327 */ 328 public int nextInteger() throws DecodeException { 329 if (!hasNext(GSER_INTEGER)) { 330 final LocalizableMessage msg = 331 WARN_GSER_NO_VALID_INTEGER.get(gserValue.substring(pos, length)); 332 throw DecodeException.error(msg); 333 } 334 return Integer.valueOf(next(GSER_INTEGER)).intValue(); 335 } 336 337 /** 338 * Returns the next element as a BigInteger. 339 * 340 * @return the input matching the integer pattern 341 * 342 * @throws DecodeException If the input does not match the integer pattern 343 */ 344 public BigInteger nextBigInteger() throws DecodeException { 345 if (!hasNext(GSER_INTEGER)) { 346 final LocalizableMessage msg = 347 WARN_GSER_NO_VALID_INTEGER.get(gserValue.substring(pos, length)); 348 throw DecodeException.error(msg); 349 } 350 return new BigInteger(next(GSER_INTEGER)); 351 } 352 353 /** 354 * Returns the identifier of the next NamedValue element. 355 * 356 * @return the identifier of the NamedValue element 357 * 358 * @throws DecodeException If the input does not match the identifier 359 * pattern of a NamedValue 360 */ 361 public String nextNamedValueIdentifier() throws DecodeException { 362 if (!hasNext(GSER_IDENTIFIER)) { 363 final LocalizableMessage msg = 364 WARN_GSER_NO_VALID_IDENTIFIER.get(gserValue.substring(pos, length)); 365 throw DecodeException.error(msg); 366 } 367 String identifier = next(GSER_IDENTIFIER); 368 if (!hasNext(GSER_MSP)) { 369 final LocalizableMessage msg = 370 WARN_GSER_SPACE_CHAR_EXPECTED.get(gserValue.substring(pos, length)); 371 throw DecodeException.error(msg); 372 } 373 skipMSP(); 374 return identifier; 375 } 376 377 /** 378 * Return the identifier of the next IdentifiedChoiceValue element. 379 * 380 * @return the identifier of the IdentifiedChoiceValue element 381 * 382 * @throws DecodeException If the input does not match the identifier 383 * pattern of an IdentifiedChoiceValue 384 */ 385 public String nextChoiceValueIdentifier() throws DecodeException { 386 if (!hasNext(GSER_CHOICE_IDENTIFIER)) { 387 final LocalizableMessage msg = 388 WARN_GSER_NO_VALID_IDENTIFIEDCHOICE.get(gserValue.substring(pos, length)); 389 throw DecodeException.error(msg); 390 } 391 String identifier = next(GSER_CHOICE_IDENTIFIER); 392 393 // Remove the colon at the end of the identifier 394 return identifier.substring(0, identifier.length() - 1); 395 } 396 397 /** 398 * Returns the GSER encoded String value. 399 * 400 * @return The GSER encoded String value. 401 */ 402 @Override 403 public String toString() { 404 return gserValue; 405 } 406}