Home | History | Annotate | Download | only in native
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #define LOG_TAG "Pattern"
     18 
     19 #include <stdlib.h>
     20 
     21 #include "JNIHelp.h"
     22 #include "JniConstants.h"
     23 #include "ScopedJavaUnicodeString.h"
     24 #include "jni.h"
     25 #include "unicode/parseerr.h"
     26 #include "unicode/regex.h"
     27 
     28 // ICU documentation: http://icu-project.org/apiref/icu4c/classRegexPattern.html
     29 
     30 static icu::RegexPattern* toRegexPattern(jlong addr) {
     31     return reinterpret_cast<icu::RegexPattern*>(static_cast<uintptr_t>(addr));
     32 }
     33 
     34 static const char* regexDetailMessage(UErrorCode status) {
     35     // These human-readable error messages were culled from "utypes.h", and then slightly tuned
     36     // to make more sense in context.
     37     // If we don't have a special-case, we'll just return the textual name of
     38     // the enum value (such as U_REGEX_RULE_SYNTAX), which is better than nothing.
     39     switch (status) {
     40     case U_REGEX_INTERNAL_ERROR: return "An internal error was detected";
     41     case U_REGEX_RULE_SYNTAX: return "Syntax error in regexp pattern";
     42     case U_REGEX_INVALID_STATE: return "Matcher in invalid state for requested operation";
     43     case U_REGEX_BAD_ESCAPE_SEQUENCE: return "Unrecognized backslash escape sequence in pattern";
     44     case U_REGEX_PROPERTY_SYNTAX: return "Incorrect Unicode property";
     45     case U_REGEX_UNIMPLEMENTED: return "Use of unimplemented feature";
     46     case U_REGEX_MISMATCHED_PAREN: return "Incorrectly nested parentheses in regexp pattern";
     47     case U_REGEX_NUMBER_TOO_BIG: return "Decimal number is too large";
     48     case U_REGEX_BAD_INTERVAL: return "Error in {min,max} interval";
     49     case U_REGEX_MAX_LT_MIN: return "In {min,max}, max is less than min";
     50     case U_REGEX_INVALID_BACK_REF: return "Back-reference to a non-existent capture group";
     51     case U_REGEX_INVALID_FLAG: return "Invalid value for match mode flags";
     52     case U_REGEX_LOOK_BEHIND_LIMIT: return "Look-behind pattern matches must have a bounded maximum length";
     53     case U_REGEX_SET_CONTAINS_STRING: return "Regular expressions cannot have UnicodeSets containing strings";
     54     case U_REGEX_OCTAL_TOO_BIG: return "Octal character constants must be <= 0377.";
     55     case U_REGEX_MISSING_CLOSE_BRACKET: return "Missing closing bracket in character class";
     56     case U_REGEX_INVALID_RANGE: return "In a character range [x-y], x is greater than y";
     57     case U_REGEX_STACK_OVERFLOW: return "Regular expression backtrack stack overflow";
     58     case U_REGEX_TIME_OUT: return "Maximum allowed match time exceeded";
     59     case U_REGEX_STOPPED_BY_CALLER: return "Matching operation aborted by user callback function";
     60     default:
     61         return u_errorName(status);
     62     }
     63 }
     64 
     65 static void throwPatternSyntaxException(JNIEnv* env, UErrorCode status, jstring pattern, UParseError error) {
     66     static jmethodID method = env->GetMethodID(JniConstants::patternSyntaxExceptionClass,
     67             "<init>", "(Ljava/lang/String;Ljava/lang/String;I)V");
     68     jstring message = env->NewStringUTF(regexDetailMessage(status));
     69     jclass exceptionClass = JniConstants::patternSyntaxExceptionClass;
     70     jobject exception = env->NewObject(exceptionClass, method, message, pattern, error.offset);
     71     env->Throw(reinterpret_cast<jthrowable>(exception));
     72 }
     73 
     74 static void Pattern_closeImpl(JNIEnv*, jclass, jlong addr) {
     75     delete toRegexPattern(addr);
     76 }
     77 
     78 static jlong Pattern_compileImpl(JNIEnv* env, jclass, jstring javaRegex, jint flags) {
     79     flags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES;
     80 
     81     UErrorCode status = U_ZERO_ERROR;
     82     UParseError error;
     83     error.offset = -1;
     84 
     85     ScopedJavaUnicodeString regex(env, javaRegex);
     86     if (!regex.valid()) {
     87         return 0;
     88     }
     89     icu::UnicodeString& regexString(regex.unicodeString());
     90     icu::RegexPattern* result = icu::RegexPattern::compile(regexString, flags, error, status);
     91     if (!U_SUCCESS(status)) {
     92         throwPatternSyntaxException(env, status, javaRegex, error);
     93     }
     94     return static_cast<jlong>(reinterpret_cast<uintptr_t>(result));
     95 }
     96 
     97 static JNINativeMethod gMethods[] = {
     98     NATIVE_METHOD(Pattern, closeImpl, "(J)V"),
     99     NATIVE_METHOD(Pattern, compileImpl, "(Ljava/lang/String;I)J"),
    100 };
    101 void register_java_util_regex_Pattern(JNIEnv* env) {
    102     jniRegisterNativeMethods(env, "java/util/regex/Pattern", gMethods, NELEM(gMethods));
    103 }
    104