Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/nodejs/node.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'deps/v8/src/regexp/regexp.cc')
-rw-r--r--deps/v8/src/regexp/regexp.cc160
1 files changed, 45 insertions, 115 deletions
diff --git a/deps/v8/src/regexp/regexp.cc b/deps/v8/src/regexp/regexp.cc
index 4319990a398..7b8da4d8eae 100644
--- a/deps/v8/src/regexp/regexp.cc
+++ b/deps/v8/src/regexp/regexp.cc
@@ -34,8 +34,7 @@ class RegExpImpl final : public AllStatic {
// Prepares a JSRegExp object with Irregexp-specific data.
static void IrregexpInitialize(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> pattern, JSRegExp::Flags flags,
- int capture_register_count,
- uint32_t backtrack_limit);
+ int capture_count, uint32_t backtrack_limit);
static void AtomCompile(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> pattern, JSRegExp::Flags flags,
@@ -86,7 +85,6 @@ class RegExpImpl final : public AllStatic {
static void SetIrregexpCaptureNameMap(FixedArray re,
Handle<FixedArray> value);
static int IrregexpNumberOfCaptures(FixedArray re);
- static int IrregexpNumberOfRegisters(FixedArray re);
static ByteArray IrregexpByteCode(FixedArray re, bool is_one_byte);
static Code IrregexpNativeCode(FixedArray re, bool is_one_byte);
};
@@ -422,7 +420,7 @@ bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
Handle<FixedArray> data =
Handle<FixedArray>(FixedArray::cast(re->data()), isolate);
if (compile_data.compilation_target == RegExpCompilationTarget::kNative) {
- data->set(JSRegExp::code_index(is_one_byte), compile_data.code);
+ data->set(JSRegExp::code_index(is_one_byte), *compile_data.code);
// Reset bytecode to uninitialized. In case we use tier-up we know that
// tier-up has happened this way.
data->set(JSRegExp::bytecode_index(is_one_byte),
@@ -432,7 +430,7 @@ bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
RegExpCompilationTarget::kBytecode);
// Store code generated by compiler in bytecode and trampoline to
// interpreter in code.
- data->set(JSRegExp::bytecode_index(is_one_byte), compile_data.code);
+ data->set(JSRegExp::bytecode_index(is_one_byte), *compile_data.code);
Handle<Code> trampoline =
BUILTIN_CODE(isolate, RegExpInterpreterTrampoline);
data->set(JSRegExp::code_index(is_one_byte), *trampoline);
@@ -456,7 +454,7 @@ bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
}
int RegExpImpl::IrregexpMaxRegisterCount(FixedArray re) {
- return Smi::cast(re.get(JSRegExp::kIrregexpMaxRegisterCountIndex)).value();
+ return Smi::ToInt(re.get(JSRegExp::kIrregexpMaxRegisterCountIndex));
}
void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray re, int value) {
@@ -476,10 +474,6 @@ int RegExpImpl::IrregexpNumberOfCaptures(FixedArray re) {
return Smi::ToInt(re.get(JSRegExp::kIrregexpCaptureCountIndex));
}
-int RegExpImpl::IrregexpNumberOfRegisters(FixedArray re) {
- return Smi::ToInt(re.get(JSRegExp::kIrregexpMaxRegisterCountIndex));
-}
-
ByteArray RegExpImpl::IrregexpByteCode(FixedArray re, bool is_one_byte) {
return ByteArray::cast(re.get(JSRegExp::bytecode_index(is_one_byte)));
}
@@ -509,35 +503,23 @@ int RegExp::IrregexpPrepare(Isolate* isolate, Handle<JSRegExp> regexp,
return -1;
}
- DisallowHeapAllocation no_gc;
- FixedArray data = FixedArray::cast(regexp->data());
- if (regexp->ShouldProduceBytecode()) {
- // Byte-code regexp needs space allocated for all its registers.
- // The result captures are copied to the start of the registers array
- // if the match succeeds. This way those registers are not clobbered
- // when we set the last match info from last successful match.
- return RegExpImpl::IrregexpNumberOfRegisters(data) +
- (RegExpImpl::IrregexpNumberOfCaptures(data) + 1) * 2;
- } else {
- // Native regexp only needs room to output captures. Registers are handled
- // internally.
- return (RegExpImpl::IrregexpNumberOfCaptures(data) + 1) * 2;
- }
+ // Only reserve room for output captures. Internal registers are allocated by
+ // the engine.
+ return JSRegExp::RegistersForCaptureCount(regexp->CaptureCount());
}
int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject, int index,
int32_t* output, int output_size) {
- Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
-
DCHECK_LE(0, index);
DCHECK_LE(index, subject->length());
DCHECK(subject->IsFlat());
+ DCHECK_GE(output_size,
+ JSRegExp::RegistersForCaptureCount(regexp->CaptureCount()));
bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
if (!regexp->ShouldProduceBytecode()) {
- DCHECK(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
do {
EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte);
// The stack is used to allocate registers for the compiled regexp code.
@@ -568,27 +550,16 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
UNREACHABLE();
} else {
DCHECK(regexp->ShouldProduceBytecode());
- DCHECK(output_size >= IrregexpNumberOfRegisters(*irregexp));
- // We must have done EnsureCompiledIrregexp, so we can get the number of
- // registers.
- int number_of_capture_registers =
- (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
- int32_t* raw_output = &output[number_of_capture_registers];
do {
IrregexpInterpreter::Result result =
IrregexpInterpreter::MatchForCallFromRuntime(
- isolate, regexp, subject, raw_output, number_of_capture_registers,
- index);
+ isolate, regexp, subject, output, output_size, index);
DCHECK_IMPLIES(result == IrregexpInterpreter::EXCEPTION,
isolate->has_pending_exception());
switch (result) {
case IrregexpInterpreter::SUCCESS:
- // Copy capture results to the start of the registers array.
- MemCopy(output, raw_output,
- number_of_capture_registers * sizeof(int32_t));
- return result;
case IrregexpInterpreter::EXCEPTION:
case IrregexpInterpreter::FAILURE:
return result;
@@ -596,9 +567,7 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
// The string has changed representation, and we must restart the
// match.
// We need to reset the tier up to start over with compilation.
- if (FLAG_regexp_tier_up) {
- regexp->ResetLastTierUpTick();
- }
+ if (FLAG_regexp_tier_up) regexp->ResetLastTierUpTick();
is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte);
break;
@@ -659,8 +628,7 @@ MaybeHandle<Object> RegExpImpl::IrregexpExec(
output_registers, required_registers);
if (res == RegExp::RE_SUCCESS) {
- int capture_count =
- IrregexpNumberOfCaptures(FixedArray::cast(regexp->data()));
+ int capture_count = regexp->CaptureCount();
return RegExp::SetLastMatchInfo(isolate, last_match_info, subject,
capture_count, output_registers);
}
@@ -692,7 +660,8 @@ Handle<RegExpMatchInfo> RegExp::SetLastMatchInfo(
}
}
- int capture_register_count = (capture_count + 1) * 2;
+ int capture_register_count =
+ JSRegExp::RegistersForCaptureCount(capture_count);
DisallowHeapAllocation no_allocation;
if (match != nullptr) {
for (int i = 0; i < capture_register_count; i += 2) {
@@ -746,14 +715,12 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
JSRegExp::Flags flags, Handle<String> pattern,
Handle<String> sample_subject, bool is_one_byte,
uint32_t backtrack_limit) {
- if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
+ if (JSRegExp::RegistersForCaptureCount(data->capture_count) >
+ RegExpMacroAssembler::kMaxRegisterCount) {
data->error = RegExpError::kTooLarge;
return false;
}
- bool is_sticky = IsSticky(flags);
- bool is_global = IsGlobal(flags);
- bool is_unicode = IsUnicode(flags);
RegExpCompiler compiler(isolate, zone, data->capture_count, is_one_byte);
if (compiler.optimize()) {
@@ -772,50 +739,8 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
compiler.frequency_collator()->CountCharacter(sample_subject->Get(i));
}
- // Wrap the body of the regexp in capture #0.
- RegExpNode* captured_body =
- RegExpCapture::ToNode(data->tree, 0, &compiler, compiler.accept());
- RegExpNode* node = captured_body;
- bool is_end_anchored = data->tree->IsAnchoredAtEnd();
- bool is_start_anchored = data->tree->IsAnchoredAtStart();
- int max_length = data->tree->max_match();
- if (!is_start_anchored && !is_sticky) {
- // Add a .*? at the beginning, outside the body capture, unless
- // this expression is anchored at the beginning or sticky.
- JSRegExp::Flags default_flags = JSRegExp::Flags();
- RegExpNode* loop_node = RegExpQuantifier::ToNode(
- 0, RegExpTree::kInfinity, false,
- new (zone) RegExpCharacterClass('*', default_flags), &compiler,
- captured_body, data->contains_anchor);
-
- if (data->contains_anchor) {
- // Unroll loop once, to take care of the case that might start
- // at the start of input.
- ChoiceNode* first_step_node = new (zone) ChoiceNode(2, zone);
- first_step_node->AddAlternative(GuardedAlternative(captured_body));
- first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode(
- new (zone) RegExpCharacterClass('*', default_flags), false,
- loop_node)));
- node = first_step_node;
- } else {
- node = loop_node;
- }
- }
- if (is_one_byte) {
- node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
- // Do it again to propagate the new nodes to places where they were not
- // put because they had not been calculated yet.
- if (node != nullptr) {
- node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
- }
- } else if (is_unicode && (is_global || is_sticky)) {
- node = RegExpCompiler::OptionallyStepBackToLeadSurrogate(&compiler, node,
- flags);
- }
-
- if (node == nullptr) node = new (zone) EndNode(EndNode::BACKTRACK, zone);
- data->node = node;
- data->error = AnalyzeRegExp(isolate, is_one_byte, node);
+ data->node = compiler.PreprocessRegExp(data, flags, is_one_byte);
+ data->error = AnalyzeRegExp(isolate, is_one_byte, data->node);
if (data->error != RegExpError::kNone) {
return false;
}
@@ -830,30 +755,32 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
is_one_byte ? NativeRegExpMacroAssembler::LATIN1
: NativeRegExpMacroAssembler::UC16;
+ const int output_register_count =
+ JSRegExp::RegistersForCaptureCount(data->capture_count);
#if V8_TARGET_ARCH_IA32
- macro_assembler.reset(new RegExpMacroAssemblerIA32(
- isolate, zone, mode, (data->capture_count + 1) * 2));
+ macro_assembler.reset(new RegExpMacroAssemblerIA32(isolate, zone, mode,
+ output_register_count));
#elif V8_TARGET_ARCH_X64
- macro_assembler.reset(new RegExpMacroAssemblerX64(
- isolate, zone, mode, (data->capture_count + 1) * 2));
+ macro_assembler.reset(new RegExpMacroAssemblerX64(isolate, zone, mode,
+ output_register_count));
#elif V8_TARGET_ARCH_ARM
- macro_assembler.reset(new RegExpMacroAssemblerARM(
- isolate, zone, mode, (data->capture_count + 1) * 2));
+ macro_assembler.reset(new RegExpMacroAssemblerARM(isolate, zone, mode,
+ output_register_count));
#elif V8_TARGET_ARCH_ARM64
- macro_assembler.reset(new RegExpMacroAssemblerARM64(
- isolate, zone, mode, (data->capture_count + 1) * 2));
+ macro_assembler.reset(new RegExpMacroAssemblerARM64(isolate, zone, mode,
+ output_register_count));
#elif V8_TARGET_ARCH_S390
- macro_assembler.reset(new RegExpMacroAssemblerS390(
- isolate, zone, mode, (data->capture_count + 1) * 2));
+ macro_assembler.reset(new RegExpMacroAssemblerS390(isolate, zone, mode,
+ output_register_count));
#elif V8_TARGET_ARCH_PPC || V8_TARGET_ARCH_PPC64
- macro_assembler.reset(new RegExpMacroAssemblerPPC(
- isolate, zone, mode, (data->capture_count + 1) * 2));
+ macro_assembler.reset(new RegExpMacroAssemblerPPC(isolate, zone, mode,
+ output_register_count));
#elif V8_TARGET_ARCH_MIPS
- macro_assembler.reset(new RegExpMacroAssemblerMIPS(
- isolate, zone, mode, (data->capture_count + 1) * 2));
+ macro_assembler.reset(new RegExpMacroAssemblerMIPS(isolate, zone, mode,
+ output_register_count));
#elif V8_TARGET_ARCH_MIPS64
- macro_assembler.reset(new RegExpMacroAssemblerMIPS(
- isolate, zone, mode, (data->capture_count + 1) * 2));
+ macro_assembler.reset(new RegExpMacroAssemblerMIPS(isolate, zone, mode,
+ output_register_count));
#else
#error "Unsupported architecture"
#endif
@@ -868,17 +795,20 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
// Inserted here, instead of in Assembler, because it depends on information
// in the AST that isn't replicated in the Node structure.
+ bool is_end_anchored = data->tree->IsAnchoredAtEnd();
+ bool is_start_anchored = data->tree->IsAnchoredAtStart();
+ int max_length = data->tree->max_match();
static const int kMaxBacksearchLimit = 1024;
- if (is_end_anchored && !is_start_anchored && !is_sticky &&
+ if (is_end_anchored && !is_start_anchored && !IsSticky(flags) &&
max_length < kMaxBacksearchLimit) {
macro_assembler->SetCurrentPositionFromEnd(max_length);
}
- if (is_global) {
+ if (IsGlobal(flags)) {
RegExpMacroAssembler::GlobalMode mode = RegExpMacroAssembler::GLOBAL;
if (data->tree->min_match() > 0) {
mode = RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK;
- } else if (is_unicode) {
+ } else if (IsUnicode(flags)) {
mode = RegExpMacroAssembler::GLOBAL_UNICODE;
}
macro_assembler->set_global_mode(mode);
@@ -895,7 +825,7 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
#endif
RegExpCompiler::CompilationResult result = compiler.Assemble(
- isolate, macro_assembler_ptr, node, data->capture_count, pattern);
+ isolate, macro_assembler_ptr, data->node, data->capture_count, pattern);
// Code / bytecode printing.
{
@@ -904,14 +834,14 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
data->compilation_target == RegExpCompilationTarget::kNative) {
CodeTracer::Scope trace_scope(isolate->GetCodeTracer());
OFStream os(trace_scope.file());
- Handle<Code> c(Code::cast(result.code), isolate);
+ Handle<Code> c = Handle<Code>::cast(result.code);
auto pattern_cstring = pattern->ToCString();
c->Disassemble(pattern_cstring.get(), os, isolate);
}
#endif
if (FLAG_print_regexp_bytecode &&
data->compilation_target == RegExpCompilationTarget::kBytecode) {
- Handle<ByteArray> bytecode(ByteArray::cast(result.code), isolate);
+ Handle<ByteArray> bytecode = Handle<ByteArray>::cast(result.code);
auto pattern_cstring = pattern->ToCString();
RegExpBytecodeDisassemble(bytecode->GetDataStartAddress(),
bytecode->length(), pattern_cstring.get());