Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/miloyip/rapidjson.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMilo Yip <miloyip@gmail.com>2016-09-16 07:13:02 +0300
committerMilo Yip <miloyip@gmail.com>2016-09-16 07:13:02 +0300
commit769185d68b0ca3ab8a976b6e625f0c5edff6a3c6 (patch)
treef29fd80ae63a03662524b8dc870127c01d52668d /include/rapidjson/internal
parent328ead0e173a960e0fc164e102cf8e1734c16380 (diff)
Refactor regex
Remove mutable which causes reentrant issue
Diffstat (limited to 'include/rapidjson/internal')
-rw-r--r--include/rapidjson/internal/regex.h204
1 files changed, 117 insertions, 87 deletions
diff --git a/include/rapidjson/internal/regex.h b/include/rapidjson/internal/regex.h
index 422a5240..8530cd77 100644
--- a/include/rapidjson/internal/regex.h
+++ b/include/rapidjson/internal/regex.h
@@ -44,11 +44,39 @@ RAPIDJSON_NAMESPACE_BEGIN
namespace internal {
///////////////////////////////////////////////////////////////////////////////
+// DecodedStream
+
+template <typename SourceStream, typename Encoding>
+class DecodedStream {
+public:
+ DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); }
+ unsigned Peek() { return codepoint_; }
+ unsigned Take() {
+ unsigned c = codepoint_;
+ if (c) // No further decoding when '\0'
+ Decode();
+ return c;
+ }
+
+private:
+ void Decode() {
+ if (!Encoding::Decode(ss_, &codepoint_))
+ codepoint_ = 0;
+ }
+
+ SourceStream& ss_;
+ unsigned codepoint_;
+};
+
+///////////////////////////////////////////////////////////////////////////////
// GenericRegex
static const SizeType kRegexInvalidState = ~SizeType(0); //!< Represents an invalid index in GenericRegex::State::out, out1
static const SizeType kRegexInvalidRange = ~SizeType(0);
+template <typename Encoding, typename Allocator>
+class GenericRegexSearch;
+
//! Regular expression engine with subset of ECMAscript grammar.
/*!
Supported regular expression syntax:
@@ -84,45 +112,25 @@ static const SizeType kRegexInvalidRange = ~SizeType(0);
template <typename Encoding, typename Allocator = CrtAllocator>
class GenericRegex {
public:
+ typedef Encoding EncodingType;
typedef typename Encoding::Ch Ch;
+ template <typename, typename> friend class GenericRegexSearch;
GenericRegex(const Ch* source, Allocator* allocator = 0) :
states_(allocator, 256), ranges_(allocator, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(),
- stateSet_(), state0_(allocator, 0), state1_(allocator, 0), anchorBegin_(), anchorEnd_()
+ anchorBegin_(), anchorEnd_()
{
GenericStringStream<Encoding> ss(source);
- DecodedStream<GenericStringStream<Encoding> > ds(ss);
+ DecodedStream<GenericStringStream<Encoding>, Encoding> ds(ss);
Parse(ds);
}
- ~GenericRegex() {
- Allocator::Free(stateSet_);
- }
+ ~GenericRegex() {}
bool IsValid() const {
return root_ != kRegexInvalidState;
}
- template <typename InputStream>
- bool Match(InputStream& is) const {
- return SearchWithAnchoring(is, true, true);
- }
-
- bool Match(const Ch* s) const {
- GenericStringStream<Encoding> is(s);
- return Match(is);
- }
-
- template <typename InputStream>
- bool Search(InputStream& is) const {
- return SearchWithAnchoring(is, anchorBegin_, anchorEnd_);
- }
-
- bool Search(const Ch* s) const {
- GenericStringStream<Encoding> is(s);
- return Search(is);
- }
-
private:
enum Operator {
kZeroOrOne,
@@ -157,28 +165,6 @@ private:
SizeType minIndex;
};
- template <typename SourceStream>
- class DecodedStream {
- public:
- DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); }
- unsigned Peek() { return codepoint_; }
- unsigned Take() {
- unsigned c = codepoint_;
- if (c) // No further decoding when '\0'
- Decode();
- return c;
- }
-
- private:
- void Decode() {
- if (!Encoding::Decode(ss_, &codepoint_))
- codepoint_ = 0;
- }
-
- SourceStream& ss_;
- unsigned codepoint_;
- };
-
State& GetState(SizeType index) {
RAPIDJSON_ASSERT(index < stateCount_);
return states_.template Bottom<State>()[index];
@@ -200,7 +186,7 @@ private:
}
template <typename InputStream>
- void Parse(DecodedStream<InputStream>& ds) {
+ void Parse(DecodedStream<InputStream, Encoding>& ds) {
Allocator allocator;
Stack<Allocator> operandStack(&allocator, 256); // Frag
Stack<Allocator> operatorStack(&allocator, 256); // Operator
@@ -327,14 +313,6 @@ private:
printf("\n");
#endif
}
-
- // Preallocate buffer for SearchWithAnchoring()
- RAPIDJSON_ASSERT(stateSet_ == 0);
- if (stateCount_ > 0) {
- stateSet_ = static_cast<unsigned*>(states_.GetAllocator().Malloc(GetStateSetSize()));
- state0_.template Reserve<SizeType>(stateCount_);
- state1_.template Reserve<SizeType>(stateCount_);
- }
}
SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) {
@@ -483,7 +461,7 @@ private:
}
template <typename InputStream>
- bool ParseUnsigned(DecodedStream<InputStream>& ds, unsigned* u) {
+ bool ParseUnsigned(DecodedStream<InputStream, Encoding>& ds, unsigned* u) {
unsigned r = 0;
if (ds.Peek() < '0' || ds.Peek() > '9')
return false;
@@ -497,7 +475,7 @@ private:
}
template <typename InputStream>
- bool ParseRange(DecodedStream<InputStream>& ds, SizeType* range) {
+ bool ParseRange(DecodedStream<InputStream, Encoding>& ds, SizeType* range) {
bool isBegin = true;
bool negate = false;
int step = 0;
@@ -575,7 +553,7 @@ private:
}
template <typename InputStream>
- bool CharacterEscape(DecodedStream<InputStream>& ds, unsigned* escapedCodepoint) {
+ bool CharacterEscape(DecodedStream<InputStream, Encoding>& ds, unsigned* escapedCodepoint) {
unsigned codepoint;
switch (codepoint = ds.Take()) {
case '^':
@@ -603,34 +581,93 @@ private:
}
}
+ Stack<Allocator> states_;
+ Stack<Allocator> ranges_;
+ SizeType root_;
+ SizeType stateCount_;
+ SizeType rangeCount_;
+
+ static const unsigned kInfinityQuantifier = ~0u;
+
+ // For SearchWithAnchoring()
+ bool anchorBegin_;
+ bool anchorEnd_;
+};
+
+template <typename RegexType, typename Allocator = CrtAllocator>
+class GenericRegexSearch {
+public:
+ typedef typename RegexType::EncodingType Encoding;
+ typedef typename Encoding::Ch Ch;
+
+ GenericRegexSearch(const RegexType& regex, Allocator* allocator = 0) :
+ regex_(regex), allocator_(allocator), ownAllocator_(0),
+ state0_(allocator, 0), state1_(allocator, 0), stateSet_()
+ {
+ RAPIDJSON_ASSERT(regex_.IsValid());
+ if (!allocator_)
+ ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator());
+ stateSet_ = static_cast<unsigned*>(allocator_->Malloc(GetStateSetSize()));
+ state0_.template Reserve<SizeType>(regex_.stateCount_);
+ state1_.template Reserve<SizeType>(regex_.stateCount_);
+ }
+
+ ~GenericRegexSearch() {
+ Allocator::Free(stateSet_);
+ RAPIDJSON_DELETE(ownAllocator_);
+ }
+
template <typename InputStream>
- bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) const {
- RAPIDJSON_ASSERT(IsValid());
- DecodedStream<InputStream> ds(is);
+ bool Match(InputStream& is) {
+ return SearchWithAnchoring(is, true, true);
+ }
+
+ bool Match(const Ch* s) {
+ GenericStringStream<Encoding> is(s);
+ return Match(is);
+ }
+
+ template <typename InputStream>
+ bool Search(InputStream& is) {
+ return SearchWithAnchoring(is, regex_.anchorBegin_, regex_.anchorEnd_);
+ }
+
+ bool Search(const Ch* s) {
+ GenericStringStream<Encoding> is(s);
+ return Search(is);
+ }
+
+private:
+ typedef typename RegexType::State State;
+ typedef typename RegexType::Range Range;
+
+ template <typename InputStream>
+ bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) {
+ DecodedStream<InputStream, Encoding> ds(is);
state0_.Clear();
Stack<Allocator> *current = &state0_, *next = &state1_;
const size_t stateSetSize = GetStateSetSize();
std::memset(stateSet_, 0, stateSetSize);
- bool matched = AddState(*current, root_);
+ bool matched = AddState(*current, regex_.root_);
unsigned codepoint;
while (!current->Empty() && (codepoint = ds.Take()) != 0) {
std::memset(stateSet_, 0, stateSetSize);
next->Clear();
matched = false;
for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
- const State& sr = GetState(*s);
+ const State& sr = regex_.GetState(*s);
if (sr.codepoint == codepoint ||
- sr.codepoint == kAnyCharacterClass ||
- (sr.codepoint == kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint)))
+ sr.codepoint == RegexType::kAnyCharacterClass ||
+ (sr.codepoint == RegexType::kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint)))
{
matched = AddState(*next, sr.out) || matched;
if (!anchorEnd && matched)
return true;
}
if (!anchorBegin)
- AddState(*next, root_);
+ AddState(*next, regex_.root_);
}
internal::Swap(current, next);
}
@@ -639,14 +676,14 @@ private:
}
size_t GetStateSetSize() const {
- return (stateCount_ + 31) / 32 * 4;
+ return (regex_.stateCount_ + 31) / 32 * 4;
}
// Return whether the added states is a match state
- bool AddState(Stack<Allocator>& l, SizeType index) const {
+ bool AddState(Stack<Allocator>& l, SizeType index) {
RAPIDJSON_ASSERT(index != kRegexInvalidState);
- const State& s = GetState(index);
+ const State& s = regex_.GetState(index);
if (s.out1 != kRegexInvalidState) { // Split
bool matched = AddState(l, s.out);
return AddState(l, s.out1) || matched;
@@ -659,33 +696,26 @@ private:
}
bool MatchRange(SizeType rangeIndex, unsigned codepoint) const {
- bool yes = (GetRange(rangeIndex).start & kRangeNegationFlag) == 0;
+ bool yes = (regex_.GetRange(rangeIndex).start & RegexType::kRangeNegationFlag) == 0;
while (rangeIndex != kRegexInvalidRange) {
- const Range& r = GetRange(rangeIndex);
- if (codepoint >= (r.start & ~kRangeNegationFlag) && codepoint <= r.end)
+ const Range& r = regex_.GetRange(rangeIndex);
+ if (codepoint >= (r.start & ~RegexType::kRangeNegationFlag) && codepoint <= r.end)
return yes;
rangeIndex = r.next;
}
return !yes;
}
- Stack<Allocator> states_;
- Stack<Allocator> ranges_;
- SizeType root_;
- SizeType stateCount_;
- SizeType rangeCount_;
-
- static const unsigned kInfinityQuantifier = ~0u;
-
- // For SearchWithAnchoring()
- uint32_t* stateSet_; // allocated by states_.GetAllocator()
- mutable Stack<Allocator> state0_;
- mutable Stack<Allocator> state1_;
- bool anchorBegin_;
- bool anchorEnd_;
+ const RegexType& regex_;
+ Allocator* allocator_;
+ Allocator* ownAllocator_;
+ Stack<Allocator> state0_;
+ Stack<Allocator> state1_;
+ uint32_t* stateSet_;
};
typedef GenericRegex<UTF8<> > Regex;
+typedef GenericRegexSearch<Regex> RegexSearch;
} // namespace internal
RAPIDJSON_NAMESPACE_END