Dre4m Shell
Server IP : 127.0.0.2  /  Your IP : 3.16.50.164
Web Server : Apache/2.4.18 (Ubuntu)
System :
User : www-data ( )
PHP Version : 7.0.33-0ubuntu0.16.04.16
Disable Function : disk_free_space,disk_total_space,diskfreespace,dl,exec,fpaththru,getmyuid,getmypid,highlight_file,ignore_user_abord,leak,listen,link,opcache_get_configuration,opcache_get_status,passthru,pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,php_uname,phpinfo,posix_ctermid,posix_getcwd,posix_getegid,posix_geteuid,posix_getgid,posix_getgrgid,posix_getgrnam,posix_getgroups,posix_getlogin,posix_getpgid,posix_getpgrp,posix_getpid,posix,_getppid,posix_getpwnam,posix_getpwuid,posix_getrlimit,posix_getsid,posix_getuid,posix_isatty,posix_kill,posix_mkfifo,posix_setegid,posix_seteuid,posix_setgid,posix_setpgid,posix_setsid,posix_setuid,posix_times,posix_ttyname,posix_uname,pclose,popen,proc_open,proc_close,proc_get_status,proc_nice,proc_terminate,shell_exec,source,show_source,system,virtual
MySQL : OFF  |  cURL : ON  |  WGET : ON  |  Perl : ON  |  Python : ON  |  Sudo : ON  |  Pkexec : ON
Directory :  /usr/include/nodejs/src/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Command :


[ HOME SHELL ]     

Current File : /usr/include/nodejs/src/string_search.h
// Copyright 2011 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef SRC_STRING_SEARCH_H_
#define SRC_STRING_SEARCH_H_

#include "node.h"
#include <string.h>

namespace node {
namespace stringsearch {


// Returns the maximum of the two parameters.
template <typename T>
T Max(T a, T b) {
  return a < b ? b : a;
}


static const uint32_t kMaxOneByteCharCodeU = 0xff;


static inline size_t NonOneByteStart(const uint16_t* chars, size_t length) {
  const uint16_t* limit = chars + length;
  const uint16_t* start = chars;
  while (chars < limit) {
    if (*chars > kMaxOneByteCharCodeU)
      return static_cast<size_t>(chars - start);
    ++chars;
  }
  return static_cast<size_t>(chars - start);
}


static inline bool IsOneByte(const uint16_t* chars, size_t length) {
  return NonOneByteStart(chars, length) >= length;
}


template <typename T>
class Vector {
 public:
  Vector(T* data, size_t length) : start_(data), length_(length) {
    ASSERT(length > 0 && data != nullptr);
  }

  // Returns the length of the vector.
  size_t length() const { return length_; }

  T* start() const { return start_; }

  // Access individual vector elements - checks bounds in debug mode.
  T& operator[](size_t index) const {
    ASSERT(0 <= index && index < length_);
    return start_[index];
  }

  const T& at(size_t index) const { return operator[](index); }

  bool operator==(const Vector<T>& other) const {
    if (length_ != other.length_)
      return false;
    if (start_ == other.start_)
      return true;
    for (size_t i = 0; i < length_; ++i) {
      if (start_[i] != other.start_[i]) {
        return false;
      }
    }
    return true;
  }

 private:
  T* start_;
  size_t length_;
};


//---------------------------------------------------------------------
// String Search object.
//---------------------------------------------------------------------

// Class holding constants and methods that apply to all string search variants,
// independently of subject and pattern char size.
class StringSearchBase {
 protected:
  // Cap on the maximal shift in the Boyer-Moore implementation. By setting a
  // limit, we can fix the size of tables. For a needle longer than this limit,
  // search will not be optimal, since we only build tables for a suffix
  // of the string, but it is a safe approximation.
  static const int kBMMaxShift = 250;

  // Reduce alphabet to this size.
  // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size
  // proportional to the input alphabet. We reduce the alphabet size by
  // equating input characters modulo a smaller alphabet size. This gives
  // a potentially less efficient searching, but is a safe approximation.
  // For needles using only characters in the same Unicode 256-code point page,
  // there is no search speed degradation.
  static const int kLatin1AlphabetSize = 256;
  static const int kUC16AlphabetSize = 256;

  // Bad-char shift table stored in the state. It's length is the alphabet size.
  // For patterns below this length, the skip length of Boyer-Moore is too short
  // to compensate for the algorithmic overhead compared to simple brute force.
  static const int kBMMinPatternLength = 8;

  // Store for the BoyerMoore(Horspool) bad char shift table.
  static int kBadCharShiftTable[kUC16AlphabetSize];
  // Store for the BoyerMoore good suffix shift table.
  static int kGoodSuffixShiftTable[kBMMaxShift + 1];
  // Table used temporarily while building the BoyerMoore good suffix
  // shift table.
  static int kSuffixTable[kBMMaxShift + 1];

  static inline bool IsOneByteString(Vector<const uint8_t> string) {
    return true;
  }

  static inline bool IsOneByteString(Vector<const uint16_t> string) {
    return IsOneByte(string.start(), string.length());
  }
};

template <typename PatternChar, typename SubjectChar>
class StringSearch : private StringSearchBase {
 public:
  explicit StringSearch(Vector<const PatternChar> pattern)
      : pattern_(pattern), start_(0) {
    if (pattern.length() >= kBMMaxShift) {
      start_ = pattern.length() - kBMMaxShift;
    }

    if (sizeof(PatternChar) > sizeof(SubjectChar)) {
      if (!IsOneByteString(pattern_)) {
        strategy_ = &FailSearch;
        return;
      }
    }
    size_t pattern_length = pattern_.length();
    CHECK_GT(pattern_length, 0);
    if (pattern_length < kBMMinPatternLength) {
      if (pattern_length == 1) {
        strategy_ = &SingleCharSearch;
        return;
      }
      strategy_ = &LinearSearch;
      return;
    }
    strategy_ = &InitialSearch;
  }

  size_t Search(Vector<const SubjectChar> subject, size_t index) {
    return strategy_(this, subject, index);
  }

  static inline int AlphabetSize() {
    if (sizeof(PatternChar) == 1) {
      // Latin1 needle.
      return kLatin1AlphabetSize;
    } else {
      // UC16 needle.
      return kUC16AlphabetSize;
    }

    static_assert(sizeof(PatternChar) == sizeof(uint8_t) ||
                      sizeof(PatternChar) == sizeof(uint16_t),
                  "sizeof(PatternChar) == sizeof(uint16_t) || sizeof(uint8_t)");
  }

 private:
  typedef size_t (*SearchFunction)(  // NOLINT - it's not a cast!
      StringSearch<PatternChar, SubjectChar>*,
      Vector<const SubjectChar>,
      size_t);

  static size_t FailSearch(StringSearch<PatternChar, SubjectChar>*,
                           Vector<const SubjectChar> subject,
                           size_t) {
    return subject.length();
  }

  static size_t SingleCharSearch(StringSearch<PatternChar, SubjectChar>* search,
                                 Vector<const SubjectChar> subject,
                                 size_t start_index);

  static size_t LinearSearch(StringSearch<PatternChar, SubjectChar>* search,
                             Vector<const SubjectChar> subject,
                             size_t start_index);

  static size_t InitialSearch(StringSearch<PatternChar, SubjectChar>* search,
                              Vector<const SubjectChar> subject,
                              size_t start_index);

  static size_t BoyerMooreHorspoolSearch(
      StringSearch<PatternChar, SubjectChar>* search,
      Vector<const SubjectChar> subject,
      size_t start_index);

  static size_t BoyerMooreSearch(StringSearch<PatternChar, SubjectChar>* search,
                                 Vector<const SubjectChar> subject,
                                 size_t start_index);

  void PopulateBoyerMooreHorspoolTable();

  void PopulateBoyerMooreTable();

  static inline bool exceedsOneByte(uint8_t c) { return false; }

  static inline bool exceedsOneByte(uint16_t c) {
    return c > kMaxOneByteCharCodeU;
  }

  static inline int CharOccurrence(int* bad_char_occurrence,
                                   SubjectChar char_code) {
    if (sizeof(SubjectChar) == 1) {
      return bad_char_occurrence[static_cast<int>(char_code)];
    }
    if (sizeof(PatternChar) == 1) {
      if (exceedsOneByte(char_code)) {
        return -1;
      }
      return bad_char_occurrence[static_cast<unsigned int>(char_code)];
    }
    // Both pattern and subject are UC16. Reduce character to equivalence class.
    int equiv_class = char_code % kUC16AlphabetSize;
    return bad_char_occurrence[equiv_class];
  }

  // Store for the BoyerMoore(Horspool) bad char shift table.
  // Return a table covering the last kBMMaxShift+1 positions of
  // pattern.
  int* bad_char_table() { return kBadCharShiftTable; }

  // Store for the BoyerMoore good suffix shift table.
  int* good_suffix_shift_table() {
    // Return biased pointer that maps the range  [start_..pattern_.length()
    // to the kGoodSuffixShiftTable array.
    return kGoodSuffixShiftTable - start_;
  }

  // Table used temporarily while building the BoyerMoore good suffix
  // shift table.
  int* suffix_table() {
    // Return biased pointer that maps the range  [start_..pattern_.length()
    // to the kSuffixTable array.
    return kSuffixTable - start_;
  }

  // The pattern to search for.
  Vector<const PatternChar> pattern_;
  // Pointer to implementation of the search.
  SearchFunction strategy_;
  // Cache value of Max(0, pattern_length() - kBMMaxShift)
  size_t start_;
};


template <typename T, typename U>
inline T AlignDown(T value, U alignment) {
  return reinterpret_cast<T>(
      (reinterpret_cast<uintptr_t>(value) & ~(alignment - 1)));
}


inline uint8_t GetHighestValueByte(uint16_t character) {
  return Max(static_cast<uint8_t>(character & 0xFF),
             static_cast<uint8_t>(character >> 8));
}


inline uint8_t GetHighestValueByte(uint8_t character) { return character; }


template <typename PatternChar, typename SubjectChar>
inline size_t FindFirstCharacter(Vector<const PatternChar> pattern,
                              Vector<const SubjectChar> subject, size_t index) {
  const PatternChar pattern_first_char = pattern[0];
  const size_t max_n = (subject.length() - pattern.length() + 1);

  const uint8_t search_byte = GetHighestValueByte(pattern_first_char);
  const SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char);
  size_t pos = index;
  do {
    const SubjectChar* char_pos = reinterpret_cast<const SubjectChar*>(
        memchr(subject.start() + pos, search_byte,
               (max_n - pos) * sizeof(SubjectChar)));
    if (char_pos == nullptr)
      return subject.length();
    char_pos = AlignDown(char_pos, sizeof(SubjectChar));
    pos = static_cast<size_t>(char_pos - subject.start());
    if (subject[pos] == search_char)
      return pos;
  } while (++pos < max_n);

  return subject.length();
}


template <>
inline size_t FindFirstCharacter(Vector<const uint8_t> pattern,
                                 Vector<const uint8_t> subject,
                                 size_t index) {
  const uint8_t pattern_first_char = pattern[0];
  const size_t max_n = (subject.length() - pattern.length() + 1);

  const uint8_t* char_pos = reinterpret_cast<const uint8_t*>(
      memchr(subject.start() + index, pattern_first_char, max_n - index));
  if (char_pos == nullptr)
    return subject.length();
  return static_cast<size_t>(char_pos - subject.start());
}

//---------------------------------------------------------------------
// Single Character Pattern Search Strategy
//---------------------------------------------------------------------

template <typename PatternChar, typename SubjectChar>
size_t StringSearch<PatternChar, SubjectChar>::SingleCharSearch(
    StringSearch<PatternChar, SubjectChar>* search,
    Vector<const SubjectChar> subject,
    size_t index) {
  CHECK_EQ(1, search->pattern_.length());
  PatternChar pattern_first_char = search->pattern_[0];

  if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
    return FindFirstCharacter(search->pattern_, subject, index);
  } else {
    if (sizeof(PatternChar) > sizeof(SubjectChar)) {
      if (exceedsOneByte(pattern_first_char)) {
        return -1;
      }
    }
    return FindFirstCharacter(search->pattern_, subject, index);
  }
}

//---------------------------------------------------------------------
// Linear Search Strategy
//---------------------------------------------------------------------

template <typename PatternChar, typename SubjectChar>
inline bool CharCompare(const PatternChar* pattern,
                        const SubjectChar* subject,
                        size_t length) {
  ASSERT_GT(length, 0);
  size_t pos = 0;
  do {
    if (pattern[pos] != subject[pos]) {
      return false;
    }
    pos++;
  } while (pos < length);
  return true;
}

// Simple linear search for short patterns. Never bails out.
template <typename PatternChar, typename SubjectChar>
size_t StringSearch<PatternChar, SubjectChar>::LinearSearch(
    StringSearch<PatternChar, SubjectChar>* search,
    Vector<const SubjectChar> subject,
    size_t index) {
  Vector<const PatternChar> pattern = search->pattern_;
  CHECK_GT(pattern.length(), 1);
  const size_t pattern_length = pattern.length();
  size_t i = index;
  const size_t n = subject.length() - pattern_length;
  while (i <= n) {
    i = FindFirstCharacter(pattern, subject, i);
    if (i == subject.length())
      return subject.length();
    ASSERT_LE(i, n);
    i++;

    // Loop extracted to separate function to allow using return to do
    // a deeper break.
    if (CharCompare(pattern.start() + 1, subject.start() + i,
                    pattern_length - 1)) {
      return i - 1;
    }
  }
  return subject.length();
}

//---------------------------------------------------------------------
// Boyer-Moore string search
//---------------------------------------------------------------------

template <typename PatternChar, typename SubjectChar>
size_t StringSearch<PatternChar, SubjectChar>::BoyerMooreSearch(
    StringSearch<PatternChar, SubjectChar>* search,
    Vector<const SubjectChar> subject,
    size_t start_index) {
  Vector<const PatternChar> pattern = search->pattern_;
  const size_t subject_length = subject.length();
  const size_t pattern_length = pattern.length();
  // Only preprocess at most kBMMaxShift last characters of pattern.
  size_t start = search->start_;

  int* bad_char_occurence = search->bad_char_table();
  int* good_suffix_shift = search->good_suffix_shift_table();

  PatternChar last_char = pattern[pattern_length - 1];
  size_t index = start_index;
  // Continue search from i.
  while (index <= subject_length - pattern_length) {
    size_t j = pattern_length - 1;
    int c;
    while (last_char != (c = subject[index + j])) {
      int shift = j - CharOccurrence(bad_char_occurence, c);
      index += shift;
      if (index > subject_length - pattern_length) {
        return subject.length();
      }
    }
    while (j >= 0 && pattern[j] == (c = subject[index + j])) {
      if (j == 0) {
        return index;
      }
      j--;
    }
    if (j < start) {
      // we have matched more than our tables allow us to be smart about.
      // Fall back on BMH shift.
      index += pattern_length - 1 -
               CharOccurrence(bad_char_occurence,
                              static_cast<SubjectChar>(last_char));
    } else {
      int gs_shift = good_suffix_shift[j + 1];
      int bc_occ = CharOccurrence(bad_char_occurence, c);
      int shift = j - bc_occ;
      if (gs_shift > shift) {
        shift = gs_shift;
      }
      index += shift;
    }
  }

  return subject.length();
}

template <typename PatternChar, typename SubjectChar>
void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreTable() {
  const size_t pattern_length = pattern_.length();
  const PatternChar* pattern = pattern_.start();
  // Only look at the last kBMMaxShift characters of pattern (from start_
  // to pattern_length).
  const size_t start = start_;
  const size_t length = pattern_length - start;

  // Biased tables so that we can use pattern indices as table indices,
  // even if we only cover the part of the pattern from offset start.
  int* shift_table = good_suffix_shift_table();
  int* suffix_table = this->suffix_table();

  // Initialize table.
  for (size_t i = start; i < pattern_length; i++) {
    shift_table[i] = length;
  }
  shift_table[pattern_length] = 1;
  suffix_table[pattern_length] = pattern_length + 1;

  if (pattern_length <= start) {
    return;
  }

  // Find suffixes.
  PatternChar last_char = pattern[pattern_length - 1];
  size_t suffix = pattern_length + 1;
  {
    size_t i = pattern_length;
    while (i > start) {
      PatternChar c = pattern[i - 1];
      while (suffix <= pattern_length && c != pattern[suffix - 1]) {
        if (static_cast<size_t>(shift_table[suffix]) == length) {
          shift_table[suffix] = suffix - i;
        }
        suffix = suffix_table[suffix];
      }
      suffix_table[--i] = --suffix;
      if (suffix == pattern_length) {
        // No suffix to extend, so we check against last_char only.
        while ((i > start) && (pattern[i - 1] != last_char)) {
          if (static_cast<size_t>(shift_table[pattern_length]) == length) {
            shift_table[pattern_length] = pattern_length - i;
          }
          suffix_table[--i] = pattern_length;
        }
        if (i > start) {
          suffix_table[--i] = --suffix;
        }
      }
    }
  }
  // Build shift table using suffixes.
  if (suffix < pattern_length) {
    for (size_t i = start; i <= pattern_length; i++) {
      if (static_cast<size_t>(shift_table[i]) == length) {
        shift_table[i] = suffix - start;
      }
      if (i == suffix) {
        suffix = suffix_table[suffix];
      }
    }
  }
}

//---------------------------------------------------------------------
// Boyer-Moore-Horspool string search.
//---------------------------------------------------------------------

template <typename PatternChar, typename SubjectChar>
size_t StringSearch<PatternChar, SubjectChar>::BoyerMooreHorspoolSearch(
    StringSearch<PatternChar, SubjectChar>* search,
    Vector<const SubjectChar> subject,
    size_t start_index) {
  Vector<const PatternChar> pattern = search->pattern_;
  const size_t subject_length = subject.length();
  const size_t pattern_length = pattern.length();
  int* char_occurrences = search->bad_char_table();
  int64_t badness = -pattern_length;

  // How bad we are doing without a good-suffix table.
  PatternChar last_char = pattern[pattern_length - 1];
  int last_char_shift =
      pattern_length - 1 -
      CharOccurrence(char_occurrences, static_cast<SubjectChar>(last_char));

  // Perform search
  size_t index = start_index;  // No matches found prior to this index.
  while (index <= subject_length - pattern_length) {
    size_t j = pattern_length - 1;
    int subject_char;
    while (last_char != (subject_char = subject[index + j])) {
      int bc_occ = CharOccurrence(char_occurrences, subject_char);
      int shift = j - bc_occ;
      index += shift;
      badness += 1 - shift;  // at most zero, so badness cannot increase.
      if (index > subject_length - pattern_length) {
        return subject_length;
      }
    }
    j--;
    while (j >= 0 && pattern[j] == (subject[index + j])) {
      if (j == 0) {
        return index;
      }
      j--;
    }
    index += last_char_shift;
    // Badness increases by the number of characters we have
    // checked, and decreases by the number of characters we
    // can skip by shifting. It's a measure of how we are doing
    // compared to reading each character exactly once.
    badness += (pattern_length - j) - last_char_shift;
    if (badness > 0) {
      search->PopulateBoyerMooreTable();
      search->strategy_ = &BoyerMooreSearch;
      return BoyerMooreSearch(search, subject, index);
    }
  }
  return subject.length();
}

template <typename PatternChar, typename SubjectChar>
void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreHorspoolTable() {
  const size_t pattern_length = pattern_.length();

  int* bad_char_occurrence = bad_char_table();

  // Only preprocess at most kBMMaxShift last characters of pattern.
  const size_t start = start_;
  // Run forwards to populate bad_char_table, so that *last* instance
  // of character equivalence class is the one registered.
  // Notice: Doesn't include the last character.
  const size_t table_size = AlphabetSize();
  if (start == 0) {
    // All patterns less than kBMMaxShift in length.
    memset(bad_char_occurrence, -1, table_size * sizeof(*bad_char_occurrence));
  } else {
    for (size_t i = 0; i < table_size; i++) {
      bad_char_occurrence[i] = start - 1;
    }
  }
  for (size_t i = start; i < pattern_length - 1; i++) {
    PatternChar c = pattern_[i];
    int bucket = (sizeof(PatternChar) == 1) ? c : c % AlphabetSize();
    bad_char_occurrence[bucket] = i;
  }
}

//---------------------------------------------------------------------
// Linear string search with bailout to BMH.
//---------------------------------------------------------------------

// Simple linear search for short patterns, which bails out if the string
// isn't found very early in the subject. Upgrades to BoyerMooreHorspool.
template <typename PatternChar, typename SubjectChar>
size_t StringSearch<PatternChar, SubjectChar>::InitialSearch(
    StringSearch<PatternChar, SubjectChar>* search,
    Vector<const SubjectChar> subject,
    size_t index) {
  Vector<const PatternChar> pattern = search->pattern_;
  const size_t pattern_length = pattern.length();
  // Badness is a count of how much work we have done.  When we have
  // done enough work we decide it's probably worth switching to a better
  // algorithm.
  int64_t badness = -10 - (pattern_length << 2);

  // We know our pattern is at least 2 characters, we cache the first so
  // the common case of the first character not matching is faster.
  for (size_t i = index, n = subject.length() - pattern_length; i <= n; i++) {
    badness++;
    if (badness <= 0) {
      i = FindFirstCharacter(pattern, subject, i);
      if (i == subject.length())
        return subject.length();
      ASSERT_LE(i, n);
      size_t j = 1;
      do {
        if (pattern[j] != subject[i + j]) {
          break;
        }
        j++;
      } while (j < pattern_length);
      if (j == pattern_length) {
        return i;
      }
      badness += j;
    } else {
      search->PopulateBoyerMooreHorspoolTable();
      search->strategy_ = &BoyerMooreHorspoolSearch;
      return BoyerMooreHorspoolSearch(search, subject, i);
    }
  }
  return subject.length();
}

// Perform a a single stand-alone search.
// If searching multiple times for the same pattern, a search
// object should be constructed once and the Search function then called
// for each search.
template <typename SubjectChar, typename PatternChar>
size_t SearchString(Vector<const SubjectChar> subject,
                    Vector<const PatternChar> pattern,
                    size_t start_index) {
  StringSearch<PatternChar, SubjectChar> search(pattern);
  return search.Search(subject, start_index);
}
}
}  // namespace node::stringsearch

namespace node {
using node::stringsearch::Vector;

template <typename SubjectChar, typename PatternChar>
size_t SearchString(const SubjectChar* haystack,
                    size_t haystack_length,
                    const PatternChar* needle,
                    size_t needle_length,
                    size_t start_index) {
  return node::stringsearch::SearchString(
      Vector<const SubjectChar>(haystack, haystack_length),
      Vector<const PatternChar>(needle, needle_length),
      start_index);
}
}  // namespace node

#endif  // SRC_STRING_SEARCH_H_

Anon7 - 2022
AnonSec Team