diff --git a/sandbox/Makefile b/sandbox/Makefile new file mode 100644 index 0000000..f72d56e --- /dev/null +++ b/sandbox/Makefile @@ -0,0 +1,2 @@ +all: + g++ -O2 -g -pthread -std=c++11 optimized-parse.cc diff --git a/sandbox/optimized-parse.cc b/sandbox/optimized-parse.cc new file mode 100644 index 0000000..686ff11 --- /dev/null +++ b/sandbox/optimized-parse.cc @@ -0,0 +1,870 @@ +#ifdef _WIN64 +#define atoll(S) _atoi64(S) +#include +#else +#include +#include +#include +#include +#include +#include +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include // C++11 +#include // C++11 +#include // C++11 + +// ---------------------------------------------------------------------------- +// Small vector class useful for multi-threaded environment. +// +// stack_container.h +// +// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// This allocator can be used with STL containers to provide a stack buffer +// from which to allocate memory and overflows onto the heap. This stack buffer +// would be allocated on the stack and allows us to avoid heap operations in +// some situations. +// +// STL likes to make copies of allocators, so the allocator itself can't hold +// the data. Instead, we make the creator responsible for creating a +// StackAllocator::Source which contains the data. Copying the allocator +// merely copies the pointer to this shared source, so all allocators created +// based on our allocator will share the same stack buffer. +// +// This stack buffer implementation is very simple. The first allocation that +// fits in the stack buffer will use the stack buffer. Any subsequent +// allocations will not use the stack buffer, even if there is unused room. +// This makes it appropriate for array-like containers, but the caller should +// be sure to reserve() in the container up to the stack buffer size. Otherwise +// the container will allocate a small array which will "use up" the stack +// buffer. +template +class StackAllocator : public std::allocator { + public: + typedef typename std::allocator::pointer pointer; + typedef typename std::allocator::size_type size_type; + + // Backing store for the allocator. The container owner is responsible for + // maintaining this for as long as any containers using this allocator are + // live. + struct Source { + Source() : used_stack_buffer_(false) {} + + // Casts the buffer in its right type. + T *stack_buffer() { return reinterpret_cast(stack_buffer_); } + const T *stack_buffer() const { + return reinterpret_cast(stack_buffer_); + } + + // + // IMPORTANT: Take care to ensure that stack_buffer_ is aligned + // since it is used to mimic an array of T. + // Be careful while declaring any unaligned types (like bool) + // before stack_buffer_. + // + + // The buffer itself. It is not of type T because we don't want the + // constructors and destructors to be automatically called. Define a POD + // buffer of the right size instead. + char stack_buffer_[sizeof(T[stack_capacity])]; + + // Set when the stack buffer is used for an allocation. We do not track + // how much of the buffer is used, only that somebody is using it. + bool used_stack_buffer_; + }; + + // Used by containers when they want to refer to an allocator of type U. + template + struct rebind { + typedef StackAllocator other; + }; + + // For the straight up copy c-tor, we can share storage. + StackAllocator(const StackAllocator &rhs) + : source_(rhs.source_) {} + + // ISO C++ requires the following constructor to be defined, + // and std::vector in VC++2008SP1 Release fails with an error + // in the class _Container_base_aux_alloc_real (from ) + // if the constructor does not exist. + // For this constructor, we cannot share storage; there's + // no guarantee that the Source buffer of Ts is large enough + // for Us. + // TODO(Google): If we were fancy pants, perhaps we could share storage + // iff sizeof(T) == sizeof(U). + template + StackAllocator(const StackAllocator &other) + : source_(NULL) { + (void)other; + } + + explicit StackAllocator(Source *source) : source_(source) {} + + // Actually do the allocation. Use the stack buffer if nobody has used it yet + // and the size requested fits. Otherwise, fall through to the standard + // allocator. + pointer allocate(size_type n, void *hint = 0) { + if (source_ != NULL && !source_->used_stack_buffer_ && + n <= stack_capacity) { + source_->used_stack_buffer_ = true; + return source_->stack_buffer(); + } else { + return std::allocator::allocate(n, hint); + } + } + + // Free: when trying to free the stack buffer, just mark it as free. For + // non-stack-buffer pointers, just fall though to the standard allocator. + void deallocate(pointer p, size_type n) { + if (source_ != NULL && p == source_->stack_buffer()) + source_->used_stack_buffer_ = false; + else + std::allocator::deallocate(p, n); + } + + private: + Source *source_; +}; + +// A wrapper around STL containers that maintains a stack-sized buffer that the +// initial capacity of the vector is based on. Growing the container beyond the +// stack capacity will transparently overflow onto the heap. The container must +// support reserve(). +// +// WATCH OUT: the ContainerType MUST use the proper StackAllocator for this +// type. This object is really intended to be used only internally. You'll want +// to use the wrappers below for different types. +template +class StackContainer { + public: + typedef TContainerType ContainerType; + typedef typename ContainerType::value_type ContainedType; + typedef StackAllocator Allocator; + + // Allocator must be constructed before the container! + StackContainer() : allocator_(&stack_data_), container_(allocator_) { + // Make the container use the stack allocation by reserving our buffer size + // before doing anything else. + container_.reserve(stack_capacity); + } + + // Getters for the actual container. + // + // Danger: any copies of this made using the copy constructor must have + // shorter lifetimes than the source. The copy will share the same allocator + // and therefore the same stack buffer as the original. Use std::copy to + // copy into a "real" container for longer-lived objects. + ContainerType &container() { return container_; } + const ContainerType &container() const { return container_; } + + // Support operator-> to get to the container. This allows nicer syntax like: + // StackContainer<...> foo; + // std::sort(foo->begin(), foo->end()); + ContainerType *operator->() { return &container_; } + const ContainerType *operator->() const { return &container_; } + +#ifdef UNIT_TEST + // Retrieves the stack source so that that unit tests can verify that the + // buffer is being used properly. + const typename Allocator::Source &stack_data() const { return stack_data_; } +#endif + + protected: + typename Allocator::Source stack_data_; + unsigned char pad_[7]; + Allocator allocator_; + ContainerType container_; + + // DISALLOW_EVIL_CONSTRUCTORS(StackContainer); + StackContainer(const StackContainer &); + void operator=(const StackContainer &); +}; + +// StackVector +// +// Example: +// StackVector foo; +// foo->push_back(22); // we have overloaded operator-> +// foo[0] = 10; // as well as operator[] +template +class StackVector + : public StackContainer >, + stack_capacity> { + public: + StackVector() + : StackContainer >, + stack_capacity>() {} + + // We need to put this in STL containers sometimes, which requires a copy + // constructor. We can't call the regular copy constructor because that will + // take the stack buffer from the original. Here, we create an empty object + // and make a stack buffer of its own. + StackVector(const StackVector &other) + : StackContainer >, + stack_capacity>() { + this->container().assign(other->begin(), other->end()); + } + + StackVector &operator=( + const StackVector &other) { + this->container().assign(other->begin(), other->end()); + return *this; + } + + // Vectors are commonly indexed, which isn't very convenient even with + // operator-> (using "->at()" does exception stuff we don't want). + T &operator[](size_t i) { return this->container().operator[](i); } + const T &operator[](size_t i) const { + return this->container().operator[](i); + } +}; + +// ---------------------------------------------------------------------------- + +typedef StackVector ShortString; + +#define IS_SPACE(x) (((x) == ' ') || ((x) == '\t')) +#define IS_DIGIT(x) \ + (static_cast((x) - '0') < static_cast(10)) +#define IS_NEW_LINE(x) (((x) == '\r') || ((x) == '\n') || ((x) == '\0')) + +struct vertex_index { + int v_idx, vt_idx, vn_idx; + vertex_index() : v_idx(-1), vt_idx(-1), vn_idx(-1) {} + explicit vertex_index(int idx) : v_idx(idx), vt_idx(idx), vn_idx(idx) {} + vertex_index(int vidx, int vtidx, int vnidx) + : v_idx(vidx), vt_idx(vtidx), vn_idx(vnidx) {} +}; + +// Make index zero-base, and also support relative index. +static inline int fixIndex(int idx, int n) { + if (idx > 0) return idx - 1; + if (idx == 0) return 0; + return n + idx; // negative value = relative +} + +// Parse triples with index offsets: i, i/j/k, i//k, i/j +static vertex_index parseTriple(const char **token, int vsize, int vnsize, + int vtsize) { + vertex_index vi(-1); + + vi.v_idx = fixIndex(atoi((*token)), vsize); + (*token) += strcspn((*token), "/ \t\r"); + if ((*token)[0] != '/') { + return vi; + } + (*token)++; + + // i//k + if ((*token)[0] == '/') { + (*token)++; + vi.vn_idx = fixIndex(atoi((*token)), vnsize); + (*token) += strcspn((*token), "/ \t\r"); + return vi; + } + + // i/j/k or i/j + vi.vt_idx = fixIndex(atoi((*token)), vtsize); + (*token) += strcspn((*token), "/ \t\r"); + if ((*token)[0] != '/') { + return vi; + } + + // i/j/k + (*token)++; // skip '/' + vi.vn_idx = fixIndex(atoi((*token)), vnsize); + (*token) += strcspn((*token), "/ \t\r"); + return vi; +} + +// Parse raw triples: i, i/j/k, i//k, i/j +static vertex_index parseRawTriple(const char **token) { + vertex_index vi( + static_cast(0x80000000)); // 0x80000000 = -2147483648 = invalid + + vi.v_idx = atoi((*token)); + (*token) += strcspn((*token), "/ \t\r"); + if ((*token)[0] != '/') { + return vi; + } + (*token)++; + + // i//k + if ((*token)[0] == '/') { + (*token)++; + vi.vn_idx = atoi((*token)); + (*token) += strcspn((*token), "/ \t\r"); + return vi; + } + + // i/j/k or i/j + vi.vt_idx = atoi((*token)); + (*token) += strcspn((*token), "/ \t\r"); + if ((*token)[0] != '/') { + return vi; + } + + // i/j/k + (*token)++; // skip '/' + vi.vn_idx = atoi((*token)); + (*token) += strcspn((*token), "/ \t\r"); + return vi; +} + +static inline bool parseString(ShortString *s, const char **token) { + (*token) += strspn((*token), " \t"); + size_t e = strcspn((*token), " \t\r"); + (*s)->insert((*s)->end(), (*token), (*token) + e); + (*token) += e; + return true; +} + +static inline int parseInt(const char **token) { + (*token) += strspn((*token), " \t"); + int i = atoi((*token)); + (*token) += strcspn((*token), " \t\r"); + return i; +} + +// Tries to parse a floating point number located at s. +// +// s_end should be a location in the string where reading should absolutely +// stop. For example at the end of the string, to prevent buffer overflows. +// +// Parses the following EBNF grammar: +// sign = "+" | "-" ; +// END = ? anything not in digit ? +// digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; +// integer = [sign] , digit , {digit} ; +// decimal = integer , ["." , integer] ; +// float = ( decimal , END ) | ( decimal , ("E" | "e") , integer , END ) ; +// +// Valid strings are for example: +// -0 +3.1417e+2 -0.0E-3 1.0324 -1.41 11e2 +// +// If the parsing is a success, result is set to the parsed value and true +// is returned. +// +// The function is greedy and will parse until any of the following happens: +// - a non-conforming character is encountered. +// - s_end is reached. +// +// The following situations triggers a failure: +// - s >= s_end. +// - parse failure. +// +static bool tryParseDouble(const char *s, const char *s_end, double *result) { + if (s >= s_end) { + return false; + } + + double mantissa = 0.0; + // This exponent is base 2 rather than 10. + // However the exponent we parse is supposed to be one of ten, + // thus we must take care to convert the exponent/and or the + // mantissa to a * 2^E, where a is the mantissa and E is the + // exponent. + // To get the final double we will use ldexp, it requires the + // exponent to be in base 2. + int exponent = 0; + + // NOTE: THESE MUST BE DECLARED HERE SINCE WE ARE NOT ALLOWED + // TO JUMP OVER DEFINITIONS. + char sign = '+'; + char exp_sign = '+'; + char const *curr = s; + + // How many characters were read in a loop. + int read = 0; + // Tells whether a loop terminated due to reaching s_end. + bool end_not_reached = false; + + /* + BEGIN PARSING. + */ + + // Find out what sign we've got. + if (*curr == '+' || *curr == '-') { + sign = *curr; + curr++; + } else if (IS_DIGIT(*curr)) { /* Pass through. */ + } else { + goto fail; + } + + // Read the integer part. + end_not_reached = (curr != s_end); + while (end_not_reached && IS_DIGIT(*curr)) { + mantissa *= 10; + mantissa += static_cast(*curr - 0x30); + curr++; + read++; + end_not_reached = (curr != s_end); + } + + // We must make sure we actually got something. + if (read == 0) goto fail; + // We allow numbers of form "#", "###" etc. + if (!end_not_reached) goto assemble; + + // Read the decimal part. + if (*curr == '.') { + curr++; + read = 1; + end_not_reached = (curr != s_end); + while (end_not_reached && IS_DIGIT(*curr)) { + // NOTE: Don't use powf here, it will absolutely murder precision. + mantissa += static_cast(*curr - 0x30) * pow(10.0, -read); + read++; + curr++; + end_not_reached = (curr != s_end); + } + } else if (*curr == 'e' || *curr == 'E') { + } else { + goto assemble; + } + + if (!end_not_reached) goto assemble; + + // Read the exponent part. + if (*curr == 'e' || *curr == 'E') { + curr++; + // Figure out if a sign is present and if it is. + end_not_reached = (curr != s_end); + if (end_not_reached && (*curr == '+' || *curr == '-')) { + exp_sign = *curr; + curr++; + } else if (IS_DIGIT(*curr)) { /* Pass through. */ + } else { + // Empty E is not allowed. + goto fail; + } + + read = 0; + end_not_reached = (curr != s_end); + while (end_not_reached && IS_DIGIT(*curr)) { + exponent *= 10; + exponent += static_cast(*curr - 0x30); + curr++; + read++; + end_not_reached = (curr != s_end); + } + exponent *= (exp_sign == '+' ? 1 : -1); + if (read == 0) goto fail; + } + +assemble: + *result = + (sign == '+' ? 1 : -1) * ldexp(mantissa * pow(5.0, exponent), exponent); + return true; +fail: + return false; +} + +static inline float parseFloat(const char **token) { + (*token) += strspn((*token), " \t"); +#ifdef TINY_OBJ_LOADER_OLD_FLOAT_PARSER + float f = static_cast(atof(*token)); + (*token) += strcspn((*token), " \t\r"); +#else + const char *end = (*token) + strcspn((*token), " \t\r"); + double val = 0.0; + tryParseDouble((*token), end, &val); + float f = static_cast(val); + (*token) = end; +#endif + return f; +} + +static inline void parseFloat2(float *x, float *y, const char **token) { + (*x) = parseFloat(token); + (*y) = parseFloat(token); +} + +static inline void parseFloat3(float *x, float *y, float *z, + const char **token) { + (*x) = parseFloat(token); + (*y) = parseFloat(token); + (*z) = parseFloat(token); +} + +typedef enum +{ + COMMAND_EMPTY, + COMMAND_V, + COMMAND_VN, + COMMAND_VT, + COMMAND_F, + COMMAND_G, + COMMAND_O, + COMMAND_USEMTL, + +} CommandType; + +typedef struct +{ + float vx, vy, vz; + float nx, ny, nz; + float tx, ty; + + StackVector f; + ShortString group_name; + ShortString object_name; + ShortString material_name; + + CommandType type; +} Command; + +bool test(Command *command, const char *p, size_t p_len) +{ + std::vector linebuf(p_len + 1, '\0'); + memcpy(&linebuf.at(0), p, p_len); + + const char *token = &linebuf.at(0); + + // Skip leading space. + token += strspn(token, " \t"); + + assert(token); + if (token[0] == '\0') { // empty line + return false; + } + + if (token[0] == '#') { // comment line + return false; + } + + // vertex + if (token[0] == 'v' && IS_SPACE((token[1]))) { + token += 2; + float x, y, z; + parseFloat3(&x, &y, &z, &token); + command->vx = x; + command->vy = y; + command->vz = z; + command->type = COMMAND_V; + return true; + } + + // normal + if (token[0] == 'v' && token[1] == 'n' && IS_SPACE((token[2]))) { + token += 3; + float x, y, z; + parseFloat3(&x, &y, &z, &token); + command->nx = x; + command->ny = y; + command->nz = z; + command->type = COMMAND_VN; + return true; + } + + // texcoord + if (token[0] == 'v' && token[1] == 't' && IS_SPACE((token[2]))) { + token += 3; + float x, y; + parseFloat2(&x, &y, &token); + command->tx = x; + command->ty = y; + command->type = COMMAND_VT; + return true; + } + + // face + if (token[0] == 'f' && IS_SPACE((token[1]))) { + token += 2; + token += strspn(token, " \t"); + + while (!IS_NEW_LINE(token[0])) { + vertex_index vi = parseRawTriple(&token); + //if (callback.index_cb) { + // callback.index_cb(user_data, vi.v_idx, vi.vn_idx, vi.vt_idx); + //} + size_t n = strspn(token, " \t\r"); + token += n; + + command->f->push_back(vi); + } + + command->type = COMMAND_F; + + return true; + } + + // use mtl + if ((0 == strncmp(token, "usemtl", 6)) && IS_SPACE((token[6]))) { + char namebuf[8192]; + token += 7; +#ifdef _MSC_VER + sscanf_s(token, "%s", namebuf, (unsigned)_countof(namebuf)); +#else + sscanf(token, "%s", namebuf); +#endif + + //int newMaterialId = -1; + //if (material_map.find(namebuf) != material_map.end()) { + // newMaterialId = material_map[namebuf]; + //} else { + // // { error!! material not found } + //} + + //if (newMaterialId != materialId) { + // materialId = newMaterialId; + //} + + command->material_name->insert(command->material_name->end(), namebuf, namebuf + strlen(namebuf)); + command->material_name->push_back('\0'); + command->type = COMMAND_USEMTL; + + return true; + } + +#if 0 + // load mtl + if ((0 == strncmp(token, "mtllib", 6)) && IS_SPACE((token[6]))) { + char namebuf[TINYOBJ_SSCANF_BUFFER_SIZE]; + token += 7; +#ifdef _MSC_VER + sscanf_s(token, "%s", namebuf, (unsigned)_countof(namebuf)); +#else + sscanf(token, "%s", namebuf); +#endif + + std::string err_mtl; + std::vector materials; + bool ok = (*readMatFn)(namebuf, &materials, &material_map, &err_mtl); + if (err) { + (*err) += err_mtl; + } + + if (!ok) { + return false; + } + + if (callback.mtllib_cb) { + callback.mtllib_cb(user_data, &materials.at(0), + static_cast(materials.size())); + } + + continue; + } +#endif + + // group name + if (token[0] == 'g' && IS_SPACE((token[1]))) { + ShortString names[16]; + int num_names = 0; + + while (!IS_NEW_LINE(token[0])) { + bool ret = parseString(&(names[num_names]), &token); + assert(ret); + token += strspn(token, " \t\r"); // skip tag + num_names++; + } + + assert(num_names > 0); + + int name_idx = 0; + // names[0] must be 'g', so skip the 0th element. + if (num_names > 1) { + name_idx = 1; + } + + //command->group_name->assign(names[name_idx]); + command->type = COMMAND_G; + + //if (callback.group_cb) { + // if (names.size() > 1) { + // // create const char* array. + // std::vector tmp(names.size() - 1); + // for (size_t j = 0; j < tmp.size(); j++) { + // tmp[j] = names[j + 1].c_str(); + // } + // callback.group_cb(user_data, &tmp.at(0), + // static_cast(tmp.size())); + + // } else { + // callback.group_cb(user_data, NULL, 0); + // } + + //} + return true; + } + + // object name + if (token[0] == 'o' && IS_SPACE((token[1]))) { + // @todo { multiple object name? } + char namebuf[8192]; + token += 2; +#ifdef _MSC_VER + sscanf_s(token, "%s", namebuf, (unsigned)_countof(namebuf)); +#else + sscanf(token, "%s", namebuf); +#endif + + //if (callback.object_cb) { + // callback.object_cb(user_data, name.c_str()); + //} + + command->object_name->insert(command->object_name->end(), namebuf, namebuf + strlen(namebuf)); + command->object_name->push_back('\0'); + command->type = COMMAND_O; + + return true; + } + + + return false; +} + +typedef struct +{ + size_t pos; + size_t len; +} LineInfo; + +// Idea come from https://github.com/antonmks/nvParse +// 1. mmap file +// 2. find newline(\n) and list of line data. +// 3. Do parallel parsing for each line. +// 4. Reconstruct final mesh data structure. + +void parse(const char* buf, size_t len) +{ + std::vector newline_marker(len, 0); + + auto num_threads = std::thread::hardware_concurrency(); + std::cout << "# of threads = " << num_threads << std::endl; + + auto t1 = std::chrono::high_resolution_clock::now(); + + std::atomic newline_counter(0); + std::vector workers; + + std::vector line_infos; + line_infos.reserve(len / 1024); // len / 1024 = heuristics + + // @todo { parallelize lineinfo construction ? } + size_t prev_pos = 0; + for (size_t i = 0; i < len - 1; i++) { + if (buf[i] == '\n') { + LineInfo info; + info.pos = prev_pos; + info.len = i - prev_pos; + + //printf("p = %d, len = %d\n", prev_pos, info.len); + + if (info.len > 0) { + line_infos.push_back(info); + } + + prev_pos = i+1; + } + } + + auto chunk = line_infos.size() / num_threads; + + for (auto t = 0; t < num_threads; t++) { + workers.push_back(std::thread([&, t]() { + auto start_idx = (t + 0) * chunk; + auto end_idx = std::min((t + 1) * chunk, line_infos.size()); + + for (auto i = start_idx; i < end_idx; i++) { + Command cmd; + bool ret = test(&cmd, &buf[line_infos[i].pos], line_infos[i].len); + if (ret) { + } + } + + })); + } + + for (auto &t : workers) { + t.join(); + } + + auto t2 = std::chrono::high_resolution_clock::now(); + + std::chrono::duration ms = t2 - t1; + + std::cout << ms.count() << " ms\n"; + + std::cout << "# of newlines = " << newline_counter << std::endl; + std::cout << "# of lines = " << line_infos.size() << std::endl; +} + +int +main(int argc, char **argv) +{ + + if (argc < 2) { + printf("Needs .obj\n"); + return EXIT_FAILURE; + } + +#ifdef _WIN64 + HANDLE file = CreateFileA("lineitem.tbl", GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN, NULL); + assert(file != INVALID_HANDLE_VALUE); + + HANDLE fileMapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL); + assert(fileMapping != INVALID_HANDLE_VALUE); + + LPVOID fileMapView = MapViewOfFile(fileMapping, FILE_MAP_READ, 0, 0, 0); + auto fileMapViewChar = (const char*)fileMapView; + assert(fileMapView != NULL); +#else + + FILE* f = fopen(argv[1], "r" ); + fseek(f, 0, SEEK_END); + long fileSize = ftell(f); + fclose(f); + + struct stat sb; + char *p; + int fd; + + fd = open (argv[1], O_RDONLY); + if (fd == -1) { + perror ("open"); + return 1; + } + + if (fstat (fd, &sb) == -1) { + perror ("fstat"); + return 1; + } + + if (!S_ISREG (sb.st_mode)) { + fprintf (stderr, "%s is not a file\n", "lineitem.tbl"); + return 1; + } + + p = (char*)mmap (0, fileSize, PROT_READ, MAP_SHARED, fd, 0); + + if (p == MAP_FAILED) { + perror ("mmap"); + return 1; + } + + if (close (fd) == -1) { + perror ("close"); + return 1; + } + + printf("fsize: %lu\n", fileSize); +#endif + + parse(p, fileSize); + + return EXIT_SUCCESS; +}