Init

slaviczavik · Jul 21, 2018 · e7cc895 · e7cc895
commit e7cc895
Show file tree

Hide file tree

Showing 15 changed files with 1,037 additions and 0 deletions.
diff --git a/.eslintrc b/.eslintrc
@@ -0,0 +1,10 @@
+extends:
+- standard
+parser: babel-eslint
+rules:
+  brace-style:
+  - error
+  - stroustrup
+env:
+  node: true
+  mocha: true
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,75 @@
+.DS_Store
+
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+
+# nyc test coverage
+.nyc_output
+
+# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+
+# Bower dependency directory (https://bower.io/)
+bower_components
+
+# node-waf configuration
+.lock-wscript
+
+# Compiled binary addons (https://nodejs.org/api/addons.html)
+build/Release
+
+# Dependency directories
+node_modules/
+jspm_packages/
+
+# TypeScript v1 declaration files
+typings/
+
+# Optional npm cache directory
+.npm
+
+# Optional eslint cache
+.eslintcache
+
+# Optional REPL history
+.node_repl_history
+
+# Output of 'npm pack'
+*.tgz
+
+# Yarn Integrity file
+.yarn-integrity
+
+# dotenv environment variables file
+.env
+
+# parcel-bundler cache (https://parceljs.org/)
+.cache
+
+# next.js build output
+.next
+
+# nuxt.js build output
+.nuxt
+
+# vuepress build output
+.vuepress/dist
+
+# Serverless directories
+.serverless
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,6 @@
+language: node_js
+
+node_js:
+  - "6.14.3"
+  - "7"
+  - "8"
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2018 Jaroslav Závodný
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,50 @@
+[![Build Status](https://travis-ci.com/slaviczavik/http-data-parser.svg?branch=master)](https://travis-ci.com/slaviczavik/http-data-parser)
+
+# Description
+An extremely fast Node.js module for parsing form data and primarily file uploads.
+
+# Requirements
+Node.js 6.14.3 or higher is required.
+
+# Installation
+```
+npm i @slaviczavik/http-data-parser
+```
+
+# Example
+
+## Code
+
+```JS
+const HttpDataParser = require('@slaviczavik/http-data-parser')
+
+// A boundary must be extracted from 'content-type' request header.
+const boundary '--------------------------034172598905589540726558'
+const parser = new HttpDataParser(boundary)
+
+parser.on('header', function (buffer) {
+  // Save headers somewhere...
+})
+
+parser.on('data', function (buffer) {
+  // Save body content somewhere...
+})
+
+parser.on('part', function () {
+  // We reached the end of one body part.
+  // Here we can concate headers and/or body content together.
+})
+
+parser.on('end', function () {
+  // We reached the end of whole body.
+  // Or here we can concate headers and/or body content together.
+})
+
+req.on('data', function (data) {
+  parser.add(data)
+})
+
+req.on('end', function () {
+  parser.end()
+})
+```
diff --git a/benchmark/index.js b/benchmark/index.js
@@ -0,0 +1,28 @@
+const Parser = require('../lib/')
+
+const boundary = '--------------------------034172598905589540726558'
+const parser = new Parser(boundary)
+
+const MB = 100
+const buffer = createMultipartBuffer(boundary, MB * 1024 * 1024)
+
+let start = +new Date()
+
+parser.on('end', function () {
+  let duration = +new Date() - start
+  let ratio = (MB / (duration / 1000)).toFixed(2)
+
+  console.log(`${ratio} MB/s`)
+})
+
+parser.add(buffer)
+parser.end()
+
+function createMultipartBuffer (boundary, size) {
+  let head = Buffer.from('--' + boundary + '\r\n')
+  let body = Buffer.from('Content-Disposition: form-data; name="field"\r\n\r\n')
+  let tail = Buffer.from('\r\n--' + boundary + '--\r\n')
+  let buff = Buffer.allocUnsafe(size)
+
+  return Buffer.concat([ head, body, buff, tail ])
+}
diff --git a/lib/index.js b/lib/index.js
@@ -0,0 +1,108 @@
+const EventEmitter = require('events')
+const StreamSearch = require('@slaviczavik/stream-search')
+
+const STATE = {
+  UNINITIALIZED: 0,
+  AFTER_STARTING_BOUNDARY: 1,
+  AFTER_HEADERS: 2,
+  AFTER_ENCLOSING_BOUNDARY: 3,
+  REQUEST_END: 4
+}
+
+const DASH = Buffer.from('-')
+const TAIL = Buffer.from('--')
+
+class Parser extends EventEmitter {
+  constructor (boundary) {
+    super()
+
+    this.boundary = boundary
+    this.bodyState = STATE.UNINITIALIZED
+    this.headState = STATE.UNINITIALIZED
+
+    this.bodySearch = new StreamSearch('\r\n--' + this.boundary)
+    this.headSearch = new StreamSearch('\r\n\r\n', 1)
+
+    this._initParser()
+  }
+
+  add (buffer) {
+    this.bodySearch.add(buffer)
+  }
+
+  end () {
+    this.bodySearch.end()
+  }
+
+  _initParser () {
+    const self = this
+
+    this.bodySearch.on('part', function (result) {
+      const { isMatch, data, start } = result
+      self._bodyParser(isMatch, data, start)
+    })
+
+    this.headSearch.on('part', function (result) {
+      const { isMatch, data, start } = result
+      self._headParser(isMatch, data, start)
+    })
+
+    // We have to insert EOF before the starting boundary,
+    // because only so we can easily search for any boundary.
+    this.bodySearch.add('\r\n')
+  }
+
+  _bodyParser (isMatch, data, start) {
+    if (this.bodyState === STATE.UNINITIALIZED) {
+      if (isMatch) {
+        // The starting boundary.
+        this.bodyState = STATE.AFTER_STARTING_BOUNDARY
+      }
+    }
+    else if (this.bodyState === STATE.AFTER_STARTING_BOUNDARY) {
+      if (isMatch) {
+        // An enclosing boundary starts at position `start`,
+        // before that are content data and/or headers.
+        this.headSearch.add(data.slice(0, start))
+        this.headSearch.end()
+
+        this.emit('part')
+
+        // Reset head state.
+        this.headState = STATE.UNINITIALIZED
+      }
+      else {
+        if (data.equals(DASH) || data.equals(TAIL)) {
+          // Tail after last boundary.
+          this.bodyState = STATE.REQUEST_END
+          this.emit('end')
+        }
+        else {
+          // Data before enclosing boundary.
+          this.headSearch.add(data)
+        }
+      }
+    }
+  }
+
+  _headParser (isMatch, data, start) {
+    if (this.headState === STATE.UNINITIALIZED) {
+      if (isMatch) {
+        // Double EOF between headers and content,
+        // before that are headers.
+        this.headState = STATE.AFTER_HEADERS
+        this.emit('header', data.slice(0, start))
+      }
+      else {
+        // Headers before double EOF.
+        this.emit('header', data)
+      }
+    }
+    else if (this.headState === STATE.AFTER_HEADERS) {
+      // Content data.
+      this.emit('data', data)
+    }
+  }
+}
+
+module.exports = Parser