Fix gzipped utf-8 .js files getting corrupted (#4984)

* add failing test for utf8 word boundary corruption

* use UTF-8 aware stream replacer

* cleanup

* cleanup test server

* add link to issue

* add replace_stream tests

* add license

* perf improvements - tsificiation

* emit the maximum chars possible

* add better types

* move license, update test
This commit is contained in:
Zach Bloomquist
2019-09-28 22:26:25 -04:00
committed by Brian Mann
parent b6996bd93c
commit e7ecfcf518
6 changed files with 764 additions and 20 deletions
@@ -0,0 +1,84 @@
import through from 'through'
const GraphemeSplitter = require('grapheme-splitter')
interface IGraphemeSplitter {
nextBreak: (string: string, at: number) => number
}
const splitter : IGraphemeSplitter = new GraphemeSplitter()
/**
* UTF-8 grapheme aware stream replacer
* https://github.com/cypress-io/cypress/pull/4984
*/
function replaceStream (patterns: RegExp | RegExp[], replacements: string | string[], options = { maxTailLength: 100 }) {
if (!Array.isArray(patterns)) {
patterns = [patterns]
}
if (!Array.isArray(replacements)) {
replacements = [replacements]
}
let tail = ''
return through(function write (this: InternalStream, chunk) {
let emitted = false
const emitTailUpTo = (index) => {
emitted = true
this.queue(tail.slice(0, index))
tail = tail.slice(index)
}
chunk = chunk.toString('utf8')
tail = tail + chunk
let replacementEndIndex = 0
;(patterns as RegExp[]).forEach((pattern, i) => {
const replacement = replacements[i]
tail = tail.replace(pattern, function replacer (match) {
// ugly, but necessary due to bizarre function signature of String#replace
const offset = arguments[arguments.length - 2] // eslint-disable-line prefer-rest-params
if (offset + replacement.length > replacementEndIndex) {
replacementEndIndex = offset + replacement.length
}
return match.replace(pattern, replacement)
})
})
// if a replacement did occur, we should emit up to the end of what was replaced
if (replacementEndIndex) {
emitTailUpTo(replacementEndIndex)
}
// if we're overflowing max chars, emit the overflow at the beginning
if (tail.length > options.maxTailLength) {
// the maximum width of a unicode char is 4
// use grapheme-splitter to find a good breaking point
const breakableAt = splitter.nextBreak(tail, Math.max(0, tail.length - options.maxTailLength - 4))
emitTailUpTo(breakableAt)
}
if (!emitted) {
// this.queue('')
}
}, function end (this: InternalStream) {
if (tail.length) {
this.queue(tail)
}
this.queue(null)
})
}
module.exports = {
replaceStream,
}
+17 -5
View File
@@ -1,7 +1,8 @@
// Tests located in packages/server/test/unit/security_spec
const pumpify = require('pumpify')
const replacestream = require('replacestream')
const { replaceStream } = require('./replace_stream')
const utf8Stream = require('utf8-stream')
const topOrParentEqualityBeforeRe = /((?:window|self)(?:\.|\[['"](?:top|self)['"]\])?\s*[!=]==?\s*(?:(?:window|self)(?:\.|\[['"]))?)(top|parent)(?![\w])/g
const topOrParentEqualityAfterRe = /(top|parent)((?:["']\])?\s*[!=]==?\s*(?:window|self))/g
@@ -18,10 +19,21 @@ const strip = (html) => {
const stripStream = () => {
return pumpify(
replacestream(topOrParentEqualityBeforeRe, '$1self'),
replacestream(topOrParentEqualityAfterRe, 'self$2'),
replacestream(topOrParentLocationOrFramesRe, '$1self$3$4'),
replacestream(jiraTopWindowGetterRe, '$1 || $2.parent.__Cypress__$3')
utf8Stream(),
replaceStream(
[
topOrParentEqualityBeforeRe,
topOrParentEqualityAfterRe,
topOrParentLocationOrFramesRe,
jiraTopWindowGetterRe,
],
[
'$1self',
'self$2',
'$1self$3$4',
'$1 || $2.parent.__Cypress__$3',
]
)
)
}
+2 -1
View File
@@ -77,6 +77,7 @@
"getos": "3.1.1",
"glob": "7.1.3",
"graceful-fs": "4.2.0",
"grapheme-splitter": "1.0.4",
"http-accept": "0.1.6",
"http-proxy": "1.17.0",
"http-status-codes": "1.3.2",
@@ -110,7 +111,6 @@
"pumpify": "1.5.1",
"ramda": "0.24.1",
"randomstring": "1.1.5",
"replacestream": "4.0.3",
"request": "2.88.0",
"request-promise": "4.2.4",
"return-deep-diff": "0.3.0",
@@ -129,6 +129,7 @@
"underscore": "1.9.1",
"underscore.string": "3.3.5",
"url-parse": "1.4.7",
"utf8-stream": "0.0.0",
"uuid": "3.3.2",
"widest-line": "3.1.0",
"winston": "2.4.4"
@@ -3,7 +3,9 @@ require("../spec_helper")
_ = require("lodash")
r = require("request")
rp = require("request-promise")
compression = require("compression")
dns = require("dns")
express = require("express")
http = require("http")
path = require("path")
url = require("url")
@@ -1007,6 +1009,44 @@ describe "Routes", ->
expect(res.body).not.to.include("document.domain = 'github.com'")
expect(res.body).to.include("</html>")
## https://github.com/cypress-io/cypress/issues/1746
it "can ungzip utf-8 javascript and inject without corrupting it", ->
js = ""
app = express()
app.use compression({ chunkSize: 64, threshold: 1 })
app.get "/", (req, res) =>
res.setHeader('content-type', 'application/javascript; charset=UTF-8')
res.setHeader('transfer-encoding', 'chunked')
write = (chunk) =>
js += chunk
res.write(chunk)
write("function ")
_.times 100, =>
write("😡😈".repeat(10))
write(" () { }")
res.end()
server = http.createServer(app)
Promise.fromCallback (cb) =>
server.listen(12345, cb)
.then =>
@rp({
url: "http://localhost:12345"
gzip: true
})
.then (res) ->
expect(res.statusCode).to.eq(200)
expect(res.body).to.deep.eq(js)
.finally =>
Promise.fromCallback (cb) =>
server.close(cb)
context "accept-encoding", ->
beforeEach ->
@setup("http://www.github.com")
@@ -2619,20 +2659,6 @@ describe "Routes", ->
## shouldn't be more than 500ms
expect(reqTime).to.be.lt(500)
# b = res.body
#
# console.time("1")
# b.replace(topOrParentEqualityBeforeRe, "$self")
# console.timeEnd("1")
#
# console.time("2")
# b.replace(topOrParentEqualityAfterRe, "self$2")
# console.timeEnd("2")
#
# console.time("3")
# b.replace(topOrParentLocationOrFramesRe, "$1self$3$4")
# console.timeEnd("3")
describe "off with config", ->
beforeEach ->
@setup("http://www.google.com", {
@@ -0,0 +1,617 @@
/**
* This file contains test code from the `replacestream` library
* (https://github.com/eugeneware/replacestream), to which the following license applies:
*
* Copyright (c) 2014, Eugene Ware
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of Eugene Ware nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY EUGENE WARE ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL EUGENE WARE BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
require('../spec_helper')
const concatStream = require('concat-stream')
const { passthruStream } = require(`${root}lib/util/passthru_stream`)
const { replaceStream } = require(`${root}lib/util/replace_stream`)
const script = [
'<script type="text/javascript">',
'console.log(\'hello\');',
'document.addEventListener("DOMContentLoaded", function () {',
' document.body.style.backgroundColor = "red";',
'});',
'</script>',
].join('\n')
describe('lib/util/replace_stream', function () {
it('replaces across chunk boundaries', function (done) {
const ct = concatStream((body) => {
expect(body).to.eq('replaced')
return done()
})
const pt = passthruStream()
const rs = replaceStream(/foobar/, 'replaced')
pt.pipe(rs).pipe(ct)
pt.write('foo')
pt.write('bar')
return pt.end()
})
// test suite from the library this was meant to replace
// minus tests for extra features that Cypress's implementation doesn't need
// https://github.com/eugeneware/replacestream/blob/master/test/replace.js
context('original `replacestream` tests', function () {
it('should be able to replace within a chunk', function (done) {
let replace = replaceStream('</head>', `${script}</head>`)
replace.pipe(concatStream({ encoding: 'string' }, function (data) {
expect(data).to.include(script)
done()
}))
replace.end([
'<!DOCTYPE html>',
'<html>',
' <head>',
' <title>Test</title>',
' </head>',
' <body>',
' <h1>Head</h1>',
' </body>',
'</html>',
].join('\n'))
})
it('should be able to replace between chunks', function (done) {
let haystacks = [
['<!DOCTYPE html>',
'<html>',
' <head>',
' <title>Test</title>',
' </he'].join('\n'),
['ad>',
' <body>',
' <h1>Head</h1>',
' </body>',
'</html>'].join('\n'),
]
let replace = replaceStream('</head>', `${script}</head>`)
replace.pipe(concatStream({ encoding: 'string' }, function (data) {
expect(data).to.include(script)
done()
}))
haystacks.forEach(function (haystack) {
replace.write(haystack)
})
replace.end()
})
it('should be able to handle no matches', function (done) {
let haystacks = [
['<!DOCTYPE html>',
'<html>',
' <head>',
' <title>Test</title>',
' </de'].join('\n'),
['ad>',
' <body>',
' <h1>Head</h1>',
' </body>',
'</html>'].join('\n'),
]
let replace = replaceStream('</head>', `${script}</head>`)
replace.pipe(concatStream({ encoding: 'string' }, function (data) {
expect(data).to.not.include(script)
done()
}))
haystacks.forEach(function (haystack) {
replace.write(haystack)
})
replace.end()
})
it('should be able to handle dangling tails', function (done) {
let replace = replaceStream('</head>', `${script}</head>`)
replace.pipe(concatStream({ encoding: 'string' }, function (data) {
expect(data).to.include('</he')
done()
}))
replace.end([
'<!DOCTYPE html>',
'<html>',
' <head>',
' <title>Test</title>',
' </he',
].join('\n'))
})
it('should replace characters specified and not modify partial matches', function (done) {
let replace = replaceStream('ab', 'Z')
replace.pipe(concatStream({ encoding: 'string' }, function (data) {
let expected = [
'Z',
'a',
'a',
'b',
].join('\n')
expect(data).to.equal(expected)
done()
}))
replace.end([
'ab',
'a',
'a',
'b',
].join('\n'))
})
it('should handle partial matches between complete matches', function (done) {
let replace = replaceStream(/ab/g, 'Z')
replace.pipe(concatStream({ encoding: 'string' }, function (data) {
let expected = [
'Z',
'a',
'Z',
'b',
].join('\n')
expect(data).to.equal(expected)
done()
}))
replace.end([
'ab',
'a',
'ab',
'b',
].join('\n'))
})
it('should only replace characters specified', function (done) {
let replace = replaceStream('ab', 'Z')
replace.pipe(concatStream({ encoding: 'string' }, function (data) {
let expected = [
'Z',
'a',
'b',
].join('\n')
expect(data).to.equal(expected)
done()
}))
replace.end([
'ab',
'a',
'b',
].join('\n'))
})
it('should be able to use a replace function', function (done) {
let haystacks = [
['<!DOCTYPE html>',
'<html>',
' <head>',
' <title>Test</title>',
' </he'].join('\n'),
['ad>',
' <body>',
' <h1>Head</h1>',
' </body>',
'</html>'].join('\n'),
]
let replace = replaceStream('</head>', function (match) {
expect(match).to.equal('</head>')
return `${script}</head>`
})
replace.pipe(concatStream({ encoding: 'string' }, function (data) {
expect(data).to.include(script)
done()
}))
haystacks.forEach(function (haystack) {
replace.write(haystack)
})
replace.end()
})
it('should be able to replace within a chunk using regex', function (done) {
let replace = replaceStream(/<\/head>/, `${script}</head>`)
replace.pipe(concatStream({ encoding: 'string' }, function (data) {
expect(data).to.include(script)
done()
}))
replace.end([
'<!DOCTYPE html>',
'<html>',
' <head>',
' <title>Test</title>',
' </head>',
' <body>',
' <h1>Head</h1>',
' </body>',
'</html>',
].join('\n'))
})
it('should be able to replace between chunks using regex', function (done) {
let haystacks = [
['<!DOCTYPE html>',
'<html>',
' <head>',
' <title>Test</title>',
' </head>',
' <body>',
' <h1>I love feeeee'].join('\n'),
['eeeeeeeeeed</h1>',
' </body>',
'</html>'].join('\n'),
]
let replace = replaceStream(/fe+d/, 'foooooooood')
replace.pipe(concatStream({ encoding: 'string' }, function (data) {
expect(data).to.include('foooooooood')
done()
}))
haystacks.forEach(function (haystack) {
replace.write(haystack)
})
replace.end()
})
it('should be able to handle no matches using regex', function (done) {
let haystacks = [
['<!DOCTYPE html>',
'<html>',
' <head>',
' <title>Test</title>',
' </de'].join('\n'),
['ad>',
' <body>',
' <h1>Head</h1>',
' </body>',
'</html>'].join('\n'),
]
let replace = replaceStream(/<\/head>/, `${script}</head>`)
replace.pipe(concatStream({ encoding: 'string' }, function (data) {
expect(data).to.not.include(script)
done()
}))
haystacks.forEach(function (haystack) {
replace.write(haystack)
})
replace.end()
})
it('should be able to handle dangling tails using regex', function (done) {
let replace = replaceStream(/<\/head>/, `${script}</head>`)
replace.pipe(concatStream({ encoding: 'string' }, function (data) {
expect(data).to.include('</he')
done()
}))
replace.end([
'<!DOCTYPE html>',
'<html>',
' <head>',
' <title>Test</title>',
' </he',
].join('\n'))
})
it('should be able to handle multiple searches and replaces using regex',
function (done) {
let haystacks = [
['<!DOCTYPE html>',
'<html>',
' <head>',
' <title>Test</title>',
' </head>',
' <body>',
' <p> Hello 1</p>',
' <p> Hello 2</'].join('\n'),
['p>',
' <p> Hello 3</p>',
' <p> Hello 4</p>',
' <p> Hello 5</p>',
' </body>',
'</html>'].join('\n'),
]
let replace = replaceStream(/<\/p>/g, ', world</p>')
replace.pipe(concatStream({ encoding: 'string' }, function (data) {
let expected = [
'<!DOCTYPE html>',
'<html>',
' <head>',
' <title>Test</title>',
' </head>',
' <body>',
' <p> Hello 1, world</p>',
' <p> Hello 2, world</p>',
' <p> Hello 3, world</p>',
' <p> Hello 4, world</p>',
' <p> Hello 5, world</p>',
' </body>',
'</html>',
].join('\n')
expect(data).to.equal(expected)
done()
}))
haystacks.forEach(function (haystack) {
replace.write(haystack)
})
replace.end()
})
it('should be possible to specify the regexp flags when using a regex',
function (done) {
let haystacks = [
['<!DOCTYPE html>',
'<html>',
' <head>',
' <title>Test</title>',
' </head>',
' <body>',
' <P> Hello 1</P>',
' <P> Hello 2</'].join('\n'),
['P>',
' <P> Hello 3</P>',
' <p> Hello 4</p>',
' <p> Hello 5</p>',
' </body>',
'</html>'].join('\n'),
]
let replace = replaceStream(/<\/P>/gm, ', world</P>')
replace.pipe(concatStream({ encoding: 'string' }, function (data) {
let expected = [
'<!DOCTYPE html>',
'<html>',
' <head>',
' <title>Test</title>',
' </head>',
' <body>',
' <P> Hello 1, world</P>',
' <P> Hello 2, world</P>',
' <P> Hello 3, world</P>',
' <p> Hello 4</p>',
' <p> Hello 5</p>',
' </body>',
'</html>',
].join('\n')
expect(data).to.equal(expected)
done()
}))
haystacks.forEach(function (haystack) {
replace.write(haystack)
})
replace.end()
})
it('should replace characters specified and not modify partial matches using regex', function (done) {
let replace = replaceStream('ab', 'Z')
replace.pipe(concatStream({ encoding: 'string' }, function (data) {
let expected = [
'Z',
'a',
'a',
'b',
].join('\n')
expect(data).to.equal(expected)
done()
}))
replace.end([
'ab',
'a',
'a',
'b',
].join('\n'))
})
it('should handle partial matches between complete matches using regex', function (done) {
let replace = replaceStream(/ab/g, 'Z')
replace.pipe(concatStream({ encoding: 'string' }, function (data) {
let expected = [
'Z',
'a',
'Z',
'b',
].join('\n')
expect(data).to.equal(expected)
done()
}))
replace.end([
'ab',
'a',
'ab',
'b',
].join('\n'))
})
it('should only replace characters specified using regex', function (done) {
let replace = replaceStream(/ab/, 'Z')
replace.pipe(concatStream({ encoding: 'string' }, function (data) {
let expected = [
'Z',
'a',
'b',
].join('\n')
expect(data).to.equal(expected)
done()
}))
replace.end([
'ab',
'a',
'b',
].join('\n'))
})
it('should be able to change each replacement value with a function using regex',
function (done) {
let haystacks = [
['<!DOCTYPE html>',
'<html>',
' <head>',
' <title>Test</title>',
' </head>',
' <body>',
' <p> Hello 1</p>',
' <p> Hello 2</'].join('\n'),
['p>',
' <p> Hello 3</p>',
' <p> Hello 4</p>',
' <p> Hello 5</p>',
' </body>',
'</html>'].join('\n'),
]
let greetings = ['Hi', 'Hey', 'Gday', 'Bonjour', 'Greetings']
let replace = replaceStream(/Hello/g, greetings.shift.bind(greetings))
replace.pipe(concatStream({ encoding: 'string' }, function (data) {
let expected = [
'<!DOCTYPE html>',
'<html>',
' <head>',
' <title>Test</title>',
' </head>',
' <body>',
' <p> Hi 1</p>',
' <p> Hey 2</p>',
' <p> Gday 3</p>',
' <p> Bonjour 4</p>',
' <p> Greetings 5</p>',
' </body>',
'</html>',
].join('\n')
expect(data).to.equal(expected)
done()
}))
haystacks.forEach(function (haystack) {
replace.write(haystack)
})
replace.end()
})
it('should be able to replace captures using $1 notation', function (done) {
let replace = replaceStream(/(a)(b)/g, 'this is $1 and this is $2 and this is again $1')
replace.pipe(concatStream({ encoding: 'string' }, function (data) {
let expected = [
'this is a and this is b and this is again a',
'a',
'this is a and this is b and this is again a',
'b',
].join('\n')
expect(data).to.equal(expected)
done()
}))
replace.end([
'ab',
'a',
'ab',
'b',
].join('\n'))
})
it('should be able to replace when the match is a tail using a regex', function (done) {
let replace = replaceStream(/<\/html>/g, `${script}</html>`)
replace.pipe(concatStream({ encoding: 'string' }, function (data) {
expect(data).to.include(script)
done()
}))
replace.end([
'<!DOCTYPE html>',
'<html>',
' <head>',
' <title>Test</title>',
' </head>',
' <body>',
' <h1>Head</h1>',
' </body>',
'</html>',
].join('\n'))
})
})
})
+4
View File
@@ -89,3 +89,7 @@ declare module 'proxy-from-env' {
declare interface SymbolConstructor {
for(str: string): SymbolConstructor
}
declare interface InternalStream {
queue(str: string | null): void
}