Files
cypress/packages/proxy/lib/http/response-middleware.ts
Zach Bloomquist 6960f7cd78 Rewrite JS/HTML using AST-based approach (#5273)
* Add winPropAccessor to security.js, remove other replacers

* Add start of Cypress.resolveWindowReference

* Add regexes for dot and bracket access

* Some security_spec tests pass with new injection

* Add resolveWindowReference unit tests

* Old security_spec now passes with resolveWindowReference

* Inject stub resolveWindowReference so proxy still works outside of Cypress

* wip: rewrite HTML + JS with tokenizer

* Move to using esprima + hyntax to rewrite JS + HTML

* remove comment; oneLine makes the whole thing commented

* Fix tests, apple.com edge case

* wip: add getOrSet

* Revert "wip: add getOrSet"

This reverts commit a5c647c00f.

* release 3.5.0 [skip ci]

* use recast to replace window property accesses

* replace assignments to top properly

* fix yarn.lock

* bump deps

* update integration tests

* remove old security ts?

* fix integration spec

* always ignore js interception failure

* use globalThis instead of window

* add experimentalSourceRewriting flag

* restore regex-writer spec

* fix types

* update config_spec

* add source rewriting spec

* cleanup

* simplify rewriting logic, move rules into rewriter package

* create threaded rewriting tool for non-streaming use

* update @packages/rewriter to use threads for async

* use async rewriting where convenient

* add worker-shim.js

* add performance info to debug logs

* properly handle +=, -=, ...

* add proxy, rewriter to unit-tests stage

* cleanup

* use parse5 to rewrite HTML, strip SRI

* update tests

* reorganization, cleanup

* rewrite ALL parent, top identifiers except in a few cases

* handle many JS edge cases

* ensure parse5@5.1.1 is installed

* update yarn.lock

* update tests

* add debugging, add tests

* add attempted repro for .href issue

* implement source maps + extending inline source maps

* update opts passing in proxy layer

* fix sourcemap naming structure

* update tests to account for sourcemaps

* sourcemap tests

* remote source maps work

* comment

* update rewriter tests

* clean up TODOs in resolveWindowReference

* remove @types/nock

* clean up todos in deferred-source-map-cache

* fix rewriter build script

* fix concatStream import

* bump expectedresultcount

* clean up js-rules

* threading improvements, workaround for Electron segfault

* no visit_spec for now

* fix 6_visit_spec

* update MAX_WORKER_THREADS

* add repro for #3975

* cleanup

* cleanup

* make better use of namedTypes and builders

* get rid of the horrific closureDetectionTernary

ast-types keeps track of scope, so it is unneeded

* fix #3975, #3994

* add x-sourcemap, sourcemap header support

* snap-shot-it 7.9.3

* add deferred-source-map-cache-spec

* add tests

* Throw error in driver if AST rewriting fails

* Fix "location = 'relative-url'"

* fix max recursion depth

* slim down some fixtures

* fix window.location usage

* don't mess with `frames` at all

* no integration tests

* skip testing apple.com for now

* update wording: regex-based vs. ast-based

* skip real-world tests for now

* add some padding to process.exit workaround

* fix resolvers_spec

* fix html-spec

* cleanup

* Update packages/rewriter/lib/js-rules.ts

* Update packages/driver/src/cypress/resolvers.ts

* just import find by itself

* privatize typedefs for Cypress.state, remove .gitignore, remove dead code

Co-authored-by: Ben Kucera <14625260+Bkucera@users.noreply.github.com>
2020-05-11 12:54:14 -04:00

443 lines
12 KiB
TypeScript

import _ from 'lodash'
import charset from 'charset'
import { CookieOptions } from 'express'
import { cors, concatStream } from '@packages/network'
import { CypressRequest, CypressResponse, HttpMiddleware } from '.'
import debugModule from 'debug'
import iconv from 'iconv-lite'
import { IncomingMessage, IncomingHttpHeaders } from 'http'
import { PassThrough, Readable } from 'stream'
import * as rewriter from './util/rewriter'
import zlib from 'zlib'
export type ResponseMiddleware = HttpMiddleware<{
incomingRes: IncomingMessage
incomingResStream: Readable
}>
const debug = debugModule('cypress:proxy:http:response-middleware')
// https://github.com/cypress-io/cypress/issues/1756
const zlibOptions = {
flush: zlib.Z_SYNC_FLUSH,
finishFlush: zlib.Z_SYNC_FLUSH,
}
// https://github.com/cypress-io/cypress/issues/1543
function getNodeCharsetFromResponse (headers: IncomingHttpHeaders, body: Buffer) {
const httpCharset = (charset(headers, body, 1024) || '').toLowerCase()
debug('inferred charset from response %o', { httpCharset })
if (iconv.encodingExists(httpCharset)) {
return httpCharset
}
// browsers default to latin1
return 'latin1'
}
function reqMatchesOriginPolicy (req: CypressRequest, remoteState) {
if (remoteState.strategy === 'http') {
return cors.urlMatchesOriginPolicyProps(req.proxiedUrl, remoteState.props)
}
if (remoteState.strategy === 'file') {
return req.proxiedUrl.startsWith(remoteState.origin)
}
return false
}
function reqWillRenderHtml (req: CypressRequest) {
// will this request be rendered in the browser, necessitating injection?
// https://github.com/cypress-io/cypress/issues/288
// don't inject if this is an XHR from jquery
if (req.headers['x-requested-with']) {
return
}
// don't inject if we didn't find both text/html and application/xhtml+xml,
const accept = req.headers['accept']
return accept && accept.includes('text/html') && accept.includes('application/xhtml+xml')
}
function resContentTypeIs (res: IncomingMessage, contentType: string) {
return (res.headers['content-type'] || '').includes(contentType)
}
function resContentTypeIsJavaScript (res: IncomingMessage) {
return _.some(
['application/javascript', 'application/x-javascript', 'text/javascript']
.map(_.partial(resContentTypeIs, res)),
)
}
function isHtml (res: IncomingMessage) {
return !resContentTypeIsJavaScript(res)
}
function resIsGzipped (res: IncomingMessage) {
return (res.headers['content-encoding'] || '').includes('gzip')
}
// https://github.com/cypress-io/cypress/issues/4298
// https://tools.ietf.org/html/rfc7230#section-3.3.3
// HEAD, 1xx, 204, and 304 responses should never contain anything after headers
const NO_BODY_STATUS_CODES = [204, 304]
function responseMustHaveEmptyBody (req: CypressRequest, res: IncomingMessage) {
return _.some([_.includes(NO_BODY_STATUS_CODES, res.statusCode), _.invoke(req.method, 'toLowerCase') === 'head'])
}
function setCookie (res: CypressResponse, k: string, v: string, domain: string) {
let opts: CookieOptions = { domain }
if (!v) {
v = ''
opts.expires = new Date(0)
}
return res.cookie(k, v, opts)
}
function setInitialCookie (res: CypressResponse, remoteState: any, value) {
// dont modify any cookies if we're trying to clear the initial cookie and we're not injecting anything
// dont set the cookies if we're not on the initial request
if ((!value && !res.wantsInjection) || !res.isInitial) {
return
}
return setCookie(res, '__cypress.initial', value, remoteState.domainName)
}
// "autoplay *; document-domain 'none'" => { autoplay: "*", "document-domain": "'none'" }
const parseFeaturePolicy = (policy: string): any => {
const pairs = policy.split('; ').map((directive) => directive.split(' '))
return _.fromPairs(pairs)
}
// { autoplay: "*", "document-domain": "'none'" } => "autoplay *; document-domain 'none'"
const stringifyFeaturePolicy = (policy: any): string => {
const pairs = _.toPairs(policy)
return pairs.map((directive) => directive.join(' ')).join('; ')
}
const LogResponse: ResponseMiddleware = function () {
debug('received response %o', {
req: _.pick(this.req, 'method', 'proxiedUrl', 'headers'),
incomingRes: _.pick(this.incomingRes, 'headers', 'statusCode'),
})
this.next()
}
const PatchExpressSetHeader: ResponseMiddleware = function () {
const { incomingRes } = this
const originalSetHeader = this.res.setHeader
// Node uses their own Symbol object, so use this to get the internal kOutHeaders
// symbol - Symbol.for('kOutHeaders') will not work
const getKOutHeadersSymbol = () => {
const findKOutHeadersSymbol = (): symbol => {
return _.find(Object.getOwnPropertySymbols(this.res), (sym) => {
return sym.toString() === 'Symbol(kOutHeaders)'
})!
}
let sym = findKOutHeadersSymbol()
if (sym) {
return sym
}
// force creation of a new header field so the kOutHeaders key is available
this.res.setHeader('X-Cypress-HTTP-Response', 'X')
this.res.removeHeader('X-Cypress-HTTP-Response')
sym = findKOutHeadersSymbol()
if (!sym) {
throw new Error('unable to find kOutHeaders symbol')
}
return sym
}
let kOutHeaders
this.res.setHeader = function (name, value) {
// express.Response.setHeader does all kinds of silly/nasty stuff to the content-type...
// but we don't want to change it at all!
if (name === 'content-type') {
value = incomingRes.headers['content-type'] || value
}
// run the original function - if an "invalid header char" error is raised,
// set the header manually. this way we can retain Node's original error behavior
try {
return originalSetHeader.call(this, name, value)
} catch (err) {
if (err.code !== 'ERR_INVALID_CHAR') {
throw err
}
debug('setHeader error ignored %o', { name, value, code: err.code, err })
if (!kOutHeaders) {
kOutHeaders = getKOutHeadersSymbol()
}
// https://github.com/nodejs/node/blob/42cce5a9d0fd905bf4ad7a2528c36572dfb8b5ad/lib/_http_outgoing.js#L483-L495
let headers = this[kOutHeaders]
if (!headers) {
this[kOutHeaders] = headers = Object.create(null)
}
headers[name.toLowerCase()] = [name, value]
}
}
this.next()
}
const SetInjectionLevel: ResponseMiddleware = function () {
this.res.isInitial = this.req.cookies['__cypress.initial'] === 'true'
const getInjectionLevel = () => {
if (this.incomingRes.headers['x-cypress-file-server-error'] && !this.res.isInitial) {
return 'partial'
}
if (!resContentTypeIs(this.incomingRes, 'text/html') || !reqMatchesOriginPolicy(this.req, this.getRemoteState())) {
return false
}
if (this.res.isInitial) {
return 'full'
}
if (!reqWillRenderHtml(this.req)) {
return false
}
return 'partial'
}
if (!this.res.wantsInjection) {
this.res.wantsInjection = getInjectionLevel()
}
this.res.wantsSecurityRemoved = this.config.modifyObstructiveCode && (
(this.res.wantsInjection === 'full')
|| resContentTypeIsJavaScript(this.incomingRes)
)
debug('injection levels: %o', _.pick(this.res, 'isInitial', 'wantsInjection', 'wantsSecurityRemoved'))
this.next()
}
// https://github.com/cypress-io/cypress/issues/6480
const MaybeStripDocumentDomainFeaturePolicy: ResponseMiddleware = function () {
const { 'feature-policy': featurePolicy } = this.incomingRes.headers
if (featurePolicy) {
const directives = parseFeaturePolicy(<string>featurePolicy)
if (directives['document-domain']) {
delete directives['document-domain']
const policy = stringifyFeaturePolicy(directives)
if (policy) {
this.res.set('feature-policy', policy)
} else {
this.res.removeHeader('feature-policy')
}
}
}
this.next()
}
const OmitProblematicHeaders: ResponseMiddleware = function () {
const headers = _.omit(this.incomingRes.headers, [
'set-cookie',
'x-frame-options',
'content-length',
'content-security-policy',
'connection',
])
this.res.set(headers)
this.next()
}
const MaybePreventCaching: ResponseMiddleware = function () {
// do not cache injected responses
// TODO: consider implementing etag system so even injected content can be cached
if (this.res.wantsInjection) {
this.res.setHeader('cache-control', 'no-cache, no-store, must-revalidate')
}
this.next()
}
const CopyCookiesFromIncomingRes: ResponseMiddleware = function () {
const cookies: string | string[] | undefined = this.incomingRes.headers['set-cookie']
if (cookies) {
([] as string[]).concat(cookies).forEach((cookie) => {
try {
this.res.append('Set-Cookie', cookie)
} catch (err) {
debug('failed to Set-Cookie, continuing %o', { err, cookie })
}
})
}
this.next()
}
const REDIRECT_STATUS_CODES: any[] = [301, 302, 303, 307, 308]
// TODO: this shouldn't really even be necessary?
const MaybeSendRedirectToClient: ResponseMiddleware = function () {
const { statusCode, headers } = this.incomingRes
const newUrl = headers['location']
if (!REDIRECT_STATUS_CODES.includes(statusCode) || !newUrl) {
return this.next()
}
setInitialCookie(this.res, this.getRemoteState(), true)
debug('redirecting to new url %o', { statusCode, newUrl })
this.res.redirect(Number(statusCode), newUrl)
return this.end()
}
const CopyResponseStatusCode: ResponseMiddleware = function () {
this.res.status(Number(this.incomingRes.statusCode))
this.next()
}
const ClearCyInitialCookie: ResponseMiddleware = function () {
setInitialCookie(this.res, this.getRemoteState(), false)
this.next()
}
const MaybeEndWithEmptyBody: ResponseMiddleware = function () {
if (responseMustHaveEmptyBody(this.req, this.incomingRes)) {
this.res.end()
return this.end()
}
this.next()
}
const MaybeGunzipBody: ResponseMiddleware = function () {
if (resIsGzipped(this.incomingRes) && (this.res.wantsInjection || this.res.wantsSecurityRemoved)) {
debug('ungzipping response body')
const gunzip = zlib.createGunzip(zlibOptions)
this.incomingResStream = this.incomingResStream.pipe(gunzip).on('error', this.onError)
} else {
this.skipMiddleware('GzipBody') // not needed anymore
}
this.next()
}
const MaybeInjectHtml: ResponseMiddleware = function () {
if (!this.res.wantsInjection) {
return this.next()
}
this.skipMiddleware('MaybeRemoveSecurity') // we only want to do one or the other
debug('injecting into HTML')
this.incomingResStream.pipe(concatStream(async (body) => {
const nodeCharset = getNodeCharsetFromResponse(this.incomingRes.headers, body)
const decodedBody = iconv.decode(body, nodeCharset)
const injectedBody = await rewriter.html(decodedBody, {
domainName: this.getRemoteState().domainName,
wantsInjection: this.res.wantsInjection,
wantsSecurityRemoved: this.res.wantsSecurityRemoved,
isHtml: isHtml(this.incomingRes),
useAstSourceRewriting: this.config.experimentalSourceRewriting,
url: this.req.proxiedUrl,
deferSourceMapRewrite: this.deferSourceMapRewrite,
})
const encodedBody = iconv.encode(injectedBody, nodeCharset)
const pt = new PassThrough
pt.write(encodedBody)
pt.end()
this.incomingResStream = pt
this.next()
})).on('error', this.onError)
}
const MaybeRemoveSecurity: ResponseMiddleware = function () {
if (!this.res.wantsSecurityRemoved) {
return this.next()
}
debug('removing JS framebusting code')
this.incomingResStream.setEncoding('utf8')
this.incomingResStream = this.incomingResStream.pipe(rewriter.security({
isHtml: isHtml(this.incomingRes),
useAstSourceRewriting: this.config.experimentalSourceRewriting,
url: this.req.proxiedUrl,
deferSourceMapRewrite: this.deferSourceMapRewrite,
})).on('error', this.onError)
this.next()
}
const GzipBody: ResponseMiddleware = function () {
debug('regzipping response body')
this.incomingResStream = this.incomingResStream.pipe(zlib.createGzip(zlibOptions)).on('error', this.onError)
this.next()
}
const SendResponseBodyToClient: ResponseMiddleware = function () {
this.incomingResStream.pipe(this.res).on('error', this.onError)
this.res.on('end', () => this.end())
}
export default {
LogResponse,
PatchExpressSetHeader,
SetInjectionLevel,
OmitProblematicHeaders,
MaybePreventCaching,
MaybeStripDocumentDomainFeaturePolicy,
CopyCookiesFromIncomingRes,
MaybeSendRedirectToClient,
CopyResponseStatusCode,
ClearCyInitialCookie,
MaybeEndWithEmptyBody,
MaybeGunzipBody,
MaybeInjectHtml,
MaybeRemoveSecurity,
GzipBody,
SendResponseBodyToClient,
}