fix(compiler): support full range of entity decoding in browser builds
BREAKING CHANGE: compiler options have been adjusted.
- new option `decodeEntities` is added.
- `namedCharacterReferences` option has been removed.
- `maxCRNameLength` option has been rmeoved.
This commit is contained in:
@@ -5,12 +5,10 @@ import {
|
||||
TextNode,
|
||||
ErrorCodes,
|
||||
ElementTypes,
|
||||
InterpolationNode
|
||||
InterpolationNode,
|
||||
AttributeNode
|
||||
} from '@vue/compiler-core'
|
||||
import {
|
||||
parserOptionsStandard as parserOptions,
|
||||
DOMNamespaces
|
||||
} from '../src/parserOptionsStandard'
|
||||
import { parserOptions, DOMNamespaces } from '../src/parserOptions'
|
||||
|
||||
describe('DOM parser', () => {
|
||||
describe('Text', () => {
|
||||
@@ -170,6 +168,77 @@ describe('DOM parser', () => {
|
||||
content: `foo${nbsp}${nbsp}bar`
|
||||
})
|
||||
})
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
|
||||
test('HTML entities compatibility in text', () => {
|
||||
const ast = parse('&ersand;', parserOptions)
|
||||
const text = ast.children[0] as TextNode
|
||||
|
||||
expect(text).toStrictEqual({
|
||||
type: NodeTypes.TEXT,
|
||||
content: '&ersand;',
|
||||
loc: {
|
||||
start: { offset: 0, line: 1, column: 1 },
|
||||
end: { offset: 11, line: 1, column: 12 },
|
||||
source: '&ersand;'
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
|
||||
test('HTML entities compatibility in attribute', () => {
|
||||
const ast = parse(
|
||||
'<div a="&ersand;" b="&ersand;" c="&!"></div>',
|
||||
parserOptions
|
||||
)
|
||||
const element = ast.children[0] as ElementNode
|
||||
const text1 = (element.props[0] as AttributeNode).value
|
||||
const text2 = (element.props[1] as AttributeNode).value
|
||||
const text3 = (element.props[2] as AttributeNode).value
|
||||
|
||||
expect(text1).toStrictEqual({
|
||||
type: NodeTypes.TEXT,
|
||||
content: '&ersand;',
|
||||
loc: {
|
||||
start: { offset: 7, line: 1, column: 8 },
|
||||
end: { offset: 20, line: 1, column: 21 },
|
||||
source: '"&ersand;"'
|
||||
}
|
||||
})
|
||||
expect(text2).toStrictEqual({
|
||||
type: NodeTypes.TEXT,
|
||||
content: '&ersand;',
|
||||
loc: {
|
||||
start: { offset: 23, line: 1, column: 24 },
|
||||
end: { offset: 37, line: 1, column: 38 },
|
||||
source: '"&ersand;"'
|
||||
}
|
||||
})
|
||||
expect(text3).toStrictEqual({
|
||||
type: NodeTypes.TEXT,
|
||||
content: '&!',
|
||||
loc: {
|
||||
start: { offset: 40, line: 1, column: 41 },
|
||||
end: { offset: 47, line: 1, column: 48 },
|
||||
source: '"&!"'
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
test('Some control character reference should be replaced.', () => {
|
||||
const ast = parse('†', parserOptions)
|
||||
const text = ast.children[0] as TextNode
|
||||
|
||||
expect(text).toStrictEqual({
|
||||
type: NodeTypes.TEXT,
|
||||
content: '†',
|
||||
loc: {
|
||||
start: { offset: 0, line: 1, column: 1 },
|
||||
end: { offset: 6, line: 1, column: 7 },
|
||||
source: '†'
|
||||
}
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('Interpolation', () => {
|
||||
|
||||
133
packages/compiler-dom/src/decodeHtml.ts
Normal file
133
packages/compiler-dom/src/decodeHtml.ts
Normal file
@@ -0,0 +1,133 @@
|
||||
import { ParserOptions } from '@vue/compiler-core'
|
||||
import namedCharacterReferences from './namedChars.json'
|
||||
|
||||
// lazy compute this to make this file tree-shakable for browser
|
||||
let maxCRNameLength: number
|
||||
|
||||
export const decodeHtml: ParserOptions['decodeEntities'] = (
|
||||
rawText,
|
||||
asAttr
|
||||
) => {
|
||||
let offset = 0
|
||||
const end = rawText.length
|
||||
let decodedText = ''
|
||||
|
||||
function advance(length: number) {
|
||||
offset += length
|
||||
rawText = rawText.slice(length)
|
||||
}
|
||||
|
||||
while (offset < end) {
|
||||
const head = /&(?:#x?)?/i.exec(rawText)
|
||||
if (!head || offset + head.index >= end) {
|
||||
const remaining = end - offset
|
||||
decodedText += rawText.slice(0, remaining)
|
||||
advance(remaining)
|
||||
break
|
||||
}
|
||||
|
||||
// Advance to the "&".
|
||||
decodedText += rawText.slice(0, head.index)
|
||||
advance(head.index)
|
||||
|
||||
if (head[0] === '&') {
|
||||
// Named character reference.
|
||||
let name = ''
|
||||
let value: string | undefined = undefined
|
||||
if (/[0-9a-z]/i.test(rawText[1])) {
|
||||
if (!maxCRNameLength) {
|
||||
maxCRNameLength = Object.keys(namedCharacterReferences).reduce(
|
||||
(max, name) => Math.max(max, name.length),
|
||||
0
|
||||
)
|
||||
}
|
||||
for (let length = maxCRNameLength; !value && length > 0; --length) {
|
||||
name = rawText.substr(1, length)
|
||||
value = (namedCharacterReferences as Record<string, string>)[name]
|
||||
}
|
||||
if (value) {
|
||||
const semi = name.endsWith(';')
|
||||
if (
|
||||
asAttr &&
|
||||
!semi &&
|
||||
/[=a-z0-9]/i.test(rawText[name.length + 1] || '')
|
||||
) {
|
||||
decodedText += '&' + name
|
||||
advance(1 + name.length)
|
||||
} else {
|
||||
decodedText += value
|
||||
advance(1 + name.length)
|
||||
}
|
||||
} else {
|
||||
decodedText += '&' + name
|
||||
advance(1 + name.length)
|
||||
}
|
||||
} else {
|
||||
decodedText += '&'
|
||||
advance(1)
|
||||
}
|
||||
} else {
|
||||
// Numeric character reference.
|
||||
const hex = head[0] === '&#x'
|
||||
const pattern = hex ? /^&#x([0-9a-f]+);?/i : /^&#([0-9]+);?/
|
||||
const body = pattern.exec(rawText)
|
||||
if (!body) {
|
||||
decodedText += head[0]
|
||||
advance(head[0].length)
|
||||
} else {
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
|
||||
let cp = Number.parseInt(body[1], hex ? 16 : 10)
|
||||
if (cp === 0) {
|
||||
cp = 0xfffd
|
||||
} else if (cp > 0x10ffff) {
|
||||
cp = 0xfffd
|
||||
} else if (cp >= 0xd800 && cp <= 0xdfff) {
|
||||
cp = 0xfffd
|
||||
} else if ((cp >= 0xfdd0 && cp <= 0xfdef) || (cp & 0xfffe) === 0xfffe) {
|
||||
// noop
|
||||
} else if (
|
||||
(cp >= 0x01 && cp <= 0x08) ||
|
||||
cp === 0x0b ||
|
||||
(cp >= 0x0d && cp <= 0x1f) ||
|
||||
(cp >= 0x7f && cp <= 0x9f)
|
||||
) {
|
||||
cp = CCR_REPLACEMENTS[cp] || cp
|
||||
}
|
||||
decodedText += String.fromCodePoint(cp)
|
||||
advance(body[0].length)
|
||||
}
|
||||
}
|
||||
}
|
||||
return decodedText
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
|
||||
const CCR_REPLACEMENTS: { [key: number]: number | undefined } = {
|
||||
0x80: 0x20ac,
|
||||
0x82: 0x201a,
|
||||
0x83: 0x0192,
|
||||
0x84: 0x201e,
|
||||
0x85: 0x2026,
|
||||
0x86: 0x2020,
|
||||
0x87: 0x2021,
|
||||
0x88: 0x02c6,
|
||||
0x89: 0x2030,
|
||||
0x8a: 0x0160,
|
||||
0x8b: 0x2039,
|
||||
0x8c: 0x0152,
|
||||
0x8e: 0x017d,
|
||||
0x91: 0x2018,
|
||||
0x92: 0x2019,
|
||||
0x93: 0x201c,
|
||||
0x94: 0x201d,
|
||||
0x95: 0x2022,
|
||||
0x96: 0x2013,
|
||||
0x97: 0x2014,
|
||||
0x98: 0x02dc,
|
||||
0x99: 0x2122,
|
||||
0x9a: 0x0161,
|
||||
0x9b: 0x203a,
|
||||
0x9c: 0x0153,
|
||||
0x9e: 0x017e,
|
||||
0x9f: 0x0178
|
||||
}
|
||||
6
packages/compiler-dom/src/decodeHtmlBrowser.ts
Normal file
6
packages/compiler-dom/src/decodeHtmlBrowser.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
let decoder: HTMLDivElement
|
||||
|
||||
export function decodeHtmlBrowser(raw: string): string {
|
||||
;(decoder || (decoder = document.createElement('div'))).innerHTML = raw
|
||||
return decoder.textContent as string
|
||||
}
|
||||
@@ -9,8 +9,7 @@ import {
|
||||
NodeTransform,
|
||||
DirectiveTransform
|
||||
} from '@vue/compiler-core'
|
||||
import { parserOptionsMinimal } from './parserOptionsMinimal'
|
||||
import { parserOptionsStandard } from './parserOptionsStandard'
|
||||
import { parserOptions } from './parserOptions'
|
||||
import { transformStyle } from './transforms/transformStyle'
|
||||
import { transformVHtml } from './transforms/vHtml'
|
||||
import { transformVText } from './transforms/vText'
|
||||
@@ -20,9 +19,7 @@ import { transformShow } from './transforms/vShow'
|
||||
import { warnTransitionChildren } from './transforms/warnTransitionChildren'
|
||||
import { stringifyStatic } from './transforms/stringifyStatic'
|
||||
|
||||
export const parserOptions = __BROWSER__
|
||||
? parserOptionsMinimal
|
||||
: parserOptionsStandard
|
||||
export { parserOptions }
|
||||
|
||||
export const DOMNodeTransforms: NodeTransform[] = [
|
||||
transformStyle,
|
||||
|
||||
@@ -8,6 +8,8 @@ import {
|
||||
} from '@vue/compiler-core'
|
||||
import { makeMap, isVoidTag, isHTMLTag, isSVGTag } from '@vue/shared'
|
||||
import { TRANSITION, TRANSITION_GROUP } from './runtimeHelpers'
|
||||
import { decodeHtml } from './decodeHtml'
|
||||
import { decodeHtmlBrowser } from './decodeHtmlBrowser'
|
||||
|
||||
const isRawTextContainer = /*#__PURE__*/ makeMap(
|
||||
'style,iframe,script,noscript',
|
||||
@@ -20,10 +22,11 @@ export const enum DOMNamespaces {
|
||||
MATH_ML
|
||||
}
|
||||
|
||||
export const parserOptionsMinimal: ParserOptions = {
|
||||
export const parserOptions: ParserOptions = {
|
||||
isVoidTag,
|
||||
isNativeTag: tag => isHTMLTag(tag) || isSVGTag(tag),
|
||||
isPreTag: tag => tag === 'pre',
|
||||
decodeEntities: __BROWSER__ ? decodeHtmlBrowser : decodeHtml,
|
||||
|
||||
isBuiltInComponent: (tag: string): symbol | undefined => {
|
||||
if (isBuiltInType(tag, `Transition`)) {
|
||||
@@ -1,17 +0,0 @@
|
||||
import { ParserOptions } from '@vue/compiler-core'
|
||||
import { parserOptionsMinimal } from './parserOptionsMinimal'
|
||||
import namedCharacterReferences from './namedChars.json'
|
||||
|
||||
export { DOMNamespaces } from './parserOptionsMinimal'
|
||||
|
||||
export const parserOptionsStandard: ParserOptions = {
|
||||
// extends the minimal options with more spec-compliant overrides
|
||||
...parserOptionsMinimal,
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references
|
||||
namedCharacterReferences,
|
||||
maxCRNameLength: /*#__PURE__*/ Object.keys(namedCharacterReferences).reduce(
|
||||
(max, name) => Math.max(max, name.length),
|
||||
0
|
||||
)
|
||||
}
|
||||
Reference in New Issue
Block a user