fix(compiler): support full range of entity decoding in browser builds

BREAKING CHANGE: compiler options have been adjusted.
    - new option `decodeEntities` is added.
    - `namedCharacterReferences` option has been removed.
    - `maxCRNameLength` option has been rmeoved.
This commit is contained in:
Evan You 2020-04-08 18:51:25 -04:00
parent 8c17535a47
commit 1f6e72b110
11 changed files with 245 additions and 1809 deletions

View File

@ -9,7 +9,6 @@ import {
NodeTypes,
Position,
TextNode,
AttributeNode,
InterpolationNode
} from '../src/ast'
@ -163,114 +162,6 @@ describe('compiler: parse', () => {
}
})
})
test('HTML entities compatibility in text (https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state).', () => {
const spy = jest.fn()
const ast = baseParse('&ampersand;', {
namedCharacterReferences: { amp: '&' },
onError: spy
})
const text = ast.children[0] as TextNode
expect(text).toStrictEqual({
type: NodeTypes.TEXT,
content: '&ersand;',
loc: {
start: { offset: 0, line: 1, column: 1 },
end: { offset: 11, line: 1, column: 12 },
source: '&ampersand;'
}
})
expect(spy.mock.calls).toMatchObject([
[
{
code: ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
loc: {
start: { offset: 4, line: 1, column: 5 }
}
}
]
])
})
test('HTML entities compatibility in attribute (https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state).', () => {
const spy = jest.fn()
const ast = baseParse(
'<div a="&ampersand;" b="&amp;ersand;" c="&amp!"></div>',
{
namedCharacterReferences: { amp: '&', 'amp;': '&' },
onError: spy
}
)
const element = ast.children[0] as ElementNode
const text1 = (element.props[0] as AttributeNode).value
const text2 = (element.props[1] as AttributeNode).value
const text3 = (element.props[2] as AttributeNode).value
expect(text1).toStrictEqual({
type: NodeTypes.TEXT,
content: '&ampersand;',
loc: {
start: { offset: 7, line: 1, column: 8 },
end: { offset: 20, line: 1, column: 21 },
source: '"&ampersand;"'
}
})
expect(text2).toStrictEqual({
type: NodeTypes.TEXT,
content: '&ersand;',
loc: {
start: { offset: 23, line: 1, column: 24 },
end: { offset: 37, line: 1, column: 38 },
source: '"&amp;ersand;"'
}
})
expect(text3).toStrictEqual({
type: NodeTypes.TEXT,
content: '&!',
loc: {
start: { offset: 40, line: 1, column: 41 },
end: { offset: 47, line: 1, column: 48 },
source: '"&amp!"'
}
})
expect(spy.mock.calls).toMatchObject([
[
{
code: ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
loc: {
start: { offset: 45, line: 1, column: 46 }
}
}
]
])
})
test('Some control character reference should be replaced.', () => {
const spy = jest.fn()
const ast = baseParse('&#x86;', { onError: spy })
const text = ast.children[0] as TextNode
expect(text).toStrictEqual({
type: NodeTypes.TEXT,
content: '†',
loc: {
start: { offset: 0, line: 1, column: 1 },
end: { offset: 6, line: 1, column: 7 },
source: '&#x86;'
}
})
expect(spy.mock.calls).toMatchObject([
[
{
code: ErrorCodes.CONTROL_CHARACTER_REFERENCE,
loc: {
start: { offset: 0, line: 1, column: 1 }
}
}
]
])
})
})
describe('Interpolation', () => {
@ -1652,12 +1543,10 @@ foo
expect(baz.loc.end).toEqual({ line: 2, column: 28, offset })
})
describe('namedCharacterReferences option', () => {
describe('decodeEntities option', () => {
test('use the given map', () => {
const ast: any = baseParse('&amp;&cups;', {
namedCharacterReferences: {
'cups;': '\u222A\uFE00' // UNION with serifs
},
decodeEntities: text => text.replace('&cups;', '\u222A\uFE00'),
onError: () => {} // Ignore errors
})
@ -1756,60 +1645,6 @@ foo
errors: []
}
],
ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE: [
{
code: '<template>&#a;</template>',
errors: [
{
type: ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
loc: { offset: 10, line: 1, column: 11 }
}
]
},
{
code: '<template>&#xg;</template>',
errors: [
{
type: ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
loc: { offset: 10, line: 1, column: 11 }
}
]
},
{
code: '<template>&#99;</template>',
errors: []
},
{
code: '<template>&#xff;</template>',
errors: []
},
{
code: '<template attr="&#a;"></template>',
errors: [
{
type: ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
loc: { offset: 16, line: 1, column: 17 }
}
]
},
{
code: '<template attr="&#xg;"></template>',
errors: [
{
type: ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
loc: { offset: 16, line: 1, column: 17 }
}
]
},
{
code: '<template attr="&#99;"></template>',
errors: []
},
{
code: '<template attr="&#xff;"></template>',
errors: []
}
],
CDATA_IN_HTML_CONTENT: [
{
code: '<template><![CDATA[cdata]]></template>',
@ -1825,37 +1660,6 @@ foo
errors: []
}
],
CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE: [
{
code: '<template>&#1234567;</template>',
errors: [
{
type: ErrorCodes.CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE,
loc: { offset: 10, line: 1, column: 11 }
}
]
}
],
CONTROL_CHARACTER_REFERENCE: [
{
code: '<template>&#0003;</template>',
errors: [
{
type: ErrorCodes.CONTROL_CHARACTER_REFERENCE,
loc: { offset: 10, line: 1, column: 11 }
}
]
},
{
code: '<template>&#x7F;</template>',
errors: [
{
type: ErrorCodes.CONTROL_CHARACTER_REFERENCE,
loc: { offset: 10, line: 1, column: 11 }
}
]
}
],
DUPLICATE_ATTRIBUTE: [
{
code: '<template><div id="" id=""></div></template>',
@ -2412,36 +2216,6 @@ foo
]
}
],
MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE: [
{
code: '<template>&amp</template>',
options: { namedCharacterReferences: { amp: '&' } },
errors: [
{
type: ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
loc: { offset: 14, line: 1, column: 15 }
}
]
},
{
code: '<template>&#40</template>',
errors: [
{
type: ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
loc: { offset: 14, line: 1, column: 15 }
}
]
},
{
code: '<template>&#x40</template>',
errors: [
{
type: ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
loc: { offset: 15, line: 1, column: 16 }
}
]
}
],
MISSING_WHITESPACE_BETWEEN_ATTRIBUTES: [
{
code: '<template><div id="foo"class="bar"></div></template>',
@ -2500,48 +2274,6 @@ foo
]
}
],
NONCHARACTER_CHARACTER_REFERENCE: [
{
code: '<template>&#xFFFE;</template>',
errors: [
{
type: ErrorCodes.NONCHARACTER_CHARACTER_REFERENCE,
loc: { offset: 10, line: 1, column: 11 }
}
]
},
{
code: '<template>&#x1FFFF;</template>',
errors: [
{
type: ErrorCodes.NONCHARACTER_CHARACTER_REFERENCE,
loc: { offset: 10, line: 1, column: 11 }
}
]
}
],
NULL_CHARACTER_REFERENCE: [
{
code: '<template>&#0000;</template>',
errors: [
{
type: ErrorCodes.NULL_CHARACTER_REFERENCE,
loc: { offset: 10, line: 1, column: 11 }
}
]
}
],
SURROGATE_CHARACTER_REFERENCE: [
{
code: '<template>&#xD800;</template>',
errors: [
{
type: ErrorCodes.SURROGATE_CHARACTER_REFERENCE,
loc: { offset: 10, line: 1, column: 11 }
}
]
}
],
UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME: [
{
code: "<template><div a\"bc=''></div></template>",

View File

@ -32,10 +32,7 @@ export function createCompilerError<T extends number>(
export const enum ErrorCodes {
// parse errors
ABRUPT_CLOSING_OF_EMPTY_COMMENT,
ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
CDATA_IN_HTML_CONTENT,
CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE,
CONTROL_CHARACTER_REFERENCE,
DUPLICATE_ATTRIBUTE,
END_TAG_WITH_ATTRIBUTES,
END_TAG_WITH_TRAILING_SOLIDUS,
@ -49,12 +46,8 @@ export const enum ErrorCodes {
INVALID_FIRST_CHARACTER_OF_TAG_NAME,
MISSING_ATTRIBUTE_VALUE,
MISSING_END_TAG_NAME,
MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
MISSING_WHITESPACE_BETWEEN_ATTRIBUTES,
NESTED_COMMENT,
NONCHARACTER_CHARACTER_REFERENCE,
NULL_CHARACTER_REFERENCE,
SURROGATE_CHARACTER_REFERENCE,
UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE,
UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME,
@ -101,14 +94,8 @@ export const enum ErrorCodes {
export const errorMessages: { [code: number]: string } = {
// parse errors
[ErrorCodes.ABRUPT_CLOSING_OF_EMPTY_COMMENT]: 'Illegal comment.',
[ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE]:
'Illegal numeric character reference: invalid character.',
[ErrorCodes.CDATA_IN_HTML_CONTENT]:
'CDATA section is allowed only in XML context.',
[ErrorCodes.CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE]:
'Illegal numeric character reference: too big.',
[ErrorCodes.CONTROL_CHARACTER_REFERENCE]:
'Illegal numeric character reference: control character.',
[ErrorCodes.DUPLICATE_ATTRIBUTE]: 'Duplicate attribute.',
[ErrorCodes.END_TAG_WITH_ATTRIBUTES]: 'End tag cannot have attributes.',
[ErrorCodes.END_TAG_WITH_TRAILING_SOLIDUS]: "Illegal '/' in tags.",
@ -124,17 +111,9 @@ export const errorMessages: { [code: number]: string } = {
"Illegal tag name. Use '&lt;' to print '<'.",
[ErrorCodes.MISSING_ATTRIBUTE_VALUE]: 'Attribute value was expected.',
[ErrorCodes.MISSING_END_TAG_NAME]: 'End tag name was expected.',
[ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE]:
'Semicolon was expected.',
[ErrorCodes.MISSING_WHITESPACE_BETWEEN_ATTRIBUTES]:
'Whitespace was expected.',
[ErrorCodes.NESTED_COMMENT]: "Unexpected '<!--' in comment.",
[ErrorCodes.NONCHARACTER_CHARACTER_REFERENCE]:
'Illegal numeric character reference: non character.',
[ErrorCodes.NULL_CHARACTER_REFERENCE]:
'Illegal numeric character reference: null character.',
[ErrorCodes.SURROGATE_CHARACTER_REFERENCE]:
'Illegal numeric character reference: non-pair surrogate.',
[ErrorCodes.UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME]:
'Attribute name cannot contain U+0022 ("), U+0027 (\'), and U+003C (<).',
[ErrorCodes.UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE]:

View File

@ -26,13 +26,7 @@ export interface ParserOptions {
parent: ElementNode | undefined
) => TextModes
delimiters?: [string, string] // ['{{', '}}']
// Map to HTML entities. E.g., `{ "amp;": "&" }`
// The full set is https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references
namedCharacterReferences?: Record<string, string>
// this number is based on the map above, but it should be pre-computed
// to avoid the cost on every parse() call.
maxCRNameLength?: number
decodeEntities?: (rawText: string, asAttr: boolean) => string
onError?: (error: CompilerError) => void
}

View File

@ -30,6 +30,18 @@ type OptionalOptions = 'isNativeTag' | 'isBuiltInComponent'
type MergedParserOptions = Omit<Required<ParserOptions>, OptionalOptions> &
Pick<ParserOptions, OptionalOptions>
// The default decoder only provides escapes for characters reserved as part of
// the tempalte syntax, and is only used if the custom renderer did not provide
// a platform-specific decoder.
const decodeRE = /&(gt|lt|amp|apos|quot);/g
const decodeMap: Record<string, string> = {
gt: '>',
lt: '<',
amp: '&',
apos: "'",
quot: '"'
}
export const defaultParserOptions: MergedParserOptions = {
delimiters: [`{{`, `}}`],
getNamespace: () => Namespaces.HTML,
@ -37,14 +49,8 @@ export const defaultParserOptions: MergedParserOptions = {
isVoidTag: NO,
isPreTag: NO,
isCustomElement: NO,
namedCharacterReferences: {
'gt;': '>',
'lt;': '<',
'amp;': '&',
'apos;': "'",
'quot;': '"'
},
maxCRNameLength: 5,
decodeEntities: (rawText: string): string =>
rawText.replace(decodeRE, (_, p1) => decodeMap[p1]),
onError: defaultOnError
}
@ -57,7 +63,7 @@ export const enum TextModes {
ATTRIBUTE_VALUE
}
interface ParserContext {
export interface ParserContext {
options: MergedParserOptions
readonly originalSource: string
source: string
@ -812,128 +818,21 @@ function parseTextData(
length: number,
mode: TextModes
): string {
let rawText = context.source.slice(0, length)
const rawText = context.source.slice(0, length)
advanceBy(context, length)
if (
mode === TextModes.RAWTEXT ||
mode === TextModes.CDATA ||
rawText.indexOf('&') === -1
) {
advanceBy(context, length)
return rawText
} else {
// DATA or RCDATA containing "&"". Entity decoding required.
return context.options.decodeEntities(
rawText,
mode === TextModes.ATTRIBUTE_VALUE
)
}
// DATA or RCDATA containing "&"". Entity decoding required.
const end = context.offset + length
let decodedText = ''
function advance(length: number) {
advanceBy(context, length)
rawText = rawText.slice(length)
}
while (context.offset < end) {
const head = /&(?:#x?)?/i.exec(rawText)
if (!head || context.offset + head.index >= end) {
const remaining = end - context.offset
decodedText += rawText.slice(0, remaining)
advance(remaining)
break
}
// Advance to the "&".
decodedText += rawText.slice(0, head.index)
advance(head.index)
if (head[0] === '&') {
// Named character reference.
let name = ''
let value: string | undefined = undefined
if (/[0-9a-z]/i.test(rawText[1])) {
for (
let length = context.options.maxCRNameLength;
!value && length > 0;
--length
) {
name = rawText.substr(1, length)
value = context.options.namedCharacterReferences[name]
}
if (value) {
const semi = name.endsWith(';')
if (
mode === TextModes.ATTRIBUTE_VALUE &&
!semi &&
/[=a-z0-9]/i.test(rawText[name.length + 1] || '')
) {
decodedText += '&' + name
advance(1 + name.length)
} else {
decodedText += value
advance(1 + name.length)
if (!semi) {
emitError(
context,
ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE
)
}
}
} else {
decodedText += '&' + name
advance(1 + name.length)
}
} else {
decodedText += '&'
advance(1)
}
} else {
// Numeric character reference.
const hex = head[0] === '&#x'
const pattern = hex ? /^&#x([0-9a-f]+);?/i : /^&#([0-9]+);?/
const body = pattern.exec(rawText)
if (!body) {
decodedText += head[0]
emitError(
context,
ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE
)
advance(head[0].length)
} else {
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
let cp = Number.parseInt(body[1], hex ? 16 : 10)
if (cp === 0) {
emitError(context, ErrorCodes.NULL_CHARACTER_REFERENCE)
cp = 0xfffd
} else if (cp > 0x10ffff) {
emitError(
context,
ErrorCodes.CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE
)
cp = 0xfffd
} else if (cp >= 0xd800 && cp <= 0xdfff) {
emitError(context, ErrorCodes.SURROGATE_CHARACTER_REFERENCE)
cp = 0xfffd
} else if ((cp >= 0xfdd0 && cp <= 0xfdef) || (cp & 0xfffe) === 0xfffe) {
emitError(context, ErrorCodes.NONCHARACTER_CHARACTER_REFERENCE)
} else if (
(cp >= 0x01 && cp <= 0x08) ||
cp === 0x0b ||
(cp >= 0x0d && cp <= 0x1f) ||
(cp >= 0x7f && cp <= 0x9f)
) {
emitError(context, ErrorCodes.CONTROL_CHARACTER_REFERENCE)
cp = CCR_REPLACEMENTS[cp] || cp
}
decodedText += String.fromCodePoint(cp)
advance(body[0].length)
if (!body![0].endsWith(';')) {
emitError(
context,
ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE
)
}
}
}
}
return decodedText
}
function getCursor(context: ParserContext): Position {
@ -1052,34 +951,3 @@ function startsWithEndTagOpen(source: string, tag: string): boolean {
/[\t\n\f />]/.test(source[2 + tag.length] || '>')
)
}
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
const CCR_REPLACEMENTS: { [key: number]: number | undefined } = {
0x80: 0x20ac,
0x82: 0x201a,
0x83: 0x0192,
0x84: 0x201e,
0x85: 0x2026,
0x86: 0x2020,
0x87: 0x2021,
0x88: 0x02c6,
0x89: 0x2030,
0x8a: 0x0160,
0x8b: 0x2039,
0x8c: 0x0152,
0x8e: 0x017d,
0x91: 0x2018,
0x92: 0x2019,
0x93: 0x201c,
0x94: 0x201d,
0x95: 0x2022,
0x96: 0x2013,
0x97: 0x2014,
0x98: 0x02dc,
0x99: 0x2122,
0x9a: 0x0161,
0x9b: 0x203a,
0x9c: 0x0153,
0x9e: 0x017e,
0x9f: 0x0178
}

View File

@ -5,12 +5,10 @@ import {
TextNode,
ErrorCodes,
ElementTypes,
InterpolationNode
InterpolationNode,
AttributeNode
} from '@vue/compiler-core'
import {
parserOptionsStandard as parserOptions,
DOMNamespaces
} from '../src/parserOptionsStandard'
import { parserOptions, DOMNamespaces } from '../src/parserOptions'
describe('DOM parser', () => {
describe('Text', () => {
@ -170,6 +168,77 @@ describe('DOM parser', () => {
content: `foo${nbsp}${nbsp}bar`
})
})
// https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
test('HTML entities compatibility in text', () => {
const ast = parse('&ampersand;', parserOptions)
const text = ast.children[0] as TextNode
expect(text).toStrictEqual({
type: NodeTypes.TEXT,
content: '&ersand;',
loc: {
start: { offset: 0, line: 1, column: 1 },
end: { offset: 11, line: 1, column: 12 },
source: '&ampersand;'
}
})
})
// https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
test('HTML entities compatibility in attribute', () => {
const ast = parse(
'<div a="&ampersand;" b="&amp;ersand;" c="&amp!"></div>',
parserOptions
)
const element = ast.children[0] as ElementNode
const text1 = (element.props[0] as AttributeNode).value
const text2 = (element.props[1] as AttributeNode).value
const text3 = (element.props[2] as AttributeNode).value
expect(text1).toStrictEqual({
type: NodeTypes.TEXT,
content: '&ampersand;',
loc: {
start: { offset: 7, line: 1, column: 8 },
end: { offset: 20, line: 1, column: 21 },
source: '"&ampersand;"'
}
})
expect(text2).toStrictEqual({
type: NodeTypes.TEXT,
content: '&ersand;',
loc: {
start: { offset: 23, line: 1, column: 24 },
end: { offset: 37, line: 1, column: 38 },
source: '"&amp;ersand;"'
}
})
expect(text3).toStrictEqual({
type: NodeTypes.TEXT,
content: '&!',
loc: {
start: { offset: 40, line: 1, column: 41 },
end: { offset: 47, line: 1, column: 48 },
source: '"&amp!"'
}
})
})
test('Some control character reference should be replaced.', () => {
const ast = parse('&#x86;', parserOptions)
const text = ast.children[0] as TextNode
expect(text).toStrictEqual({
type: NodeTypes.TEXT,
content: '†',
loc: {
start: { offset: 0, line: 1, column: 1 },
end: { offset: 6, line: 1, column: 7 },
source: '&#x86;'
}
})
})
})
describe('Interpolation', () => {

View File

@ -0,0 +1,133 @@
import { ParserOptions } from '@vue/compiler-core'
import namedCharacterReferences from './namedChars.json'
// lazy compute this to make this file tree-shakable for browser
let maxCRNameLength: number
export const decodeHtml: ParserOptions['decodeEntities'] = (
rawText,
asAttr
) => {
let offset = 0
const end = rawText.length
let decodedText = ''
function advance(length: number) {
offset += length
rawText = rawText.slice(length)
}
while (offset < end) {
const head = /&(?:#x?)?/i.exec(rawText)
if (!head || offset + head.index >= end) {
const remaining = end - offset
decodedText += rawText.slice(0, remaining)
advance(remaining)
break
}
// Advance to the "&".
decodedText += rawText.slice(0, head.index)
advance(head.index)
if (head[0] === '&') {
// Named character reference.
let name = ''
let value: string | undefined = undefined
if (/[0-9a-z]/i.test(rawText[1])) {
if (!maxCRNameLength) {
maxCRNameLength = Object.keys(namedCharacterReferences).reduce(
(max, name) => Math.max(max, name.length),
0
)
}
for (let length = maxCRNameLength; !value && length > 0; --length) {
name = rawText.substr(1, length)
value = (namedCharacterReferences as Record<string, string>)[name]
}
if (value) {
const semi = name.endsWith(';')
if (
asAttr &&
!semi &&
/[=a-z0-9]/i.test(rawText[name.length + 1] || '')
) {
decodedText += '&' + name
advance(1 + name.length)
} else {
decodedText += value
advance(1 + name.length)
}
} else {
decodedText += '&' + name
advance(1 + name.length)
}
} else {
decodedText += '&'
advance(1)
}
} else {
// Numeric character reference.
const hex = head[0] === '&#x'
const pattern = hex ? /^&#x([0-9a-f]+);?/i : /^&#([0-9]+);?/
const body = pattern.exec(rawText)
if (!body) {
decodedText += head[0]
advance(head[0].length)
} else {
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
let cp = Number.parseInt(body[1], hex ? 16 : 10)
if (cp === 0) {
cp = 0xfffd
} else if (cp > 0x10ffff) {
cp = 0xfffd
} else if (cp >= 0xd800 && cp <= 0xdfff) {
cp = 0xfffd
} else if ((cp >= 0xfdd0 && cp <= 0xfdef) || (cp & 0xfffe) === 0xfffe) {
// noop
} else if (
(cp >= 0x01 && cp <= 0x08) ||
cp === 0x0b ||
(cp >= 0x0d && cp <= 0x1f) ||
(cp >= 0x7f && cp <= 0x9f)
) {
cp = CCR_REPLACEMENTS[cp] || cp
}
decodedText += String.fromCodePoint(cp)
advance(body[0].length)
}
}
}
return decodedText
}
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
const CCR_REPLACEMENTS: { [key: number]: number | undefined } = {
0x80: 0x20ac,
0x82: 0x201a,
0x83: 0x0192,
0x84: 0x201e,
0x85: 0x2026,
0x86: 0x2020,
0x87: 0x2021,
0x88: 0x02c6,
0x89: 0x2030,
0x8a: 0x0160,
0x8b: 0x2039,
0x8c: 0x0152,
0x8e: 0x017d,
0x91: 0x2018,
0x92: 0x2019,
0x93: 0x201c,
0x94: 0x201d,
0x95: 0x2022,
0x96: 0x2013,
0x97: 0x2014,
0x98: 0x02dc,
0x99: 0x2122,
0x9a: 0x0161,
0x9b: 0x203a,
0x9c: 0x0153,
0x9e: 0x017e,
0x9f: 0x0178
}

View File

@ -0,0 +1,6 @@
let decoder: HTMLDivElement
export function decodeHtmlBrowser(raw: string): string {
;(decoder || (decoder = document.createElement('div'))).innerHTML = raw
return decoder.textContent as string
}

View File

@ -9,8 +9,7 @@ import {
NodeTransform,
DirectiveTransform
} from '@vue/compiler-core'
import { parserOptionsMinimal } from './parserOptionsMinimal'
import { parserOptionsStandard } from './parserOptionsStandard'
import { parserOptions } from './parserOptions'
import { transformStyle } from './transforms/transformStyle'
import { transformVHtml } from './transforms/vHtml'
import { transformVText } from './transforms/vText'
@ -20,9 +19,7 @@ import { transformShow } from './transforms/vShow'
import { warnTransitionChildren } from './transforms/warnTransitionChildren'
import { stringifyStatic } from './transforms/stringifyStatic'
export const parserOptions = __BROWSER__
? parserOptionsMinimal
: parserOptionsStandard
export { parserOptions }
export const DOMNodeTransforms: NodeTransform[] = [
transformStyle,

View File

@ -8,6 +8,8 @@ import {
} from '@vue/compiler-core'
import { makeMap, isVoidTag, isHTMLTag, isSVGTag } from '@vue/shared'
import { TRANSITION, TRANSITION_GROUP } from './runtimeHelpers'
import { decodeHtml } from './decodeHtml'
import { decodeHtmlBrowser } from './decodeHtmlBrowser'
const isRawTextContainer = /*#__PURE__*/ makeMap(
'style,iframe,script,noscript',
@ -20,10 +22,11 @@ export const enum DOMNamespaces {
MATH_ML
}
export const parserOptionsMinimal: ParserOptions = {
export const parserOptions: ParserOptions = {
isVoidTag,
isNativeTag: tag => isHTMLTag(tag) || isSVGTag(tag),
isPreTag: tag => tag === 'pre',
decodeEntities: __BROWSER__ ? decodeHtmlBrowser : decodeHtml,
isBuiltInComponent: (tag: string): symbol | undefined => {
if (isBuiltInType(tag, `Transition`)) {

View File

@ -1,17 +0,0 @@
import { ParserOptions } from '@vue/compiler-core'
import { parserOptionsMinimal } from './parserOptionsMinimal'
import namedCharacterReferences from './namedChars.json'
export { DOMNamespaces } from './parserOptionsMinimal'
export const parserOptionsStandard: ParserOptions = {
// extends the minimal options with more spec-compliant overrides
...parserOptionsMinimal,
// https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references
namedCharacterReferences,
maxCRNameLength: /*#__PURE__*/ Object.keys(namedCharacterReferences).reduce(
(max, name) => Math.max(max, name.length),
0
)
}