fix(compiler): support full range of entity decoding in browser builds
BREAKING CHANGE: compiler options have been adjusted.
- new option `decodeEntities` is added.
- `namedCharacterReferences` option has been removed.
- `maxCRNameLength` option has been rmeoved.
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -9,7 +9,6 @@ import {
|
||||
NodeTypes,
|
||||
Position,
|
||||
TextNode,
|
||||
AttributeNode,
|
||||
InterpolationNode
|
||||
} from '../src/ast'
|
||||
|
||||
@@ -163,114 +162,6 @@ describe('compiler: parse', () => {
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
test('HTML entities compatibility in text (https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state).', () => {
|
||||
const spy = jest.fn()
|
||||
const ast = baseParse('&ersand;', {
|
||||
namedCharacterReferences: { amp: '&' },
|
||||
onError: spy
|
||||
})
|
||||
const text = ast.children[0] as TextNode
|
||||
|
||||
expect(text).toStrictEqual({
|
||||
type: NodeTypes.TEXT,
|
||||
content: '&ersand;',
|
||||
loc: {
|
||||
start: { offset: 0, line: 1, column: 1 },
|
||||
end: { offset: 11, line: 1, column: 12 },
|
||||
source: '&ersand;'
|
||||
}
|
||||
})
|
||||
expect(spy.mock.calls).toMatchObject([
|
||||
[
|
||||
{
|
||||
code: ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
|
||||
loc: {
|
||||
start: { offset: 4, line: 1, column: 5 }
|
||||
}
|
||||
}
|
||||
]
|
||||
])
|
||||
})
|
||||
|
||||
test('HTML entities compatibility in attribute (https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state).', () => {
|
||||
const spy = jest.fn()
|
||||
const ast = baseParse(
|
||||
'<div a="&ersand;" b="&ersand;" c="&!"></div>',
|
||||
{
|
||||
namedCharacterReferences: { amp: '&', 'amp;': '&' },
|
||||
onError: spy
|
||||
}
|
||||
)
|
||||
const element = ast.children[0] as ElementNode
|
||||
const text1 = (element.props[0] as AttributeNode).value
|
||||
const text2 = (element.props[1] as AttributeNode).value
|
||||
const text3 = (element.props[2] as AttributeNode).value
|
||||
|
||||
expect(text1).toStrictEqual({
|
||||
type: NodeTypes.TEXT,
|
||||
content: '&ersand;',
|
||||
loc: {
|
||||
start: { offset: 7, line: 1, column: 8 },
|
||||
end: { offset: 20, line: 1, column: 21 },
|
||||
source: '"&ersand;"'
|
||||
}
|
||||
})
|
||||
expect(text2).toStrictEqual({
|
||||
type: NodeTypes.TEXT,
|
||||
content: '&ersand;',
|
||||
loc: {
|
||||
start: { offset: 23, line: 1, column: 24 },
|
||||
end: { offset: 37, line: 1, column: 38 },
|
||||
source: '"&ersand;"'
|
||||
}
|
||||
})
|
||||
expect(text3).toStrictEqual({
|
||||
type: NodeTypes.TEXT,
|
||||
content: '&!',
|
||||
loc: {
|
||||
start: { offset: 40, line: 1, column: 41 },
|
||||
end: { offset: 47, line: 1, column: 48 },
|
||||
source: '"&!"'
|
||||
}
|
||||
})
|
||||
expect(spy.mock.calls).toMatchObject([
|
||||
[
|
||||
{
|
||||
code: ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
|
||||
loc: {
|
||||
start: { offset: 45, line: 1, column: 46 }
|
||||
}
|
||||
}
|
||||
]
|
||||
])
|
||||
})
|
||||
|
||||
test('Some control character reference should be replaced.', () => {
|
||||
const spy = jest.fn()
|
||||
const ast = baseParse('†', { onError: spy })
|
||||
const text = ast.children[0] as TextNode
|
||||
|
||||
expect(text).toStrictEqual({
|
||||
type: NodeTypes.TEXT,
|
||||
content: '†',
|
||||
loc: {
|
||||
start: { offset: 0, line: 1, column: 1 },
|
||||
end: { offset: 6, line: 1, column: 7 },
|
||||
source: '†'
|
||||
}
|
||||
})
|
||||
expect(spy.mock.calls).toMatchObject([
|
||||
[
|
||||
{
|
||||
code: ErrorCodes.CONTROL_CHARACTER_REFERENCE,
|
||||
loc: {
|
||||
start: { offset: 0, line: 1, column: 1 }
|
||||
}
|
||||
}
|
||||
]
|
||||
])
|
||||
})
|
||||
})
|
||||
|
||||
describe('Interpolation', () => {
|
||||
@@ -1652,12 +1543,10 @@ foo
|
||||
expect(baz.loc.end).toEqual({ line: 2, column: 28, offset })
|
||||
})
|
||||
|
||||
describe('namedCharacterReferences option', () => {
|
||||
describe('decodeEntities option', () => {
|
||||
test('use the given map', () => {
|
||||
const ast: any = baseParse('&∪︀', {
|
||||
namedCharacterReferences: {
|
||||
'cups;': '\u222A\uFE00' // UNION with serifs
|
||||
},
|
||||
decodeEntities: text => text.replace('∪︀', '\u222A\uFE00'),
|
||||
onError: () => {} // Ignore errors
|
||||
})
|
||||
|
||||
@@ -1756,60 +1645,6 @@ foo
|
||||
errors: []
|
||||
}
|
||||
],
|
||||
ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE: [
|
||||
{
|
||||
code: '<template>&#a;</template>',
|
||||
errors: [
|
||||
{
|
||||
type: ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
|
||||
loc: { offset: 10, line: 1, column: 11 }
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
code: '<template>&#xg;</template>',
|
||||
errors: [
|
||||
{
|
||||
type: ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
|
||||
loc: { offset: 10, line: 1, column: 11 }
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
code: '<template>c</template>',
|
||||
errors: []
|
||||
},
|
||||
{
|
||||
code: '<template>ÿ</template>',
|
||||
errors: []
|
||||
},
|
||||
{
|
||||
code: '<template attr="&#a;"></template>',
|
||||
errors: [
|
||||
{
|
||||
type: ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
|
||||
loc: { offset: 16, line: 1, column: 17 }
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
code: '<template attr="&#xg;"></template>',
|
||||
errors: [
|
||||
{
|
||||
type: ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
|
||||
loc: { offset: 16, line: 1, column: 17 }
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
code: '<template attr="c"></template>',
|
||||
errors: []
|
||||
},
|
||||
{
|
||||
code: '<template attr="ÿ"></template>',
|
||||
errors: []
|
||||
}
|
||||
],
|
||||
CDATA_IN_HTML_CONTENT: [
|
||||
{
|
||||
code: '<template><![CDATA[cdata]]></template>',
|
||||
@@ -1825,37 +1660,6 @@ foo
|
||||
errors: []
|
||||
}
|
||||
],
|
||||
CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE: [
|
||||
{
|
||||
code: '<template>�</template>',
|
||||
errors: [
|
||||
{
|
||||
type: ErrorCodes.CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE,
|
||||
loc: { offset: 10, line: 1, column: 11 }
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
CONTROL_CHARACTER_REFERENCE: [
|
||||
{
|
||||
code: '<template></template>',
|
||||
errors: [
|
||||
{
|
||||
type: ErrorCodes.CONTROL_CHARACTER_REFERENCE,
|
||||
loc: { offset: 10, line: 1, column: 11 }
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
code: '<template></template>',
|
||||
errors: [
|
||||
{
|
||||
type: ErrorCodes.CONTROL_CHARACTER_REFERENCE,
|
||||
loc: { offset: 10, line: 1, column: 11 }
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
DUPLICATE_ATTRIBUTE: [
|
||||
{
|
||||
code: '<template><div id="" id=""></div></template>',
|
||||
@@ -2412,36 +2216,6 @@ foo
|
||||
]
|
||||
}
|
||||
],
|
||||
MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE: [
|
||||
{
|
||||
code: '<template>&</template>',
|
||||
options: { namedCharacterReferences: { amp: '&' } },
|
||||
errors: [
|
||||
{
|
||||
type: ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
|
||||
loc: { offset: 14, line: 1, column: 15 }
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
code: '<template>(</template>',
|
||||
errors: [
|
||||
{
|
||||
type: ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
|
||||
loc: { offset: 14, line: 1, column: 15 }
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
code: '<template>@</template>',
|
||||
errors: [
|
||||
{
|
||||
type: ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
|
||||
loc: { offset: 15, line: 1, column: 16 }
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
MISSING_WHITESPACE_BETWEEN_ATTRIBUTES: [
|
||||
{
|
||||
code: '<template><div id="foo"class="bar"></div></template>',
|
||||
@@ -2500,48 +2274,6 @@ foo
|
||||
]
|
||||
}
|
||||
],
|
||||
NONCHARACTER_CHARACTER_REFERENCE: [
|
||||
{
|
||||
code: '<template></template>',
|
||||
errors: [
|
||||
{
|
||||
type: ErrorCodes.NONCHARACTER_CHARACTER_REFERENCE,
|
||||
loc: { offset: 10, line: 1, column: 11 }
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
code: '<template></template>',
|
||||
errors: [
|
||||
{
|
||||
type: ErrorCodes.NONCHARACTER_CHARACTER_REFERENCE,
|
||||
loc: { offset: 10, line: 1, column: 11 }
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
NULL_CHARACTER_REFERENCE: [
|
||||
{
|
||||
code: '<template>�</template>',
|
||||
errors: [
|
||||
{
|
||||
type: ErrorCodes.NULL_CHARACTER_REFERENCE,
|
||||
loc: { offset: 10, line: 1, column: 11 }
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
SURROGATE_CHARACTER_REFERENCE: [
|
||||
{
|
||||
code: '<template>�</template>',
|
||||
errors: [
|
||||
{
|
||||
type: ErrorCodes.SURROGATE_CHARACTER_REFERENCE,
|
||||
loc: { offset: 10, line: 1, column: 11 }
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME: [
|
||||
{
|
||||
code: "<template><div a\"bc=''></div></template>",
|
||||
|
||||
@@ -32,10 +32,7 @@ export function createCompilerError<T extends number>(
|
||||
export const enum ErrorCodes {
|
||||
// parse errors
|
||||
ABRUPT_CLOSING_OF_EMPTY_COMMENT,
|
||||
ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
|
||||
CDATA_IN_HTML_CONTENT,
|
||||
CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE,
|
||||
CONTROL_CHARACTER_REFERENCE,
|
||||
DUPLICATE_ATTRIBUTE,
|
||||
END_TAG_WITH_ATTRIBUTES,
|
||||
END_TAG_WITH_TRAILING_SOLIDUS,
|
||||
@@ -49,12 +46,8 @@ export const enum ErrorCodes {
|
||||
INVALID_FIRST_CHARACTER_OF_TAG_NAME,
|
||||
MISSING_ATTRIBUTE_VALUE,
|
||||
MISSING_END_TAG_NAME,
|
||||
MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
|
||||
MISSING_WHITESPACE_BETWEEN_ATTRIBUTES,
|
||||
NESTED_COMMENT,
|
||||
NONCHARACTER_CHARACTER_REFERENCE,
|
||||
NULL_CHARACTER_REFERENCE,
|
||||
SURROGATE_CHARACTER_REFERENCE,
|
||||
UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
|
||||
UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE,
|
||||
UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME,
|
||||
@@ -101,14 +94,8 @@ export const enum ErrorCodes {
|
||||
export const errorMessages: { [code: number]: string } = {
|
||||
// parse errors
|
||||
[ErrorCodes.ABRUPT_CLOSING_OF_EMPTY_COMMENT]: 'Illegal comment.',
|
||||
[ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE]:
|
||||
'Illegal numeric character reference: invalid character.',
|
||||
[ErrorCodes.CDATA_IN_HTML_CONTENT]:
|
||||
'CDATA section is allowed only in XML context.',
|
||||
[ErrorCodes.CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE]:
|
||||
'Illegal numeric character reference: too big.',
|
||||
[ErrorCodes.CONTROL_CHARACTER_REFERENCE]:
|
||||
'Illegal numeric character reference: control character.',
|
||||
[ErrorCodes.DUPLICATE_ATTRIBUTE]: 'Duplicate attribute.',
|
||||
[ErrorCodes.END_TAG_WITH_ATTRIBUTES]: 'End tag cannot have attributes.',
|
||||
[ErrorCodes.END_TAG_WITH_TRAILING_SOLIDUS]: "Illegal '/' in tags.",
|
||||
@@ -124,17 +111,9 @@ export const errorMessages: { [code: number]: string } = {
|
||||
"Illegal tag name. Use '<' to print '<'.",
|
||||
[ErrorCodes.MISSING_ATTRIBUTE_VALUE]: 'Attribute value was expected.',
|
||||
[ErrorCodes.MISSING_END_TAG_NAME]: 'End tag name was expected.',
|
||||
[ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE]:
|
||||
'Semicolon was expected.',
|
||||
[ErrorCodes.MISSING_WHITESPACE_BETWEEN_ATTRIBUTES]:
|
||||
'Whitespace was expected.',
|
||||
[ErrorCodes.NESTED_COMMENT]: "Unexpected '<!--' in comment.",
|
||||
[ErrorCodes.NONCHARACTER_CHARACTER_REFERENCE]:
|
||||
'Illegal numeric character reference: non character.',
|
||||
[ErrorCodes.NULL_CHARACTER_REFERENCE]:
|
||||
'Illegal numeric character reference: null character.',
|
||||
[ErrorCodes.SURROGATE_CHARACTER_REFERENCE]:
|
||||
'Illegal numeric character reference: non-pair surrogate.',
|
||||
[ErrorCodes.UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME]:
|
||||
'Attribute name cannot contain U+0022 ("), U+0027 (\'), and U+003C (<).',
|
||||
[ErrorCodes.UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE]:
|
||||
|
||||
@@ -26,13 +26,7 @@ export interface ParserOptions {
|
||||
parent: ElementNode | undefined
|
||||
) => TextModes
|
||||
delimiters?: [string, string] // ['{{', '}}']
|
||||
|
||||
// Map to HTML entities. E.g., `{ "amp;": "&" }`
|
||||
// The full set is https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references
|
||||
namedCharacterReferences?: Record<string, string>
|
||||
// this number is based on the map above, but it should be pre-computed
|
||||
// to avoid the cost on every parse() call.
|
||||
maxCRNameLength?: number
|
||||
decodeEntities?: (rawText: string, asAttr: boolean) => string
|
||||
onError?: (error: CompilerError) => void
|
||||
}
|
||||
|
||||
|
||||
@@ -30,6 +30,18 @@ type OptionalOptions = 'isNativeTag' | 'isBuiltInComponent'
|
||||
type MergedParserOptions = Omit<Required<ParserOptions>, OptionalOptions> &
|
||||
Pick<ParserOptions, OptionalOptions>
|
||||
|
||||
// The default decoder only provides escapes for characters reserved as part of
|
||||
// the tempalte syntax, and is only used if the custom renderer did not provide
|
||||
// a platform-specific decoder.
|
||||
const decodeRE = /&(gt|lt|amp|apos|quot);/g
|
||||
const decodeMap: Record<string, string> = {
|
||||
gt: '>',
|
||||
lt: '<',
|
||||
amp: '&',
|
||||
apos: "'",
|
||||
quot: '"'
|
||||
}
|
||||
|
||||
export const defaultParserOptions: MergedParserOptions = {
|
||||
delimiters: [`{{`, `}}`],
|
||||
getNamespace: () => Namespaces.HTML,
|
||||
@@ -37,14 +49,8 @@ export const defaultParserOptions: MergedParserOptions = {
|
||||
isVoidTag: NO,
|
||||
isPreTag: NO,
|
||||
isCustomElement: NO,
|
||||
namedCharacterReferences: {
|
||||
'gt;': '>',
|
||||
'lt;': '<',
|
||||
'amp;': '&',
|
||||
'apos;': "'",
|
||||
'quot;': '"'
|
||||
},
|
||||
maxCRNameLength: 5,
|
||||
decodeEntities: (rawText: string): string =>
|
||||
rawText.replace(decodeRE, (_, p1) => decodeMap[p1]),
|
||||
onError: defaultOnError
|
||||
}
|
||||
|
||||
@@ -57,7 +63,7 @@ export const enum TextModes {
|
||||
ATTRIBUTE_VALUE
|
||||
}
|
||||
|
||||
interface ParserContext {
|
||||
export interface ParserContext {
|
||||
options: MergedParserOptions
|
||||
readonly originalSource: string
|
||||
source: string
|
||||
@@ -812,128 +818,21 @@ function parseTextData(
|
||||
length: number,
|
||||
mode: TextModes
|
||||
): string {
|
||||
let rawText = context.source.slice(0, length)
|
||||
const rawText = context.source.slice(0, length)
|
||||
advanceBy(context, length)
|
||||
if (
|
||||
mode === TextModes.RAWTEXT ||
|
||||
mode === TextModes.CDATA ||
|
||||
rawText.indexOf('&') === -1
|
||||
) {
|
||||
advanceBy(context, length)
|
||||
return rawText
|
||||
} else {
|
||||
// DATA or RCDATA containing "&"". Entity decoding required.
|
||||
return context.options.decodeEntities(
|
||||
rawText,
|
||||
mode === TextModes.ATTRIBUTE_VALUE
|
||||
)
|
||||
}
|
||||
|
||||
// DATA or RCDATA containing "&"". Entity decoding required.
|
||||
const end = context.offset + length
|
||||
let decodedText = ''
|
||||
|
||||
function advance(length: number) {
|
||||
advanceBy(context, length)
|
||||
rawText = rawText.slice(length)
|
||||
}
|
||||
|
||||
while (context.offset < end) {
|
||||
const head = /&(?:#x?)?/i.exec(rawText)
|
||||
if (!head || context.offset + head.index >= end) {
|
||||
const remaining = end - context.offset
|
||||
decodedText += rawText.slice(0, remaining)
|
||||
advance(remaining)
|
||||
break
|
||||
}
|
||||
|
||||
// Advance to the "&".
|
||||
decodedText += rawText.slice(0, head.index)
|
||||
advance(head.index)
|
||||
|
||||
if (head[0] === '&') {
|
||||
// Named character reference.
|
||||
let name = ''
|
||||
let value: string | undefined = undefined
|
||||
if (/[0-9a-z]/i.test(rawText[1])) {
|
||||
for (
|
||||
let length = context.options.maxCRNameLength;
|
||||
!value && length > 0;
|
||||
--length
|
||||
) {
|
||||
name = rawText.substr(1, length)
|
||||
value = context.options.namedCharacterReferences[name]
|
||||
}
|
||||
if (value) {
|
||||
const semi = name.endsWith(';')
|
||||
if (
|
||||
mode === TextModes.ATTRIBUTE_VALUE &&
|
||||
!semi &&
|
||||
/[=a-z0-9]/i.test(rawText[name.length + 1] || '')
|
||||
) {
|
||||
decodedText += '&' + name
|
||||
advance(1 + name.length)
|
||||
} else {
|
||||
decodedText += value
|
||||
advance(1 + name.length)
|
||||
if (!semi) {
|
||||
emitError(
|
||||
context,
|
||||
ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE
|
||||
)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
decodedText += '&' + name
|
||||
advance(1 + name.length)
|
||||
}
|
||||
} else {
|
||||
decodedText += '&'
|
||||
advance(1)
|
||||
}
|
||||
} else {
|
||||
// Numeric character reference.
|
||||
const hex = head[0] === '&#x'
|
||||
const pattern = hex ? /^&#x([0-9a-f]+);?/i : /^&#([0-9]+);?/
|
||||
const body = pattern.exec(rawText)
|
||||
if (!body) {
|
||||
decodedText += head[0]
|
||||
emitError(
|
||||
context,
|
||||
ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE
|
||||
)
|
||||
advance(head[0].length)
|
||||
} else {
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
|
||||
let cp = Number.parseInt(body[1], hex ? 16 : 10)
|
||||
if (cp === 0) {
|
||||
emitError(context, ErrorCodes.NULL_CHARACTER_REFERENCE)
|
||||
cp = 0xfffd
|
||||
} else if (cp > 0x10ffff) {
|
||||
emitError(
|
||||
context,
|
||||
ErrorCodes.CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE
|
||||
)
|
||||
cp = 0xfffd
|
||||
} else if (cp >= 0xd800 && cp <= 0xdfff) {
|
||||
emitError(context, ErrorCodes.SURROGATE_CHARACTER_REFERENCE)
|
||||
cp = 0xfffd
|
||||
} else if ((cp >= 0xfdd0 && cp <= 0xfdef) || (cp & 0xfffe) === 0xfffe) {
|
||||
emitError(context, ErrorCodes.NONCHARACTER_CHARACTER_REFERENCE)
|
||||
} else if (
|
||||
(cp >= 0x01 && cp <= 0x08) ||
|
||||
cp === 0x0b ||
|
||||
(cp >= 0x0d && cp <= 0x1f) ||
|
||||
(cp >= 0x7f && cp <= 0x9f)
|
||||
) {
|
||||
emitError(context, ErrorCodes.CONTROL_CHARACTER_REFERENCE)
|
||||
cp = CCR_REPLACEMENTS[cp] || cp
|
||||
}
|
||||
decodedText += String.fromCodePoint(cp)
|
||||
advance(body[0].length)
|
||||
if (!body![0].endsWith(';')) {
|
||||
emitError(
|
||||
context,
|
||||
ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return decodedText
|
||||
}
|
||||
|
||||
function getCursor(context: ParserContext): Position {
|
||||
@@ -1052,34 +951,3 @@ function startsWithEndTagOpen(source: string, tag: string): boolean {
|
||||
/[\t\n\f />]/.test(source[2 + tag.length] || '>')
|
||||
)
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
|
||||
const CCR_REPLACEMENTS: { [key: number]: number | undefined } = {
|
||||
0x80: 0x20ac,
|
||||
0x82: 0x201a,
|
||||
0x83: 0x0192,
|
||||
0x84: 0x201e,
|
||||
0x85: 0x2026,
|
||||
0x86: 0x2020,
|
||||
0x87: 0x2021,
|
||||
0x88: 0x02c6,
|
||||
0x89: 0x2030,
|
||||
0x8a: 0x0160,
|
||||
0x8b: 0x2039,
|
||||
0x8c: 0x0152,
|
||||
0x8e: 0x017d,
|
||||
0x91: 0x2018,
|
||||
0x92: 0x2019,
|
||||
0x93: 0x201c,
|
||||
0x94: 0x201d,
|
||||
0x95: 0x2022,
|
||||
0x96: 0x2013,
|
||||
0x97: 0x2014,
|
||||
0x98: 0x02dc,
|
||||
0x99: 0x2122,
|
||||
0x9a: 0x0161,
|
||||
0x9b: 0x203a,
|
||||
0x9c: 0x0153,
|
||||
0x9e: 0x017e,
|
||||
0x9f: 0x0178
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user