perf(compiler): improve perf of parseTextData

This commit is contained in:
Evan You 2019-11-15 16:17:52 -05:00
parent 96623d0d52
commit 4fef8f342a

View File

@ -798,40 +798,49 @@ function parseTextData(
length: number, length: number,
mode: TextModes mode: TextModes
): string { ): string {
if (mode === TextModes.RAWTEXT || mode === TextModes.CDATA) { let rawText = context.source.slice(0, length)
const text = context.source.slice(0, length) if (
mode === TextModes.RAWTEXT ||
mode === TextModes.CDATA ||
rawText.indexOf('&') === -1
) {
advanceBy(context, length) advanceBy(context, length)
return text return rawText
} }
// DATA or RCDATA. Entity decoding required. // DATA or RCDATA containing "&"". Entity decoding required.
const end = context.offset + length const end = context.offset + length
let text: string = '' let decodedText = ''
function advance(length: number) {
advanceBy(context, length)
rawText = rawText.slice(length)
}
while (context.offset < end) { while (context.offset < end) {
const head = /&(?:#x?)?/i.exec(context.source) const head = /&(?:#x?)?/i.exec(rawText)
if (!head || context.offset + head.index >= end) { if (!head || context.offset + head.index >= end) {
const remaining = end - context.offset const remaining = end - context.offset
text += context.source.slice(0, remaining) decodedText += rawText.slice(0, remaining)
advanceBy(context, remaining) advance(remaining)
break break
} }
// Advance to the "&". // Advance to the "&".
text += context.source.slice(0, head.index) decodedText += rawText.slice(0, head.index)
advanceBy(context, head.index) advance(head.index)
if (head[0] === '&') { if (head[0] === '&') {
// Named character reference. // Named character reference.
let name = '', let name = '',
value: string | undefined = undefined value: string | undefined = undefined
if (/[0-9a-z]/i.test(context.source[1])) { if (/[0-9a-z]/i.test(rawText[1])) {
for ( for (
let length = context.maxCRNameLength; let length = context.maxCRNameLength;
!value && length > 0; !value && length > 0;
--length --length
) { ) {
name = context.source.substr(1, length) name = rawText.substr(1, length)
value = context.options.namedCharacterReferences[name] value = context.options.namedCharacterReferences[name]
} }
if (value) { if (value) {
@ -839,14 +848,13 @@ function parseTextData(
if ( if (
mode === TextModes.ATTRIBUTE_VALUE && mode === TextModes.ATTRIBUTE_VALUE &&
!semi && !semi &&
/[=a-z0-9]/i.test(context.source[1 + name.length] || '') /[=a-z0-9]/i.test(rawText[1 + name.length] || '')
) { ) {
text += '&' decodedText += '&' + name
text += name advance(1 + name.length)
advanceBy(context, 1 + name.length)
} else { } else {
text += value decodedText += value
advanceBy(context, 1 + name.length) advance(1 + name.length)
if (!semi) { if (!semi) {
emitError( emitError(
context, context,
@ -856,26 +864,25 @@ function parseTextData(
} }
} else { } else {
emitError(context, ErrorCodes.UNKNOWN_NAMED_CHARACTER_REFERENCE) emitError(context, ErrorCodes.UNKNOWN_NAMED_CHARACTER_REFERENCE)
text += '&' decodedText += '&' + name
text += name advance(1 + name.length)
advanceBy(context, 1 + name.length)
} }
} else { } else {
text += '&' decodedText += '&'
advanceBy(context, 1) advance(1)
} }
} else { } else {
// Numeric character reference. // Numeric character reference.
const hex = head[0] === '&#x' const hex = head[0] === '&#x'
const pattern = hex ? /^&#x([0-9a-f]+);?/i : /^&#([0-9]+);?/ const pattern = hex ? /^&#x([0-9a-f]+);?/i : /^&#([0-9]+);?/
const body = pattern.exec(context.source) const body = pattern.exec(rawText)
if (!body) { if (!body) {
text += head[0] decodedText += head[0]
emitError( emitError(
context, context,
ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE
) )
advanceBy(context, head[0].length) advance(head[0].length)
} else { } else {
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
let cp = Number.parseInt(body[1], hex ? 16 : 10) let cp = Number.parseInt(body[1], hex ? 16 : 10)
@ -902,8 +909,8 @@ function parseTextData(
emitError(context, ErrorCodes.CONTROL_CHARACTER_REFERENCE) emitError(context, ErrorCodes.CONTROL_CHARACTER_REFERENCE)
cp = CCR_REPLACEMENTS[cp] || cp cp = CCR_REPLACEMENTS[cp] || cp
} }
text += String.fromCodePoint(cp) decodedText += String.fromCodePoint(cp)
advanceBy(context, body[0].length) advance(body[0].length)
if (!body![0].endsWith(';')) { if (!body![0].endsWith(';')) {
emitError( emitError(
context, context,
@ -913,7 +920,7 @@ function parseTextData(
} }
} }
} }
return text return decodedText
} }
function getCursor(context: ParserContext): Position { function getCursor(context: ParserContext): Position {