vue3-yuanma/packages/compiler-dom/src/decodeHtml.ts
Evan You 1f6e72b110 fix(compiler): support full range of entity decoding in browser builds
BREAKING CHANGE: compiler options have been adjusted.
    - new option `decodeEntities` is added.
    - `namedCharacterReferences` option has been removed.
    - `maxCRNameLength` option has been rmeoved.
2020-04-08 18:51:25 -04:00

134 lines
3.5 KiB
TypeScript

import { ParserOptions } from '@vue/compiler-core'
import namedCharacterReferences from './namedChars.json'
// lazy compute this to make this file tree-shakable for browser
let maxCRNameLength: number
export const decodeHtml: ParserOptions['decodeEntities'] = (
rawText,
asAttr
) => {
let offset = 0
const end = rawText.length
let decodedText = ''
function advance(length: number) {
offset += length
rawText = rawText.slice(length)
}
while (offset < end) {
const head = /&(?:#x?)?/i.exec(rawText)
if (!head || offset + head.index >= end) {
const remaining = end - offset
decodedText += rawText.slice(0, remaining)
advance(remaining)
break
}
// Advance to the "&".
decodedText += rawText.slice(0, head.index)
advance(head.index)
if (head[0] === '&') {
// Named character reference.
let name = ''
let value: string | undefined = undefined
if (/[0-9a-z]/i.test(rawText[1])) {
if (!maxCRNameLength) {
maxCRNameLength = Object.keys(namedCharacterReferences).reduce(
(max, name) => Math.max(max, name.length),
0
)
}
for (let length = maxCRNameLength; !value && length > 0; --length) {
name = rawText.substr(1, length)
value = (namedCharacterReferences as Record<string, string>)[name]
}
if (value) {
const semi = name.endsWith(';')
if (
asAttr &&
!semi &&
/[=a-z0-9]/i.test(rawText[name.length + 1] || '')
) {
decodedText += '&' + name
advance(1 + name.length)
} else {
decodedText += value
advance(1 + name.length)
}
} else {
decodedText += '&' + name
advance(1 + name.length)
}
} else {
decodedText += '&'
advance(1)
}
} else {
// Numeric character reference.
const hex = head[0] === '&#x'
const pattern = hex ? /^&#x([0-9a-f]+);?/i : /^&#([0-9]+);?/
const body = pattern.exec(rawText)
if (!body) {
decodedText += head[0]
advance(head[0].length)
} else {
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
let cp = Number.parseInt(body[1], hex ? 16 : 10)
if (cp === 0) {
cp = 0xfffd
} else if (cp > 0x10ffff) {
cp = 0xfffd
} else if (cp >= 0xd800 && cp <= 0xdfff) {
cp = 0xfffd
} else if ((cp >= 0xfdd0 && cp <= 0xfdef) || (cp & 0xfffe) === 0xfffe) {
// noop
} else if (
(cp >= 0x01 && cp <= 0x08) ||
cp === 0x0b ||
(cp >= 0x0d && cp <= 0x1f) ||
(cp >= 0x7f && cp <= 0x9f)
) {
cp = CCR_REPLACEMENTS[cp] || cp
}
decodedText += String.fromCodePoint(cp)
advance(body[0].length)
}
}
}
return decodedText
}
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
const CCR_REPLACEMENTS: { [key: number]: number | undefined } = {
0x80: 0x20ac,
0x82: 0x201a,
0x83: 0x0192,
0x84: 0x201e,
0x85: 0x2026,
0x86: 0x2020,
0x87: 0x2021,
0x88: 0x02c6,
0x89: 0x2030,
0x8a: 0x0160,
0x8b: 0x2039,
0x8c: 0x0152,
0x8e: 0x017d,
0x91: 0x2018,
0x92: 0x2019,
0x93: 0x201c,
0x94: 0x201d,
0x95: 0x2022,
0x96: 0x2013,
0x97: 0x2014,
0x98: 0x02dc,
0x99: 0x2122,
0x9a: 0x0161,
0x9b: 0x203a,
0x9c: 0x0153,
0x9e: 0x017e,
0x9f: 0x0178
}