2020-04-09 06:51:25 +08:00
|
|
|
import { ParserOptions } from '@vue/compiler-core'
|
|
|
|
import namedCharacterReferences from './namedChars.json'
|
|
|
|
|
|
|
|
// lazy compute this to make this file tree-shakable for browser
|
|
|
|
let maxCRNameLength: number
|
|
|
|
|
|
|
|
export const decodeHtml: ParserOptions['decodeEntities'] = (
|
|
|
|
rawText,
|
|
|
|
asAttr
|
|
|
|
) => {
|
|
|
|
let offset = 0
|
|
|
|
const end = rawText.length
|
|
|
|
let decodedText = ''
|
|
|
|
|
|
|
|
function advance(length: number) {
|
|
|
|
offset += length
|
|
|
|
rawText = rawText.slice(length)
|
|
|
|
}
|
|
|
|
|
|
|
|
while (offset < end) {
|
|
|
|
const head = /&(?:#x?)?/i.exec(rawText)
|
|
|
|
if (!head || offset + head.index >= end) {
|
|
|
|
const remaining = end - offset
|
|
|
|
decodedText += rawText.slice(0, remaining)
|
|
|
|
advance(remaining)
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
// Advance to the "&".
|
|
|
|
decodedText += rawText.slice(0, head.index)
|
|
|
|
advance(head.index)
|
|
|
|
|
|
|
|
if (head[0] === '&') {
|
|
|
|
// Named character reference.
|
|
|
|
let name = ''
|
|
|
|
let value: string | undefined = undefined
|
|
|
|
if (/[0-9a-z]/i.test(rawText[1])) {
|
|
|
|
if (!maxCRNameLength) {
|
|
|
|
maxCRNameLength = Object.keys(namedCharacterReferences).reduce(
|
|
|
|
(max, name) => Math.max(max, name.length),
|
|
|
|
0
|
|
|
|
)
|
|
|
|
}
|
|
|
|
for (let length = maxCRNameLength; !value && length > 0; --length) {
|
|
|
|
name = rawText.substr(1, length)
|
|
|
|
value = (namedCharacterReferences as Record<string, string>)[name]
|
|
|
|
}
|
|
|
|
if (value) {
|
|
|
|
const semi = name.endsWith(';')
|
|
|
|
if (
|
|
|
|
asAttr &&
|
|
|
|
!semi &&
|
|
|
|
/[=a-z0-9]/i.test(rawText[name.length + 1] || '')
|
|
|
|
) {
|
|
|
|
decodedText += '&' + name
|
|
|
|
advance(1 + name.length)
|
|
|
|
} else {
|
|
|
|
decodedText += value
|
|
|
|
advance(1 + name.length)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
decodedText += '&' + name
|
|
|
|
advance(1 + name.length)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
decodedText += '&'
|
|
|
|
advance(1)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Numeric character reference.
|
|
|
|
const hex = head[0] === '&#x'
|
|
|
|
const pattern = hex ? /^&#x([0-9a-f]+);?/i : /^&#([0-9]+);?/
|
|
|
|
const body = pattern.exec(rawText)
|
|
|
|
if (!body) {
|
|
|
|
decodedText += head[0]
|
|
|
|
advance(head[0].length)
|
|
|
|
} else {
|
|
|
|
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
|
|
|
|
let cp = Number.parseInt(body[1], hex ? 16 : 10)
|
|
|
|
if (cp === 0) {
|
|
|
|
cp = 0xfffd
|
|
|
|
} else if (cp > 0x10ffff) {
|
|
|
|
cp = 0xfffd
|
|
|
|
} else if (cp >= 0xd800 && cp <= 0xdfff) {
|
|
|
|
cp = 0xfffd
|
|
|
|
} else if ((cp >= 0xfdd0 && cp <= 0xfdef) || (cp & 0xfffe) === 0xfffe) {
|
|
|
|
// noop
|
|
|
|
} else if (
|
|
|
|
(cp >= 0x01 && cp <= 0x08) ||
|
|
|
|
cp === 0x0b ||
|
|
|
|
(cp >= 0x0d && cp <= 0x1f) ||
|
|
|
|
(cp >= 0x7f && cp <= 0x9f)
|
|
|
|
) {
|
|
|
|
cp = CCR_REPLACEMENTS[cp] || cp
|
|
|
|
}
|
|
|
|
decodedText += String.fromCodePoint(cp)
|
|
|
|
advance(body[0].length)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return decodedText
|
|
|
|
}
|
|
|
|
|
|
|
|
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
|
2020-07-19 10:56:28 +08:00
|
|
|
const CCR_REPLACEMENTS: Record<number, number | undefined> = {
|
2020-04-09 06:51:25 +08:00
|
|
|
0x80: 0x20ac,
|
|
|
|
0x82: 0x201a,
|
|
|
|
0x83: 0x0192,
|
|
|
|
0x84: 0x201e,
|
|
|
|
0x85: 0x2026,
|
|
|
|
0x86: 0x2020,
|
|
|
|
0x87: 0x2021,
|
|
|
|
0x88: 0x02c6,
|
|
|
|
0x89: 0x2030,
|
|
|
|
0x8a: 0x0160,
|
|
|
|
0x8b: 0x2039,
|
|
|
|
0x8c: 0x0152,
|
|
|
|
0x8e: 0x017d,
|
|
|
|
0x91: 0x2018,
|
|
|
|
0x92: 0x2019,
|
|
|
|
0x93: 0x201c,
|
|
|
|
0x94: 0x201d,
|
|
|
|
0x95: 0x2022,
|
|
|
|
0x96: 0x2013,
|
|
|
|
0x97: 0x2014,
|
|
|
|
0x98: 0x02dc,
|
|
|
|
0x99: 0x2122,
|
|
|
|
0x9a: 0x0161,
|
|
|
|
0x9b: 0x203a,
|
|
|
|
0x9c: 0x0153,
|
|
|
|
0x9e: 0x017e,
|
|
|
|
0x9f: 0x0178
|
|
|
|
}
|