vue3-yuanma/packages/compiler-core/src/parse.ts

932 lines
24 KiB
TypeScript

import {
ErrorCodes,
CompilerError,
createCompilerError,
defaultOnError
} from './errors'
import {
assert,
advancePositionWithMutation,
advancePositionWithClone
} from './utils'
import {
Namespace,
Namespaces,
AttributeNode,
CommentNode,
DirectiveNode,
ElementNode,
ElementTypes,
ExpressionNode,
NodeTypes,
Position,
RootNode,
SourceLocation,
TextNode,
ChildNode
} from './ast'
export interface ParserOptions {
isVoidTag?: (tag: string) => boolean // e.g. img, br, hr
getNamespace?: (tag: string, parent: ElementNode | undefined) => Namespace
getTextMode?: (tag: string, ns: Namespace) => TextModes
delimiters?: [string, string] // ['{{', '}}']
ignoreSpaces?: boolean
// Map to HTML entities. E.g., `{ "amp;": "&" }`
// The full set is https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references
namedCharacterReferences?: { [name: string]: string | undefined }
onError?: (error: CompilerError) => void
}
export const defaultParserOptions: Required<ParserOptions> = {
delimiters: [`{{`, `}}`],
ignoreSpaces: true,
getNamespace: () => Namespaces.HTML,
getTextMode: () => TextModes.DATA,
isVoidTag: () => false,
namedCharacterReferences: {
'gt;': '>',
'lt;': '<',
'amp;': '&',
'apos;': "'",
'quot;': '"'
},
onError: defaultOnError
}
export const enum TextModes {
// | Elements | Entities | End sign | Inside of
DATA, // | ✔ | ✔ | End tags of ancestors |
RCDATA, // | ✘ | ✔ | End tag of the parent | <textarea>
RAWTEXT, // | ✘ | ✘ | End tag of the parent | <style>,<script>
CDATA,
ATTRIBUTE_VALUE
}
interface ParserContext {
options: Required<ParserOptions>
readonly originalSource: string
source: string
offset: number
line: number
column: number
maxCRNameLength: number
}
export function parse(content: string, options: ParserOptions = {}): RootNode {
const context = createParserContext(content, options)
const start = getCursor(context)
return {
type: NodeTypes.ROOT,
children: parseChildren(context, TextModes.DATA, []),
imports: [],
statements: [],
loc: getSelection(context, start)
}
}
function createParserContext(
content: string,
options: ParserOptions
): ParserContext {
return {
options: {
...defaultParserOptions,
...options
},
column: 1,
line: 1,
offset: 0,
originalSource: content,
source: content,
maxCRNameLength: Object.keys(
options.namedCharacterReferences ||
defaultParserOptions.namedCharacterReferences
).reduce((max, name) => Math.max(max, name.length), 0)
}
}
function parseChildren(
context: ParserContext,
mode: TextModes,
ancestors: ElementNode[]
): ChildNode[] {
const parent = last(ancestors)
const ns = parent ? parent.ns : Namespaces.HTML
const nodes: ChildNode[] = []
while (!isEnd(context, mode, ancestors)) {
__DEV__ && assert(context.source.length > 0)
const s = context.source
let node: any = null
if (startsWith(s, context.options.delimiters[0])) {
// '{{'
node = parseInterpolation(context, mode)
} else if (mode === TextModes.DATA && s[0] === '<') {
// https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
if (s.length === 1) {
emitError(context, ErrorCodes.EOF_BEFORE_TAG_NAME, 1)
} else if (s[1] === '!') {
// https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
if (startsWith(s, '<!--')) {
node = parseComment(context)
} else if (startsWith(s, '<!DOCTYPE')) {
// Ignore DOCTYPE by a limitation.
node = parseBogusComment(context)
} else if (startsWith(s, '<![CDATA[')) {
if (ns !== Namespaces.HTML) {
node = parseCDATA(context, ancestors)
} else {
emitError(context, ErrorCodes.CDATA_IN_HTML_CONTENT)
node = parseBogusComment(context)
}
} else {
emitError(context, ErrorCodes.INCORRECTLY_OPENED_COMMENT)
node = parseBogusComment(context)
}
} else if (s[1] === '/') {
// https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state
if (s.length === 2) {
emitError(context, ErrorCodes.EOF_BEFORE_TAG_NAME, 2)
} else if (s[2] === '>') {
emitError(context, ErrorCodes.MISSING_END_TAG_NAME, 2)
advanceBy(context, 3)
continue
} else if (/[a-z]/i.test(s[2])) {
emitError(context, ErrorCodes.X_INVALID_END_TAG)
parseTag(context, TagType.End, parent)
continue
} else {
emitError(context, ErrorCodes.INVALID_FIRST_CHARACTER_OF_TAG_NAME, 2)
node = parseBogusComment(context)
}
} else if (/[a-z]/i.test(s[1])) {
node = parseElement(context, ancestors)
} else if (s[1] === '?') {
emitError(
context,
ErrorCodes.UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME,
1
)
node = parseBogusComment(context)
} else {
emitError(context, ErrorCodes.INVALID_FIRST_CHARACTER_OF_TAG_NAME, 1)
}
}
if (!node) {
node = parseText(context, mode)
}
if (Array.isArray(node)) {
for (let i = 0; i < node.length; i++) {
pushNode(context, nodes, node[i])
}
} else {
pushNode(context, nodes, node)
}
}
return nodes
}
function pushNode(
context: ParserContext,
nodes: ChildNode[],
node: ChildNode
): void {
// ignore comments in production
if (!__DEV__ && node.type === NodeTypes.COMMENT) {
return
}
if (
context.options.ignoreSpaces &&
node.type === NodeTypes.TEXT &&
node.isEmpty
) {
return
}
// Merge if both this and the previous node are text and those are consecutive.
// This happens on "a < b" or something like.
const prev = last(nodes)
if (
prev &&
prev.type === NodeTypes.TEXT &&
node.type === NodeTypes.TEXT &&
prev.loc.end.offset === node.loc.start.offset
) {
prev.content += node.content
prev.isEmpty = prev.content.trim().length === 0
prev.loc.end = node.loc.end
prev.loc.source += node.loc.source
} else {
nodes.push(node)
}
}
function parseCDATA(
context: ParserContext,
ancestors: ElementNode[]
): ChildNode[] {
__DEV__ &&
assert(last(ancestors) == null || last(ancestors)!.ns !== Namespaces.HTML)
__DEV__ && assert(startsWith(context.source, '<![CDATA['))
advanceBy(context, 9)
const nodes = parseChildren(context, TextModes.CDATA, ancestors)
if (context.source.length === 0) {
emitError(context, ErrorCodes.EOF_IN_CDATA)
} else {
__DEV__ && assert(startsWith(context.source, ']]>'))
advanceBy(context, 3)
}
return nodes
}
function parseComment(context: ParserContext): CommentNode {
__DEV__ && assert(startsWith(context.source, '<!--'))
const start = getCursor(context)
let content: string
// Regular comment.
const match = /--(\!)?>/.exec(context.source)
if (!match) {
content = context.source.slice(4)
advanceBy(context, context.source.length)
emitError(context, ErrorCodes.EOF_IN_COMMENT)
} else {
if (match.index <= 3) {
emitError(context, ErrorCodes.ABRUPT_CLOSING_OF_EMPTY_COMMENT)
}
if (match[1]) {
emitError(context, ErrorCodes.INCORRECTLY_CLOSED_COMMENT)
}
content = context.source.slice(4, match.index)
// Advancing with reporting nested comments.
const s = context.source.slice(0, match.index)
let prevIndex = 1,
nestedIndex = 0
while ((nestedIndex = s.indexOf('<!--', prevIndex)) !== -1) {
advanceBy(context, nestedIndex - prevIndex + 1)
if (nestedIndex + 4 < s.length) {
emitError(context, ErrorCodes.NESTED_COMMENT)
}
prevIndex = nestedIndex + 1
}
advanceBy(context, match.index + match[0].length - prevIndex + 1)
}
return {
type: NodeTypes.COMMENT,
content,
loc: getSelection(context, start)
}
}
function parseBogusComment(context: ParserContext): CommentNode | undefined {
__DEV__ && assert(/^<(?:[\!\?]|\/[^a-z>])/i.test(context.source))
const start = getCursor(context)
const contentStart = context.source[1] === '?' ? 1 : 2
let content: string
const closeIndex = context.source.indexOf('>')
if (closeIndex === -1) {
content = context.source.slice(contentStart)
advanceBy(context, context.source.length)
} else {
content = context.source.slice(contentStart, closeIndex)
advanceBy(context, closeIndex + 1)
}
return {
type: NodeTypes.COMMENT,
content,
loc: getSelection(context, start)
}
}
function parseElement(
context: ParserContext,
ancestors: ElementNode[]
): ElementNode | undefined {
__DEV__ && assert(/^<[a-z]/i.test(context.source))
// Start tag.
const parent = last(ancestors)
const element = parseTag(context, TagType.Start, parent)
if (element.isSelfClosing || context.options.isVoidTag(element.tag)) {
return element
}
// Children.
ancestors.push(element)
const mode = (context.options.getTextMode(
element.tag,
element.ns
) as unknown) as TextModes
const children = parseChildren(context, mode, ancestors)
ancestors.pop()
element.children = children
// End tag.
if (startsWithEndTagOpen(context.source, element.tag)) {
parseTag(context, TagType.End, parent)
} else {
emitError(context, ErrorCodes.X_MISSING_END_TAG)
if (context.source.length === 0 && element.tag.toLowerCase() === 'script') {
const first = children[0]
if (first && startsWith(first.loc.source, '<!--')) {
emitError(context, ErrorCodes.EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT)
}
}
}
element.loc = getSelection(context, element.loc.start)
return element
}
const enum TagType {
Start,
End
}
/**
* Parse a tag (E.g. `<div id=a>`) with that type (start tag or end tag).
*/
function parseTag(
context: ParserContext,
type: TagType,
parent: ElementNode | undefined
): ElementNode {
__DEV__ && assert(/^<\/?[a-z]/i.test(context.source))
__DEV__ &&
assert(
type === (startsWith(context.source, '</') ? TagType.End : TagType.Start)
)
// Tag open.
const start = getCursor(context)
const match = /^<\/?([a-z][^\t\r\n\f />]*)/i.exec(context.source)!
const tag = match[1]
const props = []
const ns = context.options.getNamespace(tag, parent)
advanceBy(context, match[0].length)
advanceSpaces(context)
// Attributes.
const attributeNames = new Set<string>()
while (
context.source.length > 0 &&
!startsWith(context.source, '>') &&
!startsWith(context.source, '/>')
) {
if (startsWith(context.source, '/')) {
emitError(context, ErrorCodes.UNEXPECTED_SOLIDUS_IN_TAG)
advanceBy(context, 1)
advanceSpaces(context)
continue
}
if (type === TagType.End) {
emitError(context, ErrorCodes.END_TAG_WITH_ATTRIBUTES)
}
const attr = parseAttribute(context, attributeNames)
if (type === TagType.Start) {
props.push(attr)
}
if (/^[^\t\r\n\f />]/.test(context.source)) {
emitError(context, ErrorCodes.MISSING_WHITESPACE_BETWEEN_ATTRIBUTES)
}
advanceSpaces(context)
}
// Tag close.
let isSelfClosing = false
if (context.source.length === 0) {
emitError(context, ErrorCodes.EOF_IN_TAG)
} else {
isSelfClosing = startsWith(context.source, '/>')
if (type === TagType.End && isSelfClosing) {
emitError(context, ErrorCodes.END_TAG_WITH_TRAILING_SOLIDUS)
}
advanceBy(context, isSelfClosing ? 2 : 1)
}
let tagType = ElementTypes.ELEMENT
if (tag === 'slot') tagType = ElementTypes.SLOT
else if (tag === 'template') tagType = ElementTypes.TEMPLATE
else if (/[A-Z-]/.test(tag)) tagType = ElementTypes.COMPONENT
return {
type: NodeTypes.ELEMENT,
ns,
tag,
tagType,
props,
isSelfClosing,
children: [],
loc: getSelection(context, start),
codegenNode: undefined // to be created during transform phase
}
}
function parseAttribute(
context: ParserContext,
nameSet: Set<string>
): AttributeNode | DirectiveNode {
__DEV__ && assert(/^[^\t\r\n\f />]/.test(context.source))
// Name.
const start = getCursor(context)
const match = /^[^\t\r\n\f />][^\t\r\n\f />=]*/.exec(context.source)!
const name = match[0]
if (nameSet.has(name)) {
emitError(context, ErrorCodes.DUPLICATE_ATTRIBUTE)
}
nameSet.add(name)
if (name[0] === '=') {
emitError(context, ErrorCodes.UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME)
}
{
const pattern = /["'<]/g
let m: RegExpExecArray | null
while ((m = pattern.exec(name)) !== null) {
emitError(
context,
ErrorCodes.UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
m.index
)
}
}
advanceBy(context, name.length)
// Value
let value: { content: string; loc: SourceLocation } | undefined = undefined
if (/^[\t\r\n\f ]*=/.test(context.source)) {
advanceSpaces(context)
advanceBy(context, 1)
advanceSpaces(context)
value = parseAttributeValue(context)
if (!value) {
emitError(context, ErrorCodes.MISSING_ATTRIBUTE_VALUE)
}
}
const loc = getSelection(context, start)
if (/^(v-|:|@|#)/.test(name)) {
const match = /(?:^v-([a-z0-9-]+))?(?:(?::|^@|^#)([^\.]+))?(.+)?$/i.exec(
name
)!
let arg: ExpressionNode | undefined
if (match[2]) {
const startOffset = name.split(match[2], 2)!.shift()!.length
const loc = getSelection(
context,
getNewPosition(context, start, startOffset),
getNewPosition(context, start, startOffset + match[2].length)
)
let content = match[2]
let isStatic = true
if (content.startsWith('[')) {
isStatic = false
if (!content.endsWith(']')) {
emitError(
context,
ErrorCodes.X_MISSING_DYNAMIC_DIRECTIVE_ARGUMENT_END
)
}
content = content.substr(1, content.length - 2)
}
arg = {
type: NodeTypes.EXPRESSION,
content,
isStatic,
loc
}
}
return {
type: NodeTypes.DIRECTIVE,
name:
match[1] ||
(startsWith(name, ':')
? 'bind'
: startsWith(name, '@')
? 'on'
: 'slot'),
exp: value && {
type: NodeTypes.EXPRESSION,
content: value.content,
isStatic: false,
loc: value.loc
},
arg,
modifiers: match[3] ? match[3].substr(1).split('.') : [],
loc
}
}
return {
type: NodeTypes.ATTRIBUTE,
name,
value: value && {
type: NodeTypes.TEXT,
content: value.content,
isEmpty: value.content.trim().length === 0,
loc: value.loc
},
loc
}
}
function parseAttributeValue(
context: ParserContext
): { content: string; loc: SourceLocation } | undefined {
const start = getCursor(context)
let content: string
if (/^["']/.test(context.source)) {
// Quoted value.
const quote = context.source[0]
advanceBy(context, 1)
const endIndex = context.source.indexOf(quote)
if (endIndex === -1) {
content = parseTextData(
context,
context.source.length,
TextModes.ATTRIBUTE_VALUE
)
} else {
content = parseTextData(context, endIndex, TextModes.ATTRIBUTE_VALUE)
advanceBy(context, 1)
}
} else {
// Unquoted
const match = /^[^\t\r\n\f >]+/.exec(context.source)
if (!match) {
return undefined
}
let unexpectedChars = /["'<=`]/g
let m: RegExpExecArray | null
while ((m = unexpectedChars.exec(match[0])) !== null) {
emitError(
context,
ErrorCodes.UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE,
m.index
)
}
content = parseTextData(context, match[0].length, TextModes.ATTRIBUTE_VALUE)
}
return { content, loc: getSelection(context, start) }
}
function parseInterpolation(
context: ParserContext,
mode: TextModes
): ExpressionNode | undefined {
const [open, close] = context.options.delimiters
__DEV__ && assert(startsWith(context.source, open))
const closeIndex = context.source.indexOf(close, open.length)
if (closeIndex === -1) {
emitError(context, ErrorCodes.X_MISSING_INTERPOLATION_END)
return undefined
}
const start = getCursor(context)
advanceBy(context, open.length)
const content = parseTextData(context, closeIndex - open.length, mode).trim()
advanceBy(context, close.length)
return {
type: NodeTypes.EXPRESSION,
content,
loc: getSelection(context, start),
isStatic: content === ''
}
}
function parseText(context: ParserContext, mode: TextModes): TextNode {
__DEV__ && assert(context.source.length > 0)
const [open] = context.options.delimiters
const endIndex = Math.min(
...[
context.source.indexOf('<', 1),
context.source.indexOf(open, 1),
mode === TextModes.CDATA ? context.source.indexOf(']]>') : -1,
context.source.length
].filter(n => n !== -1)
)
__DEV__ && assert(endIndex > 0)
const start = getCursor(context)
const content = parseTextData(context, endIndex, mode)
return {
type: NodeTypes.TEXT,
content,
loc: getSelection(context, start),
isEmpty: !content.trim()
}
}
/**
* Get text data with a given length from the current location.
* This translates HTML entities in the text data.
*/
function parseTextData(
context: ParserContext,
length: number,
mode: TextModes
): string {
if (mode === TextModes.RAWTEXT || mode === TextModes.CDATA) {
const text = context.source.slice(0, length)
advanceBy(context, length)
return text
}
// DATA or RCDATA. Entity decoding required.
const end = context.offset + length
let text: string = ''
while (context.offset < end) {
const head = /&(?:#x?)?/i.exec(context.source)
if (!head || context.offset + head.index >= end) {
const remaining = end - context.offset
text += context.source.slice(0, remaining)
advanceBy(context, remaining)
break
}
// Advance to the "&".
text += context.source.slice(0, head.index)
advanceBy(context, head.index)
if (head[0] === '&') {
// Named character reference.
let name = '',
value: string | undefined = undefined
if (/[0-9a-z]/i.test(context.source[1])) {
for (
let length = context.maxCRNameLength;
!value && length > 0;
--length
) {
name = context.source.substr(1, length)
value = context.options.namedCharacterReferences[name]
}
if (value) {
const semi = name.endsWith(';')
if (
mode === TextModes.ATTRIBUTE_VALUE &&
!semi &&
/[=a-z0-9]/i.test(context.source[1 + name.length] || '')
) {
text += '&'
text += name
advanceBy(context, 1 + name.length)
} else {
text += value
advanceBy(context, 1 + name.length)
if (!semi) {
emitError(
context,
ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE
)
}
}
} else {
emitError(context, ErrorCodes.UNKNOWN_NAMED_CHARACTER_REFERENCE)
text += '&'
text += name
advanceBy(context, 1 + name.length)
}
} else {
text += '&'
advanceBy(context, 1)
}
} else {
// Numeric character reference.
const hex = head[0] === '&#x'
const pattern = hex ? /^&#x([0-9a-f]+);?/i : /^&#([0-9]+);?/
const body = pattern.exec(context.source)
if (!body) {
text += head[0]
emitError(
context,
ErrorCodes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE
)
advanceBy(context, head[0].length)
} else {
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
let cp = Number.parseInt(body[1], hex ? 16 : 10)
if (cp === 0) {
emitError(context, ErrorCodes.NULL_CHARACTER_REFERENCE)
cp = 0xfffd
} else if (cp > 0x10ffff) {
emitError(
context,
ErrorCodes.CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE
)
cp = 0xfffd
} else if (cp >= 0xd800 && cp <= 0xdfff) {
emitError(context, ErrorCodes.SURROGATE_CHARACTER_REFERENCE)
cp = 0xfffd
} else if ((cp >= 0xfdd0 && cp <= 0xfdef) || (cp & 0xfffe) === 0xfffe) {
emitError(context, ErrorCodes.NONCHARACTER_CHARACTER_REFERENCE)
} else if (
(cp >= 0x01 && cp <= 0x08) ||
cp === 0x0b ||
(cp >= 0x0d && cp <= 0x1f) ||
(cp >= 0x7f && cp <= 0x9f)
) {
emitError(context, ErrorCodes.CONTROL_CHARACTER_REFERENCE)
cp = CCR_REPLACEMENTS[cp] || cp
}
text += String.fromCodePoint(cp)
advanceBy(context, body[0].length)
if (!body![0].endsWith(';')) {
emitError(
context,
ErrorCodes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE
)
}
}
}
}
return text
}
function getCursor(context: ParserContext): Position {
const { column, line, offset } = context
return { column, line, offset }
}
function getSelection(
context: ParserContext,
start: Position,
end?: Position
): SourceLocation {
end = end || getCursor(context)
return {
start,
end,
source: context.originalSource.slice(start.offset, end.offset)
}
}
function last<T>(xs: T[]): T | undefined {
return xs[xs.length - 1]
}
function startsWith(source: string, searchString: string): boolean {
return source.startsWith(searchString)
}
function advanceBy(context: ParserContext, numberOfCharacters: number): void {
const { source } = context
__DEV__ && assert(numberOfCharacters <= source.length)
advancePositionWithMutation(context, source, numberOfCharacters)
context.source = source.slice(numberOfCharacters)
}
function advanceSpaces(context: ParserContext): void {
const match = /^[\t\r\n\f ]+/.exec(context.source)
if (match) {
advanceBy(context, match[0].length)
}
}
function getNewPosition(
context: ParserContext,
start: Position,
numberOfCharacters: number
): Position {
return advancePositionWithClone(
start,
context.originalSource.slice(start.offset, numberOfCharacters),
numberOfCharacters
)
}
function emitError(
context: ParserContext,
code: ErrorCodes,
offset?: number
): void {
const loc = getCursor(context)
if (offset) {
loc.offset += offset
loc.column += offset
}
context.options.onError(
createCompilerError(code, {
start: loc,
end: loc,
source: ''
})
)
}
function isEnd(
context: ParserContext,
mode: TextModes,
ancestors: ElementNode[]
): boolean {
const s = context.source
switch (mode) {
case TextModes.DATA:
if (startsWith(s, '</')) {
//TODO: probably bad performance
for (let i = ancestors.length - 1; i >= 0; --i) {
if (startsWithEndTagOpen(s, ancestors[i].tag)) {
return true
}
}
}
break
case TextModes.RCDATA:
case TextModes.RAWTEXT: {
const parent = last(ancestors)
if (parent && startsWithEndTagOpen(s, parent.tag)) {
return true
}
break
}
case TextModes.CDATA:
if (startsWith(s, ']]>')) {
return true
}
break
}
return !s
}
function startsWithEndTagOpen(source: string, tag: string): boolean {
return (
startsWith(source, '</') &&
source.substr(2, tag.length).toLowerCase() === tag.toLowerCase() &&
/[\t\n\f />]/.test(source[2 + tag.length] || '>')
)
}
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
const CCR_REPLACEMENTS: { [key: number]: number | undefined } = {
0x80: 0x20ac,
0x82: 0x201a,
0x83: 0x0192,
0x84: 0x201e,
0x85: 0x2026,
0x86: 0x2020,
0x87: 0x2021,
0x88: 0x02c6,
0x89: 0x2030,
0x8a: 0x0160,
0x8b: 0x2039,
0x8c: 0x0152,
0x8e: 0x017d,
0x91: 0x2018,
0x92: 0x2019,
0x93: 0x201c,
0x94: 0x201d,
0x95: 0x2022,
0x96: 0x2013,
0x97: 0x2014,
0x98: 0x02dc,
0x99: 0x2122,
0x9a: 0x0161,
0x9b: 0x203a,
0x9c: 0x0153,
0x9e: 0x017e,
0x9f: 0x0178
}