feat(compiler): port parser implementation based on work by @znck and @mysticatea
This commit is contained in:
parent
54a78f3856
commit
86ae92303d
7392
packages/compiler-core/__tests__/__snapshots__/parse.spec.ts.snap
Normal file
7392
packages/compiler-core/__tests__/__snapshots__/parse.spec.ts.snap
Normal file
File diff suppressed because it is too large
Load Diff
2517
packages/compiler-core/__tests__/parse.spec.ts
Normal file
2517
packages/compiler-core/__tests__/parse.spec.ts
Normal file
File diff suppressed because it is too large
Load Diff
87
packages/compiler-core/src/ast.ts
Normal file
87
packages/compiler-core/src/ast.ts
Normal file
@ -0,0 +1,87 @@
|
||||
export const enum NodeTypes {
|
||||
TEXT,
|
||||
COMMENT,
|
||||
ELEMENT,
|
||||
ATTRIBUTE,
|
||||
EXPRESSION,
|
||||
DIRECTIVE,
|
||||
ROOT
|
||||
}
|
||||
|
||||
export const enum ElementTypes {
|
||||
ELEMENT,
|
||||
COMPONENT,
|
||||
SLOT, // slot
|
||||
TEMPLATE // template, component
|
||||
}
|
||||
|
||||
export const enum Namespaces {
|
||||
HTML,
|
||||
SVG, // allows CDATA section and forbids end tag omission.
|
||||
MATH_ML // allows CDATA section and forbids end tag omission.
|
||||
}
|
||||
|
||||
export interface Node {
|
||||
type: NodeTypes
|
||||
loc: SourceLocation
|
||||
}
|
||||
|
||||
export interface RootNode extends Node {
|
||||
type: NodeTypes.ROOT
|
||||
children: Array<ElementNode | ExpressionNode | TextNode | CommentNode>
|
||||
}
|
||||
|
||||
export interface ElementNode extends Node {
|
||||
type: NodeTypes.ELEMENT
|
||||
ns: Namespaces
|
||||
tag: string
|
||||
tagType: ElementTypes
|
||||
isSelfClosing: boolean
|
||||
props: Array<AttributeNode | DirectiveNode>
|
||||
children: Array<ElementNode | ExpressionNode | TextNode | CommentNode>
|
||||
}
|
||||
|
||||
export interface TextNode extends Node {
|
||||
type: NodeTypes.TEXT
|
||||
content: string
|
||||
isEmpty: boolean
|
||||
}
|
||||
|
||||
export interface CommentNode extends Node {
|
||||
type: NodeTypes.COMMENT
|
||||
content: string
|
||||
}
|
||||
|
||||
export interface AttributeNode extends Node {
|
||||
type: NodeTypes.ATTRIBUTE
|
||||
name: string
|
||||
value: TextNode | undefined
|
||||
}
|
||||
|
||||
export interface DirectiveNode extends Node {
|
||||
type: NodeTypes.DIRECTIVE
|
||||
name: string
|
||||
exp: ExpressionNode | undefined
|
||||
arg: ExpressionNode | undefined
|
||||
modifiers: string[]
|
||||
}
|
||||
|
||||
export interface ExpressionNode extends Node {
|
||||
type: NodeTypes.EXPRESSION
|
||||
content: string
|
||||
isStatic: boolean
|
||||
}
|
||||
|
||||
export interface Position {
|
||||
offset: number // from start of file
|
||||
line: number
|
||||
column: number
|
||||
}
|
||||
|
||||
// The node's range. The `start` is inclusive and `end` is exclusive.
|
||||
// [start, end)
|
||||
export interface SourceLocation {
|
||||
start: Position
|
||||
end: Position
|
||||
source: string
|
||||
}
|
37
packages/compiler-core/src/errorTypes.ts
Normal file
37
packages/compiler-core/src/errorTypes.ts
Normal file
@ -0,0 +1,37 @@
|
||||
export const enum ParserErrorTypes {
|
||||
ABRUPT_CLOSING_OF_EMPTY_COMMENT,
|
||||
ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
|
||||
CDATA_IN_HTML_CONTENT,
|
||||
CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE,
|
||||
CONTROL_CHARACTER_REFERENCE,
|
||||
DUPLICATE_ATTRIBUTE,
|
||||
END_TAG_WITH_ATTRIBUTES,
|
||||
END_TAG_WITH_TRAILING_SOLIDUS,
|
||||
EOF_BEFORE_TAG_NAME,
|
||||
EOF_IN_CDATA,
|
||||
EOF_IN_COMMENT,
|
||||
EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT,
|
||||
EOF_IN_TAG,
|
||||
INCORRECTLY_CLOSED_COMMENT,
|
||||
INCORRECTLY_OPENED_COMMENT,
|
||||
INVALID_FIRST_CHARACTER_OF_TAG_NAME,
|
||||
MISSING_ATTRIBUTE_VALUE,
|
||||
MISSING_END_TAG_NAME,
|
||||
MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
|
||||
MISSING_WHITESPACE_BETWEEN_ATTRIBUTES,
|
||||
NESTED_COMMENT,
|
||||
NONCHARACTER_CHARACTER_REFERENCE,
|
||||
NULL_CHARACTER_REFERENCE,
|
||||
SURROGATE_CHARACTER_REFERENCE,
|
||||
UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
|
||||
UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE,
|
||||
UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME,
|
||||
UNEXPECTED_NULL_CHARACTER,
|
||||
UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME,
|
||||
UNEXPECTED_SOLIDUS_IN_TAG,
|
||||
UNKNOWN_NAMED_CHARACTER_REFERENCE,
|
||||
X_INVALID_END_TAG,
|
||||
X_MISSING_END_TAG,
|
||||
X_MISSING_INTERPOLATION_END,
|
||||
X_MISSING_DYNAMIC_DIRECTIVE_ARGUMENT_END
|
||||
}
|
@ -1 +1,3 @@
|
||||
// TODO
|
||||
export { parse } from './parser'
|
||||
export * from './ast'
|
||||
export * from './errorTypes'
|
||||
|
918
packages/compiler-core/src/parser.ts
Normal file
918
packages/compiler-core/src/parser.ts
Normal file
@ -0,0 +1,918 @@
|
||||
import assert from 'assert'
|
||||
import { ParserErrorTypes } from './errorTypes'
|
||||
import {
|
||||
Node,
|
||||
AttributeNode,
|
||||
CommentNode,
|
||||
DirectiveNode,
|
||||
ElementNode,
|
||||
ElementTypes,
|
||||
ExpressionNode,
|
||||
Namespaces,
|
||||
NodeTypes,
|
||||
Position,
|
||||
RootNode,
|
||||
SourceLocation,
|
||||
TextNode
|
||||
} from './ast'
|
||||
|
||||
export interface ParserOptions {
|
||||
isVoidTag: (tag: string) => boolean // e.g. img, br, hr
|
||||
getNamespace: (tag: string, parent: ElementNode | undefined) => Namespaces
|
||||
getTextMode: (tag: string, ns: Namespaces) => TextModes
|
||||
delimiters: [string, string] // ['{{', '}}']
|
||||
transform: (node: Node) => Node // --
|
||||
ignoreSpaces: boolean
|
||||
|
||||
// Map to HTML entities. E.g., `{ "amp;": "&" }`
|
||||
// The full set is https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references
|
||||
namedCharacterReferences: { [name: string]: string | undefined }
|
||||
|
||||
onError: (type: ParserErrorTypes, loc: Position) => void
|
||||
}
|
||||
|
||||
export const enum TextModes {
|
||||
// | Elements | Entities | End sign | Inside of
|
||||
DATA, // | ✔ | ✔ | End tags of ancestors |
|
||||
RCDATA, // | ✘ | ✔ | End tag of the parent | <textarea>
|
||||
RAWTEXT, // | ✘ | ✘ | End tag of the parent | <style>,<script>
|
||||
CDATA,
|
||||
ATTRIBUTE_VALUE
|
||||
}
|
||||
|
||||
interface ParserContext extends ParserOptions {
|
||||
readonly originalSource: string
|
||||
source: string
|
||||
offset: number
|
||||
line: number
|
||||
column: number
|
||||
maxCRNameLength: number
|
||||
}
|
||||
|
||||
export function parse(content: string, options: ParserOptions): RootNode {
|
||||
const context = createParserContext(content, options)
|
||||
const start = getCursor(context)
|
||||
|
||||
return {
|
||||
type: NodeTypes.ROOT,
|
||||
children: parseChildren(context, TextModes.DATA, []),
|
||||
loc: getSelection(context, start)
|
||||
}
|
||||
}
|
||||
|
||||
function last<T>(xs: T[]): T | undefined {
|
||||
return xs[xs.length - 1]
|
||||
}
|
||||
|
||||
function startsWith(source: string, searchString: string): boolean {
|
||||
return source.startsWith(searchString)
|
||||
}
|
||||
|
||||
function advanceBy(context: ParserContext, numberOfCharacters: number): void {
|
||||
assert(numberOfCharacters <= context.source.length)
|
||||
|
||||
const { column, source } = context
|
||||
const str = source.slice(0, numberOfCharacters)
|
||||
const lines = str.split(/\r?\n/)
|
||||
|
||||
context.source = source.slice(numberOfCharacters)
|
||||
context.offset += numberOfCharacters
|
||||
context.line += lines.length - 1
|
||||
context.column =
|
||||
lines.length === 1
|
||||
? column + numberOfCharacters
|
||||
: Math.max(1, lines.pop()!.length)
|
||||
}
|
||||
|
||||
function advanceSpaces(context: ParserContext): void {
|
||||
const match = /^[\t\r\n\f ]+/.exec(context.source)
|
||||
if (match) {
|
||||
advanceBy(context, match[0].length)
|
||||
}
|
||||
}
|
||||
|
||||
function getCursor(context: ParserContext): Position {
|
||||
const { column, line, offset } = context
|
||||
return { column, line, offset }
|
||||
}
|
||||
|
||||
function getNewPosition(
|
||||
context: ParserContext,
|
||||
start: Position,
|
||||
numberOfCharacters: number
|
||||
): Position {
|
||||
const { originalSource } = context
|
||||
const str = originalSource.slice(start.offset, numberOfCharacters)
|
||||
const lines = str.split(/\r?\n/)
|
||||
|
||||
const newPosition = {
|
||||
column: start.column,
|
||||
line: start.line,
|
||||
offset: start.offset
|
||||
}
|
||||
|
||||
newPosition.offset += numberOfCharacters
|
||||
newPosition.line += lines.length - 1
|
||||
newPosition.column =
|
||||
lines.length === 1
|
||||
? start.column + numberOfCharacters
|
||||
: Math.max(1, lines.pop()!.length)
|
||||
|
||||
return newPosition
|
||||
}
|
||||
|
||||
function getSelection(
|
||||
context: ParserContext,
|
||||
start: Position,
|
||||
end?: Position
|
||||
): SourceLocation {
|
||||
end = end || getCursor(context)
|
||||
return {
|
||||
start,
|
||||
end,
|
||||
source: context.originalSource.slice(start.offset, end.offset)
|
||||
}
|
||||
}
|
||||
|
||||
function emitError(
|
||||
context: ParserContext,
|
||||
type: ParserErrorTypes,
|
||||
offset?: number
|
||||
): void {
|
||||
const loc = getCursor(context)
|
||||
if (offset) {
|
||||
loc.offset += offset
|
||||
loc.column += offset
|
||||
}
|
||||
context.onError(type, loc)
|
||||
}
|
||||
|
||||
function createParserContext(
|
||||
content: string,
|
||||
options: ParserOptions
|
||||
): ParserContext {
|
||||
return {
|
||||
...options,
|
||||
column: 1,
|
||||
line: 1,
|
||||
offset: 0,
|
||||
originalSource: content,
|
||||
source: content,
|
||||
maxCRNameLength: Object.keys(options.namedCharacterReferences).reduce(
|
||||
(max, name) => Math.max(max, name.length),
|
||||
0
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
function parseChildren(
|
||||
context: ParserContext,
|
||||
mode: TextModes,
|
||||
ancestors: ElementNode[]
|
||||
): RootNode['children'] {
|
||||
const parent = last(ancestors)
|
||||
const ns = parent ? parent.ns : Namespaces.HTML
|
||||
const nodes: RootNode['children'] = []
|
||||
|
||||
while (!isEnd(context, mode, ancestors)) {
|
||||
assert(context.source.length > 0)
|
||||
const s = context.source
|
||||
let node: any = null
|
||||
|
||||
if (startsWith(s, context.delimiters[0])) {
|
||||
// '{{'
|
||||
node = parseInterpolation(context, mode)
|
||||
} else if (mode === TextModes.DATA && s[0] === '<') {
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
|
||||
if (s.length === 1) {
|
||||
emitError(context, ParserErrorTypes.EOF_BEFORE_TAG_NAME, 1)
|
||||
} else if (s[1] === '!') {
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
|
||||
if (startsWith(s, '<!--')) {
|
||||
node = parseComment(context)
|
||||
} else if (startsWith(s, '<!DOCTYPE')) {
|
||||
// Ignore DOCTYPE by a limitation.
|
||||
node = parseBogusComment(context)
|
||||
} else if (startsWith(s, '<![CDATA[')) {
|
||||
if (ns !== Namespaces.HTML) {
|
||||
node = parseCDATA(context, ancestors)
|
||||
} else {
|
||||
emitError(context, ParserErrorTypes.CDATA_IN_HTML_CONTENT)
|
||||
node = parseBogusComment(context)
|
||||
}
|
||||
} else {
|
||||
emitError(context, ParserErrorTypes.INCORRECTLY_OPENED_COMMENT)
|
||||
node = parseBogusComment(context)
|
||||
}
|
||||
} else if (s[1] === '/') {
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state
|
||||
if (s.length === 2) {
|
||||
emitError(context, ParserErrorTypes.EOF_BEFORE_TAG_NAME, 2)
|
||||
} else if (s[2] === '>') {
|
||||
emitError(context, ParserErrorTypes.MISSING_END_TAG_NAME, 2)
|
||||
advanceBy(context, 3)
|
||||
continue
|
||||
} else if (/[a-z]/i.test(s[2])) {
|
||||
emitError(context, ParserErrorTypes.X_INVALID_END_TAG)
|
||||
parseTag(context, TagType.End, parent)
|
||||
continue
|
||||
} else {
|
||||
emitError(
|
||||
context,
|
||||
ParserErrorTypes.INVALID_FIRST_CHARACTER_OF_TAG_NAME,
|
||||
2
|
||||
)
|
||||
node = parseBogusComment(context)
|
||||
}
|
||||
} else if (/[a-z]/i.test(s[1])) {
|
||||
node = parseElement(context, ancestors)
|
||||
} else if (s[1] === '?') {
|
||||
emitError(
|
||||
context,
|
||||
ParserErrorTypes.UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME,
|
||||
1
|
||||
)
|
||||
node = parseBogusComment(context)
|
||||
} else {
|
||||
emitError(
|
||||
context,
|
||||
ParserErrorTypes.INVALID_FIRST_CHARACTER_OF_TAG_NAME,
|
||||
1
|
||||
)
|
||||
}
|
||||
}
|
||||
if (!node) {
|
||||
node = parseText(context, mode)
|
||||
}
|
||||
|
||||
if (Array.isArray(node)) {
|
||||
node.forEach(pushNode.bind(null, context, nodes))
|
||||
} else {
|
||||
pushNode(context, nodes, node)
|
||||
}
|
||||
}
|
||||
|
||||
return nodes
|
||||
}
|
||||
|
||||
function isEnd(
|
||||
context: ParserContext,
|
||||
mode: TextModes,
|
||||
ancestors: ElementNode[]
|
||||
): boolean {
|
||||
const s = context.source
|
||||
|
||||
switch (mode) {
|
||||
case TextModes.DATA:
|
||||
if (startsWith(s, '</')) {
|
||||
//TODO: probably bad performance
|
||||
for (let i = ancestors.length - 1; i >= 0; --i) {
|
||||
if (startsWithEndTagOpen(s, ancestors[i].tag)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
break
|
||||
|
||||
case TextModes.RCDATA:
|
||||
case TextModes.RAWTEXT: {
|
||||
const parent = last(ancestors)
|
||||
if (parent && startsWithEndTagOpen(s, parent.tag)) {
|
||||
return true
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
case TextModes.CDATA:
|
||||
if (startsWith(s, ']]>')) {
|
||||
return true
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
return !s
|
||||
}
|
||||
|
||||
function startsWithEndTagOpen(source: string, tag: string): boolean {
|
||||
return (
|
||||
startsWith(source, '</') &&
|
||||
source.substr(2, tag.length).toLowerCase() === tag.toLowerCase() &&
|
||||
/[\t\n\f />]/.test(source[2 + tag.length] || '>')
|
||||
)
|
||||
}
|
||||
|
||||
function pushNode(
|
||||
context: ParserContext,
|
||||
nodes: RootNode['children'],
|
||||
node: RootNode['children'][0]
|
||||
): void {
|
||||
if (context.ignoreSpaces && node.type === NodeTypes.TEXT && node.isEmpty) {
|
||||
return
|
||||
}
|
||||
|
||||
// Merge if both this and the previous node are text and those are consecutive.
|
||||
// This happens on "a < b" or something like.
|
||||
const prev = last(nodes)
|
||||
if (
|
||||
prev &&
|
||||
prev.type === NodeTypes.TEXT &&
|
||||
node.type === NodeTypes.TEXT &&
|
||||
prev.loc.end.offset === node.loc.start.offset
|
||||
) {
|
||||
prev.content += node.content
|
||||
prev.isEmpty = prev.content.trim().length === 0
|
||||
prev.loc.end = node.loc.end
|
||||
prev.loc.source += node.loc.source
|
||||
} else {
|
||||
nodes.push(node)
|
||||
}
|
||||
}
|
||||
|
||||
function parseCDATA(
|
||||
context: ParserContext,
|
||||
ancestors: ElementNode[]
|
||||
): RootNode['children'] {
|
||||
assert(last(ancestors) == null || last(ancestors)!.ns !== Namespaces.HTML)
|
||||
assert(startsWith(context.source, '<![CDATA['))
|
||||
|
||||
advanceBy(context, 9)
|
||||
const nodes = parseChildren(context, TextModes.CDATA, ancestors)
|
||||
if (context.source.length === 0) {
|
||||
emitError(context, ParserErrorTypes.EOF_IN_CDATA)
|
||||
} else {
|
||||
assert(startsWith(context.source, ']]>'))
|
||||
advanceBy(context, 3)
|
||||
}
|
||||
|
||||
return nodes
|
||||
}
|
||||
|
||||
function parseComment(context: ParserContext): CommentNode {
|
||||
assert(startsWith(context.source, '<!--'))
|
||||
|
||||
const start = getCursor(context)
|
||||
let content: string
|
||||
|
||||
// Regular comment.
|
||||
const match = /--(\!)?>/.exec(context.source)
|
||||
if (!match) {
|
||||
content = context.source.slice(4)
|
||||
advanceBy(context, context.source.length)
|
||||
emitError(context, ParserErrorTypes.EOF_IN_COMMENT)
|
||||
} else {
|
||||
if (match.index <= 3) {
|
||||
emitError(context, ParserErrorTypes.ABRUPT_CLOSING_OF_EMPTY_COMMENT)
|
||||
}
|
||||
if (match[1]) {
|
||||
emitError(context, ParserErrorTypes.INCORRECTLY_CLOSED_COMMENT)
|
||||
}
|
||||
content = context.source.slice(4, match.index)
|
||||
|
||||
// Advancing with reporting nested comments.
|
||||
const s = context.source.slice(0, match.index)
|
||||
let prevIndex = 1,
|
||||
nestedIndex = 0
|
||||
while ((nestedIndex = s.indexOf('<!--', prevIndex)) !== -1) {
|
||||
advanceBy(context, nestedIndex - prevIndex + 1)
|
||||
if (nestedIndex + 4 < s.length) {
|
||||
emitError(context, ParserErrorTypes.NESTED_COMMENT)
|
||||
}
|
||||
prevIndex = nestedIndex + 1
|
||||
}
|
||||
advanceBy(context, match.index + match[0].length - prevIndex + 1)
|
||||
}
|
||||
|
||||
return {
|
||||
type: NodeTypes.COMMENT,
|
||||
content,
|
||||
loc: getSelection(context, start)
|
||||
}
|
||||
}
|
||||
|
||||
function parseBogusComment(context: ParserContext): CommentNode | undefined {
|
||||
assert(/^<(?:[\!\?]|\/[^a-z>])/i.test(context.source))
|
||||
|
||||
const start = getCursor(context)
|
||||
const contentStart = context.source[1] === '?' ? 1 : 2
|
||||
let content: string
|
||||
|
||||
const closeIndex = context.source.indexOf('>')
|
||||
if (closeIndex === -1) {
|
||||
content = context.source.slice(contentStart)
|
||||
advanceBy(context, context.source.length)
|
||||
} else {
|
||||
content = context.source.slice(contentStart, closeIndex)
|
||||
advanceBy(context, closeIndex + 1)
|
||||
}
|
||||
|
||||
return {
|
||||
type: NodeTypes.COMMENT,
|
||||
content,
|
||||
loc: getSelection(context, start)
|
||||
}
|
||||
}
|
||||
|
||||
function parseElement(
|
||||
context: ParserContext,
|
||||
ancestors: ElementNode[]
|
||||
): ElementNode | undefined {
|
||||
assert(/^<[a-z]/i.test(context.source))
|
||||
|
||||
// Start tag.
|
||||
const parent = last(ancestors)
|
||||
const element = parseTag(context, TagType.Start, parent)
|
||||
|
||||
if (element.isSelfClosing || context.isVoidTag(element.tag)) {
|
||||
return element
|
||||
}
|
||||
|
||||
// Children.
|
||||
ancestors.push(element)
|
||||
const mode = (context.getTextMode(
|
||||
element.tag,
|
||||
element.ns
|
||||
) as unknown) as TextModes
|
||||
const children = parseChildren(context, mode, ancestors)
|
||||
ancestors.pop()
|
||||
|
||||
element.children = children
|
||||
|
||||
// End tag.
|
||||
if (startsWithEndTagOpen(context.source, element.tag)) {
|
||||
parseTag(context, TagType.End, parent)
|
||||
} else {
|
||||
emitError(context, ParserErrorTypes.X_MISSING_END_TAG)
|
||||
if (context.source.length === 0 && element.tag.toLowerCase() === 'script') {
|
||||
const first = children[0]
|
||||
if (first && startsWith(first.loc.source, '<!--')) {
|
||||
emitError(
|
||||
context,
|
||||
ParserErrorTypes.EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
element.loc = getSelection(context, element.loc.start)
|
||||
return element
|
||||
}
|
||||
|
||||
const enum TagType {
|
||||
Start,
|
||||
End
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a tag (E.g. `<div id=a>`) with that type (start tag or end tag).
|
||||
*/
|
||||
function parseTag(
|
||||
context: ParserContext,
|
||||
type: TagType,
|
||||
parent: ElementNode | undefined
|
||||
): ElementNode {
|
||||
assert(/^<\/?[a-z]/i.test(context.source))
|
||||
assert(
|
||||
type === (startsWith(context.source, '</') ? TagType.End : TagType.Start)
|
||||
)
|
||||
|
||||
// Tag open.
|
||||
const start = getCursor(context)
|
||||
const match = /^<\/?([a-z][^\t\r\n\f />]*)/i.exec(context.source)!
|
||||
const tag = match[1]
|
||||
const props = []
|
||||
const ns = context.getNamespace(tag, parent)
|
||||
|
||||
advanceBy(context, match[0].length)
|
||||
advanceSpaces(context)
|
||||
|
||||
// Attributes.
|
||||
const attributeNames = new Set<string>()
|
||||
while (
|
||||
context.source.length > 0 &&
|
||||
!startsWith(context.source, '>') &&
|
||||
!startsWith(context.source, '/>')
|
||||
) {
|
||||
if (startsWith(context.source, '/')) {
|
||||
emitError(context, ParserErrorTypes.UNEXPECTED_SOLIDUS_IN_TAG)
|
||||
advanceBy(context, 1)
|
||||
advanceSpaces(context)
|
||||
continue
|
||||
}
|
||||
if (type === TagType.End) {
|
||||
emitError(context, ParserErrorTypes.END_TAG_WITH_ATTRIBUTES)
|
||||
}
|
||||
|
||||
const attr = parseAttribute(context, attributeNames)
|
||||
if (type === TagType.Start) {
|
||||
props.push(attr)
|
||||
}
|
||||
|
||||
if (/^[^\t\r\n\f />]/.test(context.source)) {
|
||||
emitError(context, ParserErrorTypes.MISSING_WHITESPACE_BETWEEN_ATTRIBUTES)
|
||||
}
|
||||
advanceSpaces(context)
|
||||
}
|
||||
|
||||
// Tag close.
|
||||
let isSelfClosing = false
|
||||
if (context.source.length === 0) {
|
||||
emitError(context, ParserErrorTypes.EOF_IN_TAG)
|
||||
} else {
|
||||
isSelfClosing = startsWith(context.source, '/>')
|
||||
if (type === TagType.End && isSelfClosing) {
|
||||
emitError(context, ParserErrorTypes.END_TAG_WITH_TRAILING_SOLIDUS)
|
||||
}
|
||||
advanceBy(context, isSelfClosing ? 2 : 1)
|
||||
}
|
||||
|
||||
let tagType = ElementTypes.ELEMENT
|
||||
|
||||
if (tag === 'slot') tagType = ElementTypes.SLOT
|
||||
else if (tag === 'template') tagType = ElementTypes.TEMPLATE
|
||||
else if (/[A-Z-]/.test(tag)) tagType = ElementTypes.COMPONENT
|
||||
|
||||
return {
|
||||
type: NodeTypes.ELEMENT,
|
||||
ns,
|
||||
tag,
|
||||
tagType,
|
||||
props,
|
||||
isSelfClosing,
|
||||
children: [],
|
||||
loc: getSelection(context, start)
|
||||
}
|
||||
}
|
||||
|
||||
function parseAttribute(
|
||||
context: ParserContext,
|
||||
nameSet: Set<string>
|
||||
): AttributeNode | DirectiveNode {
|
||||
assert(/^[^\t\r\n\f />]/.test(context.source))
|
||||
|
||||
// Name.
|
||||
const start = getCursor(context)
|
||||
const match = /^[^\t\r\n\f />][^\t\r\n\f />=]*/.exec(context.source)!
|
||||
const name = match[0]
|
||||
|
||||
if (nameSet.has(name)) {
|
||||
emitError(context, ParserErrorTypes.DUPLICATE_ATTRIBUTE)
|
||||
}
|
||||
nameSet.add(name)
|
||||
|
||||
if (name[0] === '=') {
|
||||
emitError(
|
||||
context,
|
||||
ParserErrorTypes.UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME
|
||||
)
|
||||
}
|
||||
{
|
||||
const pattern = /["'<]/g
|
||||
let m: RegExpExecArray | null
|
||||
while ((m = pattern.exec(name)) !== null) {
|
||||
emitError(
|
||||
context,
|
||||
ParserErrorTypes.UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
|
||||
m.index
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
advanceBy(context, name.length)
|
||||
|
||||
// Value
|
||||
let value: { content: string; loc: SourceLocation } | undefined = undefined
|
||||
if (/^[\t\r\n\f ]*=/.test(context.source)) {
|
||||
advanceSpaces(context)
|
||||
advanceBy(context, 1)
|
||||
advanceSpaces(context)
|
||||
value = parseAttributeValue(context)
|
||||
if (!value) {
|
||||
emitError(context, ParserErrorTypes.MISSING_ATTRIBUTE_VALUE)
|
||||
}
|
||||
}
|
||||
const loc = getSelection(context, start)
|
||||
|
||||
if (/^(v-|:|@|#)/.test(name)) {
|
||||
const match = /(?:^v-([a-z0-9-]+))?(?:(?::|^@|^#)([^\.]+))?(.+)?$/i.exec(
|
||||
name
|
||||
)!
|
||||
|
||||
let arg: ExpressionNode | undefined
|
||||
|
||||
if (match[2]) {
|
||||
const startOffset = name.split(match[2], 2)!.shift()!.length
|
||||
const loc = getSelection(
|
||||
context,
|
||||
getNewPosition(context, start, startOffset),
|
||||
getNewPosition(context, start, startOffset + match[2].length)
|
||||
)
|
||||
let content = match[2]
|
||||
let isStatic = true
|
||||
|
||||
if (content.startsWith('[')) {
|
||||
isStatic = false
|
||||
|
||||
if (!content.endsWith(']')) {
|
||||
emitError(
|
||||
context,
|
||||
ParserErrorTypes.X_MISSING_DYNAMIC_DIRECTIVE_ARGUMENT_END
|
||||
)
|
||||
}
|
||||
|
||||
content = content.substr(1, content.length - 2)
|
||||
}
|
||||
|
||||
arg = {
|
||||
type: NodeTypes.EXPRESSION,
|
||||
content,
|
||||
isStatic,
|
||||
loc
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
type: NodeTypes.DIRECTIVE,
|
||||
name:
|
||||
match[1] ||
|
||||
(startsWith(name, ':')
|
||||
? 'bind'
|
||||
: startsWith(name, '@')
|
||||
? 'on'
|
||||
: 'slot'),
|
||||
exp: value && {
|
||||
type: NodeTypes.EXPRESSION,
|
||||
content: value.content,
|
||||
isStatic: false,
|
||||
loc: value.loc
|
||||
},
|
||||
arg,
|
||||
modifiers: match[3] ? match[3].substr(1).split('.') : [],
|
||||
loc
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
type: NodeTypes.ATTRIBUTE,
|
||||
name,
|
||||
value: value && {
|
||||
type: NodeTypes.TEXT,
|
||||
content: value.content,
|
||||
isEmpty: value.content.trim().length === 0,
|
||||
loc: value.loc
|
||||
},
|
||||
loc
|
||||
}
|
||||
}
|
||||
|
||||
function parseAttributeValue(
|
||||
context: ParserContext
|
||||
): { content: string; loc: SourceLocation } | undefined {
|
||||
const start = getCursor(context)
|
||||
let content: string
|
||||
|
||||
if (/^["']/.test(context.source)) {
|
||||
// Quoted value.
|
||||
const quote = context.source[0]
|
||||
advanceBy(context, 1)
|
||||
|
||||
const endIndex = context.source.indexOf(quote)
|
||||
if (endIndex === -1) {
|
||||
content = parseTextData(
|
||||
context,
|
||||
context.source.length,
|
||||
TextModes.ATTRIBUTE_VALUE
|
||||
)
|
||||
} else {
|
||||
content = parseTextData(context, endIndex, TextModes.ATTRIBUTE_VALUE)
|
||||
advanceBy(context, 1)
|
||||
}
|
||||
} else {
|
||||
// Unquoted
|
||||
const match = /^[^\t\r\n\f >]+/.exec(context.source)
|
||||
if (!match) {
|
||||
return undefined
|
||||
}
|
||||
let unexpectedChars = /["'<=`]/g
|
||||
let m: RegExpExecArray | null
|
||||
while ((m = unexpectedChars.exec(match[0])) !== null) {
|
||||
emitError(
|
||||
context,
|
||||
ParserErrorTypes.UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE,
|
||||
m.index
|
||||
)
|
||||
}
|
||||
content = parseTextData(context, match[0].length, TextModes.ATTRIBUTE_VALUE)
|
||||
}
|
||||
|
||||
return { content, loc: getSelection(context, start) }
|
||||
}
|
||||
|
||||
function parseInterpolation(
|
||||
context: ParserContext,
|
||||
mode: TextModes
|
||||
): ExpressionNode | undefined {
|
||||
const [open, close] = context.delimiters
|
||||
assert(startsWith(context.source, open))
|
||||
|
||||
const closeIndex = context.source.indexOf(close, open.length)
|
||||
if (closeIndex === -1) {
|
||||
emitError(context, ParserErrorTypes.X_MISSING_INTERPOLATION_END)
|
||||
return undefined
|
||||
}
|
||||
|
||||
const start = getCursor(context)
|
||||
advanceBy(context, open.length)
|
||||
const content = parseTextData(context, closeIndex - open.length, mode).trim()
|
||||
advanceBy(context, close.length)
|
||||
|
||||
return {
|
||||
type: NodeTypes.EXPRESSION,
|
||||
content,
|
||||
loc: getSelection(context, start),
|
||||
isStatic: content === ''
|
||||
}
|
||||
}
|
||||
|
||||
function parseText(context: ParserContext, mode: TextModes): TextNode {
|
||||
assert(context.source.length > 0)
|
||||
|
||||
const [open] = context.delimiters
|
||||
const endIndex = Math.min(
|
||||
...[
|
||||
context.source.indexOf('<', 1),
|
||||
context.source.indexOf(open, 1),
|
||||
mode === TextModes.CDATA ? context.source.indexOf(']]>') : -1,
|
||||
context.source.length
|
||||
].filter(n => n !== -1)
|
||||
)
|
||||
assert(endIndex > 0)
|
||||
|
||||
const start = getCursor(context)
|
||||
const content = parseTextData(context, endIndex, mode)
|
||||
|
||||
return {
|
||||
type: NodeTypes.TEXT,
|
||||
content,
|
||||
loc: getSelection(context, start),
|
||||
isEmpty: !content.trim()
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get text data with a given length from the current location.
|
||||
* This translates HTML entities in the text data.
|
||||
*/
|
||||
function parseTextData(
|
||||
context: ParserContext,
|
||||
length: number,
|
||||
mode: TextModes
|
||||
): string {
|
||||
if (mode === TextModes.RAWTEXT || mode === TextModes.CDATA) {
|
||||
const text = context.source.slice(0, length)
|
||||
advanceBy(context, length)
|
||||
return text
|
||||
}
|
||||
|
||||
// DATA or RCDATA.
|
||||
const end = context.offset + length
|
||||
let text: string = ''
|
||||
|
||||
while (context.offset < end) {
|
||||
const head = /&(?:#x?)?/i.exec(context.source)
|
||||
if (!head || context.offset + head.index >= end) {
|
||||
const remaining = end - context.offset
|
||||
text += context.source.slice(0, remaining)
|
||||
advanceBy(context, remaining)
|
||||
break
|
||||
}
|
||||
|
||||
// Advance to the "&".
|
||||
text += context.source.slice(0, head.index)
|
||||
advanceBy(context, head.index)
|
||||
|
||||
if (head[0] === '&') {
|
||||
// Named character reference.
|
||||
let name = '',
|
||||
value: string | undefined = undefined
|
||||
if (/[0-9a-z]/i.test(context.source[1])) {
|
||||
for (
|
||||
let length = context.maxCRNameLength;
|
||||
!value && length > 0;
|
||||
--length
|
||||
) {
|
||||
name = context.source.substr(1, length)
|
||||
value = context.namedCharacterReferences[name]
|
||||
}
|
||||
if (value) {
|
||||
const semi = name.endsWith(';')
|
||||
if (
|
||||
mode === TextModes.ATTRIBUTE_VALUE &&
|
||||
!semi &&
|
||||
/[=a-z0-9]/i.test(context.source[1 + name.length] || '')
|
||||
) {
|
||||
text += '&'
|
||||
text += name
|
||||
advanceBy(context, 1 + name.length)
|
||||
} else {
|
||||
text += value
|
||||
advanceBy(context, 1 + name.length)
|
||||
if (!semi) {
|
||||
emitError(
|
||||
context,
|
||||
ParserErrorTypes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE
|
||||
)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
emitError(context, ParserErrorTypes.UNKNOWN_NAMED_CHARACTER_REFERENCE)
|
||||
text += '&'
|
||||
text += name
|
||||
advanceBy(context, 1 + name.length)
|
||||
}
|
||||
} else {
|
||||
text += '&'
|
||||
advanceBy(context, 1)
|
||||
}
|
||||
} else {
|
||||
// Numeric character reference.
|
||||
const hex = head[0] === '&#x'
|
||||
const pattern = hex ? /^&#x([0-9a-f]+);?/i : /^&#([0-9]+);?/
|
||||
const body = pattern.exec(context.source)
|
||||
if (!body) {
|
||||
text += head[0]
|
||||
emitError(
|
||||
context,
|
||||
ParserErrorTypes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE
|
||||
)
|
||||
advanceBy(context, head[0].length)
|
||||
} else {
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
|
||||
let cp = Number.parseInt(body[1], hex ? 16 : 10)
|
||||
if (cp === 0) {
|
||||
emitError(context, ParserErrorTypes.NULL_CHARACTER_REFERENCE)
|
||||
cp = 0xfffd
|
||||
} else if (cp > 0x10ffff) {
|
||||
emitError(
|
||||
context,
|
||||
ParserErrorTypes.CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE
|
||||
)
|
||||
cp = 0xfffd
|
||||
} else if (cp >= 0xd800 && cp <= 0xdfff) {
|
||||
emitError(context, ParserErrorTypes.SURROGATE_CHARACTER_REFERENCE)
|
||||
cp = 0xfffd
|
||||
} else if ((cp >= 0xfdd0 && cp <= 0xfdef) || (cp & 0xfffe) === 0xfffe) {
|
||||
emitError(context, ParserErrorTypes.NONCHARACTER_CHARACTER_REFERENCE)
|
||||
} else if (
|
||||
(cp >= 0x01 && cp <= 0x08) ||
|
||||
cp === 0x0b ||
|
||||
(cp >= 0x0d && cp <= 0x1f) ||
|
||||
(cp >= 0x7f && cp <= 0x9f)
|
||||
) {
|
||||
emitError(context, ParserErrorTypes.CONTROL_CHARACTER_REFERENCE)
|
||||
cp = CCR_REPLACEMENTS[cp] || cp
|
||||
}
|
||||
text += String.fromCodePoint(cp)
|
||||
advanceBy(context, body[0].length)
|
||||
if (!body![0].endsWith(';')) {
|
||||
emitError(
|
||||
context,
|
||||
ParserErrorTypes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return text
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
|
||||
const CCR_REPLACEMENTS: { [key: number]: number | undefined } = {
|
||||
0x80: 0x20ac,
|
||||
0x82: 0x201a,
|
||||
0x83: 0x0192,
|
||||
0x84: 0x201e,
|
||||
0x85: 0x2026,
|
||||
0x86: 0x2020,
|
||||
0x87: 0x2021,
|
||||
0x88: 0x02c6,
|
||||
0x89: 0x2030,
|
||||
0x8a: 0x0160,
|
||||
0x8b: 0x2039,
|
||||
0x8c: 0x0152,
|
||||
0x8e: 0x017d,
|
||||
0x91: 0x2018,
|
||||
0x92: 0x2019,
|
||||
0x93: 0x201c,
|
||||
0x94: 0x201d,
|
||||
0x95: 0x2022,
|
||||
0x96: 0x2013,
|
||||
0x97: 0x2014,
|
||||
0x98: 0x02dc,
|
||||
0x99: 0x2122,
|
||||
0x9a: 0x0161,
|
||||
0x9b: 0x203a,
|
||||
0x9c: 0x0153,
|
||||
0x9e: 0x017e,
|
||||
0x9f: 0x0178
|
||||
}
|
112
packages/compiler-core/src/parserOptionsMinimal.ts
Normal file
112
packages/compiler-core/src/parserOptionsMinimal.ts
Normal file
@ -0,0 +1,112 @@
|
||||
import { TextModes, ParserOptions } from './parser'
|
||||
import { ElementNode, Namespaces, Position, Node } from './ast'
|
||||
import { ParserErrorTypes } from './errorTypes'
|
||||
|
||||
export const parserOptionsMinimal: ParserOptions = {
|
||||
delimiters: [`{{`, `}}`],
|
||||
ignoreSpaces: true,
|
||||
|
||||
getNamespace(tag: string, parent: ElementNode | undefined): Namespaces {
|
||||
const ns = parent ? parent.ns : Namespaces.HTML
|
||||
if (ns === Namespaces.HTML) {
|
||||
if (tag === 'svg') {
|
||||
return Namespaces.SVG
|
||||
}
|
||||
if (tag === 'math') {
|
||||
return Namespaces.MATH_ML
|
||||
}
|
||||
}
|
||||
return ns
|
||||
},
|
||||
|
||||
getTextMode(tag: string, ns: Namespaces): TextModes {
|
||||
if (ns === Namespaces.HTML) {
|
||||
if (/^textarea$/i.test(tag)) {
|
||||
return TextModes.RCDATA
|
||||
}
|
||||
if (/^(?:style|script)$/i.test(tag)) {
|
||||
return TextModes.RAWTEXT
|
||||
}
|
||||
}
|
||||
return TextModes.DATA
|
||||
},
|
||||
|
||||
isVoidTag(tag: string): boolean {
|
||||
return /^(?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(
|
||||
tag
|
||||
)
|
||||
},
|
||||
|
||||
namedCharacterReferences: {
|
||||
'gt;': '>',
|
||||
'lt;': '<',
|
||||
'amp;': '&',
|
||||
'apos;': "'",
|
||||
'quot;': '"'
|
||||
},
|
||||
|
||||
onError(code: ParserErrorTypes, loc: Position): void {
|
||||
const error: any = new SyntaxError(
|
||||
`${messages[code]} (${loc.line}:${loc.column})`
|
||||
)
|
||||
error.code = code
|
||||
error.loc = loc
|
||||
throw error
|
||||
},
|
||||
|
||||
transform(node: Node): Node {
|
||||
return node
|
||||
}
|
||||
}
|
||||
|
||||
const messages: { [code: number]: string } = {
|
||||
[ParserErrorTypes.ABRUPT_CLOSING_OF_EMPTY_COMMENT]: 'Illegal comment.',
|
||||
[ParserErrorTypes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE]:
|
||||
'Illegal numeric character reference: invalid character.',
|
||||
[ParserErrorTypes.CDATA_IN_HTML_CONTENT]:
|
||||
'CDATA section is allowed only in XML context.',
|
||||
[ParserErrorTypes.CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE]:
|
||||
'Illegal numeric character reference: too big.',
|
||||
[ParserErrorTypes.CONTROL_CHARACTER_REFERENCE]:
|
||||
'Illegal numeric character reference: control character.',
|
||||
[ParserErrorTypes.DUPLICATE_ATTRIBUTE]: 'Duplicate attribute.',
|
||||
[ParserErrorTypes.END_TAG_WITH_ATTRIBUTES]: 'End tag cannot have attributes.',
|
||||
[ParserErrorTypes.END_TAG_WITH_TRAILING_SOLIDUS]: "Illegal '/' in tags.",
|
||||
[ParserErrorTypes.EOF_BEFORE_TAG_NAME]: 'Unexpected EOF in tag.',
|
||||
[ParserErrorTypes.EOF_IN_CDATA]: 'Unexpected EOF in CDATA section.',
|
||||
[ParserErrorTypes.EOF_IN_COMMENT]: 'Unexpected EOF in comment.',
|
||||
[ParserErrorTypes.EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT]:
|
||||
'Unexpected EOF in script.',
|
||||
[ParserErrorTypes.EOF_IN_TAG]: 'Unexpected EOF in tag.',
|
||||
[ParserErrorTypes.INCORRECTLY_CLOSED_COMMENT]: 'Incorrectly closed comment.',
|
||||
[ParserErrorTypes.INCORRECTLY_OPENED_COMMENT]: 'Incorrectly opened comment.',
|
||||
[ParserErrorTypes.INVALID_FIRST_CHARACTER_OF_TAG_NAME]:
|
||||
"Illegal tag name. Use '<' to print '<'.",
|
||||
[ParserErrorTypes.MISSING_ATTRIBUTE_VALUE]: 'Attribute value was expected.',
|
||||
[ParserErrorTypes.MISSING_END_TAG_NAME]: 'End tag name was expected.',
|
||||
[ParserErrorTypes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE]:
|
||||
'Semicolon was expected.',
|
||||
[ParserErrorTypes.MISSING_WHITESPACE_BETWEEN_ATTRIBUTES]:
|
||||
'Whitespace was expected.',
|
||||
[ParserErrorTypes.NESTED_COMMENT]: "Unexpected '<!--' in comment.",
|
||||
[ParserErrorTypes.NONCHARACTER_CHARACTER_REFERENCE]:
|
||||
'Illegal numeric character reference: non character.',
|
||||
[ParserErrorTypes.NULL_CHARACTER_REFERENCE]:
|
||||
'Illegal numeric character reference: null character.',
|
||||
[ParserErrorTypes.SURROGATE_CHARACTER_REFERENCE]:
|
||||
'Illegal numeric character reference: non-pair surrogate.',
|
||||
[ParserErrorTypes.UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME]:
|
||||
'Attribute name cannot contain U+0022 ("), U+0027 (\'), and U+003C (<).',
|
||||
[ParserErrorTypes.UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE]:
|
||||
'Unquoted attribute value cannot contain U+0022 ("), U+0027 (\'), U+003C (<), U+003D (=), and U+0060 (`).',
|
||||
[ParserErrorTypes.UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME]:
|
||||
"Attribute name cannot start with '='.",
|
||||
[ParserErrorTypes.UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME]:
|
||||
"'<?' is allowed only in XML context.",
|
||||
[ParserErrorTypes.UNEXPECTED_SOLIDUS_IN_TAG]: "Illegal '/' in tags.",
|
||||
[ParserErrorTypes.UNKNOWN_NAMED_CHARACTER_REFERENCE]: 'Unknown entity name.',
|
||||
[ParserErrorTypes.X_INVALID_END_TAG]: 'Invalid end tag.',
|
||||
[ParserErrorTypes.X_MISSING_END_TAG]: 'End tag was not found.',
|
||||
[ParserErrorTypes.X_MISSING_INTERPOLATION_END]:
|
||||
'Interpolation end sign was not found.'
|
||||
}
|
2307
packages/compiler-core/src/parserOptionsStandard.ts
Normal file
2307
packages/compiler-core/src/parserOptionsStandard.ts
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user