feat(compiler): port parser implementation based on work by @znck and @mysticatea

2019-09-16 14:43:29 -04:00
parent 54a78f3856
commit 86ae92303d
8 changed files with 13373 additions and 1 deletions
--- a/packages/compiler-core/tests/snapshots/parse.spec.ts.snap
+++ b/packages/compiler-core/tests/snapshots/parse.spec.ts.snap
--- a/packages/compiler-core/tests/parse.spec.ts
+++ b/packages/compiler-core/tests/parse.spec.ts
--- a/packages/compiler-core/src/ast.ts
+++ b/packages/compiler-core/src/ast.ts
@@ -0,0 +1,87 @@
+export const enum NodeTypes {
+  TEXT,
+  COMMENT,
+  ELEMENT,
+  ATTRIBUTE,
+  EXPRESSION,
+  DIRECTIVE,
+  ROOT
+}
+
+export const enum ElementTypes {
+  ELEMENT,
+  COMPONENT,
+  SLOT, // slot
+  TEMPLATE // template, component
+}
+
+export const enum Namespaces {
+  HTML,
+  SVG, // allows CDATA section and forbids end tag omission.
+  MATH_ML // allows CDATA section and forbids end tag omission.
+}
+
+export interface Node {
+  type: NodeTypes
+  loc: SourceLocation
+}
+
+export interface RootNode extends Node {
+  type: NodeTypes.ROOT
+  children: Array<ElementNode | ExpressionNode | TextNode | CommentNode>
+}
+
+export interface ElementNode extends Node {
+  type: NodeTypes.ELEMENT
+  ns: Namespaces
+  tag: string
+  tagType: ElementTypes
+  isSelfClosing: boolean
+  props: Array<AttributeNode | DirectiveNode>
+  children: Array<ElementNode | ExpressionNode | TextNode | CommentNode>
+}
+
+export interface TextNode extends Node {
+  type: NodeTypes.TEXT
+  content: string
+  isEmpty: boolean
+}
+
+export interface CommentNode extends Node {
+  type: NodeTypes.COMMENT
+  content: string
+}
+
+export interface AttributeNode extends Node {
+  type: NodeTypes.ATTRIBUTE
+  name: string
+  value: TextNode | undefined
+}
+
+export interface DirectiveNode extends Node {
+  type: NodeTypes.DIRECTIVE
+  name: string
+  exp: ExpressionNode | undefined
+  arg: ExpressionNode | undefined
+  modifiers: string[]
+}
+
+export interface ExpressionNode extends Node {
+  type: NodeTypes.EXPRESSION
+  content: string
+  isStatic: boolean
+}
+
+export interface Position {
+  offset: number // from start of file
+  line: number
+  column: number
+}
+
+// The node's range. The `start` is inclusive and `end` is exclusive.
+// [start, end)
+export interface SourceLocation {
+  start: Position
+  end: Position
+  source: string
+}
--- a/packages/compiler-core/src/errorTypes.ts
+++ b/packages/compiler-core/src/errorTypes.ts
@@ -0,0 +1,37 @@
+export const enum ParserErrorTypes {
+  ABRUPT_CLOSING_OF_EMPTY_COMMENT,
+  ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
+  CDATA_IN_HTML_CONTENT,
+  CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE,
+  CONTROL_CHARACTER_REFERENCE,
+  DUPLICATE_ATTRIBUTE,
+  END_TAG_WITH_ATTRIBUTES,
+  END_TAG_WITH_TRAILING_SOLIDUS,
+  EOF_BEFORE_TAG_NAME,
+  EOF_IN_CDATA,
+  EOF_IN_COMMENT,
+  EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT,
+  EOF_IN_TAG,
+  INCORRECTLY_CLOSED_COMMENT,
+  INCORRECTLY_OPENED_COMMENT,
+  INVALID_FIRST_CHARACTER_OF_TAG_NAME,
+  MISSING_ATTRIBUTE_VALUE,
+  MISSING_END_TAG_NAME,
+  MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
+  MISSING_WHITESPACE_BETWEEN_ATTRIBUTES,
+  NESTED_COMMENT,
+  NONCHARACTER_CHARACTER_REFERENCE,
+  NULL_CHARACTER_REFERENCE,
+  SURROGATE_CHARACTER_REFERENCE,
+  UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
+  UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE,
+  UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME,
+  UNEXPECTED_NULL_CHARACTER,
+  UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME,
+  UNEXPECTED_SOLIDUS_IN_TAG,
+  UNKNOWN_NAMED_CHARACTER_REFERENCE,
+  X_INVALID_END_TAG,
+  X_MISSING_END_TAG,
+  X_MISSING_INTERPOLATION_END,
+  X_MISSING_DYNAMIC_DIRECTIVE_ARGUMENT_END
+}
--- a/packages/compiler-core/src/index.ts
+++ b/packages/compiler-core/src/index.ts
@@ -1 +1,3 @@
-// TODO
+export { parse } from './parser'
+export * from './ast'
+export * from './errorTypes'
--- a/packages/compiler-core/src/parser.ts
+++ b/packages/compiler-core/src/parser.ts
@@ -0,0 +1,918 @@
+import assert from 'assert'
+import { ParserErrorTypes } from './errorTypes'
+import {
+  Node,
+  AttributeNode,
+  CommentNode,
+  DirectiveNode,
+  ElementNode,
+  ElementTypes,
+  ExpressionNode,
+  Namespaces,
+  NodeTypes,
+  Position,
+  RootNode,
+  SourceLocation,
+  TextNode
+} from './ast'
+
+export interface ParserOptions {
+  isVoidTag: (tag: string) => boolean // e.g. img, br, hr
+  getNamespace: (tag: string, parent: ElementNode | undefined) => Namespaces
+  getTextMode: (tag: string, ns: Namespaces) => TextModes
+  delimiters: [string, string] // ['{{', '}}']
+  transform: (node: Node) => Node // --
+  ignoreSpaces: boolean
+
+  // Map to HTML entities. E.g., `{ "amp;": "&" }`
+  // The full set is https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references
+  namedCharacterReferences: { [name: string]: string | undefined }
+
+  onError: (type: ParserErrorTypes, loc: Position) => void
+}
+
+export const enum TextModes {
+  // | Elements | Entities | End sign              | Inside of
+  DATA, // | ✔       | ✔       | End tags of ancestors |
+  RCDATA, // | ✘       | ✔       | End tag of the parent | <textarea>
+  RAWTEXT, // | ✘       | ✘       | End tag of the parent | <style>,<script>
+  CDATA,
+  ATTRIBUTE_VALUE
+}
+
+interface ParserContext extends ParserOptions {
+  readonly originalSource: string
+  source: string
+  offset: number
+  line: number
+  column: number
+  maxCRNameLength: number
+}
+
+export function parse(content: string, options: ParserOptions): RootNode {
+  const context = createParserContext(content, options)
+  const start = getCursor(context)
+
+  return {
+    type: NodeTypes.ROOT,
+    children: parseChildren(context, TextModes.DATA, []),
+    loc: getSelection(context, start)
+  }
+}
+
+function last<T>(xs: T[]): T | undefined {
+  return xs[xs.length - 1]
+}
+
+function startsWith(source: string, searchString: string): boolean {
+  return source.startsWith(searchString)
+}
+
+function advanceBy(context: ParserContext, numberOfCharacters: number): void {
+  assert(numberOfCharacters <= context.source.length)
+
+  const { column, source } = context
+  const str = source.slice(0, numberOfCharacters)
+  const lines = str.split(/\r?\n/)
+
+  context.source = source.slice(numberOfCharacters)
+  context.offset += numberOfCharacters
+  context.line += lines.length - 1
+  context.column =
+    lines.length === 1
+      ? column + numberOfCharacters
+      : Math.max(1, lines.pop()!.length)
+}
+
+function advanceSpaces(context: ParserContext): void {
+  const match = /^[\t\r\n\f ]+/.exec(context.source)
+  if (match) {
+    advanceBy(context, match[0].length)
+  }
+}
+
+function getCursor(context: ParserContext): Position {
+  const { column, line, offset } = context
+  return { column, line, offset }
+}
+
+function getNewPosition(
+  context: ParserContext,
+  start: Position,
+  numberOfCharacters: number
+): Position {
+  const { originalSource } = context
+  const str = originalSource.slice(start.offset, numberOfCharacters)
+  const lines = str.split(/\r?\n/)
+
+  const newPosition = {
+    column: start.column,
+    line: start.line,
+    offset: start.offset
+  }
+
+  newPosition.offset += numberOfCharacters
+  newPosition.line += lines.length - 1
+  newPosition.column =
+    lines.length === 1
+      ? start.column + numberOfCharacters
+      : Math.max(1, lines.pop()!.length)
+
+  return newPosition
+}
+
+function getSelection(
+  context: ParserContext,
+  start: Position,
+  end?: Position
+): SourceLocation {
+  end = end || getCursor(context)
+  return {
+    start,
+    end,
+    source: context.originalSource.slice(start.offset, end.offset)
+  }
+}
+
+function emitError(
+  context: ParserContext,
+  type: ParserErrorTypes,
+  offset?: number
+): void {
+  const loc = getCursor(context)
+  if (offset) {
+    loc.offset += offset
+    loc.column += offset
+  }
+  context.onError(type, loc)
+}
+
+function createParserContext(
+  content: string,
+  options: ParserOptions
+): ParserContext {
+  return {
+    ...options,
+    column: 1,
+    line: 1,
+    offset: 0,
+    originalSource: content,
+    source: content,
+    maxCRNameLength: Object.keys(options.namedCharacterReferences).reduce(
+      (max, name) => Math.max(max, name.length),
+      0
+    )
+  }
+}
+
+function parseChildren(
+  context: ParserContext,
+  mode: TextModes,
+  ancestors: ElementNode[]
+): RootNode['children'] {
+  const parent = last(ancestors)
+  const ns = parent ? parent.ns : Namespaces.HTML
+  const nodes: RootNode['children'] = []
+
+  while (!isEnd(context, mode, ancestors)) {
+    assert(context.source.length > 0)
+    const s = context.source
+    let node: any = null
+
+    if (startsWith(s, context.delimiters[0])) {
+      // '{{'
+      node = parseInterpolation(context, mode)
+    } else if (mode === TextModes.DATA && s[0] === '<') {
+      // https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
+      if (s.length === 1) {
+        emitError(context, ParserErrorTypes.EOF_BEFORE_TAG_NAME, 1)
+      } else if (s[1] === '!') {
+        // https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
+        if (startsWith(s, '<!--')) {
+          node = parseComment(context)
+        } else if (startsWith(s, '<!DOCTYPE')) {
+          // Ignore DOCTYPE by a limitation.
+          node = parseBogusComment(context)
+        } else if (startsWith(s, '<![CDATA[')) {
+          if (ns !== Namespaces.HTML) {
+            node = parseCDATA(context, ancestors)
+          } else {
+            emitError(context, ParserErrorTypes.CDATA_IN_HTML_CONTENT)
+            node = parseBogusComment(context)
+          }
+        } else {
+          emitError(context, ParserErrorTypes.INCORRECTLY_OPENED_COMMENT)
+          node = parseBogusComment(context)
+        }
+      } else if (s[1] === '/') {
+        // https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state
+        if (s.length === 2) {
+          emitError(context, ParserErrorTypes.EOF_BEFORE_TAG_NAME, 2)
+        } else if (s[2] === '>') {
+          emitError(context, ParserErrorTypes.MISSING_END_TAG_NAME, 2)
+          advanceBy(context, 3)
+          continue
+        } else if (/[a-z]/i.test(s[2])) {
+          emitError(context, ParserErrorTypes.X_INVALID_END_TAG)
+          parseTag(context, TagType.End, parent)
+          continue
+        } else {
+          emitError(
+            context,
+            ParserErrorTypes.INVALID_FIRST_CHARACTER_OF_TAG_NAME,
+            2
+          )
+          node = parseBogusComment(context)
+        }
+      } else if (/[a-z]/i.test(s[1])) {
+        node = parseElement(context, ancestors)
+      } else if (s[1] === '?') {
+        emitError(
+          context,
+          ParserErrorTypes.UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME,
+          1
+        )
+        node = parseBogusComment(context)
+      } else {
+        emitError(
+          context,
+          ParserErrorTypes.INVALID_FIRST_CHARACTER_OF_TAG_NAME,
+          1
+        )
+      }
+    }
+    if (!node) {
+      node = parseText(context, mode)
+    }
+
+    if (Array.isArray(node)) {
+      node.forEach(pushNode.bind(null, context, nodes))
+    } else {
+      pushNode(context, nodes, node)
+    }
+  }
+
+  return nodes
+}
+
+function isEnd(
+  context: ParserContext,
+  mode: TextModes,
+  ancestors: ElementNode[]
+): boolean {
+  const s = context.source
+
+  switch (mode) {
+    case TextModes.DATA:
+      if (startsWith(s, '</')) {
+        //TODO: probably bad performance
+        for (let i = ancestors.length - 1; i >= 0; --i) {
+          if (startsWithEndTagOpen(s, ancestors[i].tag)) {
+            return true
+          }
+        }
+      }
+      break
+
+    case TextModes.RCDATA:
+    case TextModes.RAWTEXT: {
+      const parent = last(ancestors)
+      if (parent && startsWithEndTagOpen(s, parent.tag)) {
+        return true
+      }
+      break
+    }
+
+    case TextModes.CDATA:
+      if (startsWith(s, ']]>')) {
+        return true
+      }
+      break
+  }
+
+  return !s
+}
+
+function startsWithEndTagOpen(source: string, tag: string): boolean {
+  return (
+    startsWith(source, '</') &&
+    source.substr(2, tag.length).toLowerCase() === tag.toLowerCase() &&
+    /[\t\n\f />]/.test(source[2 + tag.length] || '>')
+  )
+}
+
+function pushNode(
+  context: ParserContext,
+  nodes: RootNode['children'],
+  node: RootNode['children'][0]
+): void {
+  if (context.ignoreSpaces && node.type === NodeTypes.TEXT && node.isEmpty) {
+    return
+  }
+
+  // Merge if both this and the previous node are text and those are consecutive.
+  // This happens on "a < b" or something like.
+  const prev = last(nodes)
+  if (
+    prev &&
+    prev.type === NodeTypes.TEXT &&
+    node.type === NodeTypes.TEXT &&
+    prev.loc.end.offset === node.loc.start.offset
+  ) {
+    prev.content += node.content
+    prev.isEmpty = prev.content.trim().length === 0
+    prev.loc.end = node.loc.end
+    prev.loc.source += node.loc.source
+  } else {
+    nodes.push(node)
+  }
+}
+
+function parseCDATA(
+  context: ParserContext,
+  ancestors: ElementNode[]
+): RootNode['children'] {
+  assert(last(ancestors) == null || last(ancestors)!.ns !== Namespaces.HTML)
+  assert(startsWith(context.source, '<![CDATA['))
+
+  advanceBy(context, 9)
+  const nodes = parseChildren(context, TextModes.CDATA, ancestors)
+  if (context.source.length === 0) {
+    emitError(context, ParserErrorTypes.EOF_IN_CDATA)
+  } else {
+    assert(startsWith(context.source, ']]>'))
+    advanceBy(context, 3)
+  }
+
+  return nodes
+}
+
+function parseComment(context: ParserContext): CommentNode {
+  assert(startsWith(context.source, '<!--'))
+
+  const start = getCursor(context)
+  let content: string
+
+  // Regular comment.
+  const match = /--(\!)?>/.exec(context.source)
+  if (!match) {
+    content = context.source.slice(4)
+    advanceBy(context, context.source.length)
+    emitError(context, ParserErrorTypes.EOF_IN_COMMENT)
+  } else {
+    if (match.index <= 3) {
+      emitError(context, ParserErrorTypes.ABRUPT_CLOSING_OF_EMPTY_COMMENT)
+    }
+    if (match[1]) {
+      emitError(context, ParserErrorTypes.INCORRECTLY_CLOSED_COMMENT)
+    }
+    content = context.source.slice(4, match.index)
+
+    // Advancing with reporting nested comments.
+    const s = context.source.slice(0, match.index)
+    let prevIndex = 1,
+      nestedIndex = 0
+    while ((nestedIndex = s.indexOf('<!--', prevIndex)) !== -1) {
+      advanceBy(context, nestedIndex - prevIndex + 1)
+      if (nestedIndex + 4 < s.length) {
+        emitError(context, ParserErrorTypes.NESTED_COMMENT)
+      }
+      prevIndex = nestedIndex + 1
+    }
+    advanceBy(context, match.index + match[0].length - prevIndex + 1)
+  }
+
+  return {
+    type: NodeTypes.COMMENT,
+    content,
+    loc: getSelection(context, start)
+  }
+}
+
+function parseBogusComment(context: ParserContext): CommentNode | undefined {
+  assert(/^<(?:[\!\?]|\/[^a-z>])/i.test(context.source))
+
+  const start = getCursor(context)
+  const contentStart = context.source[1] === '?' ? 1 : 2
+  let content: string
+
+  const closeIndex = context.source.indexOf('>')
+  if (closeIndex === -1) {
+    content = context.source.slice(contentStart)
+    advanceBy(context, context.source.length)
+  } else {
+    content = context.source.slice(contentStart, closeIndex)
+    advanceBy(context, closeIndex + 1)
+  }
+
+  return {
+    type: NodeTypes.COMMENT,
+    content,
+    loc: getSelection(context, start)
+  }
+}
+
+function parseElement(
+  context: ParserContext,
+  ancestors: ElementNode[]
+): ElementNode | undefined {
+  assert(/^<[a-z]/i.test(context.source))
+
+  // Start tag.
+  const parent = last(ancestors)
+  const element = parseTag(context, TagType.Start, parent)
+
+  if (element.isSelfClosing || context.isVoidTag(element.tag)) {
+    return element
+  }
+
+  // Children.
+  ancestors.push(element)
+  const mode = (context.getTextMode(
+    element.tag,
+    element.ns
+  ) as unknown) as TextModes
+  const children = parseChildren(context, mode, ancestors)
+  ancestors.pop()
+
+  element.children = children
+
+  // End tag.
+  if (startsWithEndTagOpen(context.source, element.tag)) {
+    parseTag(context, TagType.End, parent)
+  } else {
+    emitError(context, ParserErrorTypes.X_MISSING_END_TAG)
+    if (context.source.length === 0 && element.tag.toLowerCase() === 'script') {
+      const first = children[0]
+      if (first && startsWith(first.loc.source, '<!--')) {
+        emitError(
+          context,
+          ParserErrorTypes.EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT
+        )
+      }
+    }
+  }
+
+  element.loc = getSelection(context, element.loc.start)
+  return element
+}
+
+const enum TagType {
+  Start,
+  End
+}
+
+/**
+ * Parse a tag (E.g. `<div id=a>`) with that type (start tag or end tag).
+ */
+function parseTag(
+  context: ParserContext,
+  type: TagType,
+  parent: ElementNode | undefined
+): ElementNode {
+  assert(/^<\/?[a-z]/i.test(context.source))
+  assert(
+    type === (startsWith(context.source, '</') ? TagType.End : TagType.Start)
+  )
+
+  // Tag open.
+  const start = getCursor(context)
+  const match = /^<\/?([a-z][^\t\r\n\f />]*)/i.exec(context.source)!
+  const tag = match[1]
+  const props = []
+  const ns = context.getNamespace(tag, parent)
+
+  advanceBy(context, match[0].length)
+  advanceSpaces(context)
+
+  // Attributes.
+  const attributeNames = new Set<string>()
+  while (
+    context.source.length > 0 &&
+    !startsWith(context.source, '>') &&
+    !startsWith(context.source, '/>')
+  ) {
+    if (startsWith(context.source, '/')) {
+      emitError(context, ParserErrorTypes.UNEXPECTED_SOLIDUS_IN_TAG)
+      advanceBy(context, 1)
+      advanceSpaces(context)
+      continue
+    }
+    if (type === TagType.End) {
+      emitError(context, ParserErrorTypes.END_TAG_WITH_ATTRIBUTES)
+    }
+
+    const attr = parseAttribute(context, attributeNames)
+    if (type === TagType.Start) {
+      props.push(attr)
+    }
+
+    if (/^[^\t\r\n\f />]/.test(context.source)) {
+      emitError(context, ParserErrorTypes.MISSING_WHITESPACE_BETWEEN_ATTRIBUTES)
+    }
+    advanceSpaces(context)
+  }
+
+  // Tag close.
+  let isSelfClosing = false
+  if (context.source.length === 0) {
+    emitError(context, ParserErrorTypes.EOF_IN_TAG)
+  } else {
+    isSelfClosing = startsWith(context.source, '/>')
+    if (type === TagType.End && isSelfClosing) {
+      emitError(context, ParserErrorTypes.END_TAG_WITH_TRAILING_SOLIDUS)
+    }
+    advanceBy(context, isSelfClosing ? 2 : 1)
+  }
+
+  let tagType = ElementTypes.ELEMENT
+
+  if (tag === 'slot') tagType = ElementTypes.SLOT
+  else if (tag === 'template') tagType = ElementTypes.TEMPLATE
+  else if (/[A-Z-]/.test(tag)) tagType = ElementTypes.COMPONENT
+
+  return {
+    type: NodeTypes.ELEMENT,
+    ns,
+    tag,
+    tagType,
+    props,
+    isSelfClosing,
+    children: [],
+    loc: getSelection(context, start)
+  }
+}
+
+function parseAttribute(
+  context: ParserContext,
+  nameSet: Set<string>
+): AttributeNode | DirectiveNode {
+  assert(/^[^\t\r\n\f />]/.test(context.source))
+
+  // Name.
+  const start = getCursor(context)
+  const match = /^[^\t\r\n\f />][^\t\r\n\f />=]*/.exec(context.source)!
+  const name = match[0]
+
+  if (nameSet.has(name)) {
+    emitError(context, ParserErrorTypes.DUPLICATE_ATTRIBUTE)
+  }
+  nameSet.add(name)
+
+  if (name[0] === '=') {
+    emitError(
+      context,
+      ParserErrorTypes.UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME
+    )
+  }
+  {
+    const pattern = /["'<]/g
+    let m: RegExpExecArray | null
+    while ((m = pattern.exec(name)) !== null) {
+      emitError(
+        context,
+        ParserErrorTypes.UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
+        m.index
+      )
+    }
+  }
+
+  advanceBy(context, name.length)
+
+  // Value
+  let value: { content: string; loc: SourceLocation } | undefined = undefined
+  if (/^[\t\r\n\f ]*=/.test(context.source)) {
+    advanceSpaces(context)
+    advanceBy(context, 1)
+    advanceSpaces(context)
+    value = parseAttributeValue(context)
+    if (!value) {
+      emitError(context, ParserErrorTypes.MISSING_ATTRIBUTE_VALUE)
+    }
+  }
+  const loc = getSelection(context, start)
+
+  if (/^(v-|:|@|#)/.test(name)) {
+    const match = /(?:^v-([a-z0-9-]+))?(?:(?::|^@|^#)([^\.]+))?(.+)?$/i.exec(
+      name
+    )!
+
+    let arg: ExpressionNode | undefined
+
+    if (match[2]) {
+      const startOffset = name.split(match[2], 2)!.shift()!.length
+      const loc = getSelection(
+        context,
+        getNewPosition(context, start, startOffset),
+        getNewPosition(context, start, startOffset + match[2].length)
+      )
+      let content = match[2]
+      let isStatic = true
+
+      if (content.startsWith('[')) {
+        isStatic = false
+
+        if (!content.endsWith(']')) {
+          emitError(
+            context,
+            ParserErrorTypes.X_MISSING_DYNAMIC_DIRECTIVE_ARGUMENT_END
+          )
+        }
+
+        content = content.substr(1, content.length - 2)
+      }
+
+      arg = {
+        type: NodeTypes.EXPRESSION,
+        content,
+        isStatic,
+        loc
+      }
+    }
+
+    return {
+      type: NodeTypes.DIRECTIVE,
+      name:
+        match[1] ||
+        (startsWith(name, ':')
+          ? 'bind'
+          : startsWith(name, '@')
+            ? 'on'
+            : 'slot'),
+      exp: value && {
+        type: NodeTypes.EXPRESSION,
+        content: value.content,
+        isStatic: false,
+        loc: value.loc
+      },
+      arg,
+      modifiers: match[3] ? match[3].substr(1).split('.') : [],
+      loc
+    }
+  }
+
+  return {
+    type: NodeTypes.ATTRIBUTE,
+    name,
+    value: value && {
+      type: NodeTypes.TEXT,
+      content: value.content,
+      isEmpty: value.content.trim().length === 0,
+      loc: value.loc
+    },
+    loc
+  }
+}
+
+function parseAttributeValue(
+  context: ParserContext
+): { content: string; loc: SourceLocation } | undefined {
+  const start = getCursor(context)
+  let content: string
+
+  if (/^["']/.test(context.source)) {
+    // Quoted value.
+    const quote = context.source[0]
+    advanceBy(context, 1)
+
+    const endIndex = context.source.indexOf(quote)
+    if (endIndex === -1) {
+      content = parseTextData(
+        context,
+        context.source.length,
+        TextModes.ATTRIBUTE_VALUE
+      )
+    } else {
+      content = parseTextData(context, endIndex, TextModes.ATTRIBUTE_VALUE)
+      advanceBy(context, 1)
+    }
+  } else {
+    // Unquoted
+    const match = /^[^\t\r\n\f >]+/.exec(context.source)
+    if (!match) {
+      return undefined
+    }
+    let unexpectedChars = /["'<=`]/g
+    let m: RegExpExecArray | null
+    while ((m = unexpectedChars.exec(match[0])) !== null) {
+      emitError(
+        context,
+        ParserErrorTypes.UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE,
+        m.index
+      )
+    }
+    content = parseTextData(context, match[0].length, TextModes.ATTRIBUTE_VALUE)
+  }
+
+  return { content, loc: getSelection(context, start) }
+}
+
+function parseInterpolation(
+  context: ParserContext,
+  mode: TextModes
+): ExpressionNode | undefined {
+  const [open, close] = context.delimiters
+  assert(startsWith(context.source, open))
+
+  const closeIndex = context.source.indexOf(close, open.length)
+  if (closeIndex === -1) {
+    emitError(context, ParserErrorTypes.X_MISSING_INTERPOLATION_END)
+    return undefined
+  }
+
+  const start = getCursor(context)
+  advanceBy(context, open.length)
+  const content = parseTextData(context, closeIndex - open.length, mode).trim()
+  advanceBy(context, close.length)
+
+  return {
+    type: NodeTypes.EXPRESSION,
+    content,
+    loc: getSelection(context, start),
+    isStatic: content === ''
+  }
+}
+
+function parseText(context: ParserContext, mode: TextModes): TextNode {
+  assert(context.source.length > 0)
+
+  const [open] = context.delimiters
+  const endIndex = Math.min(
+    ...[
+      context.source.indexOf('<', 1),
+      context.source.indexOf(open, 1),
+      mode === TextModes.CDATA ? context.source.indexOf(']]>') : -1,
+      context.source.length
+    ].filter(n => n !== -1)
+  )
+  assert(endIndex > 0)
+
+  const start = getCursor(context)
+  const content = parseTextData(context, endIndex, mode)
+
+  return {
+    type: NodeTypes.TEXT,
+    content,
+    loc: getSelection(context, start),
+    isEmpty: !content.trim()
+  }
+}
+
+/**
+ * Get text data with a given length from the current location.
+ * This translates HTML entities in the text data.
+ */
+function parseTextData(
+  context: ParserContext,
+  length: number,
+  mode: TextModes
+): string {
+  if (mode === TextModes.RAWTEXT || mode === TextModes.CDATA) {
+    const text = context.source.slice(0, length)
+    advanceBy(context, length)
+    return text
+  }
+
+  // DATA or RCDATA.
+  const end = context.offset + length
+  let text: string = ''
+
+  while (context.offset < end) {
+    const head = /&(?:#x?)?/i.exec(context.source)
+    if (!head || context.offset + head.index >= end) {
+      const remaining = end - context.offset
+      text += context.source.slice(0, remaining)
+      advanceBy(context, remaining)
+      break
+    }
+
+    // Advance to the "&".
+    text += context.source.slice(0, head.index)
+    advanceBy(context, head.index)
+
+    if (head[0] === '&') {
+      // Named character reference.
+      let name = '',
+        value: string | undefined = undefined
+      if (/[0-9a-z]/i.test(context.source[1])) {
+        for (
+          let length = context.maxCRNameLength;
+          !value && length > 0;
+          --length
+        ) {
+          name = context.source.substr(1, length)
+          value = context.namedCharacterReferences[name]
+        }
+        if (value) {
+          const semi = name.endsWith(';')
+          if (
+            mode === TextModes.ATTRIBUTE_VALUE &&
+            !semi &&
+            /[=a-z0-9]/i.test(context.source[1 + name.length] || '')
+          ) {
+            text += '&'
+            text += name
+            advanceBy(context, 1 + name.length)
+          } else {
+            text += value
+            advanceBy(context, 1 + name.length)
+            if (!semi) {
+              emitError(
+                context,
+                ParserErrorTypes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE
+              )
+            }
+          }
+        } else {
+          emitError(context, ParserErrorTypes.UNKNOWN_NAMED_CHARACTER_REFERENCE)
+          text += '&'
+          text += name
+          advanceBy(context, 1 + name.length)
+        }
+      } else {
+        text += '&'
+        advanceBy(context, 1)
+      }
+    } else {
+      // Numeric character reference.
+      const hex = head[0] === '&#x'
+      const pattern = hex ? /^&#x([0-9a-f]+);?/i : /^&#([0-9]+);?/
+      const body = pattern.exec(context.source)
+      if (!body) {
+        text += head[0]
+        emitError(
+          context,
+          ParserErrorTypes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE
+        )
+        advanceBy(context, head[0].length)
+      } else {
+        // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
+        let cp = Number.parseInt(body[1], hex ? 16 : 10)
+        if (cp === 0) {
+          emitError(context, ParserErrorTypes.NULL_CHARACTER_REFERENCE)
+          cp = 0xfffd
+        } else if (cp > 0x10ffff) {
+          emitError(
+            context,
+            ParserErrorTypes.CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE
+          )
+          cp = 0xfffd
+        } else if (cp >= 0xd800 && cp <= 0xdfff) {
+          emitError(context, ParserErrorTypes.SURROGATE_CHARACTER_REFERENCE)
+          cp = 0xfffd
+        } else if ((cp >= 0xfdd0 && cp <= 0xfdef) || (cp & 0xfffe) === 0xfffe) {
+          emitError(context, ParserErrorTypes.NONCHARACTER_CHARACTER_REFERENCE)
+        } else if (
+          (cp >= 0x01 && cp <= 0x08) ||
+          cp === 0x0b ||
+          (cp >= 0x0d && cp <= 0x1f) ||
+          (cp >= 0x7f && cp <= 0x9f)
+        ) {
+          emitError(context, ParserErrorTypes.CONTROL_CHARACTER_REFERENCE)
+          cp = CCR_REPLACEMENTS[cp] || cp
+        }
+        text += String.fromCodePoint(cp)
+        advanceBy(context, body[0].length)
+        if (!body![0].endsWith(';')) {
+          emitError(
+            context,
+            ParserErrorTypes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE
+          )
+        }
+      }
+    }
+  }
+
+  return text
+}
+
+// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
+const CCR_REPLACEMENTS: { [key: number]: number | undefined } = {
+  0x80: 0x20ac,
+  0x82: 0x201a,
+  0x83: 0x0192,
+  0x84: 0x201e,
+  0x85: 0x2026,
+  0x86: 0x2020,
+  0x87: 0x2021,
+  0x88: 0x02c6,
+  0x89: 0x2030,
+  0x8a: 0x0160,
+  0x8b: 0x2039,
+  0x8c: 0x0152,
+  0x8e: 0x017d,
+  0x91: 0x2018,
+  0x92: 0x2019,
+  0x93: 0x201c,
+  0x94: 0x201d,
+  0x95: 0x2022,
+  0x96: 0x2013,
+  0x97: 0x2014,
+  0x98: 0x02dc,
+  0x99: 0x2122,
+  0x9a: 0x0161,
+  0x9b: 0x203a,
+  0x9c: 0x0153,
+  0x9e: 0x017e,
+  0x9f: 0x0178
+}
--- a/packages/compiler-core/src/parserOptionsMinimal.ts
+++ b/packages/compiler-core/src/parserOptionsMinimal.ts
@@ -0,0 +1,112 @@
+import { TextModes, ParserOptions } from './parser'
+import { ElementNode, Namespaces, Position, Node } from './ast'
+import { ParserErrorTypes } from './errorTypes'
+
+export const parserOptionsMinimal: ParserOptions = {
+  delimiters: [`{{`, `}}`],
+  ignoreSpaces: true,
+
+  getNamespace(tag: string, parent: ElementNode | undefined): Namespaces {
+    const ns = parent ? parent.ns : Namespaces.HTML
+    if (ns === Namespaces.HTML) {
+      if (tag === 'svg') {
+        return Namespaces.SVG
+      }
+      if (tag === 'math') {
+        return Namespaces.MATH_ML
+      }
+    }
+    return ns
+  },
+
+  getTextMode(tag: string, ns: Namespaces): TextModes {
+    if (ns === Namespaces.HTML) {
+      if (/^textarea$/i.test(tag)) {
+        return TextModes.RCDATA
+      }
+      if (/^(?:style|script)$/i.test(tag)) {
+        return TextModes.RAWTEXT
+      }
+    }
+    return TextModes.DATA
+  },
+
+  isVoidTag(tag: string): boolean {
+    return /^(?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(
+      tag
+    )
+  },
+
+  namedCharacterReferences: {
+    'gt;': '>',
+    'lt;': '<',
+    'amp;': '&',
+    'apos;': "'",
+    'quot;': '"'
+  },
+
+  onError(code: ParserErrorTypes, loc: Position): void {
+    const error: any = new SyntaxError(
+      `${messages[code]} (${loc.line}:${loc.column})`
+    )
+    error.code = code
+    error.loc = loc
+    throw error
+  },
+
+  transform(node: Node): Node {
+    return node
+  }
+}
+
+const messages: { [code: number]: string } = {
+  [ParserErrorTypes.ABRUPT_CLOSING_OF_EMPTY_COMMENT]: 'Illegal comment.',
+  [ParserErrorTypes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE]:
+    'Illegal numeric character reference: invalid character.',
+  [ParserErrorTypes.CDATA_IN_HTML_CONTENT]:
+    'CDATA section is allowed only in XML context.',
+  [ParserErrorTypes.CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE]:
+    'Illegal numeric character reference: too big.',
+  [ParserErrorTypes.CONTROL_CHARACTER_REFERENCE]:
+    'Illegal numeric character reference: control character.',
+  [ParserErrorTypes.DUPLICATE_ATTRIBUTE]: 'Duplicate attribute.',
+  [ParserErrorTypes.END_TAG_WITH_ATTRIBUTES]: 'End tag cannot have attributes.',
+  [ParserErrorTypes.END_TAG_WITH_TRAILING_SOLIDUS]: "Illegal '/' in tags.",
+  [ParserErrorTypes.EOF_BEFORE_TAG_NAME]: 'Unexpected EOF in tag.',
+  [ParserErrorTypes.EOF_IN_CDATA]: 'Unexpected EOF in CDATA section.',
+  [ParserErrorTypes.EOF_IN_COMMENT]: 'Unexpected EOF in comment.',
+  [ParserErrorTypes.EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT]:
+    'Unexpected EOF in script.',
+  [ParserErrorTypes.EOF_IN_TAG]: 'Unexpected EOF in tag.',
+  [ParserErrorTypes.INCORRECTLY_CLOSED_COMMENT]: 'Incorrectly closed comment.',
+  [ParserErrorTypes.INCORRECTLY_OPENED_COMMENT]: 'Incorrectly opened comment.',
+  [ParserErrorTypes.INVALID_FIRST_CHARACTER_OF_TAG_NAME]:
+    "Illegal tag name. Use '&lt;' to print '<'.",
+  [ParserErrorTypes.MISSING_ATTRIBUTE_VALUE]: 'Attribute value was expected.',
+  [ParserErrorTypes.MISSING_END_TAG_NAME]: 'End tag name was expected.',
+  [ParserErrorTypes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE]:
+    'Semicolon was expected.',
+  [ParserErrorTypes.MISSING_WHITESPACE_BETWEEN_ATTRIBUTES]:
+    'Whitespace was expected.',
+  [ParserErrorTypes.NESTED_COMMENT]: "Unexpected '<!--' in comment.",
+  [ParserErrorTypes.NONCHARACTER_CHARACTER_REFERENCE]:
+    'Illegal numeric character reference: non character.',
+  [ParserErrorTypes.NULL_CHARACTER_REFERENCE]:
+    'Illegal numeric character reference: null character.',
+  [ParserErrorTypes.SURROGATE_CHARACTER_REFERENCE]:
+    'Illegal numeric character reference: non-pair surrogate.',
+  [ParserErrorTypes.UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME]:
+    'Attribute name cannot contain U+0022 ("), U+0027 (\'), and U+003C (<).',
+  [ParserErrorTypes.UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE]:
+    'Unquoted attribute value cannot contain U+0022 ("), U+0027 (\'), U+003C (<), U+003D (=), and U+0060 (`).',
+  [ParserErrorTypes.UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME]:
+    "Attribute name cannot start with '='.",
+  [ParserErrorTypes.UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME]:
+    "'<?' is allowed only in XML context.",
+  [ParserErrorTypes.UNEXPECTED_SOLIDUS_IN_TAG]: "Illegal '/' in tags.",
+  [ParserErrorTypes.UNKNOWN_NAMED_CHARACTER_REFERENCE]: 'Unknown entity name.',
+  [ParserErrorTypes.X_INVALID_END_TAG]: 'Invalid end tag.',
+  [ParserErrorTypes.X_MISSING_END_TAG]: 'End tag was not found.',
+  [ParserErrorTypes.X_MISSING_INTERPOLATION_END]:
+    'Interpolation end sign was not found.'
+}
--- a/packages/compiler-core/src/parserOptionsStandard.ts
+++ b/packages/compiler-core/src/parserOptionsStandard.ts