feat(compiler): port parser implementation based on work by @znck and @mysticatea

2019-09-16 14:43:29 -04:00
parent 54a78f3856
commit 86ae92303d
8 changed files with 13373 additions and 1 deletions
--- a/packages/compiler-core/tests/snapshots/parse.spec.ts.snap
+++ b/packages/compiler-core/tests/snapshots/parse.spec.ts.snap
--- a/packages/compiler-core/tests/parse.spec.ts
+++ b/packages/compiler-core/tests/parse.spec.ts
--- a/packages/compiler-core/src/ast.ts
+++ b/packages/compiler-core/src/ast.ts
@@ -0,0 +1,87 @@
 export const enum NodeTypes {
  TEXT,
  COMMENT,
  ELEMENT,
  ATTRIBUTE,
  EXPRESSION,
  DIRECTIVE,
  ROOT
 }
 export const enum ElementTypes {
  ELEMENT,
  COMPONENT,
  SLOT, // slot
  TEMPLATE // template, component
 }
 export const enum Namespaces {
  HTML,
  SVG, // allows CDATA section and forbids end tag omission.
  MATH_ML // allows CDATA section and forbids end tag omission.
 }
 export interface Node {
  type: NodeTypes
  loc: SourceLocation
 }
 export interface RootNode extends Node {
  type: NodeTypes.ROOT
  children: Array<ElementNode | ExpressionNode | TextNode | CommentNode>
 }
 export interface ElementNode extends Node {
  type: NodeTypes.ELEMENT
  ns: Namespaces
  tag: string
  tagType: ElementTypes
  isSelfClosing: boolean
  props: Array<AttributeNode | DirectiveNode>
  children: Array<ElementNode | ExpressionNode | TextNode | CommentNode>
 }
 export interface TextNode extends Node {
  type: NodeTypes.TEXT
  content: string
  isEmpty: boolean
 }
 export interface CommentNode extends Node {
  type: NodeTypes.COMMENT
  content: string
 }
 export interface AttributeNode extends Node {
  type: NodeTypes.ATTRIBUTE
  name: string
  value: TextNode | undefined
 }
 export interface DirectiveNode extends Node {
  type: NodeTypes.DIRECTIVE
  name: string
  exp: ExpressionNode | undefined
  arg: ExpressionNode | undefined
  modifiers: string[]
 }
 export interface ExpressionNode extends Node {
  type: NodeTypes.EXPRESSION
  content: string
  isStatic: boolean
 }
 export interface Position {
  offset: number // from start of file
  line: number
  column: number
 }
 // The node's range. The `start` is inclusive and `end` is exclusive.
 // [start, end)
 export interface SourceLocation {
  start: Position
  end: Position
  source: string
 }
--- a/packages/compiler-core/src/errorTypes.ts
+++ b/packages/compiler-core/src/errorTypes.ts
@@ -0,0 +1,37 @@
 export const enum ParserErrorTypes {
  ABRUPT_CLOSING_OF_EMPTY_COMMENT,
  ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
  CDATA_IN_HTML_CONTENT,
  CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE,
  CONTROL_CHARACTER_REFERENCE,
  DUPLICATE_ATTRIBUTE,
  END_TAG_WITH_ATTRIBUTES,
  END_TAG_WITH_TRAILING_SOLIDUS,
  EOF_BEFORE_TAG_NAME,
  EOF_IN_CDATA,
  EOF_IN_COMMENT,
  EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT,
  EOF_IN_TAG,
  INCORRECTLY_CLOSED_COMMENT,
  INCORRECTLY_OPENED_COMMENT,
  INVALID_FIRST_CHARACTER_OF_TAG_NAME,
  MISSING_ATTRIBUTE_VALUE,
  MISSING_END_TAG_NAME,
  MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
  MISSING_WHITESPACE_BETWEEN_ATTRIBUTES,
  NESTED_COMMENT,
  NONCHARACTER_CHARACTER_REFERENCE,
  NULL_CHARACTER_REFERENCE,
  SURROGATE_CHARACTER_REFERENCE,
  UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
  UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE,
  UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME,
  UNEXPECTED_NULL_CHARACTER,
  UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME,
  UNEXPECTED_SOLIDUS_IN_TAG,
  UNKNOWN_NAMED_CHARACTER_REFERENCE,
  X_INVALID_END_TAG,
  X_MISSING_END_TAG,
  X_MISSING_INTERPOLATION_END,
  X_MISSING_DYNAMIC_DIRECTIVE_ARGUMENT_END
 }
--- a/packages/compiler-core/src/index.ts
+++ b/packages/compiler-core/src/index.ts
@@ -1 +1,3 @@
-// TODO
+export { parse } from './parser'
 export * from './ast'
 export * from './errorTypes'
--- a/packages/compiler-core/src/parser.ts
+++ b/packages/compiler-core/src/parser.ts
@@ -0,0 +1,918 @@
 import assert from 'assert'
 import { ParserErrorTypes } from './errorTypes'
 import {
  Node,
  AttributeNode,
  CommentNode,
  DirectiveNode,
  ElementNode,
  ElementTypes,
  ExpressionNode,
  Namespaces,
  NodeTypes,
  Position,
  RootNode,
  SourceLocation,
  TextNode
 } from './ast'
 export interface ParserOptions {
  isVoidTag: (tag: string) => boolean // e.g. img, br, hr
  getNamespace: (tag: string, parent: ElementNode | undefined) => Namespaces
  getTextMode: (tag: string, ns: Namespaces) => TextModes
  delimiters: [string, string] // ['{{', '}}']
  transform: (node: Node) => Node // --
  ignoreSpaces: boolean
  // Map to HTML entities. E.g., `{ "amp;": "&" }`
  // The full set is https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references
  namedCharacterReferences: { [name: string]: string | undefined }
  onError: (type: ParserErrorTypes, loc: Position) => void
 }
 export const enum TextModes {
  // | Elements | Entities | End sign              | Inside of
  DATA, // | ✔       | ✔       | End tags of ancestors |
  RCDATA, // | ✘       | ✔       | End tag of the parent | <textarea>
  RAWTEXT, // | ✘       | ✘       | End tag of the parent | <style>,<script>
  CDATA,
  ATTRIBUTE_VALUE
 }
 interface ParserContext extends ParserOptions {
  readonly originalSource: string
  source: string
  offset: number
  line: number
  column: number
  maxCRNameLength: number
 }
 export function parse(content: string, options: ParserOptions): RootNode {
  const context = createParserContext(content, options)
  const start = getCursor(context)
  return {
    type: NodeTypes.ROOT,
    children: parseChildren(context, TextModes.DATA, []),
    loc: getSelection(context, start)
  }
 }
 function last<T>(xs: T[]): T | undefined {
  return xs[xs.length - 1]
 }
 function startsWith(source: string, searchString: string): boolean {
  return source.startsWith(searchString)
 }
 function advanceBy(context: ParserContext, numberOfCharacters: number): void {
  assert(numberOfCharacters <= context.source.length)
  const { column, source } = context
  const str = source.slice(0, numberOfCharacters)
  const lines = str.split(/\r?\n/)
  context.source = source.slice(numberOfCharacters)
  context.offset += numberOfCharacters
  context.line += lines.length - 1
  context.column =
    lines.length === 1
      ? column + numberOfCharacters
      : Math.max(1, lines.pop()!.length)
 }
 function advanceSpaces(context: ParserContext): void {
  const match = /^[\t\r\n\f ]+/.exec(context.source)
  if (match) {
    advanceBy(context, match[0].length)
  }
 }
 function getCursor(context: ParserContext): Position {
  const { column, line, offset } = context
  return { column, line, offset }
 }
 function getNewPosition(
  context: ParserContext,
  start: Position,
  numberOfCharacters: number
 ): Position {
  const { originalSource } = context
  const str = originalSource.slice(start.offset, numberOfCharacters)
  const lines = str.split(/\r?\n/)
  const newPosition = {
    column: start.column,
    line: start.line,
    offset: start.offset
  }
  newPosition.offset += numberOfCharacters
  newPosition.line += lines.length - 1
  newPosition.column =
    lines.length === 1
      ? start.column + numberOfCharacters
      : Math.max(1, lines.pop()!.length)
  return newPosition
 }
 function getSelection(
  context: ParserContext,
  start: Position,
  end?: Position
 ): SourceLocation {
  end = end || getCursor(context)
  return {
    start,
    end,
    source: context.originalSource.slice(start.offset, end.offset)
  }
 }
 function emitError(
  context: ParserContext,
  type: ParserErrorTypes,
  offset?: number
 ): void {
  const loc = getCursor(context)
  if (offset) {
    loc.offset += offset
    loc.column += offset
  }
  context.onError(type, loc)
 }
 function createParserContext(
  content: string,
  options: ParserOptions
 ): ParserContext {
  return {
    ...options,
    column: 1,
    line: 1,
    offset: 0,
    originalSource: content,
    source: content,
    maxCRNameLength: Object.keys(options.namedCharacterReferences).reduce(
      (max, name) => Math.max(max, name.length),
      0
    )
  }
 }
 function parseChildren(
  context: ParserContext,
  mode: TextModes,
  ancestors: ElementNode[]
 ): RootNode['children'] {
  const parent = last(ancestors)
  const ns = parent ? parent.ns : Namespaces.HTML
  const nodes: RootNode['children'] = []
  while (!isEnd(context, mode, ancestors)) {
    assert(context.source.length > 0)
    const s = context.source
    let node: any = null
    if (startsWith(s, context.delimiters[0])) {
      // '{{'
      node = parseInterpolation(context, mode)
    } else if (mode === TextModes.DATA && s[0] === '<') {
      // https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
      if (s.length === 1) {
        emitError(context, ParserErrorTypes.EOF_BEFORE_TAG_NAME, 1)
      } else if (s[1] === '!') {
        // https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
        if (startsWith(s, '<!--')) {
          node = parseComment(context)
        } else if (startsWith(s, '<!DOCTYPE')) {
          // Ignore DOCTYPE by a limitation.
          node = parseBogusComment(context)
        } else if (startsWith(s, '<![CDATA[')) {
          if (ns !== Namespaces.HTML) {
            node = parseCDATA(context, ancestors)
          } else {
            emitError(context, ParserErrorTypes.CDATA_IN_HTML_CONTENT)
            node = parseBogusComment(context)
          }
        } else {
          emitError(context, ParserErrorTypes.INCORRECTLY_OPENED_COMMENT)
          node = parseBogusComment(context)
        }
      } else if (s[1] === '/') {
        // https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state
        if (s.length === 2) {
          emitError(context, ParserErrorTypes.EOF_BEFORE_TAG_NAME, 2)
        } else if (s[2] === '>') {
          emitError(context, ParserErrorTypes.MISSING_END_TAG_NAME, 2)
          advanceBy(context, 3)
          continue
        } else if (/[a-z]/i.test(s[2])) {
          emitError(context, ParserErrorTypes.X_INVALID_END_TAG)
          parseTag(context, TagType.End, parent)
          continue
        } else {
          emitError(
            context,
            ParserErrorTypes.INVALID_FIRST_CHARACTER_OF_TAG_NAME,
            2
          )
          node = parseBogusComment(context)
        }
      } else if (/[a-z]/i.test(s[1])) {
        node = parseElement(context, ancestors)
      } else if (s[1] === '?') {
        emitError(
          context,
          ParserErrorTypes.UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME,
          1
        )
        node = parseBogusComment(context)
      } else {
        emitError(
          context,
          ParserErrorTypes.INVALID_FIRST_CHARACTER_OF_TAG_NAME,
          1
        )
      }
    }
    if (!node) {
      node = parseText(context, mode)
    }
    if (Array.isArray(node)) {
      node.forEach(pushNode.bind(null, context, nodes))
    } else {
      pushNode(context, nodes, node)
    }
  }
  return nodes
 }
 function isEnd(
  context: ParserContext,
  mode: TextModes,
  ancestors: ElementNode[]
 ): boolean {
  const s = context.source
  switch (mode) {
    case TextModes.DATA:
      if (startsWith(s, '</')) {
        //TODO: probably bad performance
        for (let i = ancestors.length - 1; i >= 0; --i) {
          if (startsWithEndTagOpen(s, ancestors[i].tag)) {
            return true
          }
        }
      }
      break
    case TextModes.RCDATA:
    case TextModes.RAWTEXT: {
      const parent = last(ancestors)
      if (parent && startsWithEndTagOpen(s, parent.tag)) {
        return true
      }
      break
    }
    case TextModes.CDATA:
      if (startsWith(s, ']]>')) {
        return true
      }
      break
  }
  return !s
 }
 function startsWithEndTagOpen(source: string, tag: string): boolean {
  return (
    startsWith(source, '</') &&
    source.substr(2, tag.length).toLowerCase() === tag.toLowerCase() &&
    /[\t\n\f />]/.test(source[2 + tag.length] || '>')
  )
 }
 function pushNode(
  context: ParserContext,
  nodes: RootNode['children'],
  node: RootNode['children'][0]
 ): void {
  if (context.ignoreSpaces && node.type === NodeTypes.TEXT && node.isEmpty) {
    return
  }
  // Merge if both this and the previous node are text and those are consecutive.
  // This happens on "a < b" or something like.
  const prev = last(nodes)
  if (
    prev &&
    prev.type === NodeTypes.TEXT &&
    node.type === NodeTypes.TEXT &&
    prev.loc.end.offset === node.loc.start.offset
  ) {
    prev.content += node.content
    prev.isEmpty = prev.content.trim().length === 0
    prev.loc.end = node.loc.end
    prev.loc.source += node.loc.source
  } else {
    nodes.push(node)
  }
 }
 function parseCDATA(
  context: ParserContext,
  ancestors: ElementNode[]
 ): RootNode['children'] {
  assert(last(ancestors) == null || last(ancestors)!.ns !== Namespaces.HTML)
  assert(startsWith(context.source, '<![CDATA['))
  advanceBy(context, 9)
  const nodes = parseChildren(context, TextModes.CDATA, ancestors)
  if (context.source.length === 0) {
    emitError(context, ParserErrorTypes.EOF_IN_CDATA)
  } else {
    assert(startsWith(context.source, ']]>'))
    advanceBy(context, 3)
  }
  return nodes
 }
 function parseComment(context: ParserContext): CommentNode {
  assert(startsWith(context.source, '<!--'))
  const start = getCursor(context)
  let content: string
  // Regular comment.
  const match = /--(\!)?>/.exec(context.source)
  if (!match) {
    content = context.source.slice(4)
    advanceBy(context, context.source.length)
    emitError(context, ParserErrorTypes.EOF_IN_COMMENT)
  } else {
    if (match.index <= 3) {
      emitError(context, ParserErrorTypes.ABRUPT_CLOSING_OF_EMPTY_COMMENT)
    }
    if (match[1]) {
      emitError(context, ParserErrorTypes.INCORRECTLY_CLOSED_COMMENT)
    }
    content = context.source.slice(4, match.index)
    // Advancing with reporting nested comments.
    const s = context.source.slice(0, match.index)
    let prevIndex = 1,
      nestedIndex = 0
    while ((nestedIndex = s.indexOf('<!--', prevIndex)) !== -1) {
      advanceBy(context, nestedIndex - prevIndex + 1)
      if (nestedIndex + 4 < s.length) {
        emitError(context, ParserErrorTypes.NESTED_COMMENT)
      }
      prevIndex = nestedIndex + 1
    }
    advanceBy(context, match.index + match[0].length - prevIndex + 1)
  }
  return {
    type: NodeTypes.COMMENT,
    content,
    loc: getSelection(context, start)
  }
 }
 function parseBogusComment(context: ParserContext): CommentNode | undefined {
  assert(/^<(?:[\!\?]|\/[^a-z>])/i.test(context.source))
  const start = getCursor(context)
  const contentStart = context.source[1] === '?' ? 1 : 2
  let content: string
  const closeIndex = context.source.indexOf('>')
  if (closeIndex === -1) {
    content = context.source.slice(contentStart)
    advanceBy(context, context.source.length)
  } else {
    content = context.source.slice(contentStart, closeIndex)
    advanceBy(context, closeIndex + 1)
  }
  return {
    type: NodeTypes.COMMENT,
    content,
    loc: getSelection(context, start)
  }
 }
 function parseElement(
  context: ParserContext,
  ancestors: ElementNode[]
 ): ElementNode | undefined {
  assert(/^<[a-z]/i.test(context.source))
  // Start tag.
  const parent = last(ancestors)
  const element = parseTag(context, TagType.Start, parent)
  if (element.isSelfClosing || context.isVoidTag(element.tag)) {
    return element
  }
  // Children.
  ancestors.push(element)
  const mode = (context.getTextMode(
    element.tag,
    element.ns
  ) as unknown) as TextModes
  const children = parseChildren(context, mode, ancestors)
  ancestors.pop()
  element.children = children
  // End tag.
  if (startsWithEndTagOpen(context.source, element.tag)) {
    parseTag(context, TagType.End, parent)
  } else {
    emitError(context, ParserErrorTypes.X_MISSING_END_TAG)
    if (context.source.length === 0 && element.tag.toLowerCase() === 'script') {
      const first = children[0]
      if (first && startsWith(first.loc.source, '<!--')) {
        emitError(
          context,
          ParserErrorTypes.EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT
        )
      }
    }
  }
  element.loc = getSelection(context, element.loc.start)
  return element
 }
 const enum TagType {
  Start,
  End
 }
 /**
 * Parse a tag (E.g. `<div id=a>`) with that type (start tag or end tag).
 */
 function parseTag(
  context: ParserContext,
  type: TagType,
  parent: ElementNode | undefined
 ): ElementNode {
  assert(/^<\/?[a-z]/i.test(context.source))
  assert(
    type === (startsWith(context.source, '</') ? TagType.End : TagType.Start)
  )
  // Tag open.
  const start = getCursor(context)
  const match = /^<\/?([a-z][^\t\r\n\f />]*)/i.exec(context.source)!
  const tag = match[1]
  const props = []
  const ns = context.getNamespace(tag, parent)
  advanceBy(context, match[0].length)
  advanceSpaces(context)
  // Attributes.
  const attributeNames = new Set<string>()
  while (
    context.source.length > 0 &&
    !startsWith(context.source, '>') &&
    !startsWith(context.source, '/>')
  ) {
    if (startsWith(context.source, '/')) {
      emitError(context, ParserErrorTypes.UNEXPECTED_SOLIDUS_IN_TAG)
      advanceBy(context, 1)
      advanceSpaces(context)
      continue
    }
    if (type === TagType.End) {
      emitError(context, ParserErrorTypes.END_TAG_WITH_ATTRIBUTES)
    }
    const attr = parseAttribute(context, attributeNames)
    if (type === TagType.Start) {
      props.push(attr)
    }
    if (/^[^\t\r\n\f />]/.test(context.source)) {
      emitError(context, ParserErrorTypes.MISSING_WHITESPACE_BETWEEN_ATTRIBUTES)
    }
    advanceSpaces(context)
  }
  // Tag close.
  let isSelfClosing = false
  if (context.source.length === 0) {
    emitError(context, ParserErrorTypes.EOF_IN_TAG)
  } else {
    isSelfClosing = startsWith(context.source, '/>')
    if (type === TagType.End && isSelfClosing) {
      emitError(context, ParserErrorTypes.END_TAG_WITH_TRAILING_SOLIDUS)
    }
    advanceBy(context, isSelfClosing ? 2 : 1)
  }
  let tagType = ElementTypes.ELEMENT
  if (tag === 'slot') tagType = ElementTypes.SLOT
  else if (tag === 'template') tagType = ElementTypes.TEMPLATE
  else if (/[A-Z-]/.test(tag)) tagType = ElementTypes.COMPONENT
  return {
    type: NodeTypes.ELEMENT,
    ns,
    tag,
    tagType,
    props,
    isSelfClosing,
    children: [],
    loc: getSelection(context, start)
  }
 }
 function parseAttribute(
  context: ParserContext,
  nameSet: Set<string>
 ): AttributeNode | DirectiveNode {
  assert(/^[^\t\r\n\f />]/.test(context.source))
  // Name.
  const start = getCursor(context)
  const match = /^[^\t\r\n\f />][^\t\r\n\f />=]*/.exec(context.source)!
  const name = match[0]
  if (nameSet.has(name)) {
    emitError(context, ParserErrorTypes.DUPLICATE_ATTRIBUTE)
  }
  nameSet.add(name)
  if (name[0] === '=') {
    emitError(
      context,
      ParserErrorTypes.UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME
    )
  }
  {
    const pattern = /["'<]/g
    let m: RegExpExecArray | null
    while ((m = pattern.exec(name)) !== null) {
      emitError(
        context,
        ParserErrorTypes.UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
        m.index
      )
    }
  }
  advanceBy(context, name.length)
  // Value
  let value: { content: string; loc: SourceLocation } | undefined = undefined
  if (/^[\t\r\n\f ]*=/.test(context.source)) {
    advanceSpaces(context)
    advanceBy(context, 1)
    advanceSpaces(context)
    value = parseAttributeValue(context)
    if (!value) {
      emitError(context, ParserErrorTypes.MISSING_ATTRIBUTE_VALUE)
    }
  }
  const loc = getSelection(context, start)
  if (/^(v-|:|@|#)/.test(name)) {
    const match = /(?:^v-([a-z0-9-]+))?(?:(?::|^@|^#)([^\.]+))?(.+)?$/i.exec(
      name
    )!
    let arg: ExpressionNode | undefined
    if (match[2]) {
      const startOffset = name.split(match[2], 2)!.shift()!.length
      const loc = getSelection(
        context,
        getNewPosition(context, start, startOffset),
        getNewPosition(context, start, startOffset + match[2].length)
      )
      let content = match[2]
      let isStatic = true
      if (content.startsWith('[')) {
        isStatic = false
        if (!content.endsWith(']')) {
          emitError(
            context,
            ParserErrorTypes.X_MISSING_DYNAMIC_DIRECTIVE_ARGUMENT_END
          )
        }
        content = content.substr(1, content.length - 2)
      }
      arg = {
        type: NodeTypes.EXPRESSION,
        content,
        isStatic,
        loc
      }
    }
    return {
      type: NodeTypes.DIRECTIVE,
      name:
        match[1] ||
        (startsWith(name, ':')
          ? 'bind'
          : startsWith(name, '@')
            ? 'on'
            : 'slot'),
      exp: value && {
        type: NodeTypes.EXPRESSION,
        content: value.content,
        isStatic: false,
        loc: value.loc
      },
      arg,
      modifiers: match[3] ? match[3].substr(1).split('.') : [],
      loc
    }
  }
  return {
    type: NodeTypes.ATTRIBUTE,
    name,
    value: value && {
      type: NodeTypes.TEXT,
      content: value.content,
      isEmpty: value.content.trim().length === 0,
      loc: value.loc
    },
    loc
  }
 }
 function parseAttributeValue(
  context: ParserContext
 ): { content: string; loc: SourceLocation } | undefined {
  const start = getCursor(context)
  let content: string
  if (/^["']/.test(context.source)) {
    // Quoted value.
    const quote = context.source[0]
    advanceBy(context, 1)
    const endIndex = context.source.indexOf(quote)
    if (endIndex === -1) {
      content = parseTextData(
        context,
        context.source.length,
        TextModes.ATTRIBUTE_VALUE
      )
    } else {
      content = parseTextData(context, endIndex, TextModes.ATTRIBUTE_VALUE)
      advanceBy(context, 1)
    }
  } else {
    // Unquoted
    const match = /^[^\t\r\n\f >]+/.exec(context.source)
    if (!match) {
      return undefined
    }
    let unexpectedChars = /["'<=`]/g
    let m: RegExpExecArray | null
    while ((m = unexpectedChars.exec(match[0])) !== null) {
      emitError(
        context,
        ParserErrorTypes.UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE,
        m.index
      )
    }
    content = parseTextData(context, match[0].length, TextModes.ATTRIBUTE_VALUE)
  }
  return { content, loc: getSelection(context, start) }
 }
 function parseInterpolation(
  context: ParserContext,
  mode: TextModes
 ): ExpressionNode | undefined {
  const [open, close] = context.delimiters
  assert(startsWith(context.source, open))
  const closeIndex = context.source.indexOf(close, open.length)
  if (closeIndex === -1) {
    emitError(context, ParserErrorTypes.X_MISSING_INTERPOLATION_END)
    return undefined
  }
  const start = getCursor(context)
  advanceBy(context, open.length)
  const content = parseTextData(context, closeIndex - open.length, mode).trim()
  advanceBy(context, close.length)
  return {
    type: NodeTypes.EXPRESSION,
    content,
    loc: getSelection(context, start),
    isStatic: content === ''
  }
 }
 function parseText(context: ParserContext, mode: TextModes): TextNode {
  assert(context.source.length > 0)
  const [open] = context.delimiters
  const endIndex = Math.min(
    ...[
      context.source.indexOf('<', 1),
      context.source.indexOf(open, 1),
      mode === TextModes.CDATA ? context.source.indexOf(']]>') : -1,
      context.source.length
    ].filter(n => n !== -1)
  )
  assert(endIndex > 0)
  const start = getCursor(context)
  const content = parseTextData(context, endIndex, mode)
  return {
    type: NodeTypes.TEXT,
    content,
    loc: getSelection(context, start),
    isEmpty: !content.trim()
  }
 }
 /**
 * Get text data with a given length from the current location.
 * This translates HTML entities in the text data.
 */
 function parseTextData(
  context: ParserContext,
  length: number,
  mode: TextModes
 ): string {
  if (mode === TextModes.RAWTEXT || mode === TextModes.CDATA) {
    const text = context.source.slice(0, length)
    advanceBy(context, length)
    return text
  }
  // DATA or RCDATA.
  const end = context.offset + length
  let text: string = ''
  while (context.offset < end) {
    const head = /&(?:#x?)?/i.exec(context.source)
    if (!head || context.offset + head.index >= end) {
      const remaining = end - context.offset
      text += context.source.slice(0, remaining)
      advanceBy(context, remaining)
      break
    }
    // Advance to the "&".
    text += context.source.slice(0, head.index)
    advanceBy(context, head.index)
    if (head[0] === '&') {
      // Named character reference.
      let name = '',
        value: string | undefined = undefined
      if (/[0-9a-z]/i.test(context.source[1])) {
        for (
          let length = context.maxCRNameLength;
          !value && length > 0;
          --length
        ) {
          name = context.source.substr(1, length)
          value = context.namedCharacterReferences[name]
        }
        if (value) {
          const semi = name.endsWith(';')
          if (
            mode === TextModes.ATTRIBUTE_VALUE &&
            !semi &&
            /[=a-z0-9]/i.test(context.source[1 + name.length] || '')
          ) {
            text += '&'
            text += name
            advanceBy(context, 1 + name.length)
          } else {
            text += value
            advanceBy(context, 1 + name.length)
            if (!semi) {
              emitError(
                context,
                ParserErrorTypes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE
              )
            }
          }
        } else {
          emitError(context, ParserErrorTypes.UNKNOWN_NAMED_CHARACTER_REFERENCE)
          text += '&'
          text += name
          advanceBy(context, 1 + name.length)
        }
      } else {
        text += '&'
        advanceBy(context, 1)
      }
    } else {
      // Numeric character reference.
      const hex = head[0] === '&#x'
      const pattern = hex ? /^&#x([0-9a-f]+);?/i : /^&#([0-9]+);?/
      const body = pattern.exec(context.source)
      if (!body) {
        text += head[0]
        emitError(
          context,
          ParserErrorTypes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE
        )
        advanceBy(context, head[0].length)
      } else {
        // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
        let cp = Number.parseInt(body[1], hex ? 16 : 10)
        if (cp === 0) {
          emitError(context, ParserErrorTypes.NULL_CHARACTER_REFERENCE)
          cp = 0xfffd
        } else if (cp > 0x10ffff) {
          emitError(
            context,
            ParserErrorTypes.CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE
          )
          cp = 0xfffd
        } else if (cp >= 0xd800 && cp <= 0xdfff) {
          emitError(context, ParserErrorTypes.SURROGATE_CHARACTER_REFERENCE)
          cp = 0xfffd
        } else if ((cp >= 0xfdd0 && cp <= 0xfdef) || (cp & 0xfffe) === 0xfffe) {
          emitError(context, ParserErrorTypes.NONCHARACTER_CHARACTER_REFERENCE)
        } else if (
          (cp >= 0x01 && cp <= 0x08) ||
          cp === 0x0b ||
          (cp >= 0x0d && cp <= 0x1f) ||
          (cp >= 0x7f && cp <= 0x9f)
        ) {
          emitError(context, ParserErrorTypes.CONTROL_CHARACTER_REFERENCE)
          cp = CCR_REPLACEMENTS[cp] || cp
        }
        text += String.fromCodePoint(cp)
        advanceBy(context, body[0].length)
        if (!body![0].endsWith(';')) {
          emitError(
            context,
            ParserErrorTypes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE
          )
        }
      }
    }
  }
  return text
 }
 // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
 const CCR_REPLACEMENTS: { [key: number]: number | undefined } = {
  0x80: 0x20ac,
  0x82: 0x201a,
  0x83: 0x0192,
  0x84: 0x201e,
  0x85: 0x2026,
  0x86: 0x2020,
  0x87: 0x2021,
  0x88: 0x02c6,
  0x89: 0x2030,
  0x8a: 0x0160,
  0x8b: 0x2039,
  0x8c: 0x0152,
  0x8e: 0x017d,
  0x91: 0x2018,
  0x92: 0x2019,
  0x93: 0x201c,
  0x94: 0x201d,
  0x95: 0x2022,
  0x96: 0x2013,
  0x97: 0x2014,
  0x98: 0x02dc,
  0x99: 0x2122,
  0x9a: 0x0161,
  0x9b: 0x203a,
  0x9c: 0x0153,
  0x9e: 0x017e,
  0x9f: 0x0178
 }
--- a/packages/compiler-core/src/parserOptionsMinimal.ts
+++ b/packages/compiler-core/src/parserOptionsMinimal.ts
@@ -0,0 +1,112 @@
 import { TextModes, ParserOptions } from './parser'
 import { ElementNode, Namespaces, Position, Node } from './ast'
 import { ParserErrorTypes } from './errorTypes'
 export const parserOptionsMinimal: ParserOptions = {
  delimiters: [`{{`, `}}`],
  ignoreSpaces: true,
  getNamespace(tag: string, parent: ElementNode | undefined): Namespaces {
    const ns = parent ? parent.ns : Namespaces.HTML
    if (ns === Namespaces.HTML) {
      if (tag === 'svg') {
        return Namespaces.SVG
      }
      if (tag === 'math') {
        return Namespaces.MATH_ML
      }
    }
    return ns
  },
  getTextMode(tag: string, ns: Namespaces): TextModes {
    if (ns === Namespaces.HTML) {
      if (/^textarea$/i.test(tag)) {
        return TextModes.RCDATA
      }
      if (/^(?:style|script)$/i.test(tag)) {
        return TextModes.RAWTEXT
      }
    }
    return TextModes.DATA
  },
  isVoidTag(tag: string): boolean {
    return /^(?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(
      tag
    )
  },
  namedCharacterReferences: {
    'gt;': '>',
    'lt;': '<',
    'amp;': '&',
    'apos;': "'",
    'quot;': '"'
  },
  onError(code: ParserErrorTypes, loc: Position): void {
    const error: any = new SyntaxError(
      `${messages[code]} (${loc.line}:${loc.column})`
    )
    error.code = code
    error.loc = loc
    throw error
  },
  transform(node: Node): Node {
    return node
  }
 }
 const messages: { [code: number]: string } = {
  [ParserErrorTypes.ABRUPT_CLOSING_OF_EMPTY_COMMENT]: 'Illegal comment.',
  [ParserErrorTypes.ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE]:
    'Illegal numeric character reference: invalid character.',
  [ParserErrorTypes.CDATA_IN_HTML_CONTENT]:
    'CDATA section is allowed only in XML context.',
  [ParserErrorTypes.CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE]:
    'Illegal numeric character reference: too big.',
  [ParserErrorTypes.CONTROL_CHARACTER_REFERENCE]:
    'Illegal numeric character reference: control character.',
  [ParserErrorTypes.DUPLICATE_ATTRIBUTE]: 'Duplicate attribute.',
  [ParserErrorTypes.END_TAG_WITH_ATTRIBUTES]: 'End tag cannot have attributes.',
  [ParserErrorTypes.END_TAG_WITH_TRAILING_SOLIDUS]: "Illegal '/' in tags.",
  [ParserErrorTypes.EOF_BEFORE_TAG_NAME]: 'Unexpected EOF in tag.',
  [ParserErrorTypes.EOF_IN_CDATA]: 'Unexpected EOF in CDATA section.',
  [ParserErrorTypes.EOF_IN_COMMENT]: 'Unexpected EOF in comment.',
  [ParserErrorTypes.EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT]:
    'Unexpected EOF in script.',
  [ParserErrorTypes.EOF_IN_TAG]: 'Unexpected EOF in tag.',
  [ParserErrorTypes.INCORRECTLY_CLOSED_COMMENT]: 'Incorrectly closed comment.',
  [ParserErrorTypes.INCORRECTLY_OPENED_COMMENT]: 'Incorrectly opened comment.',
  [ParserErrorTypes.INVALID_FIRST_CHARACTER_OF_TAG_NAME]:
    "Illegal tag name. Use '&lt;' to print '<'.",
  [ParserErrorTypes.MISSING_ATTRIBUTE_VALUE]: 'Attribute value was expected.',
  [ParserErrorTypes.MISSING_END_TAG_NAME]: 'End tag name was expected.',
  [ParserErrorTypes.MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE]:
    'Semicolon was expected.',
  [ParserErrorTypes.MISSING_WHITESPACE_BETWEEN_ATTRIBUTES]:
    'Whitespace was expected.',
  [ParserErrorTypes.NESTED_COMMENT]: "Unexpected '<!--' in comment.",
  [ParserErrorTypes.NONCHARACTER_CHARACTER_REFERENCE]:
    'Illegal numeric character reference: non character.',
  [ParserErrorTypes.NULL_CHARACTER_REFERENCE]:
    'Illegal numeric character reference: null character.',
  [ParserErrorTypes.SURROGATE_CHARACTER_REFERENCE]:
    'Illegal numeric character reference: non-pair surrogate.',
  [ParserErrorTypes.UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME]:
    'Attribute name cannot contain U+0022 ("), U+0027 (\'), and U+003C (<).',
  [ParserErrorTypes.UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE]:
    'Unquoted attribute value cannot contain U+0022 ("), U+0027 (\'), U+003C (<), U+003D (=), and U+0060 (`).',
  [ParserErrorTypes.UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME]:
    "Attribute name cannot start with '='.",
  [ParserErrorTypes.UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME]:
    "'<?' is allowed only in XML context.",
  [ParserErrorTypes.UNEXPECTED_SOLIDUS_IN_TAG]: "Illegal '/' in tags.",
  [ParserErrorTypes.UNKNOWN_NAMED_CHARACTER_REFERENCE]: 'Unknown entity name.',
  [ParserErrorTypes.X_INVALID_END_TAG]: 'Invalid end tag.',
  [ParserErrorTypes.X_MISSING_END_TAG]: 'End tag was not found.',
  [ParserErrorTypes.X_MISSING_INTERPOLATION_END]:
    'Interpolation end sign was not found.'
 }
--- a/packages/compiler-core/src/parserOptionsStandard.ts
+++ b/packages/compiler-core/src/parserOptionsStandard.ts