1
0
mirror of https://git.suyu.dev/suyu/suyu synced 2025-11-23 06:22:08 -06:00

Initial commit

This commit is contained in:
Crimson-Hawk
2024-03-05 16:42:40 +08:00
commit f1e4595ebf
39576 changed files with 7006612 additions and 0 deletions

View File

View File

View File

@@ -0,0 +1,100 @@
#==============================================================================
# Copyright (c) 2016-2020 Nikita Kniazev
#
# Use, modification and distribution is subject to the Boost Software
# License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt)
#==============================================================================
clone_depth: 50
environment:
global:
PROJECT: libs\spirit
APPVEYOR_SAVE_CACHE_ON_ERROR: true
CLCACHE_HARDLINK: 1
APPVEYOR_CACHE_ENTRY_ZIP_ARGS: "-t7z -m0=lzma2 -mx=3 -md=32m -ms=on"
CLCACHE_CACHESIZE: 2123123123 # Appveyor allows us to store 1Gb of 7zipped cache but
# object files compress ratio is 20:1 in our case
matrix:
- { APPVEYOR_BUILD_WORKER_IMAGE: 'Visual Studio 2019', ADDRMDL: 64, TOOLSET: 'msvc-14.2' }
- { APPVEYOR_BUILD_WORKER_IMAGE: 'Visual Studio 2017', ADDRMDL: 64, TOOLSET: 'msvc-14.1' }
- { APPVEYOR_BUILD_WORKER_IMAGE: 'Visual Studio 2015', ADDRMDL: 64, TOOLSET: 'msvc-14.0' }
- { APPVEYOR_BUILD_WORKER_IMAGE: 'Visual Studio 2015', ADDRMDL: 64, TOOLSET: 'msvc-12.0' }
- { APPVEYOR_BUILD_WORKER_IMAGE: 'Visual Studio 2015', ADDRMDL: 32, TOOLSET: 'msvc-11.0' }
- { APPVEYOR_BUILD_WORKER_IMAGE: 'Visual Studio 2015', ADDRMDL: 32, TOOLSET: 'msvc-10.0' }
cache:
- '%USERPROFILE%\clcache'
- '%LOCALAPPDATA%\pip\Cache'
init:
- set BOOST_ROOT=%APPVEYOR_BUILD_FOLDER%\..\boost
- set BOOST_BUILD_PATH=%BOOST_ROOT%\..\boost-build
- set B2_ARGS=link=shared threading=multi variant=release
address-model=%ADDRMDL% toolset=%TOOLSET%
define=BOOST_ALL_NO_LIB
define=BOOST_SPIRIT_X3_HIDE_CXX17_WARNING
before_build:
- set PATH=%BOOST_ROOT%;C:\Python36-x64\Scripts;%PATH%
- ps: |
# Creating %USERPROFILE%/user-config.jam file
@'
import feature os regex toolset pch ;
# clcache
local toolset = [ regex.split [ os.environ TOOLSET ] "-" ] ;
using $(toolset[1]) : $(toolset[2-]:J="-") : : <compiler>clcache ;
# A subfeature that tells Spirit tests to use PCH
feature.subfeature pch on : version : spirit : optional propagated incidental ;
'@ | sc "$env:USERPROFILE/user-config.jam"
- set BRANCH=%APPVEYOR_REPO_BRANCH%
# TODO: Determine the root branch when PR targeted/build from not our main branches.
- if not "%BRANCH%" == "master"
if not "%BRANCH%" == "develop"
set BRANCH=develop
- echo Root branch is %BRANCH%
# Sadly git's --shallow-submodules has hardcoded depth of 1 commit
# Patch the git binary with a little more depth to deal with boost-commitbot's lag
- ps: |
$git = Get-Command git | Select-Object -ExpandProperty Definition
$git = Split-Path -Parent $git | Split-Path -Parent
Get-ChildItem -Path "$git\mingw64\*" -Include *.exe -Recurse |
ForEach-Object -Process {(Get-Content -Raw $_).Replace("--depth=1","--depth=9") | Set-Content $_}
# Checkout Boost
- git clone -j10 --branch=%BRANCH% --depth=1 --quiet
--recurse-submodules=":(exclude)%PROJECT%" --shallow-submodules
https://github.com/boostorg/boost.git %BOOST_ROOT%
- pushd %BOOST_ROOT%
# Remove empty folder
- rmdir /S /Q %PROJECT%
# Move the repository to boost/libs and make a link to previous place
- move %APPVEYOR_BUILD_FOLDER% %PROJECT%
- mklink /J %APPVEYOR_BUILD_FOLDER% %PROJECT%
# Install clcache
- pip install clcache
- clcache -M %CLCACHE_CACHESIZE%
# Run clcache-server
- set CLCACHE_SERVER=1
- ps: Start-Process clcache-server.exe -PassThru
build_script:
- bootstrap.bat --with-toolset=msvc
# Let's have less noise (Appveyor cannot collapse command output)
- b2 headers 2>&1 >> deps_build.log
|| ( echo === deps_build.log === && cat deps_build.log )
test_script:
- b2 %B2_ARGS% %PROJECT%\classic\test %PROJECT%\repository\test %PROJECT%\test
warnings=extra warnings-as-errors=on pch=on-spirit

View File

@@ -0,0 +1,50 @@
# Use, modification, and distribution are
# subject to the Boost Software License, Version 1.0. (See accompanying
# file LICENSE.txt)
#
# Copyright Rene Rivera 2020.
# For Drone CI we use the Starlark scripting language to reduce duplication.
# As the yaml syntax for Drone CI is rather limited.
#
#
globalenv={'PROJECT': 'libs/spirit'}
linuxglobalimage="cppalliance/droneubuntu1804:1"
windowsglobalimage="cppalliance/dronevs2019"
def main(ctx):
return [
linux_cxx("STD=14 JOB=test/x3 Job 0", "clang-10", packages="clang-10 libc++-10-dev libc++abi-10-dev jq", llvm_os="bionic", llvm_ver="10", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '14', 'JOB': 'test/x3', 'TRAVIS_COMPILER': 'clang-10', 'DRONE_JOB_UUID': 'b6589fc6ab'}, globalenv=globalenv),
linux_cxx("STD=14 JOB=test/x3 Job 1", "gcc-10", packages="g++-10 jq", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '14', 'JOB': 'test/x3', 'TRAVIS_COMPILER': 'gcc-10', 'DRONE_JOB_UUID': '356a192b79'}, globalenv=globalenv),
linux_cxx("STD=11 JOB=test/qi Job 2", "clang-10", packages="clang-10 libc++-10-dev libc++abi-10-dev jq", llvm_os="bionic", llvm_ver="10", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '11', 'JOB': 'test/qi', 'TRAVIS_COMPILER': 'clang-10', 'DRONE_JOB_UUID': 'da4b9237ba'}, globalenv=globalenv),
linux_cxx("STD=11 JOB=test/karma Job 3", "clang-10", packages="clang-10 libc++-10-dev libc++abi-10-dev jq", llvm_os="bionic", llvm_ver="10", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '11', 'JOB': 'test/karma', 'TRAVIS_COMPILER': 'clang-10', 'DRONE_JOB_UUID': '77de68daec'}, globalenv=globalenv),
linux_cxx("STD=11 JOB=test/lex Job 4", "clang-10", packages="clang-10 libc++-10-dev libc++abi-10-dev jq", llvm_os="bionic", llvm_ver="10", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '11', 'JOB': 'test/lex', 'TRAVIS_COMPILER': 'clang-10', 'DRONE_JOB_UUID': '1b64538924'}, globalenv=globalenv),
linux_cxx("STD=11 JOB=test/support Job 5", "clang-10", packages="clang-10 libc++-10-dev libc++abi-10-dev jq", llvm_os="bionic", llvm_ver="10", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '11', 'JOB': 'test/support', 'TRAVIS_COMPILER': 'clang-10', 'DRONE_JOB_UUID': 'ac3478d69a'}, globalenv=globalenv),
linux_cxx("STD=11 JOB=repository/test Job 6", "clang-10", packages="clang-10 libc++-10-dev libc++abi-10-dev jq", llvm_os="bionic", llvm_ver="10", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '11', 'JOB': 'repository/test', 'TRAVIS_COMPILER': 'clang-10', 'DRONE_JOB_UUID': 'c1dfd96eea'}, globalenv=globalenv),
# Not building #
# linux_cxx("STD=03 JOB=test/qi Job 7", "clang-10", packages="clang-10 libc++-10-dev libc++abi-10-dev jq", llvm_os="bionic", llvm_ver="10", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '03', 'JOB': 'test/qi', 'TRAVIS_COMPILER': 'clang-10', 'DRONE_JOB_UUID': '902ba3cda1'}, globalenv=globalenv),
# Not building #
# linux_cxx("STD=03 JOB=test/karma Job 8", "clang-10", packages="clang-10 libc++-10-dev libc++abi-10-dev jq", llvm_os="bionic", llvm_ver="10", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '03', 'JOB': 'test/karma', 'TRAVIS_COMPILER': 'clang-10', 'DRONE_JOB_UUID': 'fe5dbbcea5'}, globalenv=globalenv),
linux_cxx("STD=03 JOB=test/lex Job 9", "clang-10", packages="clang-10 libc++-10-dev libc++abi-10-dev jq", llvm_os="bionic", llvm_ver="10", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '03', 'JOB': 'test/lex', 'TRAVIS_COMPILER': 'clang-10', 'DRONE_JOB_UUID': '0ade7c2cf9'}, globalenv=globalenv),
linux_cxx("STD=03 JOB=test/support Job 10", "clang-10", packages="clang-10 libc++-10-dev libc++abi-10-dev jq", llvm_os="bionic", llvm_ver="10", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '03', 'JOB': 'test/support', 'TRAVIS_COMPILER': 'clang-10', 'DRONE_JOB_UUID': 'b1d5781111'}, globalenv=globalenv),
linux_cxx("STD=03 JOB=repository/test Job 11", "clang-10", packages="clang-10 libc++-10-dev libc++abi-10-dev jq", llvm_os="bionic", llvm_ver="10", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '03', 'JOB': 'repository/test', 'TRAVIS_COMPILER': 'clang-10', 'DRONE_JOB_UUID': '17ba079149'}, globalenv=globalenv),
linux_cxx("STD=11 JOB=test/qi Job 12", "gcc-10", packages="g++-10 jq", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '11', 'JOB': 'test/qi', 'TRAVIS_COMPILER': 'gcc-10', 'DRONE_JOB_UUID': '7b52009b64'}, globalenv=globalenv),
linux_cxx("STD=11 JOB=test/karma Job 13", "gcc-10", packages="g++-10 jq", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '11', 'JOB': 'test/karma', 'TRAVIS_COMPILER': 'gcc-10', 'DRONE_JOB_UUID': 'bd307a3ec3'}, globalenv=globalenv),
linux_cxx("STD=11 JOB=test/lex Job 14", "gcc-10", packages="g++-10 jq", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '11', 'JOB': 'test/lex', 'TRAVIS_COMPILER': 'gcc-10', 'DRONE_JOB_UUID': 'fa35e19212'}, globalenv=globalenv),
linux_cxx("STD=11 JOB=test/support Job 15", "gcc-10", packages="g++-10 jq", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '11', 'JOB': 'test/support', 'TRAVIS_COMPILER': 'gcc-10', 'DRONE_JOB_UUID': 'f1abd67035'}, globalenv=globalenv),
linux_cxx("STD=11 JOB=repository/test Job 16", "gcc-10", packages="g++-10 jq", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '11', 'JOB': 'repository/test', 'TRAVIS_COMPILER': 'gcc-10', 'DRONE_JOB_UUID': '1574bddb75'}, globalenv=globalenv),
# Not building #
# linux_cxx("STD=03 JOB=test/qi Job 17", "gcc-10", packages="g++-10 jq", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '03', 'JOB': 'test/qi', 'TRAVIS_COMPILER': 'gcc-10', 'DRONE_JOB_UUID': '0716d9708d'}, globalenv=globalenv),
# Not building #
# linux_cxx("STD=03 JOB=test/karma Job 18", "gcc-10", packages="g++-10 jq", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '03', 'JOB': 'test/karma', 'TRAVIS_COMPILER': 'gcc-10', 'DRONE_JOB_UUID': '9e6a55b6b4'}, globalenv=globalenv),
linux_cxx("STD=03 JOB=test/lex Job 19", "gcc-10", packages="g++-10 jq", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '03', 'JOB': 'test/lex', 'TRAVIS_COMPILER': 'gcc-10', 'DRONE_JOB_UUID': 'b3f0c7f6bb'}, globalenv=globalenv),
linux_cxx("STD=03 JOB=test/support Job 20", "gcc-10", packages="g++-10 jq", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '03', 'JOB': 'test/support', 'TRAVIS_COMPILER': 'gcc-10', 'DRONE_JOB_UUID': '91032ad7bb'}, globalenv=globalenv),
linux_cxx("STD=03 JOB=repository/test Job 21", "gcc-10", packages="g++-10 jq", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '03', 'JOB': 'repository/test', 'TRAVIS_COMPILER': 'gcc-10', 'DRONE_JOB_UUID': '472b07b9fc'}, globalenv=globalenv),
# Not building #
# linux_cxx("STD=03 JOB=classic/test Job 22", "clang-10", packages="clang-10 libc++-10-dev libc++abi-10-dev jq", llvm_os="bionic", llvm_ver="10", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '03', 'JOB': 'classic/test', 'TRAVIS_COMPILER': 'clang-10', 'DRONE_JOB_UUID': '12c6fc06c9'}, globalenv=globalenv),
# Not building #
# linux_cxx("STD=03 JOB=classic/test Job 23", "gcc-10", packages="g++-10 jq", buildtype="boost", buildscript="drone", image=linuxglobalimage, environment={'STD': '03', 'JOB': 'classic/test', 'TRAVIS_COMPILER': 'gcc-10', 'DRONE_JOB_UUID': 'd435a6cdd7'}, globalenv=globalenv),
]
# from https://github.com/boostorg/boost-ci
load("@boost_ci//ci/drone/:functions.star", "linux_cxx","windows_cxx","osx_cxx","freebsd_cxx")

View File

@@ -0,0 +1,88 @@
#!/bin/bash
# Copyright 2020 Rene Rivera, Sam Darwin
# Distributed under the Boost Software License, Version 1.0.
# (See accompanying file LICENSE.txt or copy at http://boost.org/LICENSE_1_0.txt)
set -e
export TRAVIS_BUILD_DIR=$(pwd)
export DRONE_BUILD_DIR=$(pwd)
export TRAVIS_BRANCH=$DRONE_BRANCH
export VCS_COMMIT_ID=$DRONE_COMMIT
export GIT_COMMIT=$DRONE_COMMIT
export REPO_NAME=$DRONE_REPO
export USER=$(whoami)
export CC=${CC:-gcc}
export PATH=~/.local/bin:/usr/local/bin:$PATH
export BOOST_ROOT="$HOME/boost"
export BOOST_BUILD_PATH="$HOME/build-boost"
export TRAVIS_PULL_REQUEST=${DRONE_PULL_REQUEST:-false}
export TRAVIS_REPO_SLUG=$REPO_NAME
if [ "$DRONE_JOB_BUILDTYPE" == "boost" ]; then
echo '==================================> INSTALL'
export CACHE_NAME=$TRAVIS_OS_NAME-$TOOLSET-$STD-$JOB
export PATH=$BOOST_ROOT:$PATH
if [[ "$TRAVIS_COMPILER" =~ ^clang- ]]; then export STDLIB=stdlib=libc++ ; fi
# Creating ~/user-config.jam file
sed 's/^ //' > ~/user-config.jam << 'EOF'
import feature ;
import os ;
import regex ;
import toolset ;
local TOOLSET = [ os.environ TRAVIS_COMPILER ] ;
local toolset-parts = [ regex.split $(TOOLSET) "-" ] ;
local toolset-name = $(toolset-parts[1]) ;
local toolset-feature = $(toolset-parts[2-]:J="-") ;
local cxx ;
switch $(toolset-name) {
case gcc : cxx ?= [ regex.replace $(TOOLSET) "gcc" "g++" ] ;
case clang : cxx ?= [ regex.replace $(TOOLSET) "clang" "clang++" ] ;
case * : EXIT "user-config: Unsupported toolset $(toolset-name)" ;
}
using $(toolset-name) : $(toolset-feature) : ccache $(cxx) ;
# Release variant with enabled asserts
variant sanitize : <optimization>speed <debug-symbols>off <inlining>full
<runtime-debugging>off ;
# Determining the root branch
if [[ "$TRAVIS_PULL_REQUEST" == "false" ]]; then
export BRANCH=$TRAVIS_BRANCH
else
# It is a pull request. Retrieve the base branch from GitHub
GH_PR_API=https://api.github.com/repos/$TRAVIS_REPO_SLUG/pulls/$TRAVIS_PULL_REQUEST
export BRANCH=`curl -s $GH_PR_API | jq -r .head.ref`;
fi
if [[ ! "$BRANCH" =~ ^(master|develop)$ ]]; then
# Travis has been triggered not from our main branches.
# Find out the base branch from the git history
# TODO: Not implemented yet, but in most cases it will be develop branch
export BRANCH=develop
fi
echo Root branch is $BRANCH
env
sed 's/--depth=1/--depth=9/g' `which git` > ~/git && chmod +x ~/git
~/git clone -j10 --branch=$BRANCH --depth=1 --quiet --recurse-submodules=":(exclude)$PROJECT" --shallow-submodules https://github.com/boostorg/boost.git $BOOST_ROOT
pushd $BOOST_ROOT
rm -rf $PROJECT
./bootstrap.sh --with-toolset=clang
./b2 headers
cp -rp $TRAVIS_BUILD_DIR $PROJECT
ln -s $PROJECT $TRAVIS_BUILD_DIR
cd $PROJECT
cd $JOB
echo '==================================> SCRIPT'
b2 link=shared threading=multi variant=release,sanitize toolset=$TRAVIS_COMPILER cxxstd=$STD $STDLIB warnings=extra warnings-as-errors=on
fi

View File

@@ -0,0 +1,96 @@
* text=auto !eol svneol=native#text/plain
*.gitattributes text svneol=native#text/plain
# Scriptish formats
*.bat text svneol=native#text/plain
*.bsh text svneol=native#text/x-beanshell
*.cgi text svneol=native#text/plain
*.cmd text svneol=native#text/plain
*.js text svneol=native#text/javascript
*.php text svneol=native#text/x-php
*.pl text svneol=native#text/x-perl
*.pm text svneol=native#text/x-perl
*.py text svneol=native#text/x-python
*.sh eol=lf svneol=LF#text/x-sh
configure eol=lf svneol=LF#text/x-sh
# Image formats
*.bmp binary svneol=unset#image/bmp
*.gif binary svneol=unset#image/gif
*.ico binary svneol=unset#image/ico
*.jpeg binary svneol=unset#image/jpeg
*.jpg binary svneol=unset#image/jpeg
*.png binary svneol=unset#image/png
*.tif binary svneol=unset#image/tiff
*.tiff binary svneol=unset#image/tiff
*.svg text svneol=native#image/svg%2Bxml
# Data formats
*.pdf binary svneol=unset#application/pdf
*.avi binary svneol=unset#video/avi
*.doc binary svneol=unset#application/msword
*.dsp text svneol=crlf#text/plain
*.dsw text svneol=crlf#text/plain
*.eps binary svneol=unset#application/postscript
*.gz binary svneol=unset#application/gzip
*.mov binary svneol=unset#video/quicktime
*.mp3 binary svneol=unset#audio/mpeg
*.ppt binary svneol=unset#application/vnd.ms-powerpoint
*.ps binary svneol=unset#application/postscript
*.psd binary svneol=unset#application/photoshop
*.rdf binary svneol=unset#text/rdf
*.rss text svneol=unset#text/xml
*.rtf binary svneol=unset#text/rtf
*.sln text svneol=native#text/plain
*.swf binary svneol=unset#application/x-shockwave-flash
*.tgz binary svneol=unset#application/gzip
*.vcproj text svneol=native#text/xml
*.vcxproj text svneol=native#text/xml
*.vsprops text svneol=native#text/xml
*.wav binary svneol=unset#audio/wav
*.xls binary svneol=unset#application/vnd.ms-excel
*.zip binary svneol=unset#application/zip
# Text formats
.htaccess text svneol=native#text/plain
*.bbk text svneol=native#text/xml
*.cmake text svneol=native#text/plain
*.css text svneol=native#text/css
*.dtd text svneol=native#text/xml
*.htm text svneol=native#text/html
*.html text svneol=native#text/html
*.ini text svneol=native#text/plain
*.log text svneol=native#text/plain
*.mak text svneol=native#text/plain
*.qbk text svneol=native#text/plain
*.rst text svneol=native#text/plain
*.sql text svneol=native#text/x-sql
*.txt text svneol=native#text/plain
*.xhtml text svneol=native#text/xhtml%2Bxml
*.xml text svneol=native#text/xml
*.xsd text svneol=native#text/xml
*.xsl text svneol=native#text/xml
*.xslt text svneol=native#text/xml
*.xul text svneol=native#text/xul
*.yml text svneol=native#text/plain
boost-no-inspect text svneol=native#text/plain
CHANGES text svneol=native#text/plain
COPYING text svneol=native#text/plain
INSTALL text svneol=native#text/plain
Jamfile text svneol=native#text/plain
Jamroot text svneol=native#text/plain
Jamfile.v2 text svneol=native#text/plain
Jamrules text svneol=native#text/plain
Makefile* text svneol=native#text/plain
README text svneol=native#text/plain
TODO text svneol=native#text/plain
# Code formats
*.c text svneol=native#text/plain
*.cpp text svneol=native#text/plain
*.h text svneol=native#text/plain
*.hpp text svneol=native#text/plain
*.ipp text svneol=native#text/plain
*.tpp text svneol=native#text/plain
*.jam text svneol=native#text/plain
*.java text svneol=native#text/plain

View File

@@ -0,0 +1,442 @@
name: GitHub Actions CI
on:
pull_request:
push:
env:
PROJECT: libs/spirit
jobs:
posix:
strategy:
fail-fast: false
matrix:
include:
- name: "STD=14 JOB=test/x3 Job 0"
buildtype: "boost"
packages: "clang-12 libc++-12-dev libc++abi-12-dev libunwind-12-dev jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "clang-12"
sources: ""
llvm_os: "bionic"
llvm_ver: "12"
std: "14"
job: "test/x3"
travis_compiler: "clang-12"
- name: "STD=14 JOB=test/x3 Job 1"
buildtype: "boost"
packages: "g++-11 jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "gcc-11"
sources: ""
llvm_os: ""
llvm_ver: ""
std: "14"
job: "test/x3"
travis_compiler: "gcc-11"
- name: "STD=11 JOB=test/qi Job 2"
buildtype: "boost"
packages: "clang-12 libc++-12-dev libc++abi-12-dev libunwind-12-dev jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "clang-12"
sources: ""
llvm_os: "bionic"
llvm_ver: "12"
std: "11"
job: "test/qi"
travis_compiler: "clang-12"
- name: "STD=11 JOB=test/karma Job 3"
buildtype: "boost"
packages: "clang-12 libc++-12-dev libc++abi-12-dev libunwind-12-dev jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "clang-12"
sources: ""
llvm_os: "bionic"
llvm_ver: "12"
std: "11"
job: "test/karma"
travis_compiler: "clang-12"
- name: "STD=11 JOB=test/lex Job 4"
buildtype: "boost"
packages: "clang-12 libc++-12-dev libc++abi-12-dev libunwind-12-dev jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "clang-12"
sources: ""
llvm_os: "bionic"
llvm_ver: "12"
std: "11"
job: "test/lex"
travis_compiler: "clang-12"
- name: "STD=11 JOB=test/support Job 5"
buildtype: "boost"
packages: "clang-12 libc++-12-dev libc++abi-12-dev libunwind-12-dev jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "clang-12"
sources: ""
llvm_os: "bionic"
llvm_ver: "12"
std: "11"
job: "test/support"
travis_compiler: "clang-12"
- name: "STD=11 JOB=repository/test Job 6"
buildtype: "boost"
packages: "clang-12 libc++-12-dev libc++abi-12-dev libunwind-12-dev jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "clang-12"
sources: ""
llvm_os: "bionic"
llvm_ver: "12"
std: "11"
job: "repository/test"
travis_compiler: "clang-12"
- name: "STD=03 JOB=test/qi Job 7"
buildtype: "boost"
packages: "clang-12 libc++-12-dev libc++abi-12-dev libunwind-12-dev jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "clang-12"
sources: ""
llvm_os: "bionic"
llvm_ver: "12"
std: "03"
job: "test/qi"
travis_compiler: "clang-12"
- name: "STD=03 JOB=test/karma Job 8"
buildtype: "boost"
packages: "clang-12 libc++-12-dev libc++abi-12-dev libunwind-12-dev jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "clang-12"
sources: ""
llvm_os: "bionic"
llvm_ver: "12"
std: "03"
job: "test/karma"
travis_compiler: "clang-12"
- name: "STD=03 JOB=test/lex Job 9"
buildtype: "boost"
packages: "clang-12 libc++-12-dev libc++abi-12-dev libunwind-12-dev jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "clang-12"
sources: ""
llvm_os: "bionic"
llvm_ver: "12"
std: "03"
job: "test/lex"
travis_compiler: "clang-12"
- name: "STD=03 JOB=test/support Job 10"
buildtype: "boost"
packages: "clang-12 libc++-12-dev libc++abi-12-dev libunwind-12-dev jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "clang-12"
sources: ""
llvm_os: "bionic"
llvm_ver: "12"
std: "03"
job: "test/support"
travis_compiler: "clang-12"
- name: "STD=03 JOB=repository/test Job 11"
buildtype: "boost"
packages: "clang-12 libc++-12-dev libc++abi-12-dev libunwind-12-dev jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "clang-12"
sources: ""
llvm_os: "bionic"
llvm_ver: "12"
std: "03"
job: "repository/test"
travis_compiler: "clang-12"
- name: "STD=11 JOB=test/qi Job 12"
buildtype: "boost"
packages: "g++-11 jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "gcc-11"
sources: ""
llvm_os: ""
llvm_ver: ""
std: "11"
job: "test/qi"
travis_compiler: "gcc-11"
- name: "STD=11 JOB=test/karma Job 13"
buildtype: "boost"
packages: "g++-11 jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "gcc-11"
sources: ""
llvm_os: ""
llvm_ver: ""
std: "11"
job: "test/karma"
travis_compiler: "gcc-11"
- name: "STD=11 JOB=test/lex Job 14"
buildtype: "boost"
packages: "g++-11 jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "gcc-11"
sources: ""
llvm_os: ""
llvm_ver: ""
std: "11"
job: "test/lex"
travis_compiler: "gcc-11"
- name: "STD=11 JOB=test/support Job 15"
buildtype: "boost"
packages: "g++-11 jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "gcc-11"
sources: ""
llvm_os: ""
llvm_ver: ""
std: "11"
job: "test/support"
travis_compiler: "gcc-11"
- name: "STD=11 JOB=repository/test Job 16"
buildtype: "boost"
packages: "g++-11 jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "gcc-11"
sources: ""
llvm_os: ""
llvm_ver: ""
std: "11"
job: "repository/test"
travis_compiler: "gcc-11"
- name: "STD=03 JOB=test/qi Job 17"
buildtype: "boost"
packages: "g++-11 jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "gcc-11"
sources: ""
llvm_os: ""
llvm_ver: ""
std: "03"
job: "test/qi"
travis_compiler: "gcc-11"
- name: "STD=03 JOB=test/karma Job 18"
buildtype: "boost"
packages: "g++-11 jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "gcc-11"
sources: ""
llvm_os: ""
llvm_ver: ""
std: "03"
job: "test/karma"
travis_compiler: "gcc-11"
- name: "STD=03 JOB=test/lex Job 19"
buildtype: "boost"
packages: "g++-11 jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "gcc-11"
sources: ""
llvm_os: ""
llvm_ver: ""
std: "03"
job: "test/lex"
travis_compiler: "gcc-11"
- name: "STD=03 JOB=test/support Job 20"
buildtype: "boost"
packages: "g++-11 jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "gcc-11"
sources: ""
llvm_os: ""
llvm_ver: ""
std: "03"
job: "test/support"
travis_compiler: "gcc-11"
- name: "STD=03 JOB=repository/test Job 21"
buildtype: "boost"
packages: "g++-11 jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "gcc-11"
sources: ""
llvm_os: ""
llvm_ver: ""
std: "03"
job: "repository/test"
travis_compiler: "gcc-11"
- name: "STD=03 JOB=classic/test Job 22"
buildtype: "boost"
packages: "clang-12 libc++-12-dev libc++abi-12-dev libunwind-12-dev jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "clang-12"
sources: ""
llvm_os: "bionic"
llvm_ver: "12"
std: "03"
job: "classic/test"
travis_compiler: "clang-12"
- name: "STD=03 JOB=classic/test Job 23"
buildtype: "boost"
packages: "g++-11 jq ccache"
packages_to_remove: ""
os: "ubuntu-18.04"
cxx: "gcc-11"
sources: ""
llvm_os: ""
llvm_ver: ""
std: "03"
job: "classic/test"
travis_compiler: "gcc-11"
runs-on: ${{ matrix.os }}
container: ${{ matrix.container }}
steps:
- name: Check if running in container
if: matrix.container != ''
run: echo "GHA_CONTAINER=${{ matrix.container }}" >> $GITHUB_ENV
- uses: actions/checkout@v2
- name: linux
shell: bash
env:
CXX: ${{ matrix.cxx }}
SOURCES: ${{ matrix.sources }}
LLVM_OS: ${{ matrix.llvm_os }}
LLVM_VER: ${{ matrix.llvm_ver }}
PACKAGES: ${{ matrix.packages }}
PACKAGES_TO_REMOVE: ${{ matrix.packages_to_remove }}
JOB_BUILDTYPE: ${{ matrix.buildtype }}
STD: ${{ matrix.std }}
JOB: ${{ matrix.job }}
TRAVIS_COMPILER: ${{ matrix.travis_compiler }}
TRAVIS_BRANCH: ${{ github.base_ref }}
TRAVIS_OS_NAME: "linux"
run: |
echo '==================================> SETUP'
echo '==================================> PACKAGES'
set -e
if [ -n "$PACKAGES_TO_REMOVE" ]; then sudo apt-get purge -y $PACKAGES_TO_REMOVE; fi
echo ">>>>> APT: REPO.."
for i in {1..3}; do sudo -E apt-add-repository -y "ppa:ubuntu-toolchain-r/test" && break || sleep 2; done
if test -n "${LLVM_OS}" ; then
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
if test -n "${LLVM_VER}" ; then
sudo -E apt-add-repository "deb http://apt.llvm.org/${LLVM_OS}/ llvm-toolchain-${LLVM_OS}-${LLVM_VER} main"
else
# Snapshot (i.e. trunk) build of clang
sudo -E apt-add-repository "deb http://apt.llvm.org/${LLVM_OS}/ llvm-toolchain-${LLVM_OS} main"
fi
fi
echo ">>>>> APT: UPDATE.."
sudo -E apt-get -o Acquire::Retries=3 update
if test -n "${SOURCES}" ; then
echo ">>>>> APT: INSTALL SOURCES.."
for SOURCE in $SOURCES; do
sudo -E apt-add-repository ppa:$SOURCE
done
fi
echo ">>>>> APT: INSTALL ${PACKAGES}.."
sudo -E DEBIAN_FRONTEND=noninteractive apt-get -o Acquire::Retries=3 -y --no-install-suggests --no-install-recommends install ${PACKAGES}
echo '==================================> INSTALL AND COMPILE'
set -e
export TRAVIS_BUILD_DIR=$(pwd)
export TRAVIS_BRANCH=${TRAVIS_BRANCH:-$(echo $GITHUB_REF | awk 'BEGIN { FS = "/" } ; { print $3 }')}
export VCS_COMMIT_ID=$GITHUB_SHA
export GIT_COMMIT=$GITHUB_SHA
export REPO_NAME=$(basename $GITHUB_REPOSITORY)
export USER=$(whoami)
export CC=${CC:-gcc}
export PATH=~/.local/bin:/usr/local/bin:$PATH
export BOOST_ROOT="$HOME/boost"
export BOOST_BUILD_PATH="$HOME/build-boost"
export GITHUB_EVENT_NUMBER=${{ github.event.number }}
export TRAVIS_PULL_REQUEST=${GITHUB_EVENT_NUMBER:-false}
export TRAVIS_REPO_SLUG=$REPO_NAME
if [ "$JOB_BUILDTYPE" == "boost" ]; then
echo '==================================> INSTALL'
export CACHE_NAME=$TRAVIS_OS_NAME-$TOOLSET-$STD-$JOB
export PATH=$BOOST_ROOT:$PATH
if [[ "$TRAVIS_COMPILER" =~ ^clang- ]]; then export STDLIB=stdlib=libc++ ; fi
# Creating ~/user-config.jam file
sed 's/^ //' > ~/user-config.jam << 'EOF'
import feature ;
import os ;
import regex ;
import toolset ;
local TOOLSET = [ os.environ TRAVIS_COMPILER ] ;
local toolset-parts = [ regex.split $(TOOLSET) "-" ] ;
local toolset-name = $(toolset-parts[1]) ;
local toolset-feature = $(toolset-parts[2-]:J="-") ;
local cxx ;
switch $(toolset-name) {
case gcc : cxx ?= [ regex.replace $(TOOLSET) "gcc" "g++" ] ;
case clang : cxx ?= [ regex.replace $(TOOLSET) "clang" "clang++" ] ;
case * : EXIT "user-config: Unsupported toolset $(toolset-name)" ;
}
using $(toolset-name) : $(toolset-feature) : ccache $(cxx) ;
# Release variant with enabled asserts
variant sanitize : <optimization>speed <debug-symbols>off <inlining>full
<runtime-debugging>off ;
EOF
# Determining the root branch
if [[ "$TRAVIS_PULL_REQUEST" == "false" ]]; then
export BRANCH=$TRAVIS_BRANCH
else
# It is a pull request. Retrieve the base branch from GitHub
GH_PR_API=https://api.github.com/repos/$TRAVIS_REPO_SLUG/pulls/$TRAVIS_PULL_REQUEST
export BRANCH=`curl -s $GH_PR_API | jq -r .head.ref`;
fi
if [[ ! "$BRANCH" =~ ^(master|develop)$ ]]; then
# Travis has been triggered not from our main branches.
# Find out the base branch from the git history
# TODO: Not implemented yet, but in most cases it will be develop branch
export BRANCH=develop
fi
echo Root branch is $BRANCH
env
sed 's/--depth=1/--depth=9/g' `which git` > ~/git && chmod +x ~/git
~/git clone -j10 --branch=$BRANCH --depth=1 --quiet --recurse-submodules=":(exclude)$PROJECT" --shallow-submodules https://github.com/boostorg/boost.git $BOOST_ROOT
pushd $BOOST_ROOT
rm -rf $PROJECT
./bootstrap.sh --with-toolset=clang
./b2 headers
cp -rp $TRAVIS_BUILD_DIR $PROJECT
ln -s $PROJECT $TRAVIS_BUILD_DIR
cd $PROJECT
cd $JOB
echo '==================================> SCRIPT'
b2 link=shared threading=multi variant=release,sanitize toolset=$TRAVIS_COMPILER cxxstd=$STD $STDLIB warnings=extra warnings-as-errors=on define=BOOST_SPIRIT_X3_HIDE_CXX17_WARNING
fi

View File

@@ -0,0 +1,6 @@
.DS_Store
test/lex/matlib_static_switch.h
test/lex/matlib_static.h

View File

@@ -0,0 +1,155 @@
#==============================================================================
# Copyright (c) 2016-2021 Nikita Kniazev
#
# Use, modification and distribution is subject to the Boost Software
# License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt)
#==============================================================================
language: cpp
sudo: false
addon_shortcuts:
clang-12: &clang-12
apt:
sources:
- sourceline: 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-12 main'
key_url: 'https://apt.llvm.org/llvm-snapshot.gpg.key'
packages:
- clang-12
- libc++-12-dev
- libc++abi-12-dev
gcc-11: &gcc-11
apt:
sources:
- sourceline: 'ppa:ubuntu-toolchain-r/test'
packages:
- g++-11
os: linux
dist: bionic
env:
global:
- PROJECT=libs/spirit
- BOOST_ROOT=$HOME/boost
- BOOST_BUILD_PATH=$HOME/build-boost
matrix:
include:
### Spirit 3
- { env: 'STD=14 JOB=test/x3', compiler: clang-12, addons: *clang-12 }
- { env: 'STD=14 JOB=test/x3', compiler: gcc-11, addons: *gcc-11 }
### Spirit 2
## Clang
# 11
- { env: 'STD=11 JOB=test/qi', compiler: clang-12, addons: *clang-12 }
- { env: 'STD=11 JOB=test/karma', compiler: clang-12, addons: *clang-12 }
- { env: 'STD=11 JOB=test/lex', compiler: clang-12, addons: *clang-12 }
- { env: 'STD=11 JOB=test/support', compiler: clang-12, addons: *clang-12 }
- { env: 'STD=11 JOB=repository/test', compiler: clang-12, addons: *clang-12 }
# 03
- { env: 'STD=03 JOB=test/qi', compiler: clang-12, addons: *clang-12 }
- { env: 'STD=03 JOB=test/karma', compiler: clang-12, addons: *clang-12 }
- { env: 'STD=03 JOB=test/lex', compiler: clang-12, addons: *clang-12 }
- { env: 'STD=03 JOB=test/support', compiler: clang-12, addons: *clang-12 }
- { env: 'STD=03 JOB=repository/test', compiler: clang-12, addons: *clang-12 }
## GCC
# 11
- { env: 'STD=11 JOB=test/qi', compiler: gcc-11, addons: *gcc-11 }
- { env: 'STD=11 JOB=test/karma', compiler: gcc-11, addons: *gcc-11 }
- { env: 'STD=11 JOB=test/lex', compiler: gcc-11, addons: *gcc-11 }
- { env: 'STD=11 JOB=test/support', compiler: gcc-11, addons: *gcc-11 }
- { env: 'STD=11 JOB=repository/test', compiler: gcc-11, addons: *gcc-11 }
# 03
- { env: 'STD=03 JOB=test/qi', compiler: gcc-11, addons: *gcc-11 }
- { env: 'STD=03 JOB=test/karma', compiler: gcc-11, addons: *gcc-11 }
- { env: 'STD=03 JOB=test/lex', compiler: gcc-11, addons: *gcc-11 }
- { env: 'STD=03 JOB=test/support', compiler: gcc-11, addons: *gcc-11 }
- { env: 'STD=03 JOB=repository/test', compiler: gcc-11, addons: *gcc-11 }
### Spirit 1
- { env: 'STD=03 JOB=classic/test', compiler: clang-12, addons: *clang-12 }
- { env: 'STD=03 JOB=classic/test', compiler: gcc-11, addons: *gcc-11 }
cache: ccache
before_install:
- export CACHE_NAME=$TRAVIS_OS_NAME-$TOOLSET-$STD-$JOB
- export PATH=$BOOST_ROOT:$PATH
- if [[ "$TRAVIS_COMPILER" =~ ^clang- ]]; then export STDLIB=stdlib=libc++ ; fi
- |
# Creating ~/user-config.jam file
sed 's/^ //' > ~/user-config.jam << 'EOF'
import feature ;
import os ;
import regex ;
import toolset ;
local TOOLSET = [ os.environ TRAVIS_COMPILER ] ;
local toolset-parts = [ regex.split $(TOOLSET) "-" ] ;
local toolset-name = $(toolset-parts[1]) ;
local toolset-feature = $(toolset-parts[2-]:J="-") ;
local cxx ;
switch $(toolset-name) {
case gcc : cxx ?= [ regex.replace $(TOOLSET) "gcc" "g++" ] ;
case clang : cxx ?= [ regex.replace $(TOOLSET) "clang" "clang++" ] ;
case * : EXIT "user-config: Unsupported toolset $(toolset-name)" ;
}
using $(toolset-name) : $(toolset-feature) : ccache $(cxx) ;
# Release variant with enabled asserts
variant sanitize : <optimization>speed <debug-symbols>off <inlining>full
<runtime-debugging>off ;
- |
# Determining the root branch
if [[ "$TRAVIS_PULL_REQUEST" == "false" ]]; then
export BRANCH=$TRAVIS_BRANCH
else
# It is a pull request. Retrieve the base branch from GitHub
GH_PR_API=https://api.github.com/repos/$TRAVIS_REPO_SLUG/pulls/$TRAVIS_PULL_REQUEST
export BRANCH=`curl -s $GH_PR_API | jq -r .head.ref`;
fi
if [[ ! "$BRANCH" =~ ^(master|develop)$ ]]; then
# Travis has been triggered not from our main branches.
# Find out the base branch from the git history
# TODO: Not implemented yet, but in most cases it will be develop branch
export BRANCH=develop
fi
echo Root branch is $BRANCH
# Dump environment variables
- env
# Sadly git's --shallow-submodules has hardcoded depth of 1 commit
# Patch the git binary with a little more depth to deal with boost-commitbot's lag
- sed 's/--depth=1/--depth=9/g' `which git` > ~/git && chmod +x ~/git
# Checkout Boost
- ~/git clone -j10 --branch=$BRANCH --depth=1 --quiet
--recurse-submodules=":(exclude)$PROJECT" --shallow-submodules
https://github.com/boostorg/boost.git $BOOST_ROOT
- pushd $BOOST_ROOT
# Remove the empty folder
- rm -rf $PROJECT
- ./bootstrap.sh --with-toolset=clang
- ./b2 headers
# Move the repository to boost/libs and make a link to previous place
- mv $TRAVIS_BUILD_DIR $PROJECT
- ln -s $PROJECT $TRAVIS_BUILD_DIR
- cd $PROJECT
- cd $JOB
script:
- b2 link=shared threading=multi variant=release,sanitize
toolset=$TRAVIS_COMPILER cxxstd=$STD $STDLIB
warnings=extra warnings-as-errors=on
define=BOOST_SPIRIT_X3_HIDE_CXX17_WARNING

View File

@@ -0,0 +1,53 @@
# Generated by `boostdep --cmake spirit`
# Copyright 2020 Peter Dimov
# Distributed under the Boost Software License, Version 1.0.
# https://www.boost.org/LICENSE_1_0.txt
cmake_minimum_required(VERSION 3.5...3.16)
project(boost_spirit VERSION "${BOOST_SUPERPROJECT_VERSION}" LANGUAGES CXX)
add_library(boost_spirit INTERFACE)
add_library(Boost::spirit ALIAS boost_spirit)
target_include_directories(boost_spirit INTERFACE include)
target_link_libraries(boost_spirit
INTERFACE
Boost::array
Boost::assert
Boost::config
Boost::core
Boost::endian
Boost::function
Boost::function_types
Boost::fusion
Boost::integer
Boost::io
Boost::iterator
Boost::move
Boost::mpl
Boost::optional
Boost::phoenix
Boost::pool
Boost::preprocessor
Boost::proto
Boost::range
Boost::regex
Boost::smart_ptr
Boost::static_assert
Boost::thread
Boost::throw_exception
Boost::type_traits
Boost::typeof
Boost::unordered
Boost::utility
Boost::variant
)
if(BUILD_TESTING AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/CMakeLists.txt")
add_subdirectory(test)
endif()

View File

@@ -0,0 +1,36 @@
#==============================================================================
# Copyright (c) 2021 Nikita Kniazev
#
# Use, modification and distribution is subject to the Boost Software
# License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt)
#==============================================================================
import feature ;
if ! [ feature.valid known-warnings ] {
feature.feature known-warnings : hide show : incidental propagated ;
}
local 9.0-14.0 = 9.0 10.0 11.0 12.0 14.0 ;
project spirit
: requirements
# Warnings that we have not managed to fix yet
<known-warnings>hide,<toolset>msvc:<cxxflags>-wd4244 # conversion from 'T' to 'U', possible loss of data
<known-warnings>hide,<toolset>msvc:<cxxflags>-wd4365 # conversion from 'T' to 'U', signed/unsigned mismatch
<known-warnings>hide,<toolset>msvc:<cxxflags>-wd4459 # declaration of 'varname' hides global declaration
# Enable additional useful warnings
<warnings>extra,<toolset>gcc:<cxxflags>-Wshadow-local
<warnings>extra,<toolset>clang-linux:<cxxflags>-Wshadow-field-in-constructor
<warnings>extra,<toolset>clang-linux:<cxxflags>-Wshadow-uncaptured-local
<warnings>extra,<toolset>clang-linux:<cxxflags>-Wundefined-reinterpret-cast
<warnings>extra,<toolset>msvc:<cxxflags>-w44555 # result of expression not used
# Broken or very high false-positive rate
<known-warnings>hide,<toolset>gcc:<cxxflags>-Wno-maybe-uninitialized # Too many pesky false-positives
<known-warnings>hide,<toolset>msvc-$(9.0-14.0):<cxxflags>-wd4100 # unreferenced formal parameter
<known-warnings>hide,<toolset>msvc-$(9.0-14.0):<cxxflags>-wd4512 # assignment operator could not be generated
<known-warnings>hide,<toolset>msvc-$(9.0-14.0):<cxxflags>-wd4714 # function 'x' marked as __forceinline not inlined
;

View File

@@ -0,0 +1,71 @@
Spirit
======
Spirit is a set of C++ libraries for parsing and output generation implemented as
Domain Specific Embedded Languages (DSEL) using Expression templates and Template
Meta-Programming. The Spirit libraries enable a target grammar to be written
exclusively in C++. Inline grammar specifications can mix freely with other
C++ code and, thanks to the generative power of C++ templates, are immediately
executable.
### Spirit.X3 (3rd generation)
[Documentation](http://www.boost.org/doc/libs/develop/libs/spirit/doc/x3/html/index.html)
The newest Spirit shines faster compile times. Currently only a parser framework.
*WARNING*: C++14 compilers support will be dropped soon.
Spirit X3 in Boost 1.81 (scheduled to November 2022) will use C++17 features.
Supported compilers will be:
* Clang 4 (currently 3.6)
* GCC 7 (currently 5)
* VS 2017 v15.8 (currently 2015 U3)
### Spirit V2 (2nd generation)
[Documentation](http://www.boost.org/doc/libs/develop/libs/spirit/doc/html/index.html)
The latest Long Term Support version of Spirit. A Swiss Army knife for data
manipulation on any kind of input.
Consists of:
- [Qi]: Parser framework.
- [Karma]: Generator framework.
- [Lex]: Lexical analyzer framework.
Runs on most C++03 compilers (GCC 4.1, Clang 3.0, VS 2005).
[Spirit V2]: http://www.boost.org/doc/libs/develop/libs/spirit/doc/html/index.html
[Qi]: http://www.boost.org/doc/libs/develop/libs/spirit/doc/html/spirit/qi.html
[Karma]: http://www.boost.org/doc/libs/develop/libs/spirit/doc/html/spirit/karma.html
[Lex]: http://www.boost.org/doc/libs/develop/libs/spirit/doc/html/spirit/lex.html
### Spirit.Classic (1st generation)
[Documentation](http://www.boost.org/doc/libs/develop/libs/spirit/classic/index.html)
An elderling member of Spirit. It receives only limited maintanance, but
it is still used even inside Boost by [Boost.Serialization] and [Boost.Wave]
libraries. It also contains Phoenix V1.
Spririt.Classic should support even ancient compilers.
[Boost.Serialization]: http://boost.org/libs/serialization
[Boost.Wave]: http://boost.org/libs/wave
## Brief History
Date | Boost | Commit | Event
---------- | ----- | -------- | -----------------------------------------------
2014-03-18 | 1.56 | 8a353328 | Spirit.X3 is added
2013-12-14 | 1.56 | c0537c82 | Phoenix V2 is retired
2011-03-28 | 1.47 | 400a764d | [Phoenix V3] support added to Spirit V2
2009-04-30 | 1.41 | 5963a395 | [Spirit.Repository] is appeared
2008-04-13 | 1.36 | ffd0cc10 | Spirit V2 (Qi, Karma, Lex, Phoenix V2) is added
2006-08-23 | 1.35 | 2dc892b4 | Fusion V1 is retired
2003-01-31 | 1.30 | 81907916 | Spirit is the part of the Boost
[Phoenix V3]: http://boost.org/libs/phoenix
[Spirit.Repository]: http://www.boost.org/doc/libs/develop/libs/spirit/doc/html/spirit/repository.html

View File

@@ -0,0 +1,294 @@
<html>
<head>
<title>Spirit Change Log</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="doc/theme/style.css" type="text/css">
</head>
<body>
<h2>Spirit Change Log</h2>
<h2>1.8.11</h2>
<ul>
<li>Fixed <tt>position_iterator</tt> forming reference to local when the
underlying iterator dereference operator returns a non-reference type.
<a href="https://github.com/boostorg/spirit/pull/422">PR#422</a>
<a href="https://svn.boost.org/trac10/ticket/9737">TRAC#9737</a></li>
<li>Removed use of deprecated <tt>boost/detail/iterator.hpp</tt> header.
<a href="https://github.com/boostorg/spirit/pull/432">GH#432</a></li>
</ul>
<h2>1.8.10</h2>
<ul>
<li>Missing visibility mark on exception types.
<a href="https://github.com/boostorg/spirit/pull/409">GH#409</a></li>
</ul>
<h2>1.8.9</h2>
<ul>
<li>Fixed a regression introduced in
<a href="https://github.com/boostorg/spirit/pull/336">GH#336</a>.
<a href="https://github.com/boostorg/spirit/pull/386">GH#386</a></li>
<li>Minor code improvements.
<a href="https://github.com/boostorg/spirit/pull/367">GH#367</a></li>
</ul>
<h2>1.8.8</h2>
<ul>
<li>Fixed <tt>remove_reference</tt> usage without a namespace in Phoenix.
<a href="https://github.com/boostorg/spirit/pull/274">GH#274</a></li>
<li>Fixed <tt>std::complex</tt> usage without the include.
<a href="https://github.com/boostorg/spirit/pull/273">GH#273</a></li>
<li>Fixed compilation of <tt>match&lt;T&amp;&gt;</tt>.
<a href="https://github.com/boostorg/spirit/pull/275">GH#275</a></li>
<li>Fixed compilation with <tt>BOOST_DISABLE_THREADS</tt> defined.
<a href="https://github.com/boostorg/spirit/pull/323">GH#323</a>
<a href="https://svn.boost.org/trac10/ticket/12639">#12639</a></li>
<li>Increment scanner through iterator policy.
<a href="https://github.com/boostorg/spirit/pull/336">GH#336</a>
<a href="https://svn.boost.org/trac10/ticket/7371">TRAC#7371</a></li>
<li>Removed deprecated in C++17 <tt>std::iterator</tt> usage.
<a href="https://github.com/boostorg/spirit/pull/345">GH#345</a></li>
</ul>
<h2>1.8.7</h2>
<ul>
<li>Integrated the Spirit V1.8.x code base with Spirit V2. Spirit V1.8.x is
now called
<strong>Spirit Classic.</strong> Even if the directory
structure has changed (the
<strong>Spirit Classic</strong> headers are now moved to the
'$BOOST_ROOT/boost/spirit/home/classic' directory), we created forwarding
headers allowing to compile existing applications without any change.
These forwarding headers are deprecated, though, which will result in
corresponding warnings generated for each of the headers. The forwarding
headers are expected to be removed in the future.
<br />
The recommended way of using Spirit Classic is now to include header
files from the directory '$BOOST_ROOT/boost/spirit/include'. All files of
<strong>Spirit Classic</strong>
have now a 'classic_' prefixed to their name. For example the include
<br/>
<br><code>&nbsp;&nbsp;#include &lt;boost/spirit/core/core.hpp&gt;</code><br/>
<br/>
now should be written as:
<br/>
<br/><code>&nbsp;&nbsp;#include &lt;boost/spirit/include/classic_core.hpp&gt;</code><br/>
<br/>
To avoid namespace conflicts with the new Spirit V2 library we moved <strong>Spirit
Classic</strong> into the <tt>namespace boost::spirit::classic</tt>. This change will be automatically&nbsp;deactivated whenever the deprecated include files are
being used. This ensures full backwards compatibility for existing applications.
<br />
For more details about this change please consult the documentation.</li>
</ul>
<h2>1.8.6</h2>
<ul>
<li>Fixed a integer overflow bug preventing to fail parsing on certain large integers. This bug was reported and fixed by Michael Andersen Nex<65></li>
</ul>
<h2>1.8.5</h2>
<ul>
<li>For performance reasons, leaf_node_d/token_node_d have been changed to implicit lexems that create leaf nodes in one shot. The old token_node_d is still available and called reduced_node_d, now.</li>
<li>It's now possible to phoenix::bind (version 1) symbols::add.</li>
</ul>
<h2>1.8.4</h2>
<ul>
<li>Fixed no_actions bug where no_action is applied recursively.</li>
<li>Fixed the regex_p parser for Boost &gt;= V1.33.0 </li>
<li>Implemented a workaround for namespace issues VC++ has with Spirit's file_iterators</li>
<li>Fixed bug in tree match policies that prevented using gen_pt/ast_node_d,
<a href="http://article.gmane.org/gmane.comp.parsers.spirit.general/9013">reported
by Jascha Wetzel</a>.</li>
<li>Made position_iterator usable with wchar_t based strings. </li>
</ul>
<h2>1.8.3</h2>
<ul>
<li>Config correction for Sun C++ by
Steve Clamage (see <a href="https://sourceforge.net/tracker/?func=detail&atid=107586&aid=1220782&group_id=7586">this link</a>). </li>
<li>Fixed multi_pass_iterator for 64 platforms, where sizeof(int) != sizeof(ptr_type).Fixed bug that prevents the use of closures with grammars with multiple entry points, <a href="http://article.gmane.org/gmane.comp.parsers.spirit.general/8868">reported by David Pierre</a></li>
<li>Fixed bug that prevented embedding of grammars with multiple entry points, <a href="http://article.gmane.org/gmane.comp.parsers.spirit.general/8860">reported by David Pierre</a></li>
<li>Added '\0' to the set of valid escaped characters for escape_ch_p.</li>
<li>Fixed a switch_p bug when used with a phoenix::actor as the conditional expression.</li>
<li>__LINE__ macro now gets expanded in BOOST_SPIRIT_ASSERT_EXCEPTION</li>
<li>Fixed a bug in the intersection parser <a href="http://article.gmane.org/gmane.comp.parsers.spirit.general/8544">reported by Yusaku Sugai</a></li>
<li>The symbol parser uses the null character internally. Checks were added so that:
<ul>
<li>tst.add asserts if string contains the null character</li>
<li>tst.find doesn't match null characters in the input</li>
</ul></li>
<li>Fixed match_attr_traits.ipp to allow non-POD to pass through. The previous version taking in the ellipsis &quot;...&quot; does not allow PODs to pass through.</li>
<li>Allow evaluation to int as condition to if_p parser.</li>
<li>Applied performance improvement changes to the PT/AST code as suggested by Stefan Slapeta. </li>
<li>Fixed several problems with AST tree node directives (inner_node_d[], discard_first_node[], discard_last_node[] and infix_node_d[]). </li>
</ul>
<h2>1.8.2</h2>
<p>Maintenance release (almost the same as 1.8.1 plus a few fixes here and there)</p>
<ul>
<li>Added specializations to str_p and ch_p to allow str_p('c') and ch_p("c") thus fixing some non-bugs</li>
<li>Fixed bug where a match&lt;T&gt; is a variant.</li>
<li>added Jamfile/Jamrules from CVS to spirit-1.8.1/</li>
<li>added boost-build.jam from boost to spirit-1.8.1/</li>
<li>disabled template multi-threading in libs/spirit/test/Jamfile</li>
<li>added a boost-header-include rule (from spirit-header-include) pointing to miniboost in libs/spirit/test/Jamfile</li>
<li>Fixed if_p inconsistency</li>
</ul>
<h2>1.6.2</h2>
<p>The Spirit 1.6.2 release is a bug-fix release only, no new features were introduced.</p>
<ul>
<li>wchar_t friendly implementation of graph_p</li>
<li>Modified escape_char_parser::parse() to use a static parser instead of a rule. This will make it more friendly to use in trees. It should also be a little more efficient.</li>
<li>Moved to
Boost Software license 1.0. </li>
<li> workaround for Error 322 name lookup in base class specialization finds type</li>
<li> fixed limit_d bug</li>
<li> [numerics] Workaround for aC++</li>
<li> Fixed a bug in the switch_p parser.</li>
<li> Fixed a EOI problem in multi_pass</li>
<li>added Jamfile/Jamrules from CVS to spirit-1.6.1/</li>
<li>added boost-build.jam from boost to spirit-1.6.1/</li>
<li>disabled template multi-threading in libs/spirit/test/Jamfile</li>
<li>added a boost-header-include rule (from spirit-header-include) pointing to miniboost in libs/spirit/test/Jamfile</li>
</ul>
<h2>1.8.1 (Released with Boost 1.32.0)</h2>
<p>The Spirit 1.8.1 release is a bug-fix release only, no new features were introduced.</p>
<ul>
<li>Spirit now requires at least Boost 1.32.0 to compile correctly</li>
<li>Removed the support for the older iterator adaptor library and </li>
<li>Moved to use the new MPL library</li>
<li>Spirit was moved to use the Boost Software License 1.0.</li>
<li>Fixed several parsers to support post-skips more correctly.</li>
<li>Fixed a no_node_d[] bug.</li>
<li>Fixed a bug in shortest_d[].</li>
<li>Fixed a bug in limit_d[].</li>
<li>Fixed parser traversal meta code.</li>
<li>Fixed several bugs in switch_p.</li>
<li>Fixed AST generating problems, in particular with the loops related parsers.</li>
<li>Fixed several bugs in the multi_pass iterator.
<ul>
<li>Fixed a problem, when the used base iterator returned a value_type and not a reference from its dereferencing operator.</li>
<li>Fixed iterator_traits problem</li>
<li>Fixed an EOI problem</li>
<li>Fixed a bug, when used with std::cin</li>
</ul>
</li>
<li>Found a bug in grammar.ipp when BOOST_SPIRIT_SINGLE_GRAMMAR_INSTANCE is defined</li>
<li>Rewritten safe_bool to use CRTP - now works also on MWCW, fixed several bugs with the implementation.</li>
<li>Fixed and extended the debug diagnostics printed by the parse tree code.</li>
</ul>
<h2>1.8.0 (Released with Boost 1.31.0; Includes unreleased 1.7.1)</h2>
<ul>
<li>Fixed a wchar_t problem in the regex_p parser.</li>
<li>removed code and workarounds for old compilers (VC6/7 and Borland)</li>
<li> Changed license to the new boost license.</li>
<li> Modified escape_char_parser::parse() to use a static parser instead of a rule. This will make it more friendly to use in trees. It should also be a little more efficient.</li>
</ul>
<h2>1.7.1 (Unreleased; becomes 1.8.0)</h2>
<ul>
<li>Added a full suite of predefined actors.</li>
<li>Moved rule_alias and stored_rule from core/non-terminal to dynamic.<br>
Made as_parser a public API in meta/as_parser.hpp</li>
<li>Separated Core.Meta into its own module</li>
<li>Refactored Utility module<br>
Moved some files into Utility.Parsers</li>
</ul>
<blockquote>
<ul>
<li>utilities
<ul>
<li>parsers
<ul>
<li>chset, regex, escape_char<br>
confix, list, distinct<br>
functor_parser</li>
</ul>
</li>
</ul>
</li>
<li> support
<ul>
<li>scoped_lock<br>
flush_multi_pass<br>
grammar_def</li>
</ul>
</li>
<li> actors
<ul>
<li>assign</li>
</ul>
</li>
</ul>
</blockquote>
<ul>
<li>Stored rules</li>
<li>Added the switch_p and select_p dynamic parsers.</li>
<li>Multiple scanner support for rules.</li>
<li>The Rule's Scanner, Context and Tag template parameters can be specified in any order now. If a template parameter is missing, it will assume the defaults. See test/rule_tests.cpp.</li>
<li>Introduced the possibility to specify more than one start rule from a grammar.</li>
<li>Added an implementation of the file_iterator iterator based on the new Boost iterator_adaptors (submitted originally by Thomas Witt).</li>
</ul>
<p><em> [The transition to the new iterator_adaptors should be complete now.]</em></p>
<ul>
<li>Added an implementation of the fixed_size_queue iterator based on the new Boost iterator_adaptors.</li>
<li> wchar_t friendly implementation of graph_p</li>
<li>made the copy-constructor and assignment-operator of parser_error_base public to clear VC7.1 C4673 warning. Added copy-constructor and assignment operator to parser_error for clarity of intent.</li>
</ul>
<h2>1.7.0</h2>
<ul>
<li> assign(string) semantic action now works in VC6</li>
<li>parsers need not be default constructible </li>
<li>simplified aggregation of binary and unary parsers (more compiler friendly)</li>
<li>epsilon workarounds for VC++</li>
<li>match's attribute now uses boost.optional</li>
<li>subrules can now have closures</li>
<li>project wide 64 bit compatibility</li>
<li>dynamic_parser_tag, reissue of rule.set_id(id);</li>
<li>numerous primitives improvements and workarounds for problematic compilers</li>
<li>proper complement (~) of single char parser primitives and chsets</li>
<li>intuitive handling of lexeme_d </li>
<li>wide_phrase_scanner_t typedef</li>
<li>dynamic parser improvements (better support for more compilers)</li>
<li>complete rewrite of the file_iterator (using boost.iterator_adapters). Supports
memory maps wherever available</li>
<li>multi_pass updates (compatibility with more compilers (e.g VC7) and more)</li>
<li>position_iterator improvements</li>
<li>better phoenix support for more compilers</li>
<li>phoenix new_(...) construct</li>
<li>new lazy_p parser</li>
<li>utility.distinct parser (undocumented)</li>
<li>chset operators improvements </li>
<li>confix_p streamlining and improvements</li>
<li>numerous Boost integration improvements</li>
</ul>
<h2>Bug fixes (1.7.0 and 1.6.0)</h2>
<ul>
<li> Fixed. Using MSVC++6 (SP5), calling the assign action with a string value
on parsers using the file_iterator will not work. </li>
<li> Fixed: using assign semantic action in a grammar with a multi_pass iterator
adaptor applied to an std::istream_iterator resulted in a failure to compile
under msvc 7.0. </li>
<li> Fixed: There is a bug in the &quot;range_run&lt;CharT&gt;::set (range&lt;CharT&gt;
const&amp; r)&quot; function in the &quot;boost\spirit\utility\impl\chset\range_run.ipp&quot;.
</li>
<li> Fixed: handling of trailing whitespace bug (ast_parse/pt_parse related)</li>
<li> Fixed: comment_p and end of data bug</li>
<li> Fixed: <a href="http://article.gmane.org/gmane.comp.parsers.spirit.general/4029">Most
trailing space bug</a>:</li>
<li> Fixed:<br>
chset&lt;&gt;::operator~(range&lt;&gt;) bug<br>
operator&amp;(chset&lt;&gt;, range&lt;&gt;) bug<br>
operator&amp;(range&lt;&gt;, chset&lt;&gt;) bug</li>
<li> Fixed: <a href="http://sourceforge.net/mailarchive/forum.php?thread_id=2008510&forum_id=25901">impl::detach_clear
bug</a></li>
<li> Fixed: <a href="http://article.gmane.org/gmane.comp.parsers.spirit.general/3678">mismatch
closure return type bug</a></li>
<li> Fixed: <a href="http://sf.net/mailarchive/forum.php?thread_id=1963157&forum_id=1595">access_node_d[]</a>
and <a href="http://sf.net/mailarchive/forum.php?thread_id=1966224&forum_id=1595">access_match_d[]</a>
iterator bugs</li>
<li> Fixed a bug regarding threadsafety of Phoenix/Spirit closures.</li>
<li> Added missing include files to miniboost</li>
</ul>
<p> <font size="2" color="#666666">Copyright &copy; 1998-2005 Joel de Guzman, Hartmut Kaiser</font><br>
<font size="2"><font color="#666666">Use, modification and distribution is subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) </font> </font></p>
<p>&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,146 @@
<html>
<head>
<title>Acknowledgments</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%">
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b><b>Acknowledgments</b></b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="rationale.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="references.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>Special thanks to </p>
<p><b>Dan Nuffer</b> for his work on lexers, parse trees, ASTs, XML parsers, the
multi-pass iterator as well as administering Spirit's site, editing, maintaining
the CVS and doing the releases plus a zillion of other chores that were almost
taken for granted. </p>
<p><b>Hartmut Kaiser</b> for his work on the C parser, the work on the C/C++ preprocessor,
utility parsers, the original port to Intel 5.0, various work on Phoenix, porting
to v1.5, the meta-parsers, the grouping-parsers, extensive testing and painstaking
attention to details.</p>
<p><b>Martin Wille</b> who improved grammar multi thread safety, contributed the
eol_p parser, the dynamic parsers, documentation and for taking an active role
in almost every aspect from brainstorming and design to coding. And, as always, helps keep the regression tests for g++ on Linux as green as ever :-). </p>
<p><b>Martijn W. Van Der Lee</b> our Web site administrator and for contributing
the RFC821 parser<b>.</b></p>
<p><b>Giovanni Bajo</b> for last minute tweaks of Spirit 1.8.0 for CodeWarrior
8.3. Actually, I'm ashamed Giovanni was not in this list already. He's done
a lot since Spirit 1.5, the first Boost.Spirit release. He's instrumental in
the porting of the Spirit iterators stuff to the new Boost Iterators Library
(version 2). He also did various bug fixes and wrote some tests here and there.
</p>
<p><b>Juan Carlos Arevalo-Baeza (JCAB) </b>for his work on the C++ parser, the
position iterator, ports to v1.5 and keeping the mailing list discussions alive
and kicking.</p>
<p><strong>Vaclav Vesely, </strong>lots of stuff, the no_actions directive, various patches fixes, the distinct parsers, the lazy parser, some phoenix tweaks and add-ons (e.g. <tt>new_</tt>). Also, <strong>Stefan&nbsp;Slapeta</strong> and <strong>wife</strong> for editing Vaclav's distinct parser doc. </p>
<p><b>Raghavendra Satish </b>for doing the original v1.3 port to VC++ and his
work on Phoenix.</p>
<p><b>Noah Stein</b> for following up and helping Ragav on the VC++ ports.</p>
<p><b>Hakki Dogusan</b>, for his original v1.0 Pascal parser.</p>
<p><b>John (EBo) David</b> for his work on the VM and watching over my shoulder
as I code giving the impression of distance eXtreme programming.</p>
<p><b>Chris Uzdavinis</b> for feeding in comments and valuable suggestions as
well as editing the documentation.</p>
<p><b>Carsten Stoll</b>, for his work on dynamic parsers.</p>
<p><b>Andy Elvey</b> and his conifer parser.</p>
<p><b>Bruce Florman</b>, who did the original v1.0 port to VC++.</p>
<p><b>Jeff Westfahl </b>for porting the loop parsers to v1.5 and contributing
the file iterator.</p>
<p><b>Peter Simons</b> for the RFC date parser example and tutorial plus helping
out with some nitty gritty details.</p>
<p><b>Markus Sch&ouml;pflin</b> for suggesting the end_p parser and lots of other
nifty things and his active presence in the mailing list.</p>
<p><b>Doug Gregor</b> for mentoring and his ability to see things that others
don't. </p>
<p><strong>David Abrahams</strong> for giving me a job that allows me to still
work on Spirit, plus countless advice and help on C++ and specifically template
metaprogramming.</p>
<p><strong>Aleksey Gurtovoy</strong> for his MPL library from which I stole many
metaprogramming tricks especially for less conforming compilers such as Borland
and VC6/7.</p>
<p><strong>Gustavo Guerra</strong> for his last minute review of Spirit and constant
feedback, plus patches here and there (e.g. proposing the new dot behavior of
the real numerics parsers).</p>
<p><strong>Nicola Musatti, Paul Snively, Alisdair Meredith </strong>and<strong>
Hugo Duncan </strong> for testing and sending in various patches.</p>
<p><strong>Steve Rowe</strong> for his splendid work on the TSTs that will soon
be taken into Spirit.</p>
<p><strong>Jonathan de Halleux</strong> for his work on actors.</p>
<p><strong>Angus Leeming</strong> for last minute editing work on the 1.8.0 release documentation, his work on Phoenix and his active presence in the Spirit mailing list.</p>
<p> <strong>Joao Abecasis</strong> for his active presence in the Spirit mailing list, providing user support, participating in the discussions and so on. </p>
<p> <strong>Guillaume Melquiond</strong> for a last minute patch to <tt>multi_pass</tt> for 1.8.1. </p>
<p> <strong>Peder Holt</strong> for his porting work on Phoenix, Fusion and Spirit to VC6. </p>
<p>To my wife <b>Mariel</b> who did the graphics in this document.</p>
<p>My, there's a lot in this list! And it's a continuing list. I add people to this list every time. I hope I did not forget anyone. If I missed<br>
someone you know who has helped in any way, please inform me.</p>
<p> Special thanks also to people who gave feedback and valuable comments, particularly
members of Boost and Spirit mailing lists. This includes all those who participated
in the review:<br>
<br>
<strong>John Maddock</strong>, our review manager<br>
<strong>Aleksey Gurtovoy<br>
Andre Hentz<br>
Beman Dawes<br>
Carl Daniel<br>
Christopher Currie<br>
Dan Gohman<br>
Dan Nuffer<br>
Daryle Walker<br>
David Abrahams<br>
David B. Held<br>
Dirk Gerrits<br>
Douglas Gregor<br>
Hartmut Kaiser<br>
Iain K.Hanson<br>
Juan Carlos Arevalo-Baeza<br>
Larry Evans<br>
Martin Wille<br>
Mattias Flodin<br>
Noah Stein<br>
Nuno Lucas<br>
Peter Dimov<br>
Peter Simons<br>
Petr Kocmid<br>
Ross Smith<br>
Scott Kirkwood<br>
Steve Cleary<br>
Thorsten Ottosen<br>
Tom Wenisch<br>
Vladimir Prus</strong></p>
<p>Finally thanks to <a href="http://sourceforge.net">SourceForge</a> for hosting
the Spirit project and <a href="http://www.boost.org/">Boost</a>: a C++ community
comprised of extremely talented library authors who participate in the discussion
and peer review of well crafted C++ libraries.</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="rationale.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="references.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
</font> </p>
</body>
</html>

View File

@@ -0,0 +1,354 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<meta content=
"HTML Tidy for Windows (vers 1st February 2003), see www.w3.org"
name="generator">
<title>
Basic Concepts
</title>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10"></td>
<td width="85%">
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Basic
Concepts</b></font>
</td>
<td width="112">
<a href="http://spirit.sf.net"><img src="theme/spirit.gif"
width="112" height="48" align="right" border="0"></a>
</td>
</tr>
</table><br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30">
<a href="../index.html"><img src="theme/u_arr.gif" border="0"></a>
</td>
<td width="30">
<a href="quick_start.html"><img src="theme/l_arr.gif" border="0"></a>
</td>
<td width="30">
<a href="organization.html"><img src="theme/r_arr.gif" border="0">
</a>
</td>
</tr>
</table>
<p>
There are a few fundamental concepts that need to be understood well: 1)
The <strong>Parser</strong>, 2) <strong>Match</strong>, 3) The
<strong>Scanner</strong>, and 4) <strong>Semantic Actions</strong>. These
basic concepts interact with one another, and the functionalities of each
interweave throughout the framework to make it one coherent whole.
</p>
<table width="48%" border="0" align="center">
<tr>
<td height="211">
<img src="theme/intro1.png">
</td>
</tr>
</table>
<h2>
The Parser
</h2>
<p>
Central to the framework is the parser. The parser does the actual work
of recognizing a linear input stream of data read sequentially from start
to end by the scanner. The parser attempts to match the input following a
well-defined set of specifications known as grammar rules. The parser
reports the success or failure to its client through a match object. When
successful, the parser calls a client-supplied semantic action. Finally,
the semantic action extracts structural information depending on the data
passed by the parser and the hierarchical context of the parser it is
attached to.
</p>
<p>
Parsers come in different flavors. The Spirit framework comes bundled
with an extensive set of pre-defined parsers that perform various parsing
tasks from the trivial to the complex. The parser, as a concept, has a
public conceptual interface contract. Following the contract, anyone can
write a conforming parser that will play along well with the framework's
predefined components. We shall provide a blueprint detailing the
conceptual interface of the parser later.
</p>
<p>
Clients of the framework generally do not need to write their own
hand-coded parsers at all. Spirit has an immense repertoire of
pre-defined parsers covering all aspects of syntax and semantic analysis.
We shall examine this repertoire of parsers in the following sections. In
the rare case where a specific functionality is not available, it is
extremely easy to write a user-defined parser. The ease in writing a
parser entity is the main reason for Spirit's extensibility.
</p>
<h2>
Primitives and Composites
</h2>
<p>
Spirit parsers fall into two categories: <b>primitives</b> and
<b>composites</b>. These two categories are more or less synonymous to
terminals and non-terminals in parsing lingo. Primitives are
non-decomposable atomic units. Composites on the other hand are parsers
that are composed of other parsers which can in turn be a primitive or
another composite. To illustrate, consider the Spirit expression:
</p>
<pre><code><font color="#000000"> </font></code><code><span class="identifier">real_p</span> <span class=
"special">&gt;&gt;</span> <span class="special">*(</span><span class=
"literal">','</span> <span class="special">&gt;&gt;</span> <span class=
"identifier">real_p</span><span class="special">)</span></code>
</pre>
<p>
<tt><tt>real_p</tt></tt> is a primitive parser that can parse real
numbers. The quoted comma <tt class="quotes">','</tt> in the expression
is a shortcut and is equivalent to <tt>ch_p<span class=
"operators">(</span><span class="quotes">','</span><span class=
"operators">)</span></tt>, which is another primitive parser that
recognizes single characters.
</p>
<p>
The expression above corresponds to the following parse tree:
</p>
<table width="29%" border="0" align="center">
<tr>
<td>
<img src="theme/intro7.png">
</td>
</tr>
</table>
<p>
The expression:
</p>
<pre><code><font color="#000000"> </font></code><span class=
"literal">','</span> <span class="special">&gt;&gt;</span> <span class=
"identifier">real_p</span>
</pre>
<p>
composes a <b>sequence</b> parser. The <tt>sequence</tt> parser is a
composite parser comprising two parsers: the one on its left hand side
(lhs), <tt>ch_p<span class="operators">(</span><span class=
"quotes">','</span><span class="operators">)</span></tt> ; and the other
on its right hand side (rhs), <tt>real_p</tt>. This composite parser,
when called, calls its lhs and rhs in sequence and reports a successful
match only if both are successful.
</p>
<table width="14%" border="0" align="center">
<tr>
<td>
<img src="theme/intro2.png">
</td>
</tr>
</table>
<p>
The <tt>sequence</tt> parser is a binary composite. It is composed of two
parsers. There are unary composites as well. Unary composites hold only a
single subject. Like the binary composite, the unary composite may change
the behavior of its embedded subject. One particular example is the
<b>Kleene star</b>. The Kleene star, when called to parse, calls its sole
subject zero or more times. "Zero or more" implies that the Kleene star
always returns a successful match, possibly matching the null string: "".
</p>
<p>
The expression:
</p>
<pre><code><font color="#000000"> </font></code><code><span class=
"special">*(</span><span class="literal">','</span> <span class=
"special">&gt;&gt;</span> <span class=
"identifier">real_p</span><span class="special">)</span></code>
</pre>
<p>
wraps the whole sequence composite above inside a <tt>kleene_star</tt>.
</p>
<table width="17%" border="0" align="center">
<tr>
<td>
<img src="theme/intro3.png">
</td>
</tr>
</table>
<p>
Finally, the full expression composes a <tt>real_p</tt> primitive parser
and the <tt>kleene_star</tt> we have above into another higher level
<tt>sequence</tt> parser composite.
</p>
<table width="34%" border="0" align="center">
<tr>
<td>
<img src="theme/intro4.png">
</td>
</tr>
</table>
<p>
A few simple classes, when composed and structured in a hierarchy, form a
very powerful object-oriented recursive-descent parsing engine. These
classes provide the infrastructure needed for the construction of
more-complex parsers. The final parser composite is a non-deterministic
recursive-descent parser with infinite look-ahead.
</p>
<p>
Top-down descent traverses the hierarchy. The outer <tt>sequence</tt>
calls the leftmost <tt>real_p</tt> parser. If successful, the
<tt>kleene_star</tt> is called next. The <tt>kleene_star</tt> calls the
inner <tt>sequence</tt> repeatedly in a loop until it fails to match, or
the input is exhausted. Inside, <tt>ch_p(',')</tt> and then
<tt>real_p</tt> are called in sequence. The following diagram illustrates
what is happening, somewhat reminiscent of Pascal syntax diagrams.
</p>
<table width="37%" border="0" align="center">
<tr>
<td>
<img src="theme/intro5.png">
</td>
</tr>
</table>
<p>
The flexibility of object embedding and composition combined with
recursion opens up a unique approach to parsing. Subclasses are free to
form aggregates and algorithms of arbitrary complexity. Complex parsers
can be created with the composition of only a few primitive classes.
</p>
<p>
The framework is designed to be fully open-ended and extensible. New
primitives or composites, from the trivial to the complex, may be added
any time. Composition happens (statically) at compile time. This is
possible through the expressive flexibility of C++ expression templates
and template meta-programming.
</p>
<p>
The result is a composite composed of primitives and smaller composites.
This embedding strategy gives us the ability to build hierarchical
structures that fully model EBNF expressions of arbitrary complexity.
Later on, we shall see more primitive and composite building blocks.
</p>
<h2>
The Scanner
</h2>
<p>
Like the parser, the scanner is also an abstract concept. The task of the
scanner is to feed the sequential input data stream to the parser. The
scanner is composed of two STL conforming forward iterators, first and
last, where first is held by reference and last, by value. The first
iterator is held by reference to allow re-positioning by the parser. A
set of policies governs how the scanner behaves. Parsers extract data
from the scanner and position the iterator appropriately through its
member functions.
</p>
<p>
Knowledge of the intricacies of these policies is not required at all in
most cases. However, knowledge of the scanner's basic API is required to
write fully-conforming Spirit parsers. The scanner's API will be outlined
in a separate section. In addition, for the power users and the
adventurous among us, a full section will be devoted to covering the
scanner policies. The scanner policies make Spirit very flexible and
extensible. For instance, some of the policies may be modified to filter
data. A practical example is a scanner policy that does not distinguish
upper and lower case whereby making it useful for parsing case
insensitive input. Another example is a scanner policy that strips white
spaces from the input.
</p>
<h2>
The Match
</h2>
<p>
The parser has a conceptual parse member function taking in a scanner and
returning a match object. The primary function of the match object is to
report parsing success (or failure) back to the parser's caller; i.e., it
evaluates to true if the parse function is successful, false otherwise.
If the parse is successful, the match object may also be queried to
report the number of characters matched (using <tt>match.length()</tt>).
The length is non-negative if the match is successful, and the typical
length of a parse failure is -1. A zero length is perfectly valid and
still represents a successful match.
</p>
<p>
Parsers may have attribute data associated with it. For example, the
real_p parser has a numeric datum associated with it. This attribute is
the parsed number. This attribute is passed on to the returned match
object. The match object may be queried to get this attribute. This datum
is valid only when the match is successful.
</p>
<h2>
Semantic Actions
</h2>
<p>
A composite parser forms a hierarchy. Parsing proceeds from the topmost
parent parser which delegates and apportions the parsing task to its
children recursively to its children's children and so on until a
primitive is reached. By attaching semantic actions to various points in
this hierarchy, in effect we can transform the flat linear input stream
into a structured representation. This is essentially what parsers do.
</p>
<p>
Recall our example above:
</p>
<pre><code><font color="#000000"> </font></code><code><span class=
"identifier">real_p</span> <span class=
"special">&gt;&gt;</span> <span class="special">*(</span><span class=
"literal">','</span> <span class="special">&gt;&gt;</span> <span class=
"identifier">real_p</span><span class="special">)</span></code>
</pre>
<p>
By hooking a function (or functor) into the real_p parsers, we can
extract the numbers from the input:
</p>
<pre><code><font color="#000000"> </font></code><span class=
"identifier">real_p</span><span class="special">[&amp;</span><span class=
"identifier">f</span><span class="special">]</span> <span class=
"special">&gt;&gt;</span> <span class="special">*(</span><span class=
"literal">','</span> <span class="special">&gt;&gt;</span> <span class=
"identifier">real_p</span><span class="special">[&amp;</span><span class=
"identifier">f</span><span class="special">])</span>
</pre>
<table width="41%" border="0" align="center">
<tr>
<td>
<img src="theme/intro6.png">
</td>
</tr>
</table>
<p> where <tt>f</tt> is a function that takes in a single argument. The <tt><span class="operators">[&amp;</span>f<span class=
"operators">]</span></tt> hooks the parser with the function such that when
<tt>real_p</tt> recognizes a valid number, the function <tt>f</tt> is called.
It is up to the function then to do what is appropriate. For example, it can
stuff the numbers in a vector. Or perhaps, if the grammar is changed slightly
by replacing <tt class="quotes">','</tt> with <tt class="quotes">'+'</tt>, then
we have a primitive calculator that computes sums. The function <tt>f</tt> then
can then be made to add all incoming numbers.<br>
</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30">
<a href="../index.html"><img src="theme/u_arr.gif" border="0"></a>
</td>
<td width="30">
<a href="quick_start.html"><img src="theme/l_arr.gif" border="0"></a>
</td>
<td width="30">
<a href="organization.html"><img src="theme/r_arr.gif" border="0">
</a>
</td>
</tr>
</table><br>
<hr size="1">
<p class="copyright">
Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost
Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or
copy at http://www.boost.org/LICENSE_1_0.txt)</font>
</p>
<p>&nbsp;
</p>
</body>
</html>

View File

@@ -0,0 +1,158 @@
<html>
<head>
<title>Character Sets</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%">
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Character Sets</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="loops.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="confix.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>The character set <tt>chset</tt> matches a set of characters over a finite
range bounded by the limits of its template parameter <tt>CharT</tt>. This class
is an optimization of a parser that acts on a set of single characters. The
template class is parameterized by the character type <tt>CharT</tt> and can
work efficiently with 8, 16 and 32 and even 64 bit characters.</p>
<pre><span class=identifier> </span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>CharT </span><span class=special>= </span><span class=keyword>char</span><span class=special>&gt;
</span><span class=keyword>class </span><span class=identifier>chset</span><span class=special>;</span></pre>
<p>The <tt>chset</tt> is constructed from literals (e.g. <tt>'x'</tt>), <tt>ch_p</tt>
or <tt>chlit&lt;&gt;</tt>, <tt>range_p</tt> or <tt>range&lt;&gt;</tt>, <tt>anychar_p</tt>
and <tt>nothing_p</tt> (see <a href="primitives.html">primitives</a>) or copy-constructed
from another <tt>chset</tt>. The <tt>chset</tt> class uses a copy-on-write scheme
that enables instances to be passed along easily by value.</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/lens.gif" width="15" height="16"> <b>Sparse
bit vectors</b><br>
<br>
To accommodate 16/32 and 64 bit characters, the <tt>chset</tt> class
statically switches from a <tt>std::bitset</tt> implementation when the
character type is not greater than 8 bits, to a sparse bit/boolean set which
uses a sorted vector of disjoint ranges (<tt>range_run</tt>). The set is
constructed from ranges such that adjacent or overlapping ranges are coalesced.<br>
<br>
range_runs are very space-economical in situations where there are lots
of ranges and a few individual disjoint values. Searching is O(log n) where
n is the number of ranges.</td>
</tr>
</table>
<p> Examples:<br>
</p>
<pre><span class=identifier> </span><span class=identifier>chset</span><span class=special>&lt;&gt; </span><span class=identifier>s1</span><span class=special>(</span><span class=literal>'x'</span><span class=special>);
</span><span class=identifier>chset</span><span class=special>&lt;&gt; </span><span class=identifier>s2</span><span class=special>(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=identifier>s1</span><span class=special>);</span></pre>
<p>Optionally, character sets may also be constructed using a definition string
following a syntax that resembles posix style regular expression character sets,
except that double quotes delimit the set elements instead of square brackets
and there is no special negation <tt>^</tt> character.</p>
<pre> <span class=identifier>range </span><span class=special>= </span><span class=identifier>anychar_p </span><span class=special>&gt;&gt; </span><span class=literal>'-' </span><span class=special>&gt;&gt; </span><span class=identifier>anychar_p</span><span class=special>;
</span><span class=identifier>set </span><span class=special>= *(</span><span class=identifier>range_p </span><span class=special>| </span><span class=identifier>anychar_p</span><span class=special>);</span></pre>
<p>Since we are defining the set using a C string, the usual C/C++ literal string
syntax rules apply. Examples:<br>
</p>
<pre> <span class=identifier>chset</span><span class=special>&lt;&gt; </span><span class=identifier>s1</span><span class=special>(</span><span class=string>&quot;a-zA-Z&quot;</span><span class=special>); </span><span class=comment>// alphabetic characters
</span><span class=identifier>chset</span><span class=special>&lt;&gt; </span><span class=identifier>s2</span><span class=special>(</span><span class=string>&quot;0-9a-fA-F&quot;</span><span class=special>); </span><span class=comment>// hexadecimal characters
</span><span class=identifier>chset</span><span class=special>&lt;&gt; </span><span class=identifier>s3</span><span class=special>(</span><span class=string>&quot;actgACTG&quot;</span><span class=special>); </span><span class=comment>// DNA identifiers
</span><span class=identifier>chset</span><span class=special>&lt;&gt; </span><span class=identifier>s4</span><span class=special>(</span><span class=string>&quot;\x7f\x7e&quot;</span><span class=special>); </span><span class=comment>// Hexadecimal 0x7F and 0x7E</span></pre>
<p>The standard Spirit set operators apply (see <a href="operators.html">operators</a>)
plus an additional character-set-specific inverse (negation <tt>~</tt>) operator:<span class=comment></span></p>
<table width="90%" border="0" align="center">
<tr>
<td class="table_title" colspan="2">Character set operators</td>
</tr>
<tr>
<td class="table_cells" width="28%"><b>~a</b></td>
<td class="table_cells" width="72%">Set inverse</td>
</tr>
<tr>
<td class="table_cells" width="28%"><b>a | b</b></td>
<td class="table_cells" width="72%">Set union</td>
</tr>
<tr>
<td class="table_cells" width="28%"><b>a &amp; </b></td>
<td class="table_cells" width="72%">Set intersection</td>
</tr>
<tr>
<td class="table_cells" width="28%"><b>a - b</b></td>
<td class="table_cells" width="72%">Set difference</td>
</tr>
<tr>
<td class="table_cells" width="28%"><b>a ^ b</b></td>
<td class="table_cells" width="72%">Set xor</td>
</tr>
</table>
<p></p>
<p></p>
<p></p>
<p></p>
<p></p>
<p></p>
<p></p>
<p></p>
<p>where operands a and b are both <tt>chsets</tt> or one of the operand is either
a literal character, <tt>ch_p</tt> or <tt>chlit</tt>, <tt>range_p</tt> or <tt>range</tt>,
<tt>anychar_p</tt> or <tt>nothing_p</tt>. Special optimized overloads are provided
for <tt>anychar_p</tt> and <tt>nothing_p</tt> operands. A <tt>nothing_p</tt>
operand is converted to an empty set, while an <tt>anychar_p</tt> operand is
converted to a set having elements of the full range of the character type used
(e.g. 0-255 for unsigned 8 bit chars).</p>
<p>A special case is <tt>~anychar_p</tt> which yields <tt>nothing_p</tt>, but
<tt>~nothing_p</tt> is illegal. Inversion of <tt>anychar_p</tt> is asymmetrical,
a one-way trip comparable to converting <tt>T*</tt> to a <tt>void*.</tt></p>
<table width="90%" border="0" align="center">
<tr>
<td class="table_title" colspan="2">Special conversions</td>
</tr>
<tr>
<td class="table_cells" width="28%"><b>chset&lt;CharT&gt;(nothing_p)</b></td>
<td class="table_cells" width="72%">empty set</td>
</tr>
<tr>
<td class="table_cells" width="28%"><b>chset&lt;CharT&gt;(anychar_p)</b></td>
<td class="table_cells" width="72%">full range of CharT (e.g. 0-255 for unsigned
8 bit chars)</td>
</tr>
<tr>
<td class="table_cells" width="28%"><b>~anychar_p</b></td>
<td class="table_cells" width="72%">nothing_p</td>
</tr>
<tr>
<td class="table_cells" width="28%"><b>~nothing_p</b></td>
<td class="table_cells" width="72%">illegal</td>
</tr>
</table>
<p></p><table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="loops.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="confix.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
</body>
</html>

View File

@@ -0,0 +1,338 @@
<html>
<head>
<title>Closures</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
<style type="text/css">
<!--
.style1 {font-family: "Courier New", Courier, mono}
-->
</style>
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%">
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Closures</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="phoenix.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="dynamic_parsers.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<h2>Overview</h2>
<p>Using phoenix, in the previous chapter, we've seen how we can get data from our parsers using <tt>var</tt>:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>int </span><span class=identifier>i</span><span class=special>;
</span><span class=identifier> integer </span><span class=special>= </span><span class=identifier>int_p</span><span class=special>[</span><span class="identifier">var</span><span class=special>(</span><span class=identifier>i</span><span class=special>) = </span><span class="identifier">arg1</span><span class=special>];</span></font></code></pre>
<p>Nifty! Our rule <tt>integer</tt>, if successful, passes the parsed integer
to the variable <tt>i</tt>. Every time we need to parse an integer, we can call
our rule <tt>integer</tt> and simply extract the parsed number from the variable
<tt>i</tt>. There's something you should be aware of though. In the viewpoint
of the grammar, the variable <tt>i</tt> is global. When the grammar gets more
complex, it's hard to keep track of the current state of <tt>i</tt>. And, with
recursive rules, global variables simply won't be adequate. </p>
<p>Closures are needed if you need your rules (or grammars) to be reentrant. For example, a rule (or grammar) might be called recursively indirectly or directly by itself. The calculator is a good example. The expression rule recursively calls itself indirectly when it invokes the factor rule. </p>
<p>Closures provide named (lazy) variables associated with each parse rule invocation. A closure variable is addressed using member syntax:</p>
<pre><code><font color="#000000"><span class=identifier> </span>rulename<span class="special">.</span>varname</font></code></pre>
<p>A closure variable <tt>R.x</tt> may be addressed in the semantic action of any other rule invoked by <tt>R</tt>; it refers to the innermost enclosing invocation of <tt>R</tt>. If no such invocation exists, an assertion occurs at runtime. </p>
<p>Closures provide an environment, a stack frame, for local variables.
Most importantly, the closure variables are accessible from the EBNF grammar
specification and can be used to pass parser information upstream or downstream
from the topmost rule down to the terminals in a top-down recursive descent.
Closures facilitate dynamic scoping in C++.
Spirit's closure implementation is based on <em>Todd Veldhuizen</em>'s <strong>Dynamic
scoping in C++</strong> technique that he presented in his paper <a href="ftp://ftp.cs.indiana.edu/pub/techreports/TR542.pdf">Techniques
for Scientic C++</a>. </p>
<p>When a rule is given a closure, the closure's local variables are created prior
to entering the parse function and destructed after exiting the parse function.
These local variables are true local variables that exist on the hardware stack.</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/alert.gif" width="16" height="16"> <strong>Closures</strong>
<strong>and Phoenix</strong><br> <br>
Spirit v1.8 closure support requires <a href="../phoenix/index.html">Phoenix</a>.
In the future, Spirit will fully support <a href="../../../../libs/lambda/index.html">BLL</a>.
Currently, work is underway to merge the features of both libraries.</td>
</tr>
</table>
<h2>Example</h2>
<p>Let's go back to the calculator grammar introduced in the <a href="functional.html">Functional</a> chapter. Here's the full grammar again, plus the closure declarations:</p>
<pre><span class=special> </span><span class=keyword>struct </span><span class=identifier>calc_closure </span><span class=special>: </span><span class=identifier>boost</span><span class=special>::</span><span class=identifier>spirit</span><span class=special>::</span><span class=identifier>closure</span><span class=special>&lt;</span><span class=identifier>calc_closure</span><span class=special>, </span><span class=keyword>double</span><span class=special>&gt;
</span><span class=special>{
</span><span class=identifier>member1 </span><span class=identifier>val</span><span class=special>;
</span><span class=special>};
</span><span class=keyword>struct </span><span class=identifier>calculator </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>grammar</span><span class=special>&lt;</span><span class=identifier>calculator</span><span class=special>, </span><span class=identifier>calc_closure</span><span class=special>::</span><span class=identifier>context_t</span><span class=special>&gt;
</span><span class=special>{
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>definition
</span><span class=special>{
</span><span class=identifier>definition</span><span class=special>(</span><span class=identifier>calculator </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>self</span><span class=special>)
</span><span class=special>{
</span><span class=identifier>top </span><span class=special>= </span><span class=identifier>expression</span><span class=special>[</span><span class=identifier>self</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>];
</span><span class=identifier>expression
</span><span class=special>= </span><span class=identifier>term</span><span class=special>[</span><span class=identifier>expression</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>]
</span><span class=special>&gt;&gt; </span><span class=special>*( </span><span class=special>(</span><span class=literal>'+' </span><span class=special>&gt;&gt; </span><span class=identifier>term</span><span class=special>[</span><span class=identifier>expression</span><span class=special>.</span><span class=identifier>val </span><span class=special>+= </span><span class=identifier>arg1</span><span class=special>])
</span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>&gt;&gt; </span><span class=identifier>term</span><span class=special>[</span><span class=identifier>expression</span><span class=special>.</span><span class=identifier>val </span><span class=special>-= </span><span class=identifier>arg1</span><span class=special>])
</span><span class=special>)
</span><span class=special>;
</span><span class=identifier>term
</span><span class=special>= </span><span class=identifier>factor</span><span class=special>[</span><span class=identifier>term</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>]
</span><span class=special>&gt;&gt; </span><span class=special>*( </span><span class=special>(</span><span class=literal>'*' </span><span class=special>&gt;&gt; </span><span class=identifier>factor</span><span class=special>[</span><span class=identifier>term</span><span class=special>.</span><span class=identifier>val </span><span class=special>*= </span><span class=identifier>arg1</span><span class=special>])
</span><span class=special>| </span><span class=special>(</span><span class=literal>'/' </span><span class=special>&gt;&gt; </span><span class=identifier>factor</span><span class=special>[</span><span class=identifier>term</span><span class=special>.</span><span class=identifier>val </span><span class=special>/= </span><span class=identifier>arg1</span><span class=special>])
</span><span class=special>)
</span><span class=special>;
</span><span class=identifier>factor
</span><span class=special>= </span><span class=identifier>ureal_p</span><span class=special>[</span><span class=identifier>factor</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>]
</span><span class=special>| </span><span class=literal>'(' </span><span class=special>&gt;&gt; </span><span class=identifier>expression</span><span class=special>[</span><span class=identifier>factor</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>] </span><span class=special>&gt;&gt; </span><span class=literal>')'
</span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>&gt;&gt; </span><span class=identifier>factor</span><span class=special>[</span><span class=identifier>factor</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=special>-</span><span class=identifier>arg1</span><span class=special>])
</span><span class=special>| </span><span class=special>(</span><span class=literal>'+' </span><span class=special>&gt;&gt; </span><span class=identifier>factor</span><span class=special>[</span><span class=identifier>factor</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>])
</span><span class=special>;
</span><span class=special>}
</span><span class=keyword>typedef </span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>, </span><span class=identifier>calc_closure</span><span class=special>::</span><span class=identifier>context_t</span><span class=special>&gt; </span><span class=identifier>rule_t</span><span class=special>;
</span><span class=identifier>rule_t </span><span class=identifier>expression</span><span class=special>, </span><span class=identifier>term</span><span class=special>, </span><span class=identifier>factor</span><span class=special>;
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt; </span><span class=identifier>top</span><span class=special>;
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt; </span><span class=keyword>const</span><span class=special>&amp;
</span><span class=identifier>start</span><span class=special>() </span><span class=keyword>const </span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>top</span><span class=special>; </span><span class=special>}
</span><span class=special>};
</span><span class=special>};</span></pre>
<p> <img height="16" width="15" src="theme/lens.gif"> The full source code can be <a href="../example/fundamental/phoenix_calc.cpp">viewed here</a>. This is part of the Spirit distribution.</p>
<p>Surely, we've come a long way from the original version of this calculator. With inline <a href="phoenix.html#lambda">lambda expressions</a>, we were able to write self contained grammars complete with semantic actions. </p>
<p>The first thing to notice is the declaration of <tt>calc_closure</tt>. </p>
<p> <strong>Declaring closures</strong></p>
<p> The general closure declaration syntax is:</p>
<pre><code> <span class=keyword>struct </span><span class=identifier>name</span><span class=special></span> <span class=special>: </span><span class=identifier>spirit</span><span class=special>::</span><span class=identifier>closure</span><span class=special>&lt;</span><span class=identifier>name</span><span class=special>, </span><span class=keyword>type1, type2, type3,... typeN</span><span class=special>&gt;
{
</span><span class=identifier>member1 m_name1</span><span class=special>;
</span><span class=identifier>member2 m_name2</span><span class=special>;
</span><span class=identifier>member3 m_name3</span><span class=special>;
...
</span><span class=identifier>memberN m_nameN</span><span class=special>;
};</span></code></pre>
<p> <tt>member1</tt>... <tt>memberN</tt> are indirect links to the actual closure variables. Their indirect types correspond to <code><tt>type1</tt></code>... <code><tt>typeN</tt></code>. In our example, we declared <tt>calc_closure</tt>:</p>
<pre><span class=number> </span><span class=keyword>struct </span><span class=identifier>calc_closure </span><span class=special>: </span><span class=identifier>boost</span><span class=special>::</span><span class=identifier>spirit</span><span class=special>::</span><span class=identifier>closure</span><span class=special>&lt;</span><span class=identifier>calc_closure</span><span class=special>, </span><span class=keyword>double</span><span class=special>&gt;
</span><span class=special>{
</span><span class=identifier>member1 </span><span class=identifier>val</span><span class=special>;
</span><span class=special>};</span></pre>
<p><tt>calc_closure</tt> has a single variable <tt>val</tt> of type <span class=keyword>double</span><span class=special></span>.</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><p><img src="theme/alert.gif" width="16" height="16"> <tt>BOOST_SPIRIT_CLOSURE_LIMIT</tt><br>
<br>
Spirit predefined maximum closure limit. This limit defines the maximum number of elements a closure can hold. This number defaults to 3. The actual maximum is rounded up in multiples of 3. Thus, if this value is 4, the actual limit is 6. The ultimate maximum limit in this implementation is 15. It should <strong>NOT</strong> be greater than <tt>PHOENIX_LIMIT</tt> (see <a href="../phoenix/index.html">phoenix</a>). Example:<br>
<br>
<span class="comment style1">// Define these before including anything else <br>
</span><span class="preprocessor style1">#define</span><span class="style1"> PHOENIX_LIMIT 10<br>
</span><span class="preprocessor">#define</span><span class="style1"> BOOST_SPIRIT_CLOSURE_LIMIT 10</span></p> </td>
</tr>
</table>
<p><strong>Attaching closures</strong></p>
<p>Closures can be applied to rules, subrules and grammars (non-terminals). The closure has a
special <a href="indepth_the_parser_context.html">parser context</a> that can be used with these non-terminals. The closure's
context is its means to hook into the non-terminal. The context of the closure <tt>C</tt> is <tt>C::context_t</tt>. </p>
<p>We can see in the example that we attached <tt>calc_closure</tt> to the <tt>expression</tt>, <tt>term</tt> and <tt>factor</tt> rules in our grammar:</p>
<pre><span class=special> </span><span class=keyword>typedef </span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>, </span><span class=identifier>calc_closure</span><span class=special>::</span><span class=identifier>context_t</span><span class=special>&gt; </span><span class=identifier>rule_t</span><span class=special>;
</span><span class=identifier>rule_t </span><span class=identifier>expression</span><span class=special>, </span><span class=identifier>term</span><span class=special>, </span><span class=identifier>factor</span><span class=special>;</span> </pre>
<p>as well as the grammar itself:</p>
<pre><span class=special> </span><span class=keyword>struct </span><span class=identifier>calculator </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>grammar</span><span class=special>&lt;</span><span class=identifier>calculator</span><span class=special>, </span><span class=identifier>calc_closure</span><span class=special>::</span><span class=identifier>context_t</span><span class=special>&gt;</span></pre>
<p><strong>Closure return value</strong></p>
<p>The closure <tt>member1</tt> is the closure's return value. This return value, like the one returned by <tt>anychar_p</tt>, for example, can be used to propagate data up the parser hierarchy or passed to semantic actions. Thus, <tt>expression</tt>, <tt>term</tt> and <tt>factor</tt>, as well as the <tt>calculator</tt> grammar itself, all return a <tt>double</tt>. </p>
<p><strong>Accessing closure variables</strong></p>
<p>Closure variables can be accessed from within semantic actions just like you
would struct members: by qualifying the member name with its owner rule, subrule
or grammar. In our example above, notice how we referred to the closure member val. Example:</p>
<pre class="identifier"><code> expression<span class=special>.</span>val <span class="comment">// refer to expression's closure member val</span></code></pre>
<p><strong>Initializing closure variables </strong></p>
<p>We didn't use this feature in the example, yet, for completeness... </p>
<p>Sometimes, we need to initialize our closure variables upon entering a non-terminal (rule, subrule or grammar). Closure enabled non-terminals, by default, default-construct variables upon entering the parse member function.
If this is not desirable, we can pass constructor arguments to the non-terminal. The syntax mimics a
function call. </p>
<p>For (<em>a contrived</em>) example, if you wish to construct <tt>calc_closure</tt>'s variables
to <tt>3.6</tt>, when we invoke the rule <tt>expression</tt>, we write:</p>
<pre class="identifier"><code> expression<span class="special">(</span><span class="keyword">3.6</span><span class="special">) </span><span class="comment">// invoke rule expression and set its closure variable to 3.6</span></code></pre>
<p>The constructor arguments are actually Phoenix lambda expressions, so you can
use arbitrarily complex expressions. Here's another <em>contrived example<strong>: </strong></em></p>
<pre class="identifier"><code> <span class="comment">// call rule factor and set its closure variable to (expression.x / 8) * factor.y
</span> <code>factor</code><span class="special">((</span>expression<span class="special">.</span>x<span class="keyword"> </span><span class="special">/</span><span class="keyword"> 8</span><span class="special">) *</span><span class="keyword"> </span>term<span class="special">.</span>y<span class="special">)</span></code></pre>
<p><img src="theme/lens.gif" width="15" height="16"> We can pass less arguments than the actual number of variables in the closure.
The variables at the right with no corresponding constructor arguments are default
constructed. Passing more arguments than there are closure variables is an error.</p>
<p><img src="theme/lens.gif" width="15" height="16"> See <a href="../example/intermediate/parameters.cpp">parameters.cpp</a> for a compilable example. This is part of the Spirit distribution.</p>
<h2>Closures and Dynamic parsing</h2>
<p>Let's write a very simple parser for an XML/HTML like language with arbitrarily nested tags. The typical approach to this type of nested tag parsing is to delegate the actual tag matching to semantic actions, perhaps using a symbol table. For example, the semantic actions are responsible for ensuring that the tags are nested (e.g. this code: <tt>&lt;p&gt;&lt;table&gt;&lt;/p&gt;&lt;/table&gt;</tt> is erroneous).</p>
<p>Spirit allows us to dynamically modify the parser at runtime. The ability to guide parser behavior through semantic actions makes it possible to ensure the nesting of tags directly in the parser. We shall see how this is possible. here's the grammar in its simplest form:</p>
<pre><span class=identifier> element </span><span class=special>= </span><span class=identifier>start_tag </span><span class=special>&gt;&gt; </span><span class=special>*</span><span class=identifier>element </span><span class=special>&gt;&gt; </span><span class=identifier>end_tag</span><span class=special>;</span>
</pre>
<p>An element is a <tt>start_tag</tt> (e.g. <tt>&lt;font&gt;</tt>) folowed by zero or more elements, and ended by an <tt>end_tag</tt> (e.g. <tt>&lt;/font&gt;</tt>). Now, here's a first shot at our <tt>start_tag</tt>:</p>
<pre><span class=special> </span><span class=identifier>start_tag </span><span class=special>= </span><span class=literal>'&lt;' </span><span class=special>&gt;&gt; </span><span class=identifier>lexeme_d</span><span class=special>[</span><span class=special>(+</span><span class=identifier>alpha_p</span><span class=special>)</span><span class=special>] </span><span class=special>&gt;&gt; </span><span class=literal>'&gt;'</span><span class=special>;</span></pre>
<p>Notice that the <tt>end_tag</tt> is just the same as <tt>start_tag</tt> with the addition of a slash:</p>
<pre><span class=special> </span><span class=identifier>end_tag </span><span class=special>= </span><span class=literal>&quot;&lt;/&quot; </span><span class=special>&gt;&gt; </span>what_we_got_in_the_start_tag <span class=special></span><span class=special>&gt;&gt; </span><span class=literal>'&gt;'</span><span class=special>;</span>
</pre>
<p>What we need to do is to temporarily store what we got in our <tt>start_tag</tt> and use that later to parse our <tt>end_tag</tt>. Nifty, we can use the <a href="parametric_parsers.html">parametric parser</a> primitives to parse our <tt>end_tag</tt>: </p>
<pre><span class=special> </span><span class=identifier>end_tag </span><span class=special>= </span><span class=string>&quot;&lt;/&quot; </span><span class=special>&gt;&gt; </span><span class=identifier>f_str_p</span><span class=special>(</span>tag<span class=special>) </span><span class=special>&gt;&gt; </span><span class=literal>'&gt;'</span><span class=special>;</span></pre>
<p>where we parameterize <tt>f_str_p</tt> with what we stored (tag). </p>
<p>Be reminded though that our grammar is recursive. The element rule calls itself. Hence, we can't just use a variable and use <tt>phoenix::var</tt> or <tt>boost::ref</tt>. Nested recursion will simply gobble up the variable. Each invocation of element must have a closure variable <tt>tag</tt>. Here now is the complete grammar:</p>
<pre><span class=number> </span><span class=keyword>struct </span><span class=identifier>tags_closure </span><span class=special>: </span><span class=identifier>boost</span><span class=special>::</span><span class=identifier>spirit</span><span class=special>::</span><span class=identifier>closure</span><span class=special>&lt;</span><span class=identifier>tags_closure</span><span class=special>, </span><span class=identifier>string</span><span class=special>&gt; </span><span class=special>
{
</span><span class=identifier>member1 </span><span class=identifier>tag</span><span class=special>;
</span><span class=special>};
</span><span class=keyword>struct </span><span class=identifier>tags </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>grammar</span><span class=special>&lt;</span><span class=identifier>tags</span><span class=special>&gt;
</span><span class=special>{
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>definition </span><span class=special>{
</span><span class=identifier>definition</span><span class=special>(</span><span class=identifier>tags </span><span class=keyword>const</span><span class=special>&amp; </span><span class=comment>/*self*/</span><span class=special>)
</span><span class=special>{
</span><span class=identifier>element </span><span class=special>= </span><span class=identifier>start_tag </span><span class=special>&gt;&gt; </span><span class=special>*</span><span class=identifier>element </span><span class=special>&gt;&gt; </span><span class=identifier>end_tag</span><span class=special>;
</span><span class=identifier>start_tag </span><span class=special>=
</span><span class=literal>'&lt;'
</span><span class=special>&gt;&gt; </span><span class=identifier>lexeme_d
</span><span class=special>[
</span><span class=special>(+</span><span class=identifier>alpha_p</span><span class=special>)
</span><span class=special>[
</span><span class=comment>// construct string from arg1 and arg2 lazily
</span><span class=comment>// and assign to element.tag
</span><span class=identifier>element</span><span class=special>.</span><span class=identifier>tag </span><span class=special>= </span><span class=identifier>construct_</span><span class=special>&lt;</span><span class=identifier>string</span><span class=special>&gt;(</span><span class=identifier>arg1</span><span class=special>, </span><span class=identifier>arg2</span><span class=special>)
</span><span class=special>]
</span><span class=special>]
</span><span class=special>&gt;&gt; </span><span class=literal>'&gt;'</span><span class=special>;
</span><span class=identifier>end_tag </span><span class=special>= </span><span class=string>&quot;&lt;/&quot; </span><span class=special>&gt;&gt; </span><span class=identifier>f_str_p</span><span class=special>(</span><span class=identifier>element</span><span class=special>.</span><span class=identifier>tag</span><span class=special>) </span><span class=special>&gt;&gt; </span><span class=literal>'&gt;'</span><span class=special>;
</span><span class=special>}
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>, </span><span class=identifier>tags_closure</span><span class=special>::</span><span class=identifier>context_t</span><span class=special>&gt; </span><span class=identifier>element</span><span class=special>;
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt; </span><span class=identifier>start_tag</span><span class=special>, </span><span class=identifier>end_tag</span><span class=special>;
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>, </span><span class=identifier>tags_closure</span><span class=special>::</span><span class=identifier>context_t</span><span class=special>&gt; </span><span class=keyword>const</span><span class=special>&amp;
</span><span class=identifier>start</span><span class=special>() </span><span class=keyword>const </span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>element</span><span class=special>; </span><span class=special>}
</span><span class=special>};
</span><span class=special>};</span></pre>
<p>We attached a semantic action to the <tt>(+alpha_p)</tt> part of the start_tag. There, we stored the parsed tag in the <tt>element</tt>'s closure variable <tt>tag</tt>. Later, in the <tt>end_tag</tt>, we simply used the <tt>element</tt>'s closure variable <tt>tag</tt> to parameterize our <tt>f_str_p</tt> parser. Simple and elegant. If some of the details begin to look like greek (e.g. what is <tt>construct_</tt>?), please consult the <a href="phoenix.html">Phoenix</a> chapter. </p>
<p><img height="16" width="15" src="theme/lens.gif"> The full source code can be <a href="../example/fundamental/matching_tags.cpp">viewed here</a>. This is part of the Spirit distribution.</p>
<h2><img src="theme/lens.gif" width="15" height="16"> Closures in-depth</h2>
<p><strong>What are Closures?</strong></p>
<p>The closure is an object that <span class="quotes">&quot;closes&quot;</span>
over the local variables of a function making them visible and accessible outside
the function. What is more interesting is that the closure actually packages
a local context (stack frame where some variables reside) and makes it available
outside the scope in which they actually exist. The information is essentially
<span class="quotes">&quot;captured&quot;</span> by the closure allowing it
to be referred to anywhere and anytime, even prior to the actual creation of
the variables. </p>
<p>The following diagram depicts the situation where a function <tt>A</tt> (or
rule) exposes its closure and another function <tt>B</tt> references <tt>A</tt>'s
variables through its closure.</p>
<table width="40%" border="0" align="center">
<tr>
<td><img src="theme/closure1.png"></td>
</tr>
<tr>
<td> <div align="center"><b><font face="Geneva, Arial, Helvetica, san-serif" size="+1" color="#003399">The
closure as an object that <i>&quot;closes&quot;</i> over the local variables
of a function making them visible and accessible outside the function</font></b></div></td>
</tr>
</table>
<p>Of course, function <tt>A</tt> should be active when <tt>A.x</tt> is referenced.
What this means is that function <tt>B</tt> is reliant on function <tt>A</tt>
(If <tt>B</tt> is a nested function of <tt>A</tt>, this will always be the case).
The free form nature of Spirit rules allows access to a closure variable anytime,
anywhere. Accessing <tt>A.x</tt> is equivalent to referring to the topmost stack
variable <tt>x</tt> of function <tt>A</tt>. If function <tt>A</tt> is not active
when <tt>A.x</tt> is referenced, a runtime exception will be thrown.</p>
<p><strong>Nested Functions</strong></p>
<p>To fully understand the importance of closures, it is best to look at a language
such as Pascal which allows nested functions. Since we are dealing with C++,
lets us assume for the moment that C++ allows nested functions. Consider the
following <b><i>pseudo</i></b> C++ code:</p>
<pre><span class=identifier> </span><span class=keyword>void </span><span class=identifier>a</span><span class=special>()
</span><span class=special>{
</span><span class=keyword>int </span><span class=identifier>va</span><span class=special>;
</span><span class=keyword>void </span><span class=identifier>b</span><span class=special>()
</span><span class=special>{
</span><span class=keyword>int </span><span class=identifier>vb</span><span class=special>;
</span> <span class=keyword>void </span><span class=identifier>c</span><span class=special>()
</span><span class=special>{
</span><span class=keyword>int </span><span class=identifier>vc</span><span class=special>;
</span><span class=special>}
</span><span class=identifier>c</span><span class=special>()</span><span class=special>;
</span><span class=special>}
</span><span class=identifier>b</span><span class=special>();
</span><span class=special>}</span></pre>
<p>We have three functions <tt>a</tt>, <tt>b</tt> and <tt>c</tt> where <tt>c</tt>
is nested in <tt>b</tt> and <tt>b</tt> is nested in <tt>a</tt>. We also have
three variables <tt>va</tt>, <tt>vb</tt> and <tt>vc</tt>. The lifetime of each
of these local variables starts when the function where it is declared is entered
and ends when the function exits. The scope of a local variable spans all nested
functions inside the enclosing function where the variable is declared.</p>
<p>Going downstream from function <tt>a</tt> to function <tt>c</tt>, when function
a is entered, the variable <tt>va</tt> will be created in the stack. When function
<tt>b</tt> is entered (called by <tt>a</tt>), <tt>va</tt> is very well in scope
and is visble in <tt>b</tt>. At which point a fresh variable, <tt>vb</tt>, is
created on the stack. When function <tt>c</tt> is entered, both <tt>va</tt>
and <tt>vb</tt> are visibly in scope, and a fresh local variable <tt>vc</tt>
is created. </p>
<p>Going upstream, <tt>vc</tt> is not and cannot be visible outside the function
<tt>c</tt>. <tt>vc</tt>'s life has already expired once <tt>c</tt> exits. The
same is true with <tt>vb</tt>; vb is accessible in function <tt>c</tt> but not
in function <tt>a</tt>. </p>
<strong>Nested Mutually Recursive Rules</strong>
<p>Now consider that <tt>a</tt>, <tt>b</tt> and <tt>c</tt> are rules:</p>
<pre><span class=identifier> </span><span class=identifier>a </span><span class=special>= </span><span class=identifier>b </span><span class=special>&gt;&gt; </span><span class=special>*((</span><span class=literal>'+' </span><span class=special>&gt;&gt; </span><span class=identifier>b</span><span class=special>) </span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>&gt;&gt; </span><span class=identifier>b</span><span class=special>));
</span><span class=identifier>b </span><span class=special>= </span><span class=identifier>c </span><span class=special>&gt;&gt; </span><span class=special>*((</span><span class=literal>'*' </span><span class=special>&gt;&gt; </span><span class=identifier>c</span><span class=special>) </span><span class=special>| </span><span class=special>(</span><span class=literal>'/' </span><span class=special>&gt;&gt; </span><span class=identifier>c</span><span class=special>));
</span><span class=identifier>c </span><span class=special>= </span><span class=identifier>int_p </span><span class=special>| </span><span class=literal>'(' </span><span class=special>&gt;&gt; </span><span class=identifier>a </span><span class=special>&gt;&gt; </span><span class=literal>')' </span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>&gt;&gt; </span><span class=identifier>c</span><span class=special>) </span><span class=special>| </span><span class=special>(</span><span class=literal>'+' </span><span class=special>&gt;&gt; </span><span class=identifier>c</span><span class=special>);</span></pre>
<p>We can visualize <tt>a</tt>, <tt>b</tt> and <tt>c</tt> as mutually recursive
functions where <tt>a</tt> calls <tt>b</tt>, <tt>b</tt> calls <tt>c</tt> and
<tt>c</tt> recursively calls <tt>a</tt>. Now, imagine if <tt>a</tt>, <tt>b</tt>
and <tt>c</tt> each has a local variable named <tt>value</tt> that can be referred
to in our grammar by explicit qualification:</p>
<pre><span class=special> </span><span class=identifier>a</span><span class=special>.</span><span class=identifier>value </span><span class=comment>// refer to a's value local variable
</span><span class=identifier>b</span><span class=special>.</span><span class=identifier>value </span><span class=comment>// refer to b's value local variable
</span><span class=identifier>c</span><span class=special>.</span><span class=identifier>value </span><span class=comment>// refer to c's value local variable</span>
</pre>
<p>Like above, when <tt>a</tt> is entered, a local variable <tt>value</tt> is
created on the stack. This variable can be referred to by both <tt>b</tt> and
<tt>c</tt>. Again, when <tt>b</tt> is called by <tt>a</tt>, <tt>b</tt> creates
a local variable <tt>value</tt>. This variable is accessible by <tt>c</tt> but
not by <tt>a</tt>. </p>
<p>Here now is where the analogy with nested functions end: when <tt>c</tt> is
called, a fresh variable <tt>value</tt> is created which, as usual, lasts the
whole lifetime of <tt>c</tt>. Pay close attention however that <tt>c</tt> may
call <tt>a</tt> recursively. When this happens, <tt>a</tt> may now refer to
the local variable of <tt>c</tt><code><span class=special>.</span></code></p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="phoenix.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="dynamic_parsers.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
</body>
</html>

View File

@@ -0,0 +1,185 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<title>Confix Parsers</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>&nbsp;</b></font></td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Confix Parsers</b></font></td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="character_sets.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="list_parsers.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p><a name="confix_parser"></a><b>Confix Parsers</b></p>
<p>Confix Parsers recognize a sequence out of three independent elements: an
opening, an expression and a closing. A simple example is a C comment:
</p>
<pre><code class="comment"> /* This is a C comment */</code></pre>
<p>which could be parsed through the following rule definition:<code><font color="#000000">
</font></code> </p>
<pre><span class=identifier> </span><span class=identifier>rule</span><span class=special>&lt;&gt; </span><span class=identifier>c_comment_rule
</span><span class=special>= </span><span class=identifier>confix_p</span><span class=special>(</span><span class=literal>"/*"</span><span class=special>, </span><span class=special>*</span><span class=identifier>anychar_p</span><span class=special>, </span><span class=literal>"*/"</span><span class=special>)
</span><span class=special>;</span></pre>
<p>The <tt>confix_p</tt> parser generator
should be used for generating the required Confix Parser. The
three parameters to <tt>confix_p</tt> can be single
characters (as above), strings or, if more complex parsing logic is required,
auxiliary parsers, each of which is automatically converted to the corresponding
parser type needed for successful parsing.</p>
<p>The generated parser is equivalent to the following rule: </p>
<pre><code> <span class=identifier>open </span><span class=special>&gt;&gt; (</span><span class=identifier>expr </span><span class=special>- </span><span class=identifier>close</span><span class=special>) &gt;&gt; </span><span class=identifier>close</span></code></pre>
<p>If the expr parser is an <tt>action_parser_category</tt> type parser (a parser
with an attached semantic action) we have to do something special. This happens,
if the user wrote something like:</p>
<pre><code><span class=identifier> confix_p</span><span class=special>(</span><span class=identifier>open</span><span class=special>, </span><span class=identifier>expr</span><span class=special>[</span><span class=identifier>func</span><span class=special>], </span><span class=identifier>close</span><span class=special>)</span></code></pre>
<p>where <code>expr</code> is the parser matching the expr of the confix sequence
and <code>func</code> is a functor to be called after matching the <code>expr</code>.
If we would do nothing, the resulting code would parse the sequence as follows:</p>
<pre><code> <span class=identifier>open </span><span class=special>&gt;&gt; (</span><span class=identifier>expr</span><span class=special>[</span><span class=identifier>func</span><span class=special>] - </span><span class=identifier>close</span><span class=special>) &gt;&gt; </span><span class=identifier>close</span></code></pre>
<p>which in most cases is not what the user expects. (If this <u>is</u> what you've
expected, then please use the <tt>confix_p</tt> generator
function <tt>direct()</tt>, which will inhibit the parser refactoring). To make
the confix parser behave as expected:</p>
<pre><code><span class=identifier> open </span><span class=special>&gt;&gt; (</span><span class=identifier>expr </span><span class=special>- </span><span class=identifier>close</span><span class=special>)[</span><span class=identifier>func</span><span class=special>] &gt;&gt; </span><span class=identifier>close</span></code></pre>
<p>the actor attached to the <code>expr</code> parser has to be re-attached to
the <code>(expr - close)</code> parser construct, which will make the resulting
confix parser 'do the right thing'. This refactoring is done by the help of
the <a href="refactoring.html">Refactoring Parsers</a>. Additionally special
care must be taken, if the expr parser is a <tt>unary_parser_category</tt> type
parser as </p>
<pre><code><span class=identifier> confix_p</span><span class=special>(</span><span class=identifier>open</span><span class=special>, *</span><span class=identifier>anychar_p</span><span class=special>, </span><span class=identifier>close</span><span class=special>)</span></code></pre>
<p>which without any refactoring would result in </p>
<pre><code> <span class=identifier>open</span> <span class=special>&gt;&gt; (*</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=identifier>close</span><span class=special>) &gt;&gt; </span><span class=identifier>close</span></code></pre>
<p>and will not give the expected result (*anychar_p will eat up all the input up
to the end of the input stream). So we have to refactor this into:
<pre><code><span class=identifier> open </span><span class=special>&gt;&gt; *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=identifier>close</span><span class=special>) &gt;&gt; </span><span class=identifier>close</span></code></pre>
<p>what will give the correct result. </p>
<p>The case, where the expr parser is a combination of the two mentioned problems
(i.e. the expr parser is a unary parser with an attached action), is handled
accordingly too, so: </p>
<pre><code><span class=identifier> confix_p</span><span class=special>(</span><span class=identifier>open</span><span class=special>, (*</span><span class=identifier>anychar_p</span><span class=special>)[</span><span class=identifier>func</span><span class=special>], </span>close<span class=special>)</span></code></pre>
<p>will be parsed as expected: </p>
<pre><code> <span class=identifier>open</span> <span class=special>&gt;&gt; (*(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=identifier>end</span><span class=special>))[</span><span class=identifier>func</span><span class=special>] &gt;&gt; </span>close</code></pre>
<p>The required refactoring is implemented here with the help of the <a href="refactoring.html">Refactoring
Parsers</a> too.</p>
<table width="90%" border="0" align="center">
<tr>
<td colspan="2" class="table_title"><b>Summary of Confix Parser refactorings</b></td>
</tr>
<tr class="table_title">
<td width="40%"><b>You write it as:</b></td>
<td width="60%"><code><font face="Verdana, Arial, Helvetica, sans-serif">It
is refactored to:</font></code></td>
</tr>
<tr>
<td width="40%" class="table_cells"><code>confix_p<span class="special">(</span><span class=identifier>open</span><span class="special">,</span>
expr<span class="special">,</span> close<span class="special">)</span></code></td>
<td width="60%" class="table_cells"> <p><code>open <span class=special>&gt;&gt;
(</span>expr <span class=special>-</span> close<span class=special>)</span><font color="#0000FF">
</font><span class=special>&gt;&gt;</span> close</code></p>
</td>
</tr>
<tr>
<td width="40%" class="table_cells"><code>confix_p<span class="special">(</span><span class=identifier>open</span><span class="special">,</span>
expr<span class="special">[</span>func<span class="special">],</span> close<span class="special">)</span></code></td>
<td width="60%" class="table_cells"> <p><code>open <span class=special>&gt;&gt;
(</span>expr <span class=special>-</span> close<span class="special">)[</span>func<span class="special">]
<font color="#0000FF" class="special">&gt;&gt;</font></span> close</code></p>
</td>
</tr>
<tr>
<td width="40%" class="table_cells" height="9"><code>confix_p<span class="special">(</span><span class=identifier>open</span><span class="special">,
*</span>expr<span class="special">,</span> close<span class="special">)</span></code></td>
<td width="60%" class="table_cells" height="9"> <p><code>open <font color="#0000FF"><span class="special">&gt;&gt;</span></font>
<span class="special"><font color="#0000FF" class="special">*</font>(</span>expr
<font color="#0000FF" class="special">-</font> close<span class="special">)
<font color="#0000FF" class="special">&gt;&gt;</font></span> close</code></p>
</td>
</tr>
<tr>
<td width="40%" class="table_cells"><code>confix_p<span class="special">(</span><span class=identifier>open</span><span class="special">,
(*</span>expr<span class="special">)[</span>func<span class="special">],
close</span><span class="special">)</span></code></td>
<td width="60%" class="table_cells"> <p><code>open <font color="#0000FF"><span class="special">&gt;&gt;</span></font><span class="special">
(<font color="#0000FF" class="special">*</font>(</span>expr <font color="#0000FF" class="special">-</font>
close<span class="special">))[</span>func<span class="special">] <font color="#0000FF" class="special">&gt;&gt;</font></span>
close</code></p>
</td>
</tr>
</table>
<p><a name="comment_parsers"></a><b>Comment Parsers</b></p>
<p>The Comment Parser generator template <tt>comment_p</tt>
is helper for generating a correct <a href="#confix_parser">Confix Parser</a>
from auxiliary parameters, which is able to parse comment constructs as follows:
</p>
<pre><code> StartCommentToken <span class="special">&gt;&gt;</span> Comment text <span class="special">&gt;&gt;</span> EndCommentToken</code></pre>
<p>There are the following types supported as parameters: parsers, single
characters and strings (see as_parser). If it
is used with one parameter, a comment starting with the given first parser
parameter up to the end of the line is matched. So for instance the following
parser matches C++ style comments:</p>
<pre><code><span class=identifier> comment_p</span><span class=special>(</span><span class=string>"//"</span><span class=special>)</span></code></pre>
<p>If it is used with two parameters, a comment starting with the first parser
parameter up to the second parser parameter is matched. For instance a C style
comment parser could be constrcuted as:</p>
<pre><code> <span class=identifier>comment_p</span><span class=special>(</span><span class=string>"/*"</span><span class=special>, </span><span class=string>"*/"</span><span class=special>)</span></code></pre>
<p>The <tt>comment_p</tt> parser generator allows to generate parsers for matching
non-nested comments (as for C/C++ comments). Sometimes it is necessary to parse
nested comments as for instance allowed in Pascal.</p>
<pre><code class="comment"> { This is a { nested } PASCAL-comment }</code></pre>
<p>Such nested comments are
parseable through parsers generated by the <tt>comment_nest_p</tt> generator
template functor. The following example shows a parser, which can be used for
parsing the two different (nestable) Pascal comment styles:</p>
<pre><code> <span class=identifier>rule</span><span class=special>&lt;&gt; </span><span class=identifier>pascal_comment
</span><span class=special>= </span><span class=identifier>comment_nest_p</span><span class=special>(</span><span class=string>"(*"</span><span class=special>, </span><span class=string>"*)"</span><span class=special>)
| </span><span class=identifier>comment_nest_p</span><span class=special>(</span><span class=literal>'{'</span><span class=special>, </span><span class=literal>'}'</span><span class=special>)
;</span></code></pre>
<p>Please note, that a comment is parsed implicitly as if the whole <tt>comment_p(...)</tt>
statement were embedded into a <tt>lexeme_d[]</tt> directive, i.e. during parsing
of a comment no token skipping will occur, even if you've defined a skip parser
for your whole parsing process.</p>
<p> <img height="16" width="15" src="theme/lens.gif"> <a href="../example/fundamental/comments.cpp">comments.cpp</a> demonstrates various comment parsing schemes: </p>
<ol>
<li>Parsing of different comment styles </li>
<ul>
<li>parsing C/C++-style comment</li>
<li>parsing C++-style comment</li>
<li>parsing PASCAL-style comment</li>
</ul>
<li>Parsing tagged data with the help of the confix_parser</li>
<li>Parsing tagged data with the help of the confix_parser but the semantic<br>
action is directly attached to the body sequence parser</li>
</ol>
<p>This is part of the Spirit distribution.</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="character_sets.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="list_parsers.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 2001-2002 Hartmut Kaiser<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
</body>
</html>

View File

@@ -0,0 +1,270 @@
<html>
<head>
<title>Debugging</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Debugging</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="position_iterator.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="error_handling.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>The top-down nature of Spirit makes the generated parser easy to micro- debug
using the standard debugger bundled with the C++ compiler we are using. With
recursive-descent, the parse traversal utilizes the hardware stack through C++
function call mechanisms. There are no difficult to debug tables or state machines
that obscure the parsing logic flow. The stack trace we see in the debugger
follows faithfully the hierarchical grammar structure.</p>
<p> Since any production rule can initiate a parse traversal , it is a lot easier
to pinpoint the bugs by focusing on one or a few rules. For relatively complex
parsing tasks, the same way we write robust C++ programs, it is advisable to
develop a grammar iteratively on a per-module basis where each module is a small
subset of the complete grammar. That way, we can stress-test individual modules
piecemeal until we reach the top-most module. For instance, when developing
a scripting language, we can start with expressions, then move on to statements,
then functions, upwards until we have a complete grammar. </p>
<p> At some point when the grammar gets quite complicated, it is desirable to
visualize the parse traversal and see what's happening. There are some facilities
in the framework that aid in the visualisation of the parse traversal for the
purpose of debugging. The following macros enable these features.</p>
<a name="debugging_macros"></a>
<h2>Debugging Macros</h2>
<a name="spirit_assert_exception"></a>
<h3>BOOST_SPIRIT_ASSERT_EXCEPTION</h3>
<p> Spirit contains assertions that may activate when spirit is used incorrectly.
By default these assertions use the assert macro from the standard library.
If you want spirit to throw an exception instead, define <tt>BOOST_SPIRIT_ASSERT_EXCEPTION</tt>
to the name of the class that you want to be thrown. This class's constructor
will be passed a <tt>const char*</tt> stringified version of the file, line,
and assertion condition, when it is thrown. If you want to totally disable the
assertion, <tt>#define NDEBUG</tt>.</p>
<a name="spirit_debug"></a>
<h3>BOOST_SPIRIT_DEBUG</h3>
<p>Define this to enable debugging.</p>
<p>With debugging enabled, special output is generated at key points of the
parse process, using the standard output operator (<tt><span class="keyword">operator</span><span class="special">&lt;&lt;</span></tt>)
with <tt>BOOST_SPIRIT_DEBUG_OUT</tt> (default is <tt><span class="identifier">std</span><span class="special">::</span><span class="identifier">cout</span></tt>,
see below) as its left operand.</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/note.gif"> In order to use spirit's
debugging support you must ensure that appropriate overloads of
<tt><span class="identifier">operator</span><span class="special">&lt;&lt;</span></tt>
taking <tt>BOOST_SPIRIT_DEBUG_OUT</tt> as its left operand are available.
The expected semantics are those of the standard output operator.<br>
<br>
These overloads may be provided either within the namespace where the
corresponding class is declared (will be found through Argument Dependent Lookup) or [within an
anonymous namespace] within <tt><span class="keyword">namespace</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">spirit</span></tt>,
so it is visible where it is called.<br>
<br>
<img src="theme/alert.gif"> Note in particular that when <tt>BOOST_SPIRIT_DEBUG_FLAGS_CLOSURES</tt>
is set, overloads of <tt><span class="identifier">operator</span><span class="special">&lt;&lt;</span></tt>
taking instances of the types used in closures as their right operands are required.<br>
<br>
You may find an example of overloading the output operator for
<tt><span class="identifier">std</span><span class="special">::</span><span class="identifier">pair</span></tt>
in a <a href="faq.html#output_operator">related FAQ entry</a>.</td>
</tr>
</table>
<p>By default, if the <tt>BOOST_SPIRIT_DEBUG</tt> macro is defined, all available
debug output is generated. To fine tune the amount of generated text you can
define the <tt>BOOST_SPIRIT_DEBUG_FLAGS</tt> constant to be equal of a combination
of the following flags:</p>
<table width="90%" border="0" align="center">
<tr>
<td colspan="2" class="table_title"><b>Available flags to fine tune debug
output </b></td>
</tr>
<tr>
<td width="29%" height="27" class="table_cells"><tt>BOOST_SPIRIT_DEBUG_FLAGS_NODES</tt></td>
<td width="71%" class="table_cells"><p>print information about nodes (general
for all parsers)</p></td>
</tr>
<tr>
<td height="27" class="table_cells"><tt>BOOST_SPIRIT_DEBUG_FLAGS_TREES</tt></td>
<td class="table_cells"><p>print information about parse trees and AST's (general
for all tree parsers)</p></td>
</tr>
<tr>
<td height="27" class="table_cells"><tt>BOOST_SPIRIT_DEBUG_FLAGS_CLOSURES</tt></td>
<td class="table_cells">print information about closures (general for all
parsers with closures)</td>
</tr>
<tr>
<td height="27" class="table_cells"><tt>BOOST_SPIRIT_DEBUG_FLAGS_ESCAPE_CHAR</tt></td>
<td class="table_cells"><p>print information out of the <tt>esc_char_parser</tt></p></td>
</tr>
<tr>
<td height="27" class="table_cells"><tt>BOOST_SPIRIT_DEBUG_FLAGS_SLEX</tt></td>
<td class="table_cells">print information out of the <tt>SLEX</tt> parser</td>
</tr>
</table>
<p><a name="spirit_debug_out"></a> </p>
<h3>BOOST_SPIRIT_DEBUG_OUT</h3>
<p> Define this to redirect the debugging diagnostics printout to somewhere else
(e.g. a file or stream). Defaults to <tt>std::cout</tt>.</p>
<a name="spirit_debug_token printer"></a>
<h3>BOOST_SPIRIT_DEBUG_TOKEN_PRINTER</h3>
<p> The <tt>BOOST_SPIRIT_DEBUG_TOKEN_PRINTER</tt> macro allows you to redefine the way characters are printed on the stream. </p>
<p>If <tt>BOOST_SPIRIT_DEBUG_OUT</tt> is of type <tt>StreamT</tt>, the character type is <tt>CharT</tt> and <tt>BOOST_SPIRIT_DEBUG_TOKEN_PRINTER</tt> is
defined to <tt>foo</tt>, it must be compatible with this usage:</p>
<pre><code><span class=identifier> foo</span><span class=special>(</span><span class=identifier>StreamT</span><span class=special>, </span><span class=identifier>CharT</span><span class=special>)</span></code></pre>
<p>The default printer requires <tt>operator&lt;&lt;(StreamT, CharT)</tt> to
be defined. Additionally, if <tt>CharT</tt> is convertible to a normal character
type (<tt>char</tt>, <tt>wchar_t</tt> or <tt>int</tt>), it prints control
characters in a friendly manner (e.g., when it receives <span class=special>'\n'</span> it
actually prints the <span class=special>\</span> and <span class=special>n</span> characters,
instead of a newline).</p>
<a name="spirit_debug_print_some"></a>
<h3>BOOST_SPIRIT_DEBUG_PRINT_SOME</h3>
<p> The <tt>BOOST_SPIRIT_DEBUG_PRINT_SOME</tt> constant defines the number of
characters from the stream to be printed for diagnosis. This defaults to the
first 20 characters.</p>
<p><a name="spirit_debug_tracenode"></a> </p>
<h3>BOOST_SPIRIT_DEBUG_TRACENODE</h3>
<p> By default all parser nodes are traced. This constant may be used to redefine
this default. If this is <tt>1</tt> (<tt>true</tt>), then tracing is enabled
by default, if this constant is <tt>0</tt> (<tt>false</tt>), the tracing is
disabled by default. This preprocessor constant is set to <tt>1 </tt>(<tt>true</tt>)
by default.</p>
<p>Please note, that the following <tt>BOOST_SPIRIT_DEBUG_...() </tt>macros are
to be used at function scope only.</p>
<a name="spirit_debug_node_p_"></a>
<h3>BOOST_SPIRIT_DEBUG_NODE(p)</h3>
<p> Define this to print some debugging diagnostics for parser p. This macro</p>
<ul>
<li>Registers the parser name for debugging</li>
<li>Enables/disables the tracing for parser depending on <tt>BOOST_SPIRIT_DEBUG_TRACENODE</tt></li>
</ul>
<p> <b>Pre-parse</b>: Before entering the rule, the rule name followed by a peek
into the data at the current iterator position is printed.</p>
<p> <b>Post-parse</b>: After parsing the rule, the rule name followed by a peek
into the data at the current iterator position is printed. Here, <tt>'/'</tt>
before the rule name flags a successful match while <tt>'#'</tt> before the rule
name flags an unsuccessful match.</p>
<p> The following are synonyms for <tt>BOOST_SPIRIT_DEBUG_NODE</tt></p>
<ol>
<li>BOOST_SPIRIT_DEBUG_RULE</li>
<li>BOOST_SPIRIT_DEBUG_GRAMMAR</li>
</ol>
<a name="spirit_trace_node_p__flag_"></a>
<h3>BOOST_SPIRIT_DEBUG_TRACE_NODE(p, flag)</h3>
<p> Similar to <tt>BOOST_SPIRIT_DEBUG_NODE</tt>. Additionally allows selective debugging.
This is useful in situations where we want to debug just a hand picked set of
nodes.</p>
<p> The following are synonyms for <tt>BOOST_SPIRIT_DEBUG_TRACE_NODE</tt></p>
<ol>
<li>BOOST_SPIRIT_DEBUG_TRACE_RULE</li>
<li>BOOST_SPIRIT_DEBUG_TRACE_GRAMMAR</li>
</ol>
<p><a name="spirit_trace_node_p__flag__name_"></a> </p>
<h3>BOOST_SPIRIT_DEBUG_TRACE_NODE_NAME(p, name, flag)</h3>
<p> Similar to <tt>BOOST_SPIRIT_DEBUG_NODE</tt>. Additionally allows selective
debugging and allows to specify the name used during debug printout. This is
useful in situations where we want to debug just a hand picked set of nodes.
The <tt>name</tt> may be redefined in situations, where the parser parameter does not reflect the name of the parser to debug.</p>
<p> The following are synonyms for <tt>BOOST_SPIRIT_DEBUG_TRACE_NODE</tt></p>
<ol>
<li>BOOST_SPIRIT_DEBUG_TRACE_RULE_NAME</li>
<li>BOOST_SPIRIT_DEBUG_TRACE_GRAMMAR_NAME</li>
</ol>
<hr>
<p>Here's the original calculator with debugging features enabled:</p>
<pre>
<code><span class=preprocessor>#define </span><span class=identifier>BOOST_SPIRIT_DEBUG </span><span class=comment>///$$$ DEFINE THIS BEFORE ANYTHING ELSE $$$///
</span><span class=preprocessor>#include </span><span class=string>&quot;boost/spirit/include/classic.hpp&quot;
</span><span class=comment>/***/
/*** CALCULATOR GRAMMAR DEFINITIONS HERE ***/
</span><span class=identifier>BOOST_SPIRIT_DEBUG_RULE</span><span class=special>(</span><span class=identifier>integer</span><span class=special>);
</span><span class=identifier>BOOST_SPIRIT_DEBUG_RULE</span><span class=special>(</span><span class=identifier>group</span><span class=special>);
</span><span class=identifier>BOOST_SPIRIT_DEBUG_RULE</span><span class=special>(</span><span class=identifier>factor</span><span class=special>);
</span><span class=identifier>BOOST_SPIRIT_DEBUG_RULE</span><span class=special>(</span><span class=identifier>term</span><span class=special>);
</span><span class=identifier>BOOST_SPIRIT_DEBUG_RULE</span><span class=special>(</span><span class=identifier>expr</span><span class=special>);
</span></code></pre>
<p> <img src="theme/note.gif" width="16" height="16"> Be sure to add the macros <strong>inside</strong> the grammar definition's constructor. Now here's a sample session with the calculator.</p>
<pre><code> <span class="preprocessor">Type an expression...or [q or Q] to quit</span>
<span class="preprocessor">1 + 2</span>
grammar(calc): "1 + 2"
rule(expression): "1 + 2"
rule(term): "1 + 2"
rule(factor): "1 + 2"
rule(integer): "1 + 2"
<span class="preprocessor">push 1</span>
/rule(integer): " + 2"
/rule(factor): " + 2"
/rule(term): " + 2"
rule(term): "2"
rule(factor): "2"
rule(integer): "2"
<span class="preprocessor">push 2</span>
/rule(integer): ""
/rule(factor): ""
/rule(term): ""
<span class="preprocessor">popped 1 and 2 from the stack. pushing 3 onto the stack.</span>
/rule(expression): ""
/grammar(calc): ""
<span class="preprocessor">-------------------------
Parsing succeeded
result = 3
-------------------------</span></code></pre>
<p> We typed in &quot;1 + 2&quot;. Notice that there are two successful branches
from the top rule <tt>expr</tt>. The text in red is generated by the parser's
semantic actions while the others are generated by the debug-diagnostics of
our rules. Notice how the first <tt>integer</tt> rule took &quot;1&quot;, the
first <tt>term</tt> rule took &quot;+&quot; and finally the second <tt>integer</tt>
rule took &quot;2&quot;.</p>
<p>Please note the special meaning of the first characters appearing on the printed
lines:</p>
<ul>
<li>a single <span class="literal">'/'</span> starts a line containing the information
about a successfully matched parser node (<tt>rule&lt;&gt;</tt>, <tt>grammar&lt;&gt;</tt>
or <tt>subrule&lt;&gt;</tt>)</li>
<li>a single <span class="literal">'#'</span> starts a line containing the information
about a failed parser node</li>
<li>a single <span class="literal">'^'</span> starts a line containing the first member (return value/synthesised
attribute) of the closure of a successfully matched parser node.</li>
</ul>
<p>Check out <a href="../example/fundamental/calc_debug.cpp">calc_debug.cpp</a> to see debugging in action. </p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="position_iterator.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="error_handling.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
Copyright &copy; 2003 Hartmut Kaiser<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p class="copyright">&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,202 @@
<html>
<head>
<title>Directives</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%">
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Directives</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="epsilon.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="scanner.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>Parser directives have the form: <b>directive[expression]</b></p>
<p>A directive modifies the behavior of its enclosed expression, essentially <em>decorating</em>
it. The framework pre-defines a few directives. Clients of the framework are
free to define their own directives as needed. Information on how this is done
will be provided later. For now, we shall deal only with predefined directives.</p>
<h2>lexeme_d</h2>
<p>Turns off white space skipping. At the phrase level, the parser ignores white
spaces, possibly including comments. Use <tt>lexeme_d</tt> in situations where
we want to work at the character level instead of the phrase level. Parsers
can be made to work at the character level by enclosing the pertinent parts
inside the lexeme_d directive. For example, let us complete the example presented
in the <a href="introduction.html">Introduction</a>. There, we skipped the definition
of the <tt>integer</tt> rule. Here's how it is actually defined:</p>
<pre><code><font color="#000000"><span class=identifier> </span><span class=identifier>integer </span><span class=special>= </span><span class=identifier>lexeme_d</span><span class=special>[ </span><span class=special>!(</span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'+'</span><span class=special>) </span><span class=special>| </span><span class=literal>'-'</span><span class=special>) </span><span class=special>&gt;&gt; </span><span class=special>+</span><span class=identifier>digit </span><span class=special>];</span></font></code></pre>
<p>The <tt>lexeme_d</tt> directive instructs the parser to work on the character
level. Without it, the <tt>integer</tt> rule would have allowed erroneous embedded
white spaces in inputs such as <span class="quotes">&quot;1 2 345&quot;</span>
which will be parsed as <span class="quotes">&quot;12345&quot;</span>.</p>
<h2>as_lower_d</h2>
<p>There are times when we want to inhibit case sensitivity. The <tt>as_lower_d</tt>
directive converts all characters from the input to lower-case.</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/alert.gif" width="16" height="16"><b>
as_lower_d behavior</b> <br>
<br>
It is important to note that only the input is converted to lower case.
Parsers enclosed inside the <tt>as_lower_d</tt> expecting upper case characters
will fail to parse. Example: <tt>as_lower_d[<span class="quotes">'X'</span>]</tt>
will never succeed because it expects an upper case <tt class="quotes">'X'</tt>
that the <tt>as_lower_d</tt> directive will never supply.</td>
</tr>
</table>
<p>For example, in Pascal, keywords and identifiers are case insensitive. Pascal
ignores the case of letters in identifiers and keywords. Identifiers Id, ID
and id are indistinguishable in Pascal. Without the as_lower_d directive, it
would be awkward to define a rule that recognizes this. Here's a possibility:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>r </span><span class=special>= </span><span class=identifier>str_p</span><span class=special>(</span><span class=string>"id"</span><span class=special>) </span><span class=special>| </span><span class=string>"Id" </span><span class=special>| </span><span class=string>"iD" </span><span class=special>| </span><span class=string>"ID"</span><span class=special>;</span></font></code></pre>
<p>Now, try doing that with the case insensitive Pascal keyword <span class="quotes">&quot;BEGIN&quot;</span>.
The <tt>as_lower_d</tt> directive makes this simple:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>r </span><span class=special>= </span><span class=identifier>as_lower_d</span><span class=special>[</span><span class=string>"begin"</span><span class=special>];</span></font></code></pre>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><div align="justify"><img src="theme/note.gif" width="16" height="16">
<b>Primitive arguments</b> <br>
<br>
The astute reader will notice that we did not explicitly wrap <span class="quotes">&quot;begin&quot;</span>
inside an <tt>str_p</tt>. Whenever appropriate, directives should be able
to allow primitive types such as <tt>char</tt>, <tt>int</tt>, <tt>wchar_t</tt>,
<tt>char const<span class="operators">*</span></tt>, <tt>wchar_t const<span class="operators">*</span></tt>
and so on. Examples: <tt><br>
<br>
</tt><code><span class=identifier>as_lower_d</span><tt><span class=special>[</span><span class=string>"hello"</span><span class=special>]
</span><span class=comment>// same as as_lower_d[str_p("hello")]</span></tt><code></code><span class=identifier><br>
as_lower_d</span><span class=special>[</span><span class=literal>'x'</span><span class=special>]
</span><span class=comment>// same as as_lower_d[ch_p('x')]</span></code></div></td>
</tr>
</table>
<h3>no_actions_d</h3>
<p>There are cases where you want <a href="semantic_actions.html">semantic actions</a>
not to be triggered. By enclosing a parser in the <tt>no_actions_d</tt> directive,
all semantic actions directly or indirectly attached to the parser will not
fire. </p>
<pre><code><font color="#000000"><span class=special> </span>no_actions_d<span class=special>[</span><span class=identifier>expression</span><span class=special>]</span></font></code><code><font color="#000000"><span class=special></span></font></code></pre>
<h3>Tweaking the Scanner Type</h3>
<p><img src="theme/note.gif" width="16" height="16"> How does <tt>lexeme_d, as_lower_d</tt>
and <font color="#000000"><tt>no_actions_d</tt></font> work? These directives
do their magic by tweaking the scanner policies. Well, you don't need to know
what that means for now. Scanner policies are discussed <a href="indepth_the_scanner.html">later</a>.
However, it is important to note that when the scanner policy is tweaked, the
result is a different scanner. Why is this important to note? The <a href="rule.html">rule</a>
is tied to a particular scanner (one or more scanners, to be precise). If you
wrap a rule inside a <tt>lexeme_d, as_lower_d</tt> or <font color="#000000"><tt>no_actions_d,</tt>the
compiler will complain about <a href="faq.html#scanner_business">scanner mismatch</a>
unless you associate the required scanner with the rule. </font></p>
<p><tt>lexeme_scanner</tt>, <tt>as_lower_scanner</tt> and <tt>no_actions_scanner</tt>
are your friends if the need to wrap a rule inside these directives arise. Learn
bout these beasts in the next chapter on <a href="scanner.html#lexeme_scanner">The
Scanner and Parsing</a>.</p>
<h2>longest_d</h2>
<p>Alternatives in the Spirit parser compiler are short-circuited (see <a href="operators.html">Operators</a>).
Sometimes, this is not what is desired. The <tt>longest_d</tt> directive instructs
the parser not to short-circuit alternatives enclosed inside this directive,
but instead makes the parser try all possible alternatives and choose the one
matching the longest portion of the input stream.</p>
<p>Consider the parsing of integers and real numbers:</p>
<pre><code><font color="#000000"><span class=comment> </span><span class=identifier>number </span><span class=special>= </span><span class=identifier>real </span><span class=special>| </span><span class=identifier>integer</span><span class=special>;</span></font></code></pre>
<p>A number can be a real or an integer. This grammar is ambiguous. An input <span class="quotes">&quot;1234&quot;</span>
should potentially match both real and integer. Recall though that alternatives
are short-circuited . Thus, for inputs such as above, the real alternative always
wins. However, if we swap the alternatives:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>number </span><span class=special>= </span><span class=identifier>integer </span><span class=special>| </span><span class=identifier>real</span><span class=special>;</span></font></code></pre>
<p>we still have a problem. Now, an input <span class="quotes">&quot;123.456&quot;</span>
will be partially matched by integer until the decimal point. This is not what
we want. The solution here is either to fix the ambiguity by factoring out the
common prefixes of real and integer or, if that is not possible nor desired,
use the <tt>longest_d</tt> directive:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>number </span><span class=special>= </span><span class=identifier>longest_d</span><span class=special>[ </span><span class=identifier>integer </span><span class=special>| </span><span class=identifier>real </span><span class=special>];</span></font></code></pre>
<h2>shortest_d</h2>
<p>Opposite of the <tt>longest_d</tt> directive.</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/note.gif" width="16" height="16"> <b>Multiple
alternatives</b> <br>
<br>
The <tt>longest_d</tt> and <tt>shortest_d</tt> directives can accept two
or more alternatives. Examples:<br>
<br>
<font color="#000000"><span class=identifier><code>longest</code></span><code><span class=special>[
</span><span class=identifier>a </span><span class=special>| </span><span class=identifier>b
</span><span class=special>| </span><span class=identifier>c </span><span class=special>];
</span><span class=identifier><br>
shortest</span><span class=special>[ </span><span class=identifier>a </span><span class=special>|
</span><span class=identifier>b </span><span class=special>| </span><span class=identifier>c
</span><span class=special>| </span><span class=identifier>d </span><span class=special>];</span></code></font></td>
</tr>
</table>
<h2>limit_d</h2>
<p>Ensures that the result of a parser is constrained to a given min..max range
(inclusive). If not, then the parser fails and returns a no-match.</p>
<p><b>Usage:</b></p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>limit_d</span><span class=special>(</span><span class=identifier>min</span><span class=special>, </span><span class=identifier>max</span><span class=special>)[</span><span class=identifier>expression</span><span class=special>]</span></font></code></pre>
<p>This directive is particularly useful in conjunction with parsers that parse
specific scalar ranges (for example, <a href="numerics.html">numeric parsers</a>).
Here's a practical example. Although the numeric parsers can be configured to
accept only a limited number of digits (say, 0..2), there is no way to limit
the result to a range (say -1.0..1.0). This design is deliberate. Doing so would
have undermined Spirit's design rule that <i><span class="quotes">&quot;the
client should not pay for features that she does not use&quot;</span></i>. We
would have stored the min, max values in the numeric parser itself, used or
unused. Well, we could get by by using static constants configured by a non-type
template parameter, but that is not acceptable because that way, we can only
accommodate integers. What about real numbers or user defined numbers such as
big-ints?</p>
<p><b>Example</b>, parse time of the form <b>HH:MM:SS</b>:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>uint_parser</span><span class=special>&lt;</span><span class=keyword>int</span><span class=special>, </span><span class=number>10</span><span class=special>, </span><span class=number>2</span><span class=special>, </span><span class=number>2</span><span class=special>&gt; </span><span class=identifier>uint2_p</span><span class=special>;
</span><span class=identifier>r </span><span class=special>= </span><span class=identifier>lexeme_d
</span><span class=special>[
</span><span class=identifier>limit_d</span><span class=special>(</span><span class=number>0u</span><span class=special>, </span><span class=number>23u</span><span class=special>)[</span><span class=identifier>uint2_p</span><span class=special>] </span><span class=special>&gt;&gt; </span><span class=literal>':' </span><span class=comment>// Hours 00..23
</span><span class=special>&gt;&gt; </span><span class=identifier>limit_d</span><span class=special>(</span><span class=number>0u</span><span class=special>, </span><span class=number>59u</span><span class=special>)[</span><span class=identifier>uint2_p</span><span class=special>] </span><span class=special>&gt;&gt; </span><span class=literal>':' </span><span class=comment>// Minutes 00..59
</span><span class=special>&gt;&gt; </span><span class=identifier>limit_d</span><span class=special>(</span><span class=number>0u</span><span class=special>, </span><span class=number>59u</span><span class=special>)[</span><span class=identifier>uint2_p</span><span class=special>] </span><span class=comment>// Seconds 00..59
</span><span class=special>];</span></font></code>
</pre>
<h2>min_limit_d</h2>
<p>Sometimes, it is useful to unconstrain just the maximum limit. This will allow
for an interval that's unbounded in one direction. The directive min_limit_d
ensures that the result of a parser is not less than minimum. If not, then the
parser fails and returns a no-match.</p>
<p><b>Usage:</b></p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>min_limit_d</span><span class=special>(</span><span class=identifier>min</span><span class=special>)[</span><span class=identifier>expression</span><span class=special>]</span></font></code></pre>
<p><b>Example</b>, ensure that a date is not less than 1900</p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>min_limit_d</span><span class=special>(</span><span class=number>1900u</span><span class=special>)[</span><span class=identifier>uint_p</span><span class=special>]</span></font></code></pre>
<h2>max_limit_d</h2>
<p>Opposite of <tt>min_limit_d</tt>. Take note that <tt>limit_d[p]</tt> is equivalent
to:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>min_limit_d</span><span class=special>(</span><span class=identifier>min</span><span class=special>)[</span><span class=identifier>max_limit_d</span><span class=special>(</span><span class=identifier>max</span><span class=special>)[</span><span class=identifier>p</span><span class=special>]]</span></font></code><code><font color="#000000"><span class=special></span></font></code></pre>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="epsilon.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="scanner.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
<p>&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,122 @@
<html>
<head>
<!-- Generated by the Spirit (http://spirit.sf.net) QuickDoc -->
<title>Distinct Parser</title>
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" height="48" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%">
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Distinct Parser </b></font></td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="scoped_lock.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="symbols.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<h3>Distinct Parsers</h3><p>
The distinct parsers are utility parsers which ensure that matched input is
not immediately followed by a forbidden pattern. Their typical usage is to
distinguish keywords from identifiers.</p>
<h3>distinct_parser</h3>
<p>
The basic usage of the <tt>distinct_parser</tt> is to replace the <tt>str_p</tt> parser. For
example the <tt>declaration_rule</tt> in the following example:</p>
<pre>
<code><span class=identifier>rule</span><span class=special>&lt;</span><span class="identifier">ScannerT</span><span class=special>&gt; </span><span class=identifier>declaration_rule </span><span class=special>= </span><span class=identifier>str_p</span><span class=special>(</span><span class=string>&quot;declare&quot;</span><span class=special>) &gt;&gt; </span><span class=identifier>lexeme_d</span><span class=special>[+</span><span class=identifier>alpha_p</span><span class=special>];
</span></code></pre>
<p>
would correctly match an input &quot;declare abc&quot;, but as well an input&quot;declareabc&quot; what is usually not intended. In order to avoid this, we can
use <tt>distinct_parser</tt>:</p>
<code>
<pre>
<span class=comment>// keyword_p may be defined in the global scope
</span><span class=identifier>distinct_parser</span><span class=special>&lt;&gt; </span><span class=identifier>keyword_p</span><span class=special>(</span><span class=string>&quot;a-zA-Z0-9_&quot;</span><span class=special>);
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class="identifier">ScannerT</span><span class=special>&gt; </span><span class=identifier>declaration_rule </span><span class=special>= </span><span class=identifier>keyword_p</span><span class=special>(</span><span class=string>&quot;declare&quot;</span><span class=special>) &gt;&gt; </span><span class=identifier>lexeme_d</span><span class=special>[+</span><span class=identifier>alpha_p</span><span class=special>];
</span></pre>
</code>
<p>
The <tt>keyword_p</tt> works in the same way as the <tt>str_p</tt> parser but matches only
when the matched input is not immediately followed by one of the characters
from the set passed to the constructor of <tt>keyword_p</tt>. In the example the
&quot;declare&quot; can't be immediately followed by any alphabetic character, any
number or an underscore.</p>
<p>
See the full <a href="../example/fundamental/distinct/distinct_parser.cpp">example here </a>.</p>
<h3>distinct_directive</h3><p>
For more sophisticated cases, for example when keywords are stored in a
symbol table, we can use <tt>distinct_directive</tt>.</p>
<pre>
<code><span class=identifier>distinct_directive</span><span class=special>&lt;&gt; </span><span class=identifier>keyword_d</span><span class=special>(</span><span class=string>&quot;a-zA-Z0-9_&quot;</span><span class=special>);
</span><span class=identifier>symbol</span><span class=special>&lt;&gt; </span><span class=identifier>keywords </span><span class=special>= </span><span class=string>&quot;declare&quot;</span><span class=special>, </span><span class=string>&quot;begin&quot;</span><span class=special>, </span><span class=string>&quot;end&quot;</span><span class=special>;
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class="identifier">ScannerT</span><span class=special>&gt; </span><span class=identifier>keyword </span><span class=special>= </span><span class=identifier>keyword_d</span><span class=special>[</span><span class=identifier>keywords</span><span class=special>];
</span></code></pre>
<h3>dynamic_distinct_parser and dynamic_distinct_directive</h3><p>
In some cases a set of forbidden follow-up characters is not sufficient.
For example ASN.1 naming conventions allows identifiers to contain dashes,
but not double dashes (which marks the beginning of a comment).
Furthermore, identifiers can't end with a dash. So, a matched keyword can't
be followed by any alphanumeric character or exactly one dash, but can be
followed by two dashes.</p>
<p>
This is when <tt>dynamic_distinct_parser</tt> and the <tt>dynamic_distinct_directive </tt>come into play. The constructor of the <tt>dynamic_distinct_parser</tt> accepts a
parser which matches any input that <strong>must NOT</strong> follow the keyword.</p>
<pre>
<code><span class=comment>// Alphanumeric characters and a dash followed by a non-dash
// may not follow an ASN.1 identifier.
</span><span class=identifier>dynamic_distinct_parser</span><span class=special>&lt;&gt; </span><span class=identifier>keyword_p</span><span class=special>(</span><span class=identifier>alnum_p </span><span class=special>| (</span><span class=literal>'-' </span><span class=special>&gt;&gt; ~</span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'-'</span><span class=special>)));
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class="identifier">ScannerT</span><span class=special>&gt; </span><span class=identifier>declaration_rule </span><span class=special>= </span><span class=identifier>keyword_p</span><span class=special>(</span><span class=string>&quot;declare&quot;</span><span class=special>) &gt;&gt; </span><span class=identifier>lexeme_d</span><span class=special>[+</span><span class=identifier>alpha_p</span><span class=special>];
</span></code></pre>
<p>
Since the <tt>dynamic_distinct_parser</tt> internally uses a rule, its type is
dependent on the scanner type. So, the <tt>keyword_p</tt> shouldn't be defined
globally, but rather within the grammar.</p>
<p>
See the full <a href="../example/fundamental/distinct/distinct_parser_dynamic.cpp">example here</a>.</p>
<h3>How it works</h3><p>
When the <tt>keyword_p_1</tt> and the <tt>keyword_p_2</tt> are defined as</p>
<code><pre>
<span class=identifier>distinct_parser</span><span class=special>&lt;&gt; </span><span class=identifier>keyword_p</span><span class=special>(</span><span class=identifier>forbidden_chars</span><span class=special>);
</span><span class=identifier>distinct_parser_dynamic</span><span class=special>&lt;&gt; </span><span class=identifier>keyword_p</span><span class=special>(</span><span class=identifier>forbidden_tail_parser</span><span class=special>);
</span></pre></code>
<p>
the parsers</p>
<code><pre>
<span class=identifier>keyword_p_1</span><span class=special>(</span><span class=identifier>str</span><span class=special>)
</span><span class=identifier>keyword_p_2</span><span class=special>(</span><span class=identifier>str</span><span class=special>)
</span></pre></code>
<p>
are equivalent to the rules</p>
<code><pre>
<span class=identifier>lexeme_d</span><span class=special>[</span><span class=identifier>chseq_p</span><span class=special>(</span><span class=identifier>str</span><span class=special>) &gt;&gt; ~</span><span class=identifier>epsilon_p</span><span class=special>(</span><span class=identifier>chset_p</span><span class=special>(</span><span class=identifier>forbidden_chars</span><span class=special>))]
</span><span class=identifier>lexeme_d</span><span class=special>[</span><span class=identifier>chseq_p</span><span class=special>(</span><span class=identifier>str</span><span class=special>) &gt;&gt; ~</span><span class=identifier>epsilon_p</span><span class=special>(</span><span class=identifier>forbidden_tail_parser</span><span class=special>)]
</span></pre></code>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="scoped_lock.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="symbols.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 2003-2004
Vaclav Vesely<br><br>
<font size="2">Use, modification and distribution is subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) </font> </p>
</body>
</html>

View File

@@ -0,0 +1,99 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html><head>
<title>Dynamic Parsers</title><meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css"></head>
<body>
<table background="theme/bkd2.gif" border="0" cellspacing="2" width="100%">
<tbody><tr>
<td width="10">
</td>
<td width="85%"> <font face="Verdana, Arial, Helvetica, sans-serif" size="6"><b>Dynamic
Parsers </b></font></td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" align="right" border="0" height="48" width="112"></a></td>
</tr>
</tbody></table>
<br>
<table border="0">
<tbody><tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="closures.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="stored_rule.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</tbody></table>
<p>We see dynamic parsing everywhere in Spirit. A special group of
parsers, aptly named dynamic parsers, form the most basic building
blocks to dynamic parsing. This chapter focuses on these critters.
You'll notice the similarity of these parsers with C++'s control
structures. The similarity is not a coincidence. These parsers give an
imperative flavor to parsing, and, since imperative constructs are not
native to declarative EBNF, mimicking the host language, C++, should
make their use immediately familiar. </p>
<p>Dynamic parsers modify the parsing behavior according to conditions. Constructing
dynamic parsers requires a condition argument and a body parser argument. Additional
arguments are required by some parsers.</p>
<h2>Conditions</h2>
<p>Functions or functors returning values convertable to bool can be used as conditions.
When the evaluation of the function/functor yields true it will be considered
as meeting the condition.</p>
<p>Parsers can be used as conditions, as well. When the parser matches the condition
is met. Parsers used as conditions work in an all-or-nothing manner: the scanner
will not be advanced when they don't match.</p>
<p>A failure to meet the condition will not result in a parse error.</p>
<h2>if_p</h2>
<p><tt>if_p</tt> can be used with or without an else-part. The syntax is:</p>
<pre> <span class="identifier">if_p</span><span class="special">(</span><span class="identifier">condition</span><span class="special">)[</span><span class="identifier">then</span><span class="special">-</span><span class="identifier">parser</span><span class="special">]</span></pre>
<p><span class="special"></span>or</p>
<pre><span class="identifier"> if_p</span><span class="special">(</span><span class="identifier">condition</span><span class="special">)[</span><span class="identifier">then</span><span class="special">-</span><span class="identifier">parser</span><span class="special">].</span><span class="identifier">else_p</span><span class="special">[</span><span class="identifier">else</span><span class="special">-</span><span class="identifier">parser</span><span class="special">]</span></pre>
<p>When the condition is met the then-parser is used next in the parsing process.
When the condition is not met and an else-parser is available the else-parser
is used next. When the condition isn't met and no else-parser is available then
the whole parser matches the empty sequence. (<img src="theme/alert.gif" height="16" width="16">
Note: older versions of <tt>if_p</tt> report a failure when the condition isn't
met and no else-parser is available.)</p>
<p>Example:</p>
<pre> <span class="special"></span><span class="identifier">if_p</span><span class="special">(</span><span class="string">"0x"</span><span class="special">)[</span><span class="identifier">hex_p</span><span class="special">].</span><span class="identifier">else_p</span><span class="special">[</span><span class="identifier">uint_p</span><span class="special">]</span></pre>
<h2>while_p, do_p</h2>
<p><tt>while_p</tt>/<tt>do_p</tt> syntax is:</p>
<pre> <span class="identifier">while_p</span><span class="special">(</span><span class="identifier">condition</span><span class="special">)[</span><span class="identifier">body</span><span class="special">-</span><span class="identifier">parser</span><span class="special">]<br> </span><span class="identifier">do_p</span><span class="special">[</span><span class="identifier">body</span><span class="special">-</span><span class="identifier">parser</span><span class="special">].</span><span class="identifier">while_p</span><span class="special">(</span><span class="identifier">condition</span><span class="special">)</span></pre>
<p>As long as the condition is met the dynamic parser constructed by <tt>while_p</tt>
will try to match the body-parser. <tt>do_p</tt> returns a parser that tries
to match the body-parser and then behaves just like the parser returned by <tt>while_p</tt>.
A failure to match the body-parser will cause a failure to be reported by the
while/do-parser.</p>
<p>Example:</p>
<pre><span class="special"> </span><span class="identifier">uint_p</span><span class="special">[</span><span class="identifier">assign_a</span><span class="special">(</span><span class="identifier">sum</span><span class="special">)] &gt;&gt; </span><span class="identifier">while_p</span><span class="special">(</span><span class="literal">'+'</span><span class="special">)[</span><span class="identifier">uint_p</span><span class="special">[</span><span class="identifier">add</span><span class="special">(</span><span class="identifier">sum</span><span class="special">)]]<br> </span><span class="literal">'"' </span><span class="special">&gt;&gt; </span><span class="identifier">while_p</span><span class="special">(~</span><span class="identifier">eps_p</span><span class="special">(</span><span class="literal">'"'</span><span class="special">))[</span><span class="identifier">c_escape_ch_p</span><span class="special">[</span><span class="identifier">push_back_a</span><span class="special">(</span><span class="identifier">result</span><span class="special">)]] &gt;&gt; </span><span class="literal">'"'</span>
</pre>
<p>Assuming <span style="font-family: monospace;">add</span> is a user defined function object.<br></p><h2>for_p</h2>
<p><tt>for_p</tt> requires four arguments. The syntax is:</p>
<pre> <span class="literal"></span><span class="identifier">for_p</span><span class="special">(</span><span class="identifier">init</span><span class="special">, </span><span class="identifier">condition</span><span class="special">, </span><span class="identifier">step</span><span class="special">)[</span><span class="identifier">body</span><span class="special">-</span><span class="identifier">parser</span><span class="special">]</span></pre>
<p>init and step have to be 0-ary functions/functors. for_p returns a parser that
will:</p>
<ol>
<li> call init</li>
<li>check the condition, if the
condition isn't met then a match is returned. The match will cover
everything that has been matched successfully up to this point.</li>
<li> tries to match the body-parser. A failure to match the body-parser will cause a failure to be reported by the for-parser</li>
<li> calls step</li>
<li> goes to 2.</li>
</ol>
<table border="0">
<tbody><tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="closures.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="stored_rule.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</tbody></table>
<br>
<hr size="1">
<p class="copyright">Copyright <20> 2002-2003 Joel de Guzman<br>
Copyright <20> 2002-2003 Martin Wille<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p class="copyright">&nbsp;</p>
</body></html>

View File

@@ -0,0 +1,121 @@
<html>
<head>
<title>Epsilon</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Epsilon</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="rule.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="directives.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>The <strong>Epsilon</strong> (<tt>epsilon_p</tt> and <tt>eps_p</tt>) is a multi-purpose
parser that returns a zero length match. </p>
<h3>Simple Form</h3>
<p>In its simplest form, epsilon_p matches the null string and always returns
a match of zero length:</p>
<pre><code><span class=special> </span><span class="identifier">epsilon_p </span><span class="comment">// always returns a zero-length match</span></code></pre>
<p>This form is usually used to trigger a <a href="semantic_actions.html">semantic
action</a> unconditionally. For example, it is useful in triggering error messages
when a set of alternatives fail:</p>
<pre><code><span class=special> </span><span class="identifier">r</span><span class="special"> = </span><span class="identifier">A</span><span class="special"> | </span><span class="identifier">B</span><span class="special"> | </span><span class="identifier">C</span><span class="special"> | </span><span class="identifier">eps_p</span><span class="special">[</span><span class="identifier">error</span><span class="special">];</span><span class="identifier"></span><span class="comment"> // error if A, B, or C fails to match</span></code></pre>
<h3>Semantic Predicate</h3>
<p>Semantic predicates allow you to attach a function anywhere in the grammar.
In this role, the epsilon takes a 0-ary (nullary) function/functor. The run-time
function/functor is typically a test that is called upon to resolve ambiguity
in the grammar. A parse failure will be reported when the function/functor result
evaluates to false. Otherwise an empty match will be reported. The general form
is:</p>
<pre> eps_p<span class="special">(</span>f<span class="special">) &gt;&gt;</span> rest<span class="special">;</span>
</pre>
<p>The nullary function <tt>f</tt> is called to do a semantic test (say, checking
if a symbol is in the <a href="symbols.html">symbol table</a>). If test returns
<tt>true</tt>, <tt>rest</tt> will be evaluated. Otherwise, the production will
return early with a no-match without ever touching <tt>rest</tt>.</p>
<h3>Syntactic Predicate</h3>
<p>Similar to Semantic predicates, Syntactic predicates assert a certain conditional
syntax to be satisfied before evaluating another production. This time, epsilon_p
accepts a (conditional) parser. The general form is:</p>
<pre> eps_p<span class="special">(</span>p<span class="special">) &gt;&gt;</span> rest<span class="special">;</span>
</pre>
<p>If <tt>p</tt> is matched on the input stream then attempt to recognize <tt>rest</tt>.
The parser <tt>p </tt>is called to do a syntax check. Regardless of <tt>p</tt>'s
success, <tt>eps_p(p)</tt> will always return a zero length match (i.e. the
input is not consumed). If test returns <tt>true</tt>, <tt>rest</tt> will be
evaluated. Otherwise, the production will return early with a no-match without
ever touching <tt>rest</tt>.</p>
<p>Example:</p>
<pre><code><span class=special> </span><span class="identifier">eps_p</span><span class="special">(</span><span class="literal">'0'</span><span class="special">) &gt;&gt; </span><span class="identifier">oct_p </span><span class="comment">// note that '0' is actually a ch_p('0')</span><span class="identifier"> </span></code></pre>
<p>Epsilon here is used as a syntactic predicate. <tt>oct_p</tt> (see <a href="numerics.html">numerics</a>)
is parsed only if we see a leading <tt>'0'</tt>. Wrapping the leading <tt>'0'</tt>
inside an epsilon makes the parser not consume anything from the input. If a
<tt>'0'</tt> is seen, <tt>epsilon_p</tt> reports a successful match with zero
length. </p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><div align="justify"><img src="theme/note.gif" width="16" height="16">
<b>Primitive arguments</b> <br>
<br>
Epsilon allows primitive type arguments such as <tt>char</tt>, <tt>int</tt>,
<tt>wchar_t</tt>, <tt>char const<span class="operators">*</span></tt>,
<tt>wchar_t const<span class="operators">*</span></tt> and so on. Examples:
<tt><br>
<br>
</tt><code><span class="identifier">eps_p</span><tt><span class=special>(</span><span class=string>"hello"</span><span class=special>)</span><span class=comment>
// same as eps_p(str_p("hello"))</span></tt><span class=identifier><br>
eps_p</span><span class=special>(</span><span class=literal>'x'</span><span class="special">)
</span><span class=comment>// same as eps_p(ch_p('x'))</span></code></div></td>
</tr>
</table>
<h3><img src="theme/alert.gif" width="16" height="16"> Inhibiting Semantic Actions</h3>
<p>In a syntactic predicate <tt>eps_p(p)</tt>, any semantic action directly or
indirectly attached to the conditional parser <tt>p</tt> will not be called.
However, semantic actions attached to epsilon itself will always be called.
The following code snippets illustrates the behavior:</p>
<pre> eps_p<span class="special">(</span>c<span class="special">[</span>f<span class="special">])</span> <span class="comment">// f not called</span><br> eps_p<span class="special">(</span>c<span class="special">)[</span>f<span class="special">]</span> <span class="comment">// f is called</span><br> eps_p<span class="special">[</span>f<span class="special">]</span> <span class="comment">// f is called</span></pre>
<p>Actually, the conditional parser <tt>p</tt> is implicitly wrapped in a <tt><a href="scanner.html#no_actions_scanner">no_actions_d</a></tt>
directive:</p>
<pre><code><span class=special> </span>no_actions_d<span class="special">[</span>p<span class="special">]</span></code></pre>
<p>The conditional parser is required to be free from side-effects (semantic actions).
<code></code>The conditional parser's purpose is to resolve ambiguity by looking
ahead in the input stream for a certain pattern. Ambiguity and semantic actions
do not mix well. On an ambiguous grammar, backtracking happens. And when it
happens, we cannot undo the effects of triggered semantic actions. </p>
<h3>Negation</h3>
<p>Operator <tt>~</tt> is defined for parsers constructed by <tt>epsilon_p</tt>/<tt>eps_p</tt>.
It performs negation by complementing the results reported. <tt>~~eps_p(x)</tt>
is identical to <tt>eps_p(x)</tt>.</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="rule.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="directives.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
Copyright &copy; 2003 Martin Wille<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
<p>&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,212 @@
<html>
<head>
<title>Error Handling</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Error
Handling </b></font></td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="debugging.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="quickref.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>C++'s exception handling mechanism is a perfect match for error handling in
the framework. Imagine a complete parser as a maze. At each branch, the input
dictates where we will turn. Given an erroneous input, we may reach a dead end.
If we ever reach one, it would be a waste of time to backtrack from where we
came from. Instead, we supply guards in strategic points. Beyond a certain point,
we put put parser assertions in places where one is not allowed to go. </p>
<p>The assertions are like springs that catapult us back to the guard. If we ever
reach a brick wall given a specific input pattern, everything unwinds quickly
and we are thrown right back to the guard. This can be a very effective optimization
when used wisely. Right back at the guard, we have a chance to correct the situation,
if possible. The following illustration depicts the scenario.</p>
<table border="0" align="center">
<tr>
<td><img src="theme/error_handling.png" width="313" height="238"></td>
</tr>
</table>
<a name="the_parser_exception"></a>
<h2>Parser Errors</h2>
<p> The <tt>parser_error</tt> class is the generic parser exception class used
by Spirit. This is the base class for all parser exceptions.</p>
<pre> <code><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ErrorDescrT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>IteratorT </span><span class=special>= </span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*&gt;
</span><span class=keyword>class </span><span class=identifier>parser_error </span><span class=special>
{
</span><span class=keyword>public</span><span class=special>:
</span><span class=identifier>parser_error</span><span class=special>(</span><span class=identifier>IteratorT </span><span class=identifier>where</span><span class=special>, </span><span class=identifier>ErrorDescrT </span><span class=identifier>descriptor</span><span class=special>);
</span><span class=identifier>IteratorT </span><span class=identifier>where</span><span class=special>;
</span><span class=identifier>ErrorDescrT</span><span class=identifier> descriptor</span><span class=special>;
</span><span class=special>};
</span></code></pre>
<p> The exception holds the iterator position where the error was encountered
in its <tt>where</tt> member variable. In addition to the iterator, <tt>parser_error</tt>
also holds information regarding the error (error descriptor) in its <tt>descriptor
</tt> member variable.</p>
<p> Semantic actions are free to throw parser exceptions when necessary. A utility
function <tt>throw_</tt> may be called. This function creates and throws a <tt>parser_error</tt>
given an iterator and an error descriptor:</p>
<pre>
<code><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ErrorDescrT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>IteratorT</span><span class=special>&gt;
</span><span class=keyword>void </span><span class=identifier>throw_</span><span class=special>(</span><span class=identifier>IteratorT where</span><span class=special>, </span><span class=identifier>ErrorDescrT descriptor</span><span class=special>);
</span></code></pre>
<a name="the_parser_assertion"></a>
<h2>Parser Assertions</h2>
<p> Assertions may be put in places where we don't have any other option other
than expect parsing to succeed. If parsing fails, a specific type of exception
is thrown.</p>
<p> Before declaring the grammar, we declare some assertion objects. <tt>assertion</tt>
is a template class parameterized by the type of error that will be thrown once
the assertion fails. The following assertions are parameterized by a user defined
Error enumeration.</p>
<a name="examples"></a>
<h3>Examples</h3>
<pre>
<code><span class=keyword>enum </span><span class=identifier>Errors
</span><span class=special>{
</span><span class=identifier>program_expected</span><span class=special>,
</span><span class=identifier>begin_expected</span><span class=special>,
</span><span class=identifier>end_expected
</span><span class=special>};
</span><span class=identifier>assertion</span><span class=special>&lt;</span><span class=identifier>Errors</span><span class=special>&gt; </span><span class=identifier>expect_program</span><span class=special>(</span><span class=identifier>program_expected</span><span class=special>);
</span><span class=identifier>assertion</span><span class=special>&lt;</span><span class=identifier>Errors</span><span class=special>&gt; </span><span class=identifier>expect_begin</span><span class=special>(</span><span class=identifier>begin_expected</span><span class=special>);
</span><span class=identifier>assertion</span><span class=special>&lt;</span><span class=identifier>Errors</span><span class=special>&gt; </span><span class=identifier>expect_end</span><span class=special>(</span><span class=identifier>end_expected</span><span class=special>);
</span></code></pre>
<p> The example above uses enums to hold the information regarding the error,
we are free to use other types such as integers and strings. For example, <tt>assertion&lt;string&gt;</tt>
accepts a string as its info. It is advisable to use light-weight objects though,
after all, error descriptors are usually static. Enums are convenient for error
handlers to detect and easily catch since C++ treats enums as unique types.</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"> <b><img src="theme/lens.gif" width="15" height="16">
The assertive_parser</b><br>
<br>
Actually, the expression <tt>expect_end(str_p(&quot;end&quot;))</tt>creates
an assertive_parser object. An assertive_parser is a parser that throws
an exception in response to a parsing failure. The assertive_parser throws
a parser_error exception rather than returning an unsuccessful match to
signal that the parser failed to match the input. During parsing, parsers
are given an iterator of type <tt>IteratorT</tt>. This is combined with
the error descriptor type <tt>ErrorDescrT</tt> of the assertion (in this
case enum <tt>Errors</tt>). Both are used to create a <tt>parser_error&lt;Errors,
IteratorT&gt;</tt> which is then thrown to signal the exception. </td>
</tr>
</table>
<p> The predeclared <tt>expect_end</tt> assertion object may now be used in the
grammar as wrappers around parsers. For example:</p>
<pre>
<code><span class=identifier>expect_end</span><span class=special>(</span><span class=identifier>str_p</span><span class=special>(</span><span class=string>&quot;end&quot;</span><span class=special>))
</span></code></pre>
<p> This will throw an exception if it fails to see &quot;end&quot; from the input.</p>
<a name="the_guard"></a>
<h2>The Guard</h2>
<p> The <tt>guard</tt> is used to catch a specific type of <tt>parser_error</tt>.
guards are typically predeclared just like assertions. Extending our previous
example:</p>
<pre>
<code><span class=identifier>guard</span><span class=special>&lt;</span><span class=identifier>Errors</span><span class=special>&gt; </span><span class=identifier>my_guard</span><span class=special>;
</span></code></pre>
<p> <tt>Errors</tt>, in this example is the error descriptor type we want to detect.
This is the same enum as above. <tt>my_guard</tt> may now be used in a grammar
declaration:</p>
<pre> <code><span class=identifier>my_guard</span><span class=special>(</span><span class=identifier>p</span><span class=special>)[</span><span class=identifier>error_handler</span><span class=special>]</span></code></pre>
<p> where <tt>p</tt> is an expression that evaluates to a parser. Somewhere inside
<tt>p</tt>, a parser may throw a parser exception. <tt>error_handler</tt> is
the error handler which may be a function or functor compatible with the interface:</p>
<pre> <code>error_status<span class=special>&lt;</span>T<span class=special>&gt;</span><span class=identifier>
f</span><span class=special>(</span>ScannerT const&amp; scan, ErrorT error<span class=special>);
</span></code></pre>
<p> Where scan points to the scanner state prior to parsing and error is the error
that arose. The handler is allowed to move the scanner position as it sees fit,
possibly in an attempt to perform error correction. The handler must then return
an <tt>error_status&lt;T&gt;</tt> object. </p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"> <b><img src="theme/lens.gif" width="15" height="16">
The fallback_parser </b><br>
<br>
The expression <tt>my_guard(expr, error_handler)</tt>creates a fallback_parser
object. The fallback_parser handles parser_error exceptions of a specific
type. Since <tt>my_guard</tt> is declared as <tt>guard&lt;Errors&gt;</tt>,
the fallback_parser catches <tt>Errors</tt> specific parser errors: <tt>parser_error&lt;Errors,
IteratorT&gt;</tt>. The class sets up a try block. When an exception is
caught, the catch block then calls the error_handler. </td>
</tr>
</table>
<h2>error_status&lt;T&gt;</h2>
<pre>
<code><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>T </span><span class=special>= </span><span class=identifier>nil_t</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>error_status
</span><span class=special>{
</span><span class=keyword>enum </span><span class=identifier>result_t </span><span class=special>{ </span><span class=identifier>fail</span><span class=special>, </span><span class=identifier>retry</span><span class=special>, </span><span class=identifier>accept</span><span class=special>, </span><span class=identifier>rethrow </span><span class=special>};
</span><span class=identifier>error_status</span><span class=special>(</span><span class=identifier>
result_t result </span><span class=special>= </span><span class=identifier>fail</span><span class=special>,
</span><span class=keyword>int </span><span class=identifier>length </span><span class=special>= -</span><span class=number>1</span><span class=special>,
</span><span class=identifier>T </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>value </span><span class=special>= </span><span class=identifier>T</span><span class=special>());
</span>
<span class=identifier>result_t result</span><span class=special>;
</span><span class=keyword>int </span><span class=identifier>length</span><span class=special>;
</span><span class=identifier>T value</span><span class=special>;
};</span></code></pre>
<p>Where <tt>T</tt> is an attribute type compatible with the match attribute of
the <tt>fallback_parser</tt>'s subject (defaults to <tt>nil_t</tt>). The class
<tt>error_status</tt> reports the result of an error handler. This result can
be one of: </p>
<table width="90%" border="0" align="center">
<tr>
<td class="table_title" colspan="8"> error_status result </td>
</tr>
<tr>
<tr>
<td class="table_cells"><b>fail</b></td>
<td class="table_cells">quit and fail. Return a <tt>no_match</tt></td>
</tr>
<td class="table_cells"><b>retry</b></td>
<td class="table_cells">attempt error recovery, possibly moving the scanner</td>
</tr>
<td class="table_cells"><b>accept</b></td>
<td class="table_cells">force success returning a matching length, moving the
scanner appropriately and returning an attribute value</td>
</tr>
<td class="table_cells"><b>rethrow</b></td>
<td class="table_cells">rethrows the error</td>
</tr>
</table>
<p><img src="theme/lens.gif" width="15" height="16"> See <a href="../example/fundamental/error_handling.cpp">error_handling.cpp</a> for a compilable example. This is part of the Spirit distribution.</p>
<table width="80%" border="0" align="center">
</table>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="debugging.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="quickref.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p class="copyright">&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,88 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<title>Escape Character Parser</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link href="theme/style.css" rel="stylesheet" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10" height="49"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>&nbsp;</b></font></td>
<td width="85%" height="49"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Escape Character Parser</b></font></td>
<td width="112" height="49"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="switch_parser.html"><img src="theme/l_arr.gif" width="20" height="19" border="0"></a></td>
<td width="30"><a href="loops.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p><a name="escape_char_parser"></a>The Escape Character Parser is a utility
parser, which parses escaped character sequences used in C/C++,
LEX or Perl regular expressions. Combined with the confix_p utility parser, it is useful for parsing C/C++ strings containing double quotes and other escaped
characters:</p>
<pre> confix_p<span class="special">(</span><em class="literal">'"'</em><span class="special">,</span> <span class="special">*</span>c_escape_ch_p<span class="special">,</span> <em><span class="literal">'"</span></em><span class="literal"><em>'</em></span><span class="special">)</span></pre>
<p>There are two different types of the Escape Character Parser:
<tt>c_escape_ch_p</tt>, which parses C/C++ escaped character sequences and
<tt>lex_escape_ch_p</tt>, which parses LEX style escaped character sequences.
The following table shows the valid character sequences understood by these
utility parsers.</p>
<table width="90%" border="0" align="center">
<tr>
<td colspan="2" class="table_title"><b>Summary of valid escaped character
sequences</b></td>
</tr>
<tr>
<td width="29%" height="27" class="table_cells"><b>c_escape_ch_p</b></td>
<td width="71%" class="table_cells"><p><code>\b, \t, \n, \f, \r, \\, \&quot;,
\', \xHH, \OOO</code><br>
where: H is some hexadecimal digit (0..9, a..f, A..F) and O is some octal
digit (0..7)</p></td>
</tr>
<tr>
<td height="27" class="table_cells"><strong>lex_escape_ch_p</strong></td>
<td class="table_cells">
<p>all C/C++ escaped character sequences as described above and additionally
any other character, which follows a backslash</p>
</td>
</tr>
</table>
<p>If there is a semantic action attached directly to the Escape Character Parser,
all valid escaped characters are converted to their character equivalent
(i.e. a backslash followed by a 'r' is converted to '\r'), which is
fed to the attached actor. The number of hexadecimal
or octal digits parsed depends on the size of one input character. An
overflow will be detected and will generate a non-match. lex_escape_ch_p
will strip the leading backslash for all character
sequences which are not listed as valid C/C++ escape sequences when passing
the unescaped character to an attached action.</p>
<p>Please note though, that if there is a semantic action attached to an
outermost parser (for instance as in <tt>(*c_escape_ch_p)[some_actor]</tt>,
where the action is attached to the kleene star generated parser) no conversion
takes place at the moment, but nevertheless the escaped characters are parsed
correctly. This limitation will be removed in a future version of the library.</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="switch_parser.html"><img src="theme/l_arr.gif" width="20" height="19" border="0"></a></td>
<td width="30"><a href="loops.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 2001-2002 Daniel C. Nuffer<br>
Copyright &copy; 2003 Hartmut Kaiser <br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
</body>
</html>

View File

@@ -0,0 +1,506 @@
<html>
<head>
<title>FAQ</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>FAQ</b></font></td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="techniques.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="rationale.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<ul>
<li><a href="#scanner_business">The Scanner Business</a></li>
<li><a href="#left_recursion">Eliminating Left Recursion</a> </li>
<li><a href="#right_associativity">Implementing Right Associativity</a></li>
<li><a href="#lexeme_and_rules">The lexeme_d directive and rules</a></li>
<li><a href="#kleene_star">Kleene Star infinite loop</a></li>
<li><a href="#CVS">Boost CVS and Spirit CVS</a></li>
<li><a href="#compilation_times">How to reduce compilation times with complex
Spirit grammars</a></li>
<li><strong><a href="#frame_assertion">Closure frame assertion</a></strong></li>
<li><strong><a href="#greedy_rd">Greedy RD</a></strong></li>
<li><strong><a href="#referencing_a_rule_at_construction">Referencing a rule
at construction time</a></strong></li>
<li><strong><a href="#storing_rules">Storing Rules</a></strong></li>
<li><strong><a href="#parsing_ints_and_reals">Parsing ints and reals</a> </strong></li>
<li><strong><a href="#output_operator">BOOST_SPIRIT_DEBUG and missing <tt>operator&lt;&lt;</tt></a></strong></li>
<li><strong><a href="#repository">Applications that used to be part of spirit</a></strong></li>
</ul>
<p><b> <a name="scanner_business" id="scanner_business"></a> The Scanner Business</b></p>
<p><font color="#FF0000">Question:</font> Why doesn't this compile?</p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>rule</span><span class=special>&lt;&gt; </span><span class=identifier>r </span><span class=special>= /*...*/;
</span> <span class=identifier>parse</span><span class=special>(</span><span class=string>"hello world"</span><span class=special>, </span><span class=identifier>r</span><span class=special>, </span><span class=identifier>space_p</span><span class=special>); </span><span class=comment>// BAD [attempts phrase level parsing]</span></font></code></pre>
<p>But if I <font color="#000000">remove the skip-parser, everything goes back
to normal again:<code></code></font></p>
<pre><code><font color="#000000"> <span class=identifier>rule</span><span class=special>&lt;&gt; </span><span class=identifier>r </span><span class=special>= *</span><span class=identifier>anychar_p</span><span class=special>;
</span><span class=identifier>parse</span><span class=special>(</span><span class=string>"hello world"</span><span class=special>, </span><span class=identifier>r</span><span class=special>); </span><span class=comment>// OK [character level parsing]</span></font></code></pre>
<p>Sometimes you'll want to pass in a rule to one of the functions parse functions
that Spirit provides. The problem is that the rule is a template class that
is parameterized by the scanner type. This is rather awkward but unavoidable:
<strong>the rule is tied to a scanner</strong>. What's not obvious is that this
scanner must be compatible with the scanner that is ultimately passed to the
rule's parse member function. Otherwise, the compiler will complain. </p>
<p>Why does the first call to parse not compile? Because of scanner incompatibility.
Behind the scenes, the free parse function creates a scanner from the iterators
passed in. In the first call to parse, the scanner created is a plain vanilla
<tt>scanner&lt;&gt;</tt>. This is compatible with the default scanner type of
<tt>rule&lt;&gt;</tt> [see default template parameters of <a href="rule.html">the
rule</a>]. The second call creates a scanner of type <tt><a href="scanner.html#phrase_scanner_t">phrase_scanner_t</a></tt>.
Thus, in order for the second call to succeed, the rule must be parameterized
as <tt>rule&lt;phrase_scanner_t&gt;</tt>:</p>
<pre><code><font color="#000000"><span class=comment> </span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>phrase_scanner_t</span><span class=special>&gt; </span><span class=identifier>r </span><span class=special>= </span><span class=special>*</span><span class=identifier>anychar_p</span><span class=special>;
</span><span class=identifier>parse</span><span class=special>(</span><span class=string>"hello world"</span><span class=special>, </span><span class=identifier>r</span><span class=special>, </span><span class=identifier>space_p</span><span class=special>); </span><span class=comment>// OK [phrase level parsing]</span></font></code></pre>
<p>Take note however that <tt>phrase_scanner_t</tt> is compatible only when you
are using <tt>char const*</tt> iterators and <tt>space_p</tt> as the skip parser.
Other than that, you'll have to find the right type of scanner. This is tedious
to do correctly. In light of this issue, <strong>it is best to avoid rules as
arguments to the parse functions</strong>. Keep in mind that this happens only
with rules. The rule is the only parser that has to be tied to a particular
scanner type. For instance:</p>
<pre><span class=comment> </span><span class=identifier>parse</span><span class=special>(</span><span class=string>"hello world"</span><span class=special>, *</span><span class=identifier>anychar_p</span><span class=special>); </span><span class=comment><code><font color="#000000"><span class=comment>// OK [character level parsing]</span></font></code>
</span><span class=identifier>parse</span><span class=special>(</span><span class=string>"hello world"</span><span class=special>, *</span><span class=identifier>anychar_p</span><span class=special>, </span><span class=identifier>space_p</span><span class=special>); </span><span class="comment">// OK [phrase level parsing]</span></pre>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"> <strong><img src="theme/note.gif" width="16" height="16">
Multiple Scanner Support</strong><br>
<br>
As of v1.8.0, rules can use one or more scanner types. There are cases,
for instance, where we need a rule that can work on the phrase and character
levels. Rule/scanner mismatch has been a source of confusion and is the
no. 1 <a href="faq.html#scanner_business">FAQ</a>. To address this issue,
we now have <a href="rule.html#multiple_scanner_support">multiple scanner
support</a>. <br>
<br>
<img src="theme/bulb.gif" width="13" height="18"> See the techniques section
for an <a href="techniques.html#multiple_scanner_support">example</a> of
a <a href="grammar.html">grammar</a> using a multiple scanner enabled rule,
<a href="scanner.html#lexeme_scanner">lexeme_scanner</a> and <a href="scanner.html#as_lower_scanner">as_lower_scanner.</a></td>
</tr>
</table>
<p><b> <a name="left_recursion"></a> Eliminating Left Recursion </b></p>
<p><font color="#FF0000">Question:</font> I ported a grammar from YACC. It's &quot;kinda&quot;
working - the parser itself compiles with no errors. But when I try to parse,
it gives me an &quot;invalid page fault&quot;. I tracked down the problem to
this grammar snippet:</p>
<pre> <span class=identifier>or_expr </span><span class=special>= </span><span class=identifier>xor_expr </span><span class=special>| (</span><span class=identifier>or_expr </span><span class=special>&gt;&gt; </span><span class=identifier>VBAR </span><span class=special>&gt;&gt; </span><span class=identifier>xor_expr</span><span class=special>);</span></pre>
<p>What you should do is to eliminate direct and indirect left-recursion. This
causes the invalid page fault because the program enters an infinite loop. The
code above is good for bottom up parsers such as YACC but not for LL parsers
such as Spirit.</p>
<p>This is similar to a rule in Hartmut Kaiser's C
parser (this should be available for download from <a href="http://spirit.sf.net">Spirit's site</a> as soon as you read this).</p>
<pre>
<span class=identifier>inclusive_or_expression
</span><span class=special>= </span><span class=identifier>exclusive_or_expression
</span><span class=special>| </span><span class=identifier>inclusive_or_expression </span><span class=special>&gt;&gt; </span><span class=identifier>OR </span><span class=special>&gt;&gt; </span><span class=identifier>exclusive_or_expression
</span><span class=special>;</span></pre>
<p><span class=special></span>Transforming left recursion to right recursion,
we have:</p>
<pre> <span class=identifier>inclusive_or_expression
</span><span class=special>= </span><span class=identifier>exclusive_or_expression </span><span class=special>&gt;&gt; </span><span class=identifier>inclusive_or_expression_helper
</span><span class=special>;
</span><span class=identifier>inclusive_or_expression_helper
</span><span class=special>= </span><span class=identifier>OR </span><span class=special>&gt;&gt; </span><span class=identifier>exclusive_or_expression </span><span class=special>&gt;&gt; </span><span class=identifier>inclusive_or_expression_helper
</span><span class=special>| </span><span class=identifier>epsilon_p
</span><span class=special>;</span></pre>
<p><span class=special></span>I'd go further. Since:</p>
<pre> <span class=identifier>r </span><span class=special>= </span><span class=identifier>a </span><span class=special>| </span><span class=identifier>epsilon_p</span><span class=special>;</span></pre>
<p><span class=special></span>is equivalent to:<span class=special><br>
</span></p>
<pre> <span class=identifier>r </span><span class=special>= !</span><span class=identifier>a</span><span class=special>;</span></pre>
<p>we can simplify <tt>inclusive_or_expression_helper</tt> thus:</p>
<pre> <span class=identifier>inclusive_or_expression_helper
</span><span class=special>= !(</span><span class=identifier>OR </span><span class=special>&gt;&gt; </span><span class=identifier>exclusive_or_expression </span><span class=special>&gt;&gt; </span><span class=identifier>inclusive_or_expression_helper</span><span class=special>)
;</span></pre>
<p><span class=special></span>Now, since:</p>
<pre> <span class=identifier>r </span><span class=special>= !(</span><span class=identifier>a </span><span class=special>&gt;&gt; </span><span class=identifier>r</span><span class=special>);</span></pre>
<p><span class=special></span>is equivalent to:</p>
<pre> <span class=identifier>r </span><span class=special>= *</span><span class=identifier>a</span><span class=special>;</span></pre>
<p><span class=special></span>we have:</p>
<pre> <span class=identifier>inclusive_or_expression_helper
</span><span class=special>= *(</span><span class=identifier>OR </span><span class=special>&gt;&gt; </span><span class=identifier>exclusive_or_expression</span><span class=special>)
;</span></pre>
<p><span class=special></span>Now simplifying <tt>inclusive_or_expression</tt>
fully, we have:</p>
<pre> <span class=identifier>inclusive_or_expression
</span><span class=special>= </span><span class=identifier>exclusive_or_expression </span><span class=special>&gt;&gt; *(</span><span class=identifier>OR </span><span class=special>&gt;&gt; </span><span class=identifier>exclusive_or_expression</span><span class=special>)
;</span></pre>
<p><span class=special></span>Reminds me of the calculators. So in short:</p>
<pre> <span class=identifier>a </span><span class=special>= </span><span class=identifier>b </span><span class=special>| </span><span class=identifier>a </span><span class=special>&gt;&gt; </span><span class=identifier>op </span><span class=special>&gt;&gt; </span><span class=identifier>b</span><span class=special>;</span></pre>
<p><span class=special></span><span class=identifier>in </span><span class=identifier>pseudo-YACC
</span><span class=identifier>is</span><span class=special>:</span></p>
<pre> <span class=identifier>a </span><span class=special>= </span><span class=identifier>b </span><span class=special>&gt;&gt; *(</span><span class=identifier>op </span><span class=special>&gt;&gt; </span><span class=identifier>b</span><span class=special>);</span></pre>
<p><span class=special></span>in Spirit. What could be simpler? Look Ma, no recursion,
just iteration.</p>
<p><b> <a name="right_associativity" id="right_associativity"></a> Implementing Right Associativity </b></p>
<p> <font color="#FF0000">Question:</font> I tried adding <tt>'^'</tt> as an operator to compute the power to a calculator grammar. The following code
</p>
<pre> <span class=identifier>pow_expression
</span><span class=special>= </span><span class=identifier>pow_operand </span><span class=special>&gt;&gt; </span><span class=special>*( </span><span class=literal>'^' </span><span class=special>&gt;&gt; </span><span class=identifier>pow_operand </span><span class=special>[ </span><span class=special>&amp; </span><span class=identifier>do_pow </span><span class=special>]
</span><span class=special>)
</span><span class=special>;</span>
</pre>
<p>parses the input correctly, but I want the operator to be evalutated from right to left. In other words, the expression <tt>2^3^4</tt> is supposed to have the same semantics as <tt>2^(3^4)</tt> instead of <tt>(2^3)^4</tt>. How do I do it?
</p>
<p> The "textbook recipe" for Right Associativity is Right Recursion. In BNF that means:
<pre> &lt;pow_expression&gt; ::= &lt;pow_operand&gt; '^' &lt;pow_expression&gt; | &lt;pow_operand&gt;
</pre>
<p>But we better don't take the theory too literally here, because if the first alternative fails, the semantic actions within <tt>pow_operand</tt> might have been executed already and will then be executed again when trying the second alternative. So let's apply Left Factorization to factor out <tt>pow_operand</tt>:
<pre> &lt;pow_expression&gt; ::= &lt;pow_operand&gt; &lt;pow_expression_helper&gt;
&lt;pow_expression_helper&gt; ::= '^' &lt;pow_expression&gt; | <i>&#949;</i>
</pre>
<p>The production <tt>pow_expression_helper</tt> matches the empty string <i>&#949;</i>, so we can replace the alternative with the optional operator in Spirit code.
</p>
<pre> <span class=identifier>pow_expression
</span><span class=special>= </span><span class=identifier>pow_operand </span><span class=special>&gt;&gt; </span><span class=special>!( </span><span class=literal>'^' </span><span class=special>&gt;&gt; </span><span class=identifier>pow_expression </span><span class=special>[ </span><span class=special>&amp; </span><span class=identifier>do_pow </span><span class=special>]
</span><span class=special>)
</span><span class=special>;</span>
</pre>
<p>Now any semantic actions within <tt>pow_operand</tt> can safely be executed. For stack-based evaluation that means that each match of <tt>pow_operand</tt> will leave one value on the stack and the recursion makes sure there are (at least) two values on the stack when <tt>do_pow</tt> is fired to reduce these two values to their power.
</p>
<p>In cases where this technique isn't applicable, such as C-style assignment
<pre> <span class=identifier>assignment
</span><span class=special>= </span><span class=identifier>lvalue </span><span class=special>&gt;&gt; </span><span class=literal>'=' </span><span class=special>&gt;&gt; </span><span class=identifier>assignment
</span><span class=special>| </span><span class=identifier>ternary_conditional
</span><span class=special>;</span>
</pre>
<p>you can append <tt>| epsilon_p [ <i>action</i> ] &gt;&gt; nothing_p</tt> to a parser to correct the semantic context when backtracking occurs (in the example case that would be dropping the address pushed by <tt>lvalue</tt> off the evaluation stack):
</p>
<pre> <span class=identifier>assignment
</span><span class=special>= </span><span class=identifier>lvalue </span><span class=special>&gt;&gt; </span><span class=special>( </span><span class=literal>'=' </span><span class=special>&gt;&gt; </span><span class=identifier>assignment </span></span><span class=special>[ </span><span class=special>&amp; </span><span class=identifier>do_store </span><span class=special>]
</span><span class=special>| </span><span class=identifier>epsilon_p </span><span class=special>[ </span><span class=special>&amp; </span><span class=identifier>do_drop </span><span class=special>]
</span><span class=special>&gt;&gt; </span><span class=identifier>nothing_p
</span><span class=special>)
</span><span class=special>| </span><span class=identifier>ternary_conditional
</span><span class=special>;</span>
</pre>
<p>However, this trick compromises the clear separation of syntax and semantics, so you also might want to consider using an <a href="trees.html">AST</a> instead of semantic actions so you can just go with the first definition of <tt>assignment</tt>.
</p>
<p><b> <a name="lexeme_and_rules" id="lexeme_and_rules"></a> The lexeme_d directive
and rules</b></p>
<p> <font color="#FF0000">Question:</font> Does lexeme_d not support expressions
which include rules? In the example below, the definition of atomicRule compiles,
</p>
<pre> <span class=identifier></span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>phrase_scanner_t</span><span class=special>&gt; </span><span class=identifier>atomicRule</span>
<span class=special>= </span><span class=identifier>lexeme_d</span><span class=special>[(</span><span class=identifier>alpha_p </span><span class=special>| </span><span class=literal>'_'</span><span class=special>) &gt;&gt; *(</span><span class=identifier>alnum_p </span><span class=special>| </span><span class=literal>'.' </span><span class=special>| </span><span class=literal>'-' </span><span class=special>| </span><span class=literal>'_'</span><span class=special>)];</span></pre>
<p>but if I move <tt>alnum_p | '.' | '-' | '_'</tt> into its own rule, the compiler
complains about conversion from <tt>const scanner&lt;...&gt;</tt> to <tt>const
phrase_scaner_t&amp;</tt>. </p>
<pre> <span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>phrase_scanner_t</span><span class=special>&gt; </span><span class=identifier>ch </span><span class=special>
= </span><span class=identifier>alnum_p </span><span class=special>| </span><span class=literal>'.' </span><span class=special>| </span><span class=literal>'-' </span><span class=special>| </span><span class=literal>'_'</span><span class=special>;</span>
<span class=identifier> rule</span><span class=special>&lt;</span><span class=identifier>phrase_scanner_t</span><span class=special>&gt; </span><span class=identifier>compositeRule</span>
<span class=special>= </span><span class=identifier>lexeme_d</span><span class=special>[(</span><span class=identifier>alpha_p </span><span class=special>| </span><span class=literal>'_'</span><span class=special>) &gt;&gt; *(</span><span class=identifier>ch</span><span class=special>)]; </span><span class="comment">// &lt;- error source</span></pre>
<p>You might get the impression that the <tt>lexeme_d</tt> directive and rules
do not mix. Actually, this problem is related to the first FAQ entry: The Scanner
Business. More precisely, the <tt>lexeme_d</tt> directive and rules with incompatible
scanner types do not mix. This problem is more subtle. What's causing the scanner
incompatibility is the directive itself. The <tt>lexeme_d</tt> directive transforms
the scanner it receives into something that disables the skip parser. This non-skipping
scanner, unfortunately, is incompatible with the original scanner before transformation
took place.</p>
<p>The simplest solution is not to use rules in the <tt>lexeme_d</tt>. Instead,
you can definitely apply <tt>lexeme_d</tt> to subrules and grammars if you really
need more complex parsers inside the <tt>lexeme_d</tt>. If you really must use
a rule, you need to know the exact scanner used by the directive. The <tt>lexeme_scanner</tt>
metafunction is your friend here. The example above will work as expected once
we give the <tt>ch</tt> rule a correct scanner type:</p>
<pre> <span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>lexeme_scanner</span><span class="special">&lt;</span><span class=identifier>phrase_scanner_t</span><span class=special>&gt;::</span><span class="identifier">type</span><span class=special>&gt; </span><span class=identifier>ch </span><span class=special>
= </span><span class=identifier>alnum_p </span><span class=special>| </span><span class=literal>'.' </span><span class=special>| </span><span class=literal>'-' </span><span class=special>| </span><span class=literal>'_'</span><span class=special>;</span></pre>
<p>Note: make sure to add &quot;<tt>typename</tt>&quot; before <tt>lexeme_scanner</tt>
when this is used inside a template class or function.</p>
<p>The same thing happens when rules are used inside the <tt>as_lower_d</tt> directive.
In such cases, you can use the <tt>as_lower_scanner</tt>. See the <span class=identifier><tt><a href="scanner.html#lexeme_scanner">lexeme_scanner</a></tt></span>
and <tt><a href="scanner.html#as_lower_scanner">as_lower_scanner</a></tt>.</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/bulb.gif" width="13" height="18"> See
the techniques section for an <a href="techniques.html#multiple_scanner_support">example</a>
of a <a href="grammar.html">grammar</a> using a <a href="rule.html#multiple_scanner_support">multiple
scanner enabled rule,</a> <a href="scanner.html#lexeme_scanner">lexeme_scanner</a>
and <a href="scanner.html#as_lower_scanner">as_lower_scanner.</a></td>
</tr>
</table>
<p><strong><a name="kleene_star"></a>Kleene Star infinite loop</strong></p>
<p><font color="#FF0000">Question</font>: Why Does This Loop Forever?</p>
<pre> <span class=identifier>rule</span><span class=special>&lt;&gt; </span><span class=identifier>optional </span><span class=special>= !(</span>str_p<span class="special">(</span><span class="string">&quot;optional&quot;</span><span class="special">));
</span><span class=identifier>rule</span><span class=special>&lt;&gt; </span><span class="identifier">list_of_optional </span><span class=special>= *</span><span class=identifier>optional</span><span class="special">;</span></pre>
<p>The problem with this is that the kleene star will continue looping until it
gets a no-match from it's enclosed parser. Because the <tt>optional</tt> rule
is optional, it will always return a match. Even if the input doesn't match
&quot;optional&quot; it will return a zero length match. <tt>list_of_optional</tt>
will keep calling optional forever since optional will never return a no-match.
So in general, any rule that can be &quot;nullable&quot; (meaning it can return
a zero length match) must not be put inside a kleene star.</p>
<p><strong><a name="CVS"></a>Boost CVS and Spirit CVS</strong></p>
<p><font color="#FF0000">Question:</font> There is Boost CVS and Spirit CVS. Which
is used for further development of Spirit?</p>
<p> Generally, development takes place in Spirit's CVS. However, from time to
time a new version of Spirit will be integrated in Boost. When this happens
development takes place in the Boost CVS. There will be announcements on the
Spirit mailing lists whenever the status of the Spirit CVS changes.<br>
</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/alert.gif" width="16" height="16">
During development of Spirit v1.8.1 (released as part of boost-1.32.0) and
v1.6.2, Spirit's developers decided to stop maintaining Spirit CVS for
BRANCH_1_8 and BRANCH_1_6. This was necessary to reduce the added work of
maintaining and synch'ing two repositories. The maintenance of these branches
will take place on Boost CVS. At this time, new developments towards Spirit
v2 and other experimental developments are expected to happen in Spirit
CVS.</td>
</tr>
</table>
<p><strong><a name="compilation_times"></a>How to reduce compilation times with
complex Spirit grammars </strong></p>
<p><font color="#FF0000">Question:</font> Are there any techniques to minimize
compile times using spirit? For simple parsers compile time doesn't seem to
be a big issue, but recently I created a parser with about 78 rules
and it took about 2 hours to compile. I would like to break the grammar up into
smaller chunks, but it is not as easy as I thought it would be because rules
in two grammar capsules are defined in terms of each other. Any thoughts?</p>
<p> The only way to reduce compile times is </p>
<ul>
<li> to split up your grammars into smaller chunks</li>
<li> prevent the compiler from seeing all grammar definitions at the same time
(in the same compilation unit)</li>
</ul>
<p>The first task is merely logistical, the second is rather a technical one. </p>
<p>A good example of solving the first task is given in the Spirit cpp_lexer example
written by JCAB (you may find it on the <a href="http://spirit.sourceforge.net/repository/applications/show_contents.php">applications' repository</a>).
</p>
<p>The cross referencing problems may be solved by some kind of forward declaration,
or, if this does not work, by introducing some dummy template argument to the
non-templated grammars. Thus allows the instantiation time to be deferred until the
compiler has seen all the definitions:</p>
<pre> <span class="keyword">template</span> &lt;<span class="keyword">typename</span> T = <span class="keyword">int</span>&gt;<br> grammar2;</p>
<span class="keyword">template</span> &lt;<span class="keyword">typename</span> T = <span class="keyword">int</span>&gt;<br> <span class="keyword">struct</span> grammar1 : <span class="keyword">public</span> grammar&lt;grammar1&gt;<br> {
<span class="comment">// refers to grammar2&lt;&gt;</span>
};
<span class="keyword">template</span> &lt;typename T&gt;
<span class="keyword">struct</span> grammar2 : <span class="keyword">public</span> grammar&lt;grammar2&gt;
{
<span class="comment">// refers to grammar1&lt;&gt;</span>
};
//...
grammar1&lt;&gt; g; <span class="comment">// both grammars instantiated here</span>
</pre>
<p>The second task is slightly more complex. You must ensure that in the first
compilation unit the compiler sees only some function/template <strong>declaration</strong>
and in the second compilation unit the function/template <strong>definition</strong>.
Still no problem, if no templates are involved. If templates are involved,
you need to manually (explicitly) instantiate these templates with the correct
template parameters inside a separate compilation unit. This way the compilation
time is split between several compilation units, reducing the overall
required time drastically too. </p>
<p>For a sample, showing how to achieve this, you may want to look at the <tt>Wave</tt>
preprocessor library, where this technique is used extensively. (this should be available for download from <a href="http://spirit.sf.net">Spirit's site</a> as soon as you read this).</p>
<p><strong><a name="frame_assertion" id="frame_assertion"></a>Closure frame assertion</strong></p>
<p><font color="#FF0000">Question:</font> When I run the parser I get an assertion
<span class="string">&quot;frame.get() != 0 in file closures.hpp&quot;</span>.
What am I doing wrong?</p>
<p>Basically, the assertion fires when you are accessing a closure variable that
is not constructed yet. Here's an example. We have three rules <tt>a</tt>, <tt>b</tt>
and <tt>c</tt>. Consider that the rule <tt>a</tt> has a closure member <tt>m</tt>.
Now:</p>
<pre> <span class="identifier">a</span> <span class="special">=</span> <span class="identifier">b</span><span class="special">;</span>
<span class="identifier">b</span> <span class="special">=</span> <span class="identifier">int_p</span><span class="special">[</span><span class="identifier">a</span><span class="special">.</span><span class="identifier">m</span> <span class="special">=</span> 123<span class="special">];</span>
<span class="identifier">c</span> <span class="special">=</span> <span class="identifier">b</span><span class="special">;</span></pre>
<p>When the rule <tt>a</tt> is invoked, its frame is set, along with its member
<tt>m</tt>. So, when <tt>b</tt> is called from <tt>a</tt>, the semantic action
<tt>[a.m = 123]</tt>will store <tt>123</tt> into <tt>a</tt>'s closure member
<tt>m</tt>. On the other hand, when <tt>c</tt> is invoked, and <tt>c</tt> attempts
to call <tt>b</tt>, no frame for <tt>a</tt> is set. Thus, when <tt>b</tt> is
called from <tt>c</tt>, the semantic action <tt>[a.m = 123]</tt>will fire the
<span class="string">&quot;frame.get() != 0 in file closures.hpp&quot;</span>
assertion.</p>
<p><strong><a name="greedy_rd" id="greedy_rd"></a>Greedy RD</strong></p>
<p><font color="#FF0000">Question:</font> I'm wondering why the this won't work
when parsed:</p>
<pre>
<span class="identifier"> a</span> <span class="special">= +</span><span class="identifier">anychar_p</span><span class="special">;</span>
<span class="identifier">b</span> = <span class="string">'('</span> <span class="special">&gt;&gt;</span> <span class="identifier">a</span> <span class="special">&gt;&gt;</span> <span class="string">')'</span><span class="special">;</span></pre>
<p>Try this:</p>
<pre>
<span class="identifier"> a</span> <span class="special">= +(</span><span class="identifier">anychar_p - </span><span class="string">')'</span><span class="special">);</span>
<span class="identifier">b</span> <span class="special">=</span> <span class="string">'('</span> <span class="special">&gt;&gt;</span> <span class="identifier">a</span> <span class="special">&gt;&gt;</span> <span class="string">')'</span><span class="special">;</span></pre>
<p>David Held writes: That's because it's like the langoliers--it eats everything
up. You usually want to say what it shouldn't eat up by subtracting the terminating
character from the parser. The moral being: Using <tt>*anychar_p</tt> or <tt>+anychar_p</tt>
all by itself is usually a <em>Bad Thing</em>&#8482;.</p>
<p>In other words: Recursive Descent is inherently greedy (however, see <a href="rationale.html#exhaustive_rd">Exhaustive
backtracking and greedy RD</a>).</p>
<p><span class="special"></span><strong><a name="referencing_a_rule_at_construction" id="referencing_a_rule_at_construction"></a>Referencing
a rule at construction time</strong></p>
<p><font color="#FF0000">Question:</font> The code below terminates with a segmentation
fault, but I'm (obviously) confused about what I'm doing wrong.</p>
<pre> rule<span class="special">&lt;</span>ScannerT<span class="special">,</span> clos<span class="special">::</span>context_t<span class="special">&gt;</span> id <span class="special">=</span> int_p<span class="special">[</span>id<span class="special">.</span>i <span class="special">=</span> arg1<span class="special">];</span></pre>
<p>You have a rule <tt>id</tt> being constructed. Before it is constructed, you
reference <tt>id.i</tt> in the RHS of the constructor. It's a chicken and egg
thing. The closure member <tt>id.i</tt> is not yet constructed at that point.
Using assignment will solve the problem. Try this instead:</p>
<pre> rule<span class="special">&lt;</span>ScannerT<span class="special">,</span> clos<span class="special">::</span>context_t<span class="special">&gt;</span> id<span class="special">;</span>
id <span class="special">=</span> int_p<span class="special">[</span>id<span class="special">.</span>i <span class="special">=</span> arg1<span class="special">];</span></pre>
<p><span class="special"></span><strong><a name="storing_rules" id="storing_rules"></a>Storing
Rules </strong></p>
<p><font color="#FF0000">Question:</font> Why can't I store rules in STL containers
for later use and why can't I pass and return rules to and from functions by
value? </p>
<p>EBNF is primarily declarative. Like in functional programming, It's a static
recipe and there's no notion of do this then that. However, in Spirit, we managed
to coax imperative C++ to take in declarative EBNF. Hah! Fun!... We did that
by masquerading the C++ assignment operator to mimic EBNF's <tt>::=</tt>, among
other things (e.g. <tt>&gt;&gt;</tt>, <tt>|</tt>, <tt>&amp;</tt> etc.). We used
the rule class to let us do that by giving its assignment operator (and copy
constructor) a different meaning and semantics. Doing so made the rule unlike
any other C++ object. You can't copy it. You can't assign it. You can't place
it in a container (vector, stack, etc).Heck, you can't even return it from a
function *by value*.</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/alert.gif" width="16" height="16"> The
rule is a weird object, unlike any other C++ object. It does not have the
proper copy and assignment semantics and cannot be stored and passed around
by value.</td>
</tr>
</table>
<p>However nice declarative EBNF is, the dynamic nature of C++ can be an advantage.
We've seen this in action here and there. There are indeed some interesting
applications of dynamic parsers using Spirit. Yet, we haven't fully utilized
the power of dynamic parsing, unless(!), we have a rule that's not so alien
to C++ (i.e. behaves as a good C++ object). With such a beast, we can write
parsers that's defined at run time, as opposed to at compile time.</p>
<p>Now that I started focusing on rules (hey, check out the hunky new rule features),
it might be a good time to implement the rule-holder. It is basically just a
rule, but with C++ object semantics. Yet it's not as simple. Without true garbage
collection, the implementation will be a bit tricky. We can't simply use reference
counting because a rule-holder (hey, anyone here has a better name?) *is-a*
rule, and rules are typically recursive and thus cyclic. The problem is which
will own which.</p>
<p>Ok... this will do for now. You'll definitely see more of the rule-holder in
the coming days.</p>
<p><strong><a name="parsing_ints_and_reals"></a>Parsing Ints and Reals</strong></p>
<p> <font color="#FF0000">Question:</font> I was trying to parse an int or float value with the <tt>longest_d</tt> directive and put some actors on the alternatives to visualize the results. When I parse &quot;123.456&quot;, the output reports:</p>
<ol>
<li>(int) has been matched: full match = false</li>
<li> (double) has been matched: full match = true</li>
</ol>
<p>That is not what I expected. What am I missing? </p>
<p> Actually, the problem is that both semantic actions of the int and real branch will be triggered because both branches will be tried. This doesn't buy us much. What actually wins in the end is what you expected. But there's no easy way to know which one wins. The problem stems from the ambiguity. </p>
<blockquote>
<p>Case1: Consider this input: &quot;2&quot;. Is it an int or a real? They are both (strictly following the grammar of a real). </p>
<p>Case2 : Now how about &quot;1.0&quot;? Is it an int or a real? They are both, albeit the int part gets a partial match: &quot;1&quot;. That is why you are getting a (partial) match for your <em>int</em> rule (full match = false). </p>
</blockquote>
<p> Instead of using the <tt>longest_d</tt> to parse ints and reals, what I suggest is to remove the ambiguity and use the plain short-circuiting alternatives. The first step is to use <tt><a href="numerics.html#strict_reals">strict_real_p</a> </tt>to make the first case unambiguous. Unlike
<tt>real_p</tt>, <tt>strict_real_p</tt> requires a dot to be present for a number to be considered a successful match.
Your grammar can be written unambiguously as:</p>
<pre> strict_real_p<span class="special"> | </span>int_p</pre>
<p> Note that because ambiguity is resolved, attaching actions to both branches is safe. Only one will be triggered:</p>
<pre> strict_real_p<span class="special">[</span>R<span class="special">] | </span>int_p<span class="special">[</span>I<span class="special">]</span></pre>
<blockquote>
<p> &quot;1.0&quot; ---&gt; triggers R<br>
&quot;2&quot; ---&gt; triggers I</p>
</blockquote>
<p> Again, as a rule of thumb, it is always best to resolve as much ambiguity as possible. The best grammars are those which involve no backtracking at all: an LL(1) grammar. Backtracking and semantic actions do not mix well.</p>
<p><b><a name="output_operator" id="output_operator"></a>BOOST_SPIRIT_DEBUG and missing <tt>operator&lt;&lt;</tt></b></p>
<p><font color="#FF0000">Question:</font> My code compiles fine in release mode but when I try to define <tt>BOOST_SPIRIT_DEBUG</tt> the compiler complains about a missing <tt><span class="keyword">operator</span><span class="special">&lt;&lt;</span></tt>.</p>
<p>When <tt>BOOST_SPIRIT_DEBUG</tt> is defined debug output is generated for
spirit parsers. To this end it is expected that each closure member has the
default output operator defined.</p>
<p>You may provide the operator overload either in the namespace where the
class is declared (will be found through Argument Dependent Lookup) or make it visible where it is
used, that is <tt><span class="keyword">namespace</span> <span
class="identifier">boost</span><span class="special">::</span><span
class="identifier">spirit</span></tt>. Here's an example for <tt><span
class="identifier">std</span><span class="special">::</span><span
class="identifier">pair</span></tt>:</p>
<pre><code>
<span class="preprocessor">#include</span> <span class="string">&lt;iosfwd&gt;</span>
<span class="preprocessor">#include</span> <span class="string">&lt;utility&gt;</span>
<span class="keyword">namespace</span> <span class="identifier">std</span> <span class="special">{</span>
<span class="keyword">template</span> <span class="special">&lt;</span>
<span class="keyword">typename</span> <span class="identifier">C</span><span class="special">,</span>
<span class="keyword">typename</span> <span class="identifier">E</span><span class="special">,</span>
<span class="keyword">typename</span> <span class="identifier">T1</span><span class="special">,</span>
<span class="keyword">typename</span> <span class="identifier">T2</span>
<span class="special">&gt;</span>
<span class="identifier">basic_ostream</span><span class="special">&lt;</span><span class="identifier">C</span><span class="special">,</span> <span class="identifier">E</span><span class="special">&gt;</span> <span class="special">&amp;</span> <span class="keyword">operator</span><span class="special">&lt;&lt;(</span>
<span class="identifier">basic_ostream</span><span class="special">&lt;</span><span class="identifier">C</span><span class="special">,</span> <span class="identifier">E</span><span class="special">&gt;</span> <span class="special">&amp;</span> <span class="identifier">out</span><span class="special">,</span>
<span class="identifier">pair</span><span class="special">&lt;</span><span class="identifier">T1</span><span class="special">,</span> <span class="identifier">T2</span><span class="special">&gt;</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">what</span><span class="special">)</span>
<span class="special">{</span>
<span class="keyword">return</span> <span class="identifier">out</span> <span class="special">&lt;&lt;</span> <span class="string">'('</span> <span class="special">&lt;&lt;</span> <span class="identifier">what</span><span class="special">.</span><span class="identifier">first</span> <span class="special">&lt;&lt;</span> <span class="string">", "</span>
<span class="special">&lt;&lt;</span> <span class="identifier">what</span><span class="special">.</span><span class="identifier">second</span> <span class="special">&lt;&lt;</span> <span class="string">')'</span><span class="special">;</span>
<span class="special">}</span>
<span class="special">}</span>
</code></pre>
<p><b><a name="repository" id="repository"></a>Applications that used to be part of spirit</b></p>
<p><font color="#FF0000">Question:</font> Where can I find <i>&lt;insert great application&gt;</i>, that used to be part of the Spirit distribution?</p>
<p>Old versions of Spirit used to include applications built with it.
In order to streamline the distribution they were moved to a separate
<a href="http://spirit.sourceforge.net/repository/applications/show_contents.php">applications repository</a>.
In that page you'll find links to full applications that use the Spirit
parser framework. We encourage you to send in your own applications for
inclusion (see the page for instructions).</p>
<p>You may also check out the <a href="http://spirit.sourceforge.net/repository/grammars/show_contents.php">grammars' repository</a>.</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box">
<img src="theme/note.gif" width="16" height="16"> You'll still find the
example applications that complement (actually are part of) the
documentation in the usual place: <code>libs/spirit/example</code>.<br>
<br>
<img src="theme/alert.gif" width="16" height="16"> The applications and
grammars listed in the repositories are works of the respective authors.
It is the author's responsibility to provide support and maintenance.
Should you have any questions, please send the author an email.
</td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="techniques.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="rationale.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<span class="copyright">Copyright &copy; 2002-2003 Hartmut Kaiser </span><br>
<span class="copyright">Copyright &copy; 2006-2007 Tobias Schwinger </span><br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p class="copyright">&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,97 @@
<html>
<head>
<title>File Iterator</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%">
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>File Iterator</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="multi_pass.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="position_iterator.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>Since Spirit is a back-tracking parser, it requires at least a forward iterator.
In particular, an input iterator is not sufficient. Many times it is convenient
to read the input to a parser from a file, but the STL file iterators are input
iterators. To get around this limitation, Spirit has a utility class <tt>file_iterator</tt>,
which is a read-only random-access iterator for files.</p>
<p>To use the Spirit file iterator, simply create a file iterator with the path
to the file you wish to parse, and then create an EOF iterator for the file:</p>
<pre><span class=identifier> </span><span class=preprocessor>#include </span><span class=special>&lt;</span><span class=identifier>boost</span><span class=special>/</span><span class=identifier>spirit</span><span class=special>/</span><span class=identifier>iterator</span><span class=special>/</span><span class=identifier>file_iterator</span><span class=special>.</span><span class=identifier>hpp</span><span class=special>&gt; </span><span class=comment>// the header file</span></pre>
<pre> <span class=identifier>file_iterator</span><span class=special>&lt;&gt; </span><span class=identifier>first</span><span class=special>(</span><span class=string>&quot;input.dat&quot;</span><span class=special>);
</span><span class=keyword>if </span><span class=special>(!</span><span class=identifier>first</span><span class=special>)
{
</span><span class=identifier>std</span><span class=special>::</span><span class=identifier>cout </span><span class=special>&lt;&lt; </span><span class=string>&quot;Unable to open file!\n&quot;</span><span class=special>;
</span><span class=comment>// Clean up, throw an exception, whatever
</span><span class=keyword>return </span><span class=special>-</span><span class=number>1</span><span class=special>;
}
</span><span class=identifier>file_iterator</span><span class=special>&lt;&gt; </span><span class=identifier>last </span><span class=special>= </span><span class=identifier>first</span><span class=special>.</span><span class=identifier>make_end</span><span class=special>();</span></pre>
<p>You now have a pair of iterators to use with Spirit . If your parser is fully
parametrized (no hard-coded <tt>&lt;char const *&gt;</tt>), it is a simple matter
of redefining the iterator type to <tt>file_iterator</tt>:<br>
</p>
<pre> <span class=keyword>typedef char </span><span class="identifier">char_t</span><span class=special>;
</span><span class=keyword>typedef </span><span class=identifier>file_iterator </span><span class=special>&lt;</span><span class=keyword>char</span><span class=identifier>_t</span><span class=special>&gt; </span><span class=identifier>iterator_t</span><span class=special>;
</span><span class=keyword>typedef </span><span class=identifier>scanner</span><span class=special>&lt;</span><span class=identifier>iterator_t</span><span class=special>&gt; </span><span class=identifier>scanner_t</span><span class=special>;
</span><span class=keyword>typedef </span><span class=identifier>rule </span><span class=special>&lt;</span><span class=identifier>scanner_t</span><span class=special>&gt; </span><span class=identifier>rule_t</span><span class=special>;
</span><span class=identifier>rule_t my_rule</span><span class=special>;
</span><span class=comment>// Define your rule
</span><span class=identifier>parse_info</span><span class=special>&lt;</span><span class=identifier>iterator_t</span><span class=special>&gt; </span><span class=identifier>info </span><span class=special>= </span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>, </span><span class=identifier>my_rule</span><span class=special>);</span></pre>
<p>Of course, you don't have to deal with the <a href="faq.html#scanner_business">scanner-business</a>
at all if you use grammars rather than rules as arguments to the parse functions.
You simply pass the iterator pairs and the grammar as is:<span class=special><br>
</span></p>
<pre> <span class=identifier>my_grammar </span><span class=identifier>g</span><span class=special>;
</span><span class=identifier>parse_info</span><span class=special>&lt;</span><span class=identifier>iterator_t</span><span class=special>&gt; </span><span class=identifier>info </span><span class=special>= </span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>, </span><span class=identifier>g</span><span class=special>);</span></pre>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/bulb.gif" width="13" height="18"><b>
Generic iterator</b><br>
<br>
The Spirit file iterator can be parameterized with any type that is default
constructible and assignable. It transparently supports large files (greater
than 2GB) on systems that provide an appropriate interface. The file iterator
can be useful outside of Spirit as well. For instance, the Boost.Tokenizer
package requires a bidirectional iterator, which is provided by file_iterator.</td>
</tr>
</table>
<p><img src="theme/lens.gif" width="15" height="16"> See <a href="../example/fundamental/file_parser.cpp">file_parser.cpp</a> for a compilable example. This is part of the Spirit distribution.</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="multi_pass.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="position_iterator.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 2002 Jeff Westfahl</p>
<p class="copyright"><font size="2"> Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
</font> </p>
<p class="copyright">&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,265 @@
<html>
<head>
<title>Functional</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Functional</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="parametric_parsers.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="phoenix.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>If you look more closely, you'll notice that Spirit is all about composition
of <i>parser functions</i>. A parser is just a function that accepts a scanner
and returns a match. Parser <i>functions</i> are composed to form increasingly
complex <i>higher order forms</i>. Notice too that the parser, albeit an object,
is immutable and constant. All primitive and composite parser objects are <tt>const</tt>.
The parse member function is even declared as <tt>const</tt>:</p>
<pre>
<code><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>typename </span><span class=identifier>parser_result</span><span class=special>&lt;</span><span class=identifier>self_t</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class=identifier>type
</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>ScannerT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>scan</span><span class=special>) </span><span class=keyword>const</span><span class=special>;</span></code></pre>
<p> In all accounts, this looks and feels a lot like <b>Functional Programming</b>.
And indeed it is. Spirit is by all means an application of Functional programming
in the imperative C++ domain. In Haskell, for example, there is what are called
<a href="references.html#combinators">parser combinators</a> which are strikingly
similar to the approach taken by Spirit- parser functions which are composed
using various operators to create higher order parser functions that model a
top-down recursive descent parser. Those smart Haskell folks have been doing
this way before Spirit.</p>
<p> Functional style programming (or FP) libraries are gaining momentum in the
C++ community. Certainly, we'll see more of FP in Spirit now and in the future.
Actually, if one looks more closely, even the C++ standard library has an FP
flavor. Stealthily beneath the core of the standard C++ library, a closer look
into STL gives us a glimpse of a truly FP paradigm already in place. It is obvious
that the authors of STL know and practice FP.</p>
<h2>Semantic Actions in the FP Perspective</h2>
<h3>STL style FP</h3>
<p> A more obvious application of STL-style FP in Spirit is the semantic action.
What is STL-style FP? It is primarily the use of functors that can be composed
to form higher order functors.</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"> <img src="theme/note.gif" width="16" height="16"> <strong>Functors</strong><br>
<br>
A Function Object, or Functor is simply any object that can be called as
if it is a function. An ordinary function is a function object, and so is
a function pointer; more generally, so is an object of a class that defines
operator(). </td>
</tr>
</table>
<p> This STL-style FP can be seen everywhere these days. The following example
is taken from <a href="https://www.boost.org/sgi/stl/">SGI's Standard Template
Library Programmer's Guide</a>:</p>
<pre>
<code><span class=comment>// Computes sin(x)/(x + DBL_MIN) for each element of a range.
</span><span class=identifier>transform</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>, </span><span class=identifier>first</span><span class=special>,
</span><span class=identifier>compose2</span><span class=special>(</span><span class=identifier>divides</span><span class=special>&lt;</span><span class=keyword>double</span><span class=special>&gt;(),
</span><span class=identifier>ptr_fun</span><span class=special>(</span><span class=identifier>sin</span><span class=special>),
</span><span class=identifier>bind2nd</span><span class=special>(</span><span class=identifier>plus</span><span class=special>&lt;</span><span class=keyword>double</span><span class=special>&gt;(), </span><span class=identifier>DBL_MIN</span><span class=special>)));</span></code></pre>
<p align="left"> Really, this is just <i>currying</i> in FP terminology.</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"> <img src="theme/lens.gif" width="15" height="16"> <strong>Currying</strong><br>
<br>
What is &quot;currying&quot;, and where does it come from?<br>
<br>
Currying has its origins in the mathematical study of functions. It was
observed by Frege in 1893 that it suffices to restrict attention to functions
of a single argument. For example, for any two parameter function <tt>f(x,y)</tt>,
there is a one parameter function <tt>f'</tt> such that <tt>f'(x)</tt> is
a function that can be applied to y to give <tt>(f'(x))(y) = f (x,y)</tt>.
This corresponds to the well known fact that the sets <tt>(AxB -&gt; C)</tt>
and <tt>(A -&gt; (B -&gt; C))</tt> are isomorphic, where <tt>&quot;x&quot;</tt>
is cartesian product and <tt>&quot;-&gt;&quot;</tt> is function space. In
functional programming, function application is denoted by juxtaposition,
and assumed to associate to the left, so that the equation above becomes
<tt>f' x y = f(x,y)</tt>. </td>
</tr>
</table>
<p> In the context of Spirit, the same FP style functor composition may be applied
to semantic actions. <a href="../example/fundamental/full_calc.cpp">full_calc.cpp</a> is a good example. Here's a snippet from that sample:</p>
<pre>
<code><span class=identifier>expression </span><span class=special>=
</span><span class=identifier>term
</span><span class=special>&gt;&gt; </span><span class=special>*( </span><span class=special>(</span><span class=literal>'+' </span><span class=special>&gt;&gt; </span><span class=identifier>term</span><span class=special>)[</span><span class=identifier>make_op</span><span class=special>(</span><span class=identifier>plus</span><span class=special>&lt;</span><span class=keyword>long</span><span class=special>&gt;(), </span><span class=identifier>self</span><span class=special>.</span><span class=identifier>eval</span><span class=special>)]
</span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>&gt;&gt; </span><span class=identifier>term</span><span class=special>)[</span><span class=identifier>make_op</span><span class=special>(</span><span class=identifier>minus</span><span class=special>&lt;</span><span class=keyword>long</span><span class=special>&gt;(), </span><span class=identifier>self</span><span class=special>.</span><span class=identifier>eval</span><span class=special>)]
</span><span class=special>)
</span><span class=special>;</span></code></pre>
<p> <img height="16" width="15" src="theme/lens.gif"> The full source code can be <a href="../example/fundamental/full_calc.cpp">viewed here</a>. This is part of the Spirit distribution.</p>
<h3>Boost style FP</h3>
<p> Boost takes the FP paradigm further. There are libraries in boost that focus
specifically on Function objects and higher-order programming.</p>
<table width="90%" border="0" align="center">
<tr>
<td class="table_title" colspan="14"> Boost FP libraries </td>
</tr>
<tr>
<td class="table_cells"><a href="http://www.boost.org/libs/bind/bind.html">bind</a>
and <a href="http://www.boost.org/libs/bind/mem_fn.html">mem_fn</a></td>
<td class="table_cells">Generalized binders for function/object/pointers and
member functions, from Peter Dimov</td>
</tr>
<td class="table_cells"><a href="http://www.boost.org/libs/function/index.html">function</a></td>
<td class="table_cells">Function object wrappers for deferred calls or callbacks,
from Doug Gregor</td>
</tr>
<td class="table_cells"><a href="http://www.boost.org/libs/functional/index.html">functional</a></td>
<td class="table_cells">Enhanced function object adaptors, from Mark Rodgers</td>
</tr>
<td class="table_cells"><a href="http://www.boost.org/libs/lambda/index.html">lambda</a></td>
<td class="table_cells">Define small unnamed function objects at the actual
call site, and more, from Jaakko J<>rvi and Gary Powell</td>
</tr>
<td class="table_cells"><a href="http://www.boost.org/libs/bind/ref.html">ref</a></td>
<td class="table_cells">A utility library for passing references to generic
functions, from Jaako J<>rvi, Peter Dimov, Doug Gregor, and Dave Abrahams</td>
</tr>
</table>
<p> The following is an example that uses boost <strong>Bind</strong> to use a
member function as a Spirit semantic action. You can see this example in full
in the file<a href="../example/fundamental/bind.cpp"> bind.cpp</a>.</p>
<pre>
<code><span class=keyword>class </span><span class=identifier>list_parser
</span><span class=special>{
</span><span class=keyword>public</span><span class=special>:
</span><span class=keyword>typedef </span><span class=identifier>list_parser </span><span class=identifier>self_t</span><span class=special>;
</span><span class=keyword>bool
</span><span class=identifier>parse</span><span class=special>(</span><span class=keyword>char </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>str</span><span class=special>)
</span><span class=special>{
</span><span class=keyword>return </span><span class=identifier>spirit</span><span class=special>::</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>str</span><span class=special>,
</span><span class=comment>// Begin grammar
</span><span class=special>(
</span><span class=identifier>real_p
</span><span class=special>[
</span><span class=identifier>bind</span><span class=special>(&amp;</span><span class=identifier>self_t</span><span class=special>::</span><span class=identifier>add</span><span class=special>, </span><span class=keyword>this</span><span class=special>, </span><span class=identifier>_1</span><span class=special>)
</span><span class=special>]
</span><span class=special>&gt;&gt; </span><span class=special>*( </span><span class=literal>','
</span><span class=special>&gt;&gt; </span><span class=identifier>real_p
</span><span class=special>[
</span><span class=identifier>bind</span><span class=special>(&amp;</span><span class=identifier>self_t</span><span class=special>::</span><span class=identifier>add</span><span class=special>, </span><span class=keyword>this</span><span class=special>, </span><span class=identifier>_1</span><span class=special>)
</span><span class=special>]
</span><span class=special>)
</span><span class=special>)
</span><span class=special>,
</span><span class=comment>// End grammar
</span><span class=identifier>space_p</span><span class=special>).</span><span class=identifier>full</span><span class=special>;
</span><span class=special>}
</span><span class=keyword>void
</span><span class=identifier>add</span><span class=special>(</span><span class=keyword>double </span><span class=identifier>n</span><span class=special>)
</span><span class=special>{
</span><span class=identifier>v</span><span class=special>.</span><span class=identifier>push_back</span><span class=special>(</span><span class=identifier>n</span><span class=special>);
</span><span class=special>}
</span><span class=identifier>vector</span><span class=special>&lt;</span><span class=keyword>double</span><span class=special>&gt; </span><span class=identifier>v</span><span class=special>;
</span><span class=special>};
</span></code></pre>
<p> <img height="16" width="15" src="theme/lens.gif"> The full source code can be <a href="../example/fundamental/bind.cpp">viewed here</a>. This is part of the Spirit distribution.</p>
<p>This parser parses a comma separated list of real numbers and stores them
in a vector&lt;double&gt;. Boost.bind creates a Spirit conforming semantic action
from the <tt>list_parser</tt>'s member function <tt>add</tt>.</p>
<h3>Lambda and Phoenix</h3>
<p> There's a library, authored by yours truly, named <a href="../phoenix/index.html">Phoenix</a>.
While this is not officially part of the Spirit distribution, this library has
been used extensively to experiment on advanced FP techniques in C++. This library
is highly influenced by <a href="https://people.cs.umass.edu/~yannis/fc++/">FC++</a>
and boost Lambda (<a href="http://www.boost.org/libs/lambda/index.html">BLL</a>).</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"> <b><img src="theme/lens.gif" width="15" height="16">
BLL</b><br>
<br>
In as much as Phoenix is influenced by boost Lambda (<a href="http://www.boost.org/libs/lambda/index.html">BLL</a>),
Phoenix innovations such as local variables, local functions and adaptable
closures, in turn influenced BLL. Currently, BLL is very similar to Phoenix.
Most importantly, BLL incorporated Phoenix's adaptable closures. In the
future, Spirit will fully support BLL. </td>
</tr>
</table>
<p> Phoenix allows one to write semantic actions inline in C++ through lambda
(an unnamed function) expressions. Here's a snippet from the <a href="../example/fundamental/phoenix_calc.cpp">phoenix_calc.cpp</a> example:</p>
<pre>
<code><span class=identifier>expression
</span><span class=special>= </span><span class=identifier>term</span><span class=special>[</span><span class=identifier>expression</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>]
</span><span class=special>&gt;&gt; </span><span class=special>*( </span><span class=special>(</span><span class=literal>'+' </span><span class=special>&gt;&gt; </span><span class=identifier>term</span><span class=special>[</span><span class=identifier>expression</span><span class=special>.</span><span class=identifier>val </span><span class=special>+= </span><span class=identifier>arg1</span><span class=special>])
</span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>&gt;&gt; </span><span class=identifier>term</span><span class=special>[</span><span class=identifier>expression</span><span class=special>.</span><span class=identifier>val </span><span class=special>-= </span><span class=identifier>arg1</span><span class=special>])
</span><span class=special>)
</span><span class=special>;
</span><span class=identifier>term
</span><span class=special>= </span><span class=identifier>factor</span><span class=special>[</span><span class=identifier>term</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>]
</span><span class=special>&gt;&gt; </span><span class=special>*( </span><span class=special>(</span><span class=literal>'*' </span><span class=special>&gt;&gt; </span><span class=identifier>factor</span><span class=special>[</span><span class=identifier>term</span><span class=special>.</span><span class=identifier>val </span><span class=special>*= </span><span class=identifier>arg1</span><span class=special>])
</span><span class=special>| </span><span class=special>(</span><span class=literal>'/' </span><span class=special>&gt;&gt; </span><span class=identifier>factor</span><span class=special>[</span><span class=identifier>term</span><span class=special>.</span><span class=identifier>val </span><span class=special>/= </span><span class=identifier>arg1</span><span class=special>])
</span><span class=special>)
</span><span class=special>;
</span><span class=identifier>factor
</span><span class=special>= </span><span class=identifier>ureal_p</span><span class=special>[</span><span class=identifier>factor</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>]
</span><span class=special>| </span><span class=literal>'(' </span><span class=special>&gt;&gt; </span><span class=identifier>expression</span><span class=special>[</span><span class=identifier>factor</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>] </span><span class=special>&gt;&gt; </span><span class=literal>')'
</span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>&gt;&gt; </span><span class=identifier>factor</span><span class=special>[</span><span class=identifier>factor</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=special>-</span><span class=identifier>arg1</span><span class=special>])
</span><span class=special>| </span><span class=special>(</span><span class=literal>'+' </span><span class=special>&gt;&gt; </span><span class=identifier>factor</span><span class=special>[</span><span class=identifier>factor</span><span class=special>.</span><span class=identifier>val </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>])
</span><span class=special>;</span></code></pre>
<p> <img height="16" width="15" src="theme/lens.gif"> The full source code can be <a href="../example/fundamental/phoenix_calc.cpp">viewed here</a>. This is part of the Spirit distribution.</p>
<p>You do not have to worry about the details for now. There is a lot going on here that needs to be explained. The succeeding chapters will be enlightening.</p>
<p>Notice the use of lambda expressions such as:</p>
<pre>
<code><span class=identifier>expression</span><span class=special>.</span><span class=identifier>val </span><span class=special>+= </span><span class=identifier>arg1</span></code></pre>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"> <b><img src="theme/lens.gif" width="15" height="16">
<a name="lambda"></a>Lambda Expressions?</b><br>
<br>
Lambda expressions are actually unnamed partially applied functions where
placeholders (e.g. arg1, arg2) are provided in place of some of the arguments.
The reason this is called a lambda expression is that traditionally, such
placeholders are written using the Greek letter lambda <img src="theme/lambda.png" width="15" height="22">.</td>
</tr>
</table>
<p>where <tt>expression.val</tt> is a closure variable of the expression rule
(see <a href="closures.html">Closures</a>). <code><span class=identifier><tt>arg1</tt></span></code>
is a placeholder for the first argument that the semantic action will receive
(see <a href="../phoenix/doc/place_holders.html">Phoenix Place-holders</a>).
In Boost.Lambda (BLL), this corresponds to <tt>_1</tt>. </p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="parametric_parsers.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="phoenix.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p class="copyright">&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,108 @@
<html>
<head>
<title>Functor Parser</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Functor
Parser</b></font> </td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="list_parsers.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="refactoring.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>The simplest way to write your hand coded parser that works well with the rest
of the Spirit library is to simply write a functor parser.</p>
<p> A functor parser is expected to have the interface:</p>
<pre>
<code><span class=keyword>struct </span><span class=identifier>functor
</span><span class=special>{
</span><span class=keyword>typedef </span><span class=identifier>T </span><span class=identifier>result_t</span><span class=special>;
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>std::ptrdiff_t
</span><span class=keyword>operator</span><span class=special>()(</span><span class=identifier>ScannerT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>scan</span><span class=special>, </span><span class=identifier>result_t</span><span class=special>&amp; </span><span class=identifier>result</span><span class=special>) </span><span class=keyword>const</span><span class=special>;
</span><span class=special>};
</span></code></pre>
<p> where typedef T result_t; is the attribute type of the parser that will be
passed back to the match result (see <a href="indepth_the_parser.html">In-depth:
The Parser</a>). If the parser does not need to return an attribute, this can
simply be nil_t. The <span class=keyword><tt>std::ptrdiff_t</tt></span> result
is the number of matching characters matched by your parser. A negative value
flags an unsuccessful match.</p>
<p> A conforming functor parser can transformed into a well formed Spirit parser
by wrapping it in the functor_parser template:</p>
<pre>
<code><span class=identifier>functor_parser</span><span class=special>&lt;</span><span class=identifier>functor</span><span class=special>&gt; </span><span class=identifier>functor_p</span><span class=special>;
</span></code></pre>
<h2>Example</h2>
<p> The following example puts the functor_parser into action:</p>
<pre>
<code><span class=keyword>struct </span><span class=identifier>number_parser
</span><span class=special>{
</span><span class=keyword>typedef </span><span class=keyword>int </span><span class=identifier>result_t</span><span class=special>;
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>std::ptrdiff_t</span>
<span class=keyword>operator</span><span class=special>()(</span><span class=identifier>ScannerT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>scan</span><span class=special>, </span><span class=identifier>result_t</span><span class=special>&amp; </span><span class=identifier>result</span><span class=special>) </span><span class=keyword>const
</span><span class=special>{
</span><span class=keyword>if </span><span class=special>(</span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>at_end</span><span class=special>())
</span><span class=keyword>return </span><span class=special>-</span><span class=number>1</span><span class=special>;
</span><span class=keyword>char </span><span class=identifier>ch </span><span class=special>= </span><span class=special>*</span><span class=identifier>scan</span><span class=special>;
</span><span class=keyword>if </span><span class=special>(</span><span class=identifier>ch </span><span class=special>&lt; </span><span class=literal>'0' </span><span class=special>|| </span><span class=identifier>ch </span><span class=special>&gt; </span><span class=literal>'9'</span><span class=special>)
</span><span class=keyword>return </span><span class=special>-</span><span class=number>1</span><span class=special>;
</span><span class=identifier>result </span><span class=special>= </span><span class=number>0</span><span class=special>;
</span><span class=keyword>std::ptrdiff_t</span> <span class=identifier>len </span><span class=special>= </span><span class=number>0</span><span class=special>;
</span><span class=keyword>do
</span><span class=special>{
</span><span class=identifier>result </span><span class=special>= </span><span class=identifier>result</span><span class=special>*</span><span class=number>10 </span><span class=special>+ </span><span class=keyword>int</span><span class=special>(</span><span class=identifier>ch </span><span class=special>- </span><span class=literal>'0'</span><span class=special>);
</span><span class=special>++</span><span class=identifier>len</span><span class=special>;
</span><span class=special>++</span><span class=identifier>scan</span><span class=special>;
</span><span class=special>} </span><span class=keyword>while </span><span class=special>(!</span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>at_end</span><span class=special>() </span><span class=special>&amp;&amp; </span><span class=special>(</span><span class=identifier>ch </span><span class=special>= </span><span class=special>*</span><span class=identifier>scan</span><span class=special>, </span><span class=identifier>ch </span><span class=special>&gt;= </span><span class=literal>'0' </span><span class=special>&amp;&amp; </span><span class=identifier>ch </span><span class=special>&lt;= </span><span class=literal>'9'</span><span class=special>));
</span><span class=keyword>return </span><span class=identifier>len</span><span class=special>;
</span><span class=special>}
</span><span class=special>};
</span><span class=identifier>functor_parser</span><span class=special>&lt;</span><span class=identifier>number_parser</span><span class=special>&gt; </span><span class=identifier>number_parser_p</span><span class=special>;
</span></code></pre>
<p> <img src="theme/lens.gif" width="15" height="16"> The full source code can be <a href="../example/fundamental/functor_parser.cpp">viewed here</a>. This is part of the Spirit distribution. </p>
<p>To further understand the implementation, see <a href="indepth_the_scanner.html">In-depth:
The Scanner</a> for the scanner API details. We now have a parser <tt>number_parser_p</tt> that we can use just like any other Spirit parser. Example:</p>
<pre>
<code><span class=identifier>r </span><span class=special>= </span><span class=identifier>number_parser_p </span><span class=special>&gt;&gt; </span><span class=special>*(</span><span class=literal>',' </span><span class=special>&gt;&gt; </span><span class=identifier>number_parser_p</span><span class=special>);
</span></code></pre>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="list_parsers.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="refactoring.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p class="copyright">&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,271 @@
<html>
<head>
<title>The Grammar</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%">
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>The Grammar</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="scanner.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="subrules.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>The <b>grammar</b> encapsulates a set of rules. The <tt>grammar</tt> class
is a protocol base class. It is essentially an interface contract. The <tt>grammar</tt>
is a template class that is parameterized by its derived class, <tt>DerivedT</tt>,
and its context, <tt>ContextT</tt>. The template parameter ContextT defaults
to <tt>parser_context</tt>, a predefined context. </p>
<p>You need not be concerned at all with the ContextT template parameter unless
you wish to tweak the low level behavior of the grammar. Detailed information
on the ContextT template parameter is provided <a href="indepth_the_parser_context.html">elsewhere</a>.
The <tt>grammar</tt> relies on the template parameter DerivedT, a grammar subclass
to define the actual rules.</p>
<p>Presented below is the public API. There may actually be more template parameters
after <tt>ContextT</tt>. Everything after the <tt>ContextT</tt> parameter should
not be of concern to the client and are strictly for internal use only.</p>
<pre><code><font color="#000000"><span class=identifier> </span><span class=keyword>template</span><span class=special>&lt;
</span><span class=keyword>typename </span><span class=identifier>DerivedT</span><span class=special>,
</span><span class=keyword>typename </span><span class=identifier>ContextT </span><span class=special>= </span><span class=identifier>parser_context</span><span class=special>&lt;</span><span class=special>&gt; &gt;
</span><span class=keyword>struct </span><span class=identifier>grammar</span><span class=special>;</span></font></code></pre>
<h2>Grammar definition</h2>
<p>A concrete sub-class inheriting from <tt>grammar</tt> is expected to have a
nested template class (or struct) named <tt>definition</tt>:</p>
<blockquote>
<p><img src="theme/bullet.gif" width="13" height="13"> It is a nested template
class with a typename <tt>ScannerT</tt> parameter.<br>
<img src="theme/bullet.gif" width="13" height="13"> Its constructor defines
the grammar rules.<br>
<img src="theme/bullet.gif" width="13" height="13"> Its constructor is passed
in a reference to the actual grammar <tt>self</tt>.<br>
<img src="theme/bullet.gif" width="13" height="13"> It has a member function
named <tt>start</tt> that returns a reference to the start <tt>rule</tt>.</p>
</blockquote>
<h2>Grammar skeleton</h2>
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>struct </span><span class=identifier>my_grammar </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>grammar</span><span class=special>&lt;</span><span class=identifier>my_grammar</span><span class=special>&gt;
</span><span class=special>{
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>definition
</span><span class=special>{
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt; </span><span class=identifier>r</span><span class=special>;
</span><span class=identifier>definition</span><span class=special>(</span><span class=identifier>my_grammar </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>self</span><span class=special>) </span><span class=special>{ </span><span class=identifier>r </span><span class=special>= </span><span class=comment>/*..define here..*/</span><span class=special>; </span><span class=special>}
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt; </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>start</span><span class=special>() </span><span class=keyword>const </span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>r</span><span class=special>; </span><span class=special>}
</span><span class=special>};
</span><span class=special>};</span></font></code></pre>
<p>Decoupling the scanner type from the rules that form a grammar allows the grammar
to be used in different contexts possibly using different scanners. We do not
care what scanner we are dealing with. The user-defined <tt>my_grammar</tt>
can be used with <b>any</b> type of scanner. Unlike the rule, the grammar is
not tied to a specific scanner type. See <a href="faq.html#scanner_business">&quot;Scanner
Business&quot;</a> to see why this is important and to gain further understanding
on this scanner-rule coupling problem.</p>
<h2>Instantiating and using my_grammar</h2>
<p>Our grammar above may be instantiated and put into action:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>my_grammar </span><span class=identifier>g</span><span class=special>;
</span><span class=keyword>if </span><span class=special>(</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>, </span><span class=identifier>g</span><span class=special>, </span><span class=identifier>space_p</span><span class=special>).</span><span class=identifier>full</span><span class=special>)
</span><span class=identifier>cout </span><span class=special>&lt;&lt; </span><span class=string>"parsing succeeded\n"</span><span class=special>;
</span><span class=keyword>else
</span><span class=identifier>cout </span><span class=special>&lt;&lt; </span><span class=string>"parsing failed\n"</span><span class=special>;</span></font></code></pre>
<p><tt>my_grammar</tt> <b>IS-A </b>parser and can be used anywhere a parser is
expected, even referenced by another rule:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>rule</span><span class=special>&lt;&gt; </span><span class=identifier>r </span><span class=special>= </span><span class=identifier>g </span><span class=special>&gt;&gt; </span><span class=identifier>str_p</span><span class=special>(</span><span class=string>"cool huh?"</span><span class=special>);</span></font></code></pre>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/alert.gif" width="16" height="16"> <b>Referencing
grammars<br>
</b><br>
Like the rule, the grammar is also held by reference when it is placed in
the right hand side of an EBNF expression. It is the responsibility of the
client to ensure that the referenced grammar stays in scope and does not
get destructed while it is being referenced. </td>
</tr>
</table>
<h2><a name="full_grammar"></a>Full Grammar Example</h2>
<p>Recalling our original calculator example, here it is now rewritten using a
grammar:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>struct </span><span class=identifier>calculator </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>grammar</span><span class=special>&lt;</span><span class=identifier>calculator</span><span class=special>&gt;
</span><span class=special>{
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>definition
</span><span class=special>{
</span><span class=identifier>definition</span><span class=special>(</span><span class=identifier>calculator </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>self</span><span class=special>)
</span><span class=special>{
</span><span class=identifier>group </span><span class=special>= </span><span class=literal>'(' </span><span class=special>&gt;&gt; </span><span class=identifier>expression </span><span class=special>&gt;&gt; </span><span class=literal>')'</span><span class=special>;
</span><span class=identifier>factor </span><span class=special>= </span><span class=identifier>integer </span><span class=special>| </span><span class=identifier>group</span><span class=special>;
</span><span class=identifier>term </span><span class=special>= </span><span class=identifier>factor </span><span class=special>&gt;&gt; </span><span class=special>*((</span><span class=literal>'*' </span><span class=special>&gt;&gt; </span><span class=identifier>factor</span><span class=special>) </span><span class=special>| </span><span class=special>(</span><span class=literal>'/' </span><span class=special>&gt;&gt; </span><span class=identifier>factor</span><span class=special>));
</span><span class=identifier>expression </span><span class=special>= </span><span class=identifier>term </span><span class=special>&gt;&gt; </span><span class=special>*((</span><span class=literal>'+' </span><span class=special>&gt;&gt; </span><span class=identifier>term</span><span class=special>) </span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>&gt;&gt; </span><span class=identifier>term</span><span class=special>));
</span><span class=special>}
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt; </span><span class=identifier>expression</span><span class=special>, </span><span class=identifier>term</span><span class=special>, </span><span class=identifier>factor</span><span class=special>, </span><span class=identifier>group</span><span class=special>;
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt; </span><span class=keyword>const</span><span class=special>&
</span><span class=identifier>start</span><span class=special>() </span><span class=keyword>const </span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>expression</span><span class=special>; </span><span class=special>}
</span><span class=special>};
</span><span class=special>};</span></font></code></pre>
<p><img src="theme/lens.gif" width="15" height="16"> A fully working example with
<a href="semantic_actions.html">semantic actions</a> can be <a href="../example/fundamental/calc_plain.cpp">viewed
here</a>. This is part of the Spirit distribution. </p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/lens.gif" width="15" height="16"> <b>self</b><br>
<br>
You might notice that the definition of the grammar has a constructor that
accepts a const reference to the outer grammar. In the example above, notice
that <tt>calculator::definition</tt> takes in a <tt>calculator const&amp;
self</tt>. While this is unused in the example above, in many cases, this
is very useful. The self argument is the definition's window to the outside
world. For example, the calculator class might have a reference to some
state information that the definition can update while parsing proceeds
through <a href="semantic_actions.html">semantic actions</a>. </td>
</tr>
</table>
<h2>Grammar Capsules</h2>
<p>As a grammar becomes complicated, it is a good idea to group parts into logical
modules. For instance, when writing a language, it might be wise to put expressions
and statements into separate grammar capsules. The grammar takes advantage of
the encapsulation properties of C++ classes. The declarative nature of classes
makes it a perfect fit for the definition of grammars. Since the grammar is
nothing more than a class declaration, we can conveniently publish it in header
files. The idea is that once written and fully tested, a grammar can be reused
in many contexts. We now have the notion of grammar libraries.</p>
<h2><a name="multithreading"></a>Reentrancy and multithreading</h2>
<p>An instance of a grammar may be used in different places multiple times without
any problem. The implementation is tuned to allow this at the expense of some
overhead. However, we can save considerable cycles and bytes if we are certain
that a grammar will only have a single instance. If this is desired, simply
define <tt>BOOST_SPIRIT_SINGLE_GRAMMAR_INSTANCE</tt> before including any spirit
header files.</p>
<pre><font face="Courier New, Courier, mono"><code><span class="preprocessor"> #define</span></code></font><span class="preprocessor"><code><font face="Courier New, Courier, mono"> </font><tt>BOOST_SPIRIT_SINGLE_GRAMMAR_INSTANCE</tt></code></span></pre>
<p> On the other hand, if a grammar is intended to be used in multithreaded code,
we should then define <tt>BOOST_SPIRIT_THREADSAFE</tt> before including any
spirit header files. In this case it will also be required to link against <a href="http://www.boost.org/libs/thread/doc/index.html">Boost.Threads</a></p>
<pre><font face="Courier New, Courier, mono"><span class="preprocessor"> #define</span></font> <span class="preprocessor"><tt>BOOST_SPIRIT_THREADSAFE</tt></span></pre>
<h2>Using more than one grammar start rule </h2>
<p>Sometimes it is desirable to have more than one visible entry point to a grammar
(apart from the start rule). To allow additional start points, Spirit provides
a helper template <tt>grammar_def</tt>, which may be used as a base class for
the <tt>definition</tt> subclass of your <tt>grammar</tt>. Here's an example:</p>
<pre><code> <span class="comment">// this header has to be explicitly included</span>
<span class="preprocessor">#include</span> <span class="string">&lt;boost/spirit/utility/grammar_def.hpp&gt;</span>
</span><span class=keyword>struct </span><span class=identifier>calculator2 </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>grammar</span><span class=special>&lt;</span><span class=identifier>calculator2</span><span class=special>&gt;
{
</span> <span class="keyword">enum</span>
{
expression = 0,
term = 1,
factor = 2,
};
<span class=special> </span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>definition
</span><span class="special">:</span> <span class="keyword">public</span><span class=identifier> grammar_def</span><span class="special">&lt;</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt;,</span> same<span class="special">,</span> same<span class="special">&gt;</span>
<span class=special>{</span>
<span class=identifier>definition</span><span class=special>(</span><span class=identifier>calculator2 </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>self</span><span class=special>)
{
</span><span class=identifier>group </span><span class=special>= </span><span class=literal>'(' </span><span class=special>&gt;&gt; </span><span class=identifier>expression </span><span class=special>&gt;&gt; </span><span class=literal>')'</span><span class=special>;
</span><span class=identifier>factor </span><span class=special>= </span><span class=identifier>integer </span><span class=special>| </span><span class=identifier>group</span><span class=special>;
</span><span class=identifier>term </span><span class=special>= </span><span class=identifier>factor </span><span class=special>&gt;&gt; *((</span><span class=literal>'*' </span><span class=special>&gt;&gt; </span><span class=identifier>factor</span><span class=special>) | (</span><span class=literal>'/' </span><span class=special>&gt;&gt; </span><span class=identifier>factor</span><span class=special>));
</span><span class=identifier>expression </span><span class=special>= </span><span class=identifier>term </span><span class=special>&gt;&gt; *((</span><span class=literal>'+' </span><span class=special>&gt;&gt; </span><span class=identifier>term</span><span class=special>) | (</span><span class=literal>'-' </span><span class=special>&gt;&gt; </span><span class=identifier>term</span><span class=special>));</span>
<span class="keyword">this</span><span class="special">-&gt;</span>start_parsers<span class="special">(</span>expression<span class="special">,</span> term<span class="special">,</span> factor<span class="special">);</span>
<span class="special">}</span>
<span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt; </span><span class=identifier>expression</span><span class=special>, </span><span class=identifier>term</span><span class=special>, </span><span class=identifier>factor, group</span><span class=special>;
</span><span class=special> };
};</span></font></code></pre>
<p>The <tt>grammar_def</tt> template has to be instantiated with the types of
all the rules you wish to make visible from outside the <tt>grammar</tt>:</p>
<pre><code><span class=identifier> </span><span class=identifier>grammar_def</span><span class="special">&lt;</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt;,</span> same<span class="special">,</span> same<span class="special">&gt;</span></code> </pre>
<p>The shorthand notation <tt>same</tt> is used to indicate that the same type
be used as specified by the previous template parameter (e.g. <code><tt>rule&lt;ScannerT&gt;</tt></code>).
Obviously, <tt>same</tt> may not be used as the first template parameter. </p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"> <img src="theme/bulb.gif" width="13" height="18"> <strong>grammar_def
start types</strong><br>
<br>
It may not be obvious, but it is interesting to note that aside from rule&lt;&gt;s,
any parser type may be specified (e.g. chlit&lt;&gt;, strlit&lt;&gt;, int_parser&lt;&gt;,
etc.).</td>
</tr>
</table>
<p>Using the grammar_def class, there is no need to provide a <tt>start()</tt>member
function anymore. Instead, you'll have to insert a call to the <tt>this-&gt;start_parsers()</tt>
(which is a member function of the <tt>grammar_def</tt> template) to define
the start symbols for your <tt>grammar</tt>. <img src="theme/note.gif" width="16" height="16">
Note that the number and the sequence of the rules used as the parameters to
the <tt>start_parsers()</tt> function should match the types specified in the
<tt>grammar_def</tt> template:</p>
<pre><code> <span class="keyword">this</span><span class="special">-&gt;</span>start_parsers<span class="special">(</span>expression<span class="special">,</span> term<span class="special">,</span> factor<span class="special">);</span></code></pre>
<p> The grammar entry point may be specified using the following syntax:</p>
<pre><code><font color="#000000"><span class=identifier> g</span><span class="special">.</span><span class=identifier>use_parser</span><span class="special">&lt;</span><span class=identifier>N</span><span class=special>&gt;() </span><span class="comment">// Where g is your grammar and N is the Nth entry.</span></font></code></pre>
<p>This sample shows how to use the <tt>term</tt> rule from the <tt>calculator2</tt>
grammar above:</p>
<pre><code><font color="#000000"><span class=identifier> calculator2 g</span><span class=special>;
</span><span class=keyword>if </span><span class=special>(</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>
first</span><span class=special>, </span><span class=identifier>last</span><span class=special>,
</span><span class=identifier>g</span><span class="special">.</span><span class=identifier>use_parser</span><span class="special">&lt;</span><span class=identifier>calculator2::term</span><span class=special>&gt;(),</span><span class=identifier>
space_p</span><span class=special>
).</span><span class=identifier>full</span><span class=special>)
{
</span><span class=identifier>cout </span><span class=special>&lt;&lt; </span><span class=string>"parsing succeeded\n"</span><span class=special>;
}
</span><span class=keyword>else</span> <span class="special">{</span>
<span class=identifier>cout </span><span class=special>&lt;&lt; </span><span class=string>"parsing failed\n"</span><span class=special>;
}</span></font></code></pre>
<p>The template parameter for the <tt>use_parser&lt;&gt;</tt> template type should
be the zero based index into the list of rules specified in the <tt>start_parsers()</tt>
function call. </p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/note.gif" width="16" height="16"> <tt><strong>use_parser&lt;0&gt;</strong></tt><br>
<br>
Note, that using <span class="literal">0</span> (zero) as the template parameter
to <tt>use_parser</tt> is equivalent to using the start rule, exported by
conventional means through the <tt>start()</tt> function, as shown in the
first <tt><a href="grammar.html#full_grammar">calculator</a></tt> sample
above. So this notation may be used even for grammars exporting one rule
through its <tt>start()</tt> function only. On the other hand, calling a
<tt>grammar</tt> without the <tt>use_parser</tt> notation will execute the
rule specified as the first parameter to the <tt>start_parsers()</tt> function.
</td>
</tr>
</table>
<p>The maximum number of usable start rules is limited by the preprocessor constant:</p>
<pre> <span class="identifier">BOOST_SPIRIT_GRAMMAR_STARTRULE_TYPE_LIMIT</span> <span class="comment">// defaults to 3</span></pre>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="scanner.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="subrules.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
Copyright &copy; 2003-2004 Hartmut Kaiser <br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
<p>&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,114 @@
<html>
<head>
<title>Includes</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Includes</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="quickref.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="portability.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<h2>Modules</h2>
<p>Spirit is designed to be header only. Generally, there are no libraries to build
and link against. Certain features, however, require additional libraries; in particular
the <a href="regular_expression_parser.html">regular expression parser</a> requires
<a href="http://www.boost.org/libs/regex/index.html">Boost.Regex</a> and <a href="grammar.html#multithreading">multithreading support</a>
requires <a href="http://www.boost.org/libs/thread/doc/index.html">Boost.Threads.</a></p>
<p>Using Spirit is as easy as including the main header file:</p>
<pre> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">.</span>hpp<span class="special">&gt;</span></pre>
<p>Doing so will include all the header files. This might not be desirable. A
low cholesterol alternative is to include only the module that you need. Each
of the modules has its own header file. The master spirit header file actually
includes all the module files. To avoid unnecessary inclusion of features that
you do not need, it is better to include only the modules that you need.</p>
<pre> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">.</span>hpp<span class="special">&gt;<br> </span><span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>attribute<span class="special">.</span>hpp<span class="special">&gt;
<span class="preprocessor">#include</span> &lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>core<span class="special">.</span>hpp<span class="special">&gt;</span><span class="special">
</span><span class="special"></span><span class="special"> <span class="preprocessor">#include</span> &lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>debug<span class="special">.</span>hpp<span class="special">&gt;</span><span class="special">
</span><span class="special"> <span class="preprocessor">#include</span> &lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>dynamic<span class="special">.</span>hpp<span class="special">&gt;</span><span class="special">
<span class="preprocessor">#include</span> &lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>error_handling<span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span><span class="special">
<span class="preprocessor">#include</span> &lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>iterator<span class="special">.</span>hpp<span class="special">&gt;
<span class="preprocessor">#include</span> &lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>meta<span class="special">.</span>hpp<span class="special">&gt;</span><span class="special">
</span><span class="special"></span><span class="special"> <span class="preprocessor">#include</span> &lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>symbols<span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span><span class="special">
<span class="preprocessor">#include</span> &lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>tree<span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span><span class="special">
<span class="preprocessor">#include</span> &lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>utility<span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span></pre>
<h2>Sub-Modules</h2>
<p> For even finer control over header file inclusion, you can include only the
specific files that you need. Each module is in its own sub-directory:</p>
<h3>actor</h3>
<pre> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">assign_actor</span><span class="special">.</span>hpp<span class="special">&gt;<br></span> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">assign_key</span><span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">clear_actor</span><span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">decrement_actor</span><span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">erase_actor</span><span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span> <br> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">increment_actor</span><span class="special">.</span>hpp<span class="special">&gt;<br></span> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">insert_key_actor</span><span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">push_back_actor</span><span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">push_front_actor</span><span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">swap_actor</span><span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span></pre>
<h3>attribute</h3>
<pre> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>attribute<span class="special">/</span><span class="identifier">closure</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;<br></span> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>attribute<span class="special">/</span>closure_context.hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>attribute<span class="special">/</span>parametric.hpp<span class="special">&gt;</span></pre>
<h3>debug</h3>
<p><img src="theme/alert.gif" width="16" height="16"> The debug module should
not be directly included. See <a href="debugging.html">Debugging</a> for more
info on how to use Spirit's debugger. </p>
<h3>dynamic</h3>
<pre> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>dynamic<span class="special">/</span><span class="identifier">for</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;<br></span> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>dynamic<span class="special">/</span>if.hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">dynamic</span><span class="special">/</span>lazy.hpp<span class="special">&gt; <br> </span><span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">dynamic</span><span class="special">/</span>rule_alias.hpp<span class="special">&gt;
</span><span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">dynamic</span><span class="special">/</span>select.hpp<span class="special">&gt;
</span><span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">dynamic</span><span class="special">/</span>stored_rule.hpp<span class="special">&gt;
</span><span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">dynamic</span><span class="special">/</span>switch.hpp<span class="special">&gt;
</span><span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">dynamic</span><span class="special">/</span>while.hpp<span class="special">&gt; </span></pre>
<h3>error_handling</h3>
<pre> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>error_handling<span class="special">/</span><span class="identifier">exceptions</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;</span></pre>
<h3>iterator</h3>
<pre> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>iterator<span class="special">/</span><span class="identifier">file_iterator</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;<br></span> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">iterator</span><span class="special">/</span>fixed_size_queue.hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">iterator</span><span class="special">/</span>multi_pass.hpp<span class="special">&gt; <br> </span><span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">iterator</span><span class="special">/</span>position_iterator.hpp<span class="special">&gt;</span></pre>
<h3>meta</h3>
<pre> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>meta<span class="special">/</span><span class="identifier">as_parser</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;<br></span> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">meta</span><span class="special">/</span>fundamental.hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">meta</span><span class="special">/</span>parser_traits.hpp<span class="special">&gt; <br> </span><span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">meta</span><span class="special">/</span>refactoring.hpp<span class="special">&gt;<br> </span><span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">meta</span><span class="special">/</span>traverse.hpp<span class="special">&gt;</span></pre>
<h3>tree</h3>
<pre> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">tree</span><span class="special">/</span><span class="identifier">ast</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;<br></span> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">tree</span><span class="special">/</span>parse_tree.hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">tree</span><span class="special">/</span>parse_tree_utils.hpp<span class="special">&gt;</span><span class="special"><br> </span><span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">tree</span><span class="special">/</span>tree_to_xml.hpp<span class="special">&gt;</span></pre>
<h3>utility</h3>
<pre> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">chset</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt; <br></span> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">chset_operators</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;<br> </span><span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">confix</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;
</span><span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">distinct</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;
</span><span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">escape_char</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;
</span><span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">flush_multi_pass</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;
</span><span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">functor_parser</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;
</span><span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">lists</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;
</span><span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">loops</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;
</span><span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">regex</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;
</span><span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span><span class="identifier">utility</span><span class="special">/</span><span class="identifier">scoped_lock</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;
</span></pre>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="quickref.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="portability.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p class="copyright">&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,287 @@
<html>
<head>
<title>In-depth: The Parser</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%">
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>In-depth: The Parser</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="semantic_actions.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="indepth_the_scanner.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>What makes Spirit tick? Now on to some details... The parser class is the most
fundamental entity in the framework. A parser accepts a scanner comprised of
a first-last iterator pair and returns a match object as its result. The iterators
delimit the data currently being parsed. The match object evaluates to true
if the parse succeeds, in which case the input is advanced accordingly. Each
parser can represent a specific pattern or algorithm, or it can be a more complex
parser formed as a composition of other parsers.</p>
<p>All parsers inherit from the base template class, parser:</p>
<pre>
<span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>DerivedT</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>parser
</span><span class=special>{
</span><span class=comment>/*...*/
</span><span class=identifier>DerivedT</span><span class=special>&amp; </span><span class=identifier>derived</span><span class=special>();
</span><span class=identifier>DerivedT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>derived</span><span class=special>() </span><span class=keyword>const</span><span class=special>;
</span><span class=special>};</span></pre>
<p>This class is a protocol base class for all parsers. The parser class does
not really know how to parse anything but instead relies on the template parameter
<tt>DerivedT</tt> to do the actual parsing. This technique is known as the <a href="references.html#curious_recurring">&quot;Curiously
Recurring Template Pattern&quot;</a> in template meta-programming circles. This
inheritance strategy gives us the power of polymorphism without the virtual
function overhead. In essence this is a way to implement <a href="references.html#generic_patterns">compile
time polymorphism</a>.</p>
<h2> parser_category_t</h2>
<p> Each derived parser has a typedef <tt>parser_category_t</tt> that defines
its category. By default, if one is not specified, it will inherit from the
base parser class which typedefs its parser_category_t as <tt>plain_parser_category</tt>.
Some template classes are provided to distinguish different types of parsers.
The following categories are the most generic. More specific types may inherit
from these.</p>
<table width="90%" border="0" align="center">
<tr>
<td colspan="2" class="table_title">Parser categories</td>
</tr>
<tr>
<td class="table_cells" width="33%"><tt>plain_parser_category</tt></td>
<td class="table_cells" width="67%">Your plain vanilla parser</td>
</tr>
<tr>
<td class="table_cells" width="33%"><tt>binary_parser_category</tt></td>
<td class="table_cells" width="67%">A parser that has subject a and b (e.g.
alternative)</td>
</tr>
<tr>
<td class="table_cells" width="33%"><tt>unary_parser_category</tt></td>
<td class="table_cells" width="67%">A parser that has single subject (e.g.
kleene star)</td>
</tr>
<tr>
<td class="table_cells" width="33%"><tt>action_parser_category</tt></td>
<td class="table_cells" width="67%">A parser with an attached semantic action</td>
</tr>
</table>
<pre><span class=identifier> </span><span class=keyword>struct </span><span class=identifier>plain_parser_category </span><span class=special>{};
</span><span class=keyword>struct </span><span class=identifier>binary_parser_category </span><span class=special>: </span><span class=identifier>plain_parser_category </span><span class=special>{};
</span><span class=keyword>struct </span><span class=identifier>unary_parser_category </span><span class=special>: </span><span class=identifier>plain_parser_category </span><span class=special>{};
</span><span class=keyword>struct </span><span class=identifier>action_parser_category </span><span class=special>: </span><span class=identifier>unary_parser_category </span><span class=special>{};</span></pre>
<h2>embed_t</h2>
<p>Each parser has a typedef <tt>embed_t</tt>. This typedef specifies how a parser
is embedded in a composite. By default, if one is not specified, the parser
will be embedded by value. That is, a copy of the parser is placed as a member
variable of the composite. Most parsers are embedded by value. In certain situations
however, this is not desirable or possible. One particular example is the <a href="rule.html">rule</a>.
The rule, unlike other parsers is embedded by reference.</p>
<h2><a name="match"></a>The match</h2>
<p>The match holds the result of a parser. A match object evaluates to true when
a successful match is found, otherwise false. The length of the match is the
number of characters (or tokens) that is successfully matched. This can be queried
through its <tt>length()</tt> member function. A negative value means that the
match is unsuccessful. </p>
<p> Each parser may have an associated attribute. This attribute is also returned
back to the client on a successful parse through the match object. We can get
this attribute via the match's <tt>value()</tt> member function. Be warned though
that the match's attribute may be invalid, in which case, getting the attribute
will result in an exception. The member function <tt>has_valid_attribute()</tt>
can be queried to know if it is safe to get the match's attribute. The attribute
may be set anytime through the member function <tt>value(v)</tt>where <tt>v</tt>
is the new attribute value.<br>
<br>
A match attribute is valid:</p>
<ul>
<li> on a successful match</li>
<li>when its value is set through the <tt>value(val)</tt> member function</li>
<li> if it is assigned or copied from a compatible match object (e.g. <tt>match&lt;double&gt;</tt>
from <tt>match&lt;int&gt;</tt>) with a valid attribute. A match object <tt>A</tt>
is compatible with another match object <tt>B</tt> if the attribute type of
<tt>A</tt> can be assigned from the attribute type of <tt></tt> <tt>B</tt>
(i.e. <tt>a = b;</tt> must compile).</li>
</ul>
<p>The match attribute is undefined:</p>
<ul>
<li>on an unsuccessful match </li>
<li>when an attempt to copy or assign from another match object with an incompatible
attribute type (e.g. <tt>match&lt;std::string&gt;</tt> from <tt>match&lt;int&gt;</tt>).</li>
</ul>
<h3>The match class:</h3>
<pre><span class=keyword> template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>&gt;
</span><span class=keyword> class </span><span class=identifier>match
</span><span class=keyword> </span><span class=special>{
</span><span class=keyword> public</span><span class=special>:
</span><span class=keyword> </span><span class=comment>/*...*/
</span><span class=special> </span><span class=keyword> typedef</span><span class="identifier"> T attr_t</span><span class="special">;<br>
</span><span class=keyword> </span><span class="special"> </span><span class=keyword>operator safe_bool</span><span class=special>() </span><span class=keyword>const</span>; <span class="comment">// convertible to a bool</span>
<span class=keyword> int </span><span class=identifier>length</span><span class=special>() </span><span class=keyword>const</span>;
<span class="keyword">bool</span> has_valid_attribute<span class="special">()</span> <span class="keyword">const</span><span class="special">;</span>
<span class=keyword> </span> <span class=identifier>void</span><span class=special> </span><span class=identifier>value</span><span class=special>(</span><span class="identifier">T </span><span class="keyword">const</span><span class=special>&amp;) </span><span class=keyword>const;
</span><span class=identifier>T </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>value</span><span class=special>();
</span><span class=keyword> </span><span class=special>};</span></pre>
<h2>match_result</h2>
<p>It has been mentioned repeatedly that the parser returns a match object as
its result. This is a simplification. Actually, for the sake of genericity,
parsers are really not hard-coded to return a match object. More accurately,
a parser returns an object that adheres to a conceptual interface, of which
the match is an example. Nevertheless, we shall call the result type of a parser
a match object regardless if it is actually a match class, a derivative or a
totally unrelated type.</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/lens.gif" width="15" height="16"> <b>Meta-functions</b><br>
<br>
What are meta-functions? We all know how functions look like. In simplest
terms, a function accepts some arguments and returns a result. Here is the
function we all love so much:<br>
<br>
<code><span class="keyword">int</span> identity_func<span class="special">(</span><span class="keyword">int</span>
arg<span class="special">)</span><br>
<span class="special">{</span> <span class="keyword">return</span> arg<span class="special">;
}</span> <span class="comment">// return the argument arg</span><br>
</code><br>
Meta-functions are essentially the same. These beasts also accept arguments
and return a result. However, while functions work at runtime on values,
meta-functions work at compile time on types (or constants, but we shall
deal only with types). The meta-function is a template class (or struct).
The template parameters are the arguments to the meta-function and a typedef
within the class is the meta-function's return type. Here is the corresponding
meta-function:<code><br>
<br>
<span class="keyword">template</span> <span class="special">&lt;</span><span class="keyword">typename</span>
ArgT<span class="special">&gt;</span><br>
<span class="keyword">struct</span> identity_meta_func<br>
<span class="special">{</span> <span class="keyword">typedef</span> ArgT
type<span class="special">; } </span><span class="comment">// return the
argument ArgT</span><br>
<br>
</code>The meta-function above is invoked as:<br>
<br>
<code><span class="keyword">typename</span> identity_meta_func<span class="special">&lt;</span>ArgT<span class="special">&gt;::</span>type</code><br>
<br>
By convention, meta-functions return the result through the typedef <tt>type</tt>.
Take note that <tt>typename</tt> is only required within templates.</td>
</tr>
</table>
<p>The actual match type used by the parser depends on two types: the parser's
attribute type and the scanner type. <tt>match_result</tt> is the meta-function
that returns the desired match type given an attribute type and a scanner type.
</p>
<p>Usage:</p>
<pre> <span class=keyword>typename </span><span class=identifier>match_result</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>, </span><span class=identifier>T</span><span class=special>&gt;::</span><span class=identifier>type</span></pre>
<p>The meta-function basically answers the question &quot;given a scanner type
<tt>ScannerT</tt> and an attribute type <tt>T</tt>, what is the desired match
type?&quot; [<img src="theme/note.gif" width="16" height="16"> <tt>typename</tt>
is only required within templates ].</p>
<h2>The parse member function</h2>
<p>Concrete sub-classes inheriting from parser must have a corresponding member
function <tt>parse(...)</tt> compatible with the conceptual Interface:<br>
</p>
<pre><span class=identifier> </span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=identifier>RT
</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special></span> const<span class=special>&amp; </span>scan<span class=identifier></span><span class=special>) </span><span class=keyword>const</span><span class=special>;</span></pre>
<p>where <tt>RT</tt> is the desired return type of the parser. </p>
<h2>The parser result</h2>
<p>Concrete sub-classes inheriting from parser in most cases need to have a nested
meta-function <tt>result</tt> that returns the result <tt>type</tt> of the parser's
parse member function, given a scanner type. The meta-function has the form:</p>
<pre><span class=keyword> template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>result
</span><span class=special>{
</span><span class=keyword>typedef </span>RT <span class=identifier></span><span class=identifier>type</span><span class=special>;
</span><span class=special>};</span></pre>
<p>where <tt>RT</tt> is the desired return type of the parser. This is usually,
but not always, dependent on the template parameter <tt>ScannerT</tt>. For example,
given an attribute type <tt>int</tt>, we can use the match_result metafunction:</p>
<pre><span class=keyword> template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>result
</span><span class=special>{
</span><span class=keyword>typedef typename </span><span class=identifier>match_result</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>, </span><span class="keyword">int</span><span class=special>&gt;::</span><span class=identifier>type type</span><span class=special>;
};</span></pre>
<p>If a parser does not supply a result metafunction, a default is provided by
the base parser class.<span class=special> </span>The default is declared as:</p>
<pre><span class=keyword> template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>result
</span><span class=special>{
</span><span class=keyword>typedef typename </span><span class=identifier>match_result</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>, </span><span class="identifier">nil_t</span><span class=special>&gt;::</span><span class=identifier>type type</span><span class=special>;
};</span></pre>
<p>Without a result metafunction, notice that the parser's default attribute is
<tt>nil_t</tt> (i.e. the parser has no attribute).</p>
<h2><span class=special></span>parser_result</h2>
<p>Given a a scanner type <tt>ScannerT</tt> and a parser type <tt>ParserT</tt>,
what will be the actual result of the parser? The answer to this question is
provided to by the <tt>parser_result</tt> meta-function.</p>
<p>Usage:</p>
<pre> <span class=keyword>typename </span><span class=identifier>parser_result</span><span class=special>&lt;</span><span class=identifier>ParserT, ScannerT</span><span class=special>&gt;::</span><span class=identifier>type</span></pre>
<p>In general, the meta-function just forwards the invocation to the parser's
result meta-function:</p>
<pre><span class=identifier> </span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ParserT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>parser_result
</span><span class=special>{
</span><span class=keyword>typedef </span><span class=keyword>typename </span><span class=identifier>ParserT</span><span class=special>::</span><span class=keyword>template </span><span class=identifier>result</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class=identifier>type </span><span class=identifier>type</span><span class=special>;
</span><span class=special>};</span></pre>
<p>This is similar to a global function calling a member function. Most of the
time, the usage above is equivalent to:</p>
<pre><span class=keyword> typename </span><span class=identifier>ParserT</span><span class=special>::</span><span class=keyword>template </span><span class=identifier>result</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class=identifier>type</span></pre>
<p>Yet, this should not be relied upon to be true all the time because the parser_result
metafunction might be specialized for specific parser and/or scanner types.</p>
<p>The parser_result metafunction makes the signature of the required parse member
function almost canonical:</p>
<pre><span class=identifier> </span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>typename </span><span class=identifier>parser_result</span><span class=special>&lt;</span><span class=identifier>self_t, ScannerT</span><span class=special>&gt;::</span><span class=identifier>type</span><br> <span class=identifier>parse</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special></span> const<span class=special>&amp; </span>scan<span class=identifier></span><span class=special>) </span><span class=keyword>const</span><span class=special>;</span></pre>
<p>where<span class=special></span> <tt>self_t</tt> is a typedef to the parser.</p>
<h2>parser class declaration</h2>
<pre><span class=identifier> </span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>DerivedT</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>parser
</span><span class=special>{
</span><span class=keyword>typedef </span><span class=identifier>DerivedT embed_t</span><span class=special>;
</span><span class=keyword>typedef </span><span class=identifier>DerivedT derived_t</span><span class=special>;
</span><span class=keyword>typedef </span><span class=identifier>plain_parser_category parser_category_t</span><span class=special>;
</span><span class=keyword>template </span><span class=special>&lt;</span><span class="keyword">typename</span> ScannerT<span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>result
</span><span class=special>{
</span><span class=keyword>typedef typename </span><span class=identifier>match_result</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>, </span><span class=identifier>nil_t</span><span class=special>&gt;::</span><span class=identifier>type type</span><span class=special>;
};
</span><span class=identifier>DerivedT</span><span class=special>&amp; </span><span class=identifier>derived</span><span class=special>();
</span><span class=identifier>DerivedT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>derived</span><span class=special>() </span><span class=keyword>const</span><span class=special>;
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ActionT</span><span class=special>&gt;
</span><span class=identifier>action</span><span class=special>&lt;</span><span class=identifier>DerivedT</span><span class=special>, </span><span class=identifier>ActionT</span><span class=special>&gt;
</span><span class=keyword>operator</span><span class=special>[](</span><span class=identifier>ActionT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>actor</span><span class=special>) </span><span class=keyword>const</span><span class=special>;
};</span></pre>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="semantic_actions.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="indepth_the_scanner.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
</body>
</html>

View File

@@ -0,0 +1,226 @@
<html>
<head>
<title>In-depth: The Parser Context</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>In-depth:
The Parser Context</b></font></td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html">
<img src="theme/u_arr.gif" border="0" width="20" height="19"></a></td>
<td width="30"><a href="indepth_the_scanner.html">
<img src="theme/l_arr.gif" border="0" width="20" height="19"></a></td>
<td width="30"><a href="predefined_actors.html">
<img src="theme/r_arr.gif" border="0" width="20" height="19"></a></td>
</tr>
</table>
<h2>Overview</h2>
<p>The parser's <b>context</b> is yet another concept. An instance (object) of
the <tt>context</tt> class is created before a non-terminal starts parsing and
is destructed after parsing has concluded. A non-terminal is either a <tt>rule</tt>,
a <tt>subrule</tt>, or a <tt>grammar</tt>. Non-terminals have a <tt>ContextT</tt> template parameter. The following pseudo code depicts what's happening when
a non-terminal is invoked:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>return_type
</span><span class=identifier>a_non_terminal</span><span class=special>::</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>ScannerT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>scan</span><span class=special>)
{
</span><span class=identifier>context_t ctx</span><span class=special>(/**/);
</span><span class=identifier>ctx</span><span class=special>.</span><span class=identifier>pre_parse</span><span class=special>(/**/);
</span><span class=comment>// main parse code of the non-terminal here...
</span><span class=keyword>return </span><span class=identifier>ctx</span><span class=special>.</span><span class=identifier>post_parse</span><span class=special>(/**/);
}</span></font></code></pre>
<p>The context is provided for extensibility. Its main purpose is to expose the
start and end of the non-terminal's parse member function to accommodate external
hooks. We can extend the non-terminal in a multitude of ways by writing specialized
context classes, without modifying the class itself. For example, we can make
the non-terminal emit debug diagnostics information by writing a context class
that prints out the current state of the scanner at each point in the parse
traversal where the non-terminal is invoked.</p>
<p>Example of a parser context that prints out debug information:</p>
<pre><code><font color="#000000"> pre_parse</font>:<font color="#000000"> non-terminal XXX is entered<font color="#0000ff">.</font> The current state of the input
is <font color="#616161"><i>&quot;hello world, this is a test&quot;</i></font>
post_parse</font>:<font color="#000000"> non-terminal XXX has concluded<font color="#0000ff">,</font> the non-terminal matched <font color="#616161"><i>&quot;hello world&quot;</i></font><font color="#0000ff">.</font>
The current state of the input is <font color="#616161"><i>&quot;, this is a test&quot;</i></font></font></code></pre>
<p>Most of the time, the context will be invisible from the user's view. In general,
clients of the framework need not deal directly nor even know about contexts.
Power users, however, might find some use of contexts. Thus, this is part of
the public API. Other parts of the framework in other layers above the core
take advantage of the context to extend non-terminals. </p>
<h2>Class declaration</h2>
<p>The <tt>parser_context</tt> class is the default context class that the non-terminal
uses. </p>
<pre><span class=keyword> </span><span class="identifier">template</span> <span class="special">&lt;</span><span class="keyword">typename</span> <span class="identifier">AttrT</span> <span class="special">=</span> <span class="identifier">nil_t</span><span class="special">&gt;</span><span class=keyword><br> struct </span><span class=identifier>parser_context
</span><span class=special> {
</span><span class=keyword>typedef </span>AttrT <span class=identifier>attr_t</span><span class=special>;
</span><span class=keyword>typedef </span><span class=identifier>implementation_defined base_t</span><span class=special>;
</span><span class="keyword">typedef</span><span class=special> </span>parser_context_linker<span class="special">&lt;</span>parser_context<span class="special">&lt;</span><span class="identifier">AttrT</span><span class="special">&gt;</span> <span class="special">&gt;</span> <span class="identifier">context_linker_t</span><span class=special>;
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ParserT</span><span class=special>&gt;
</span><span class=identifier>parser_context</span><span class=special>(</span><span class=identifier>ParserT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>p</span><span class=special>) {}
</span><span class=keyword> template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ParserT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword> void
</span><span class=identifier> pre_parse</span><span class=special>(</span><span class=identifier>ParserT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>p</span><span class=special>, </span><span class=identifier>ScannerT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>scan</span><span class=special>) {}
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ResultT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>ParserT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=identifier>ResultT</span><span class=special>&amp;
</span><span class=identifier> post_parse</span><span class=special>(</span><span class=identifier>ResultT</span><span class=special>&amp; </span><span class=identifier>hit</span><span class=special>, </span><span class=identifier>ParserT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>p</span><span class=special>, </span><span class=identifier>ScannerT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>scan</span><span class=special>)
{ </span><span class=keyword>return </span><span class=identifier>hit</span><span class=special>; }
};</span></pre>
<p>The non-terminal's <tt>ContextT</tt> template parameter is a concept. The <tt>parser_context</tt>
class above is the simplest model of this concept. The default <tt>parser_context</tt>'s<tt>
pre_parse</tt> and <tt>post_parse</tt> member functions are simply no-ops. You
can think of the non-terminal's <tt>ContextT</tt> template parameter as the
policy that governs how the non-terminal will behave before and after parsing.
The client can supply her own context policy by passing a user defined context
template parameter to a particular non-terminal.</p>
<table width="90%" border="0" align="center">
<tr>
<td class="table_title" colspan="8"> Parser Context Policies </td>
</tr>
<tr>
<tr>
<td class="table_cells"><strong><span class=identifier>attr_t</span></strong></td>
<td class="table_cells">typedef: the attribute type of the non-terminal. See
the <a href="indepth_the_parser.html#match">match</a>.</td>
</tr>
<td class="table_cells"><strong><span class=identifier>base_t</span></strong></td>
<td class="table_cells">typedef: the base class of the non-terminal. The non-terminal
inherits from this class.</td>
</tr>
<tr>
<td class="table_cells"><strong><span class="identifier">context_linker_t</span></strong></td>
<td class="table_cells">typedef: this class type opens up the possibility
for Spirit to plug in additional functionality into the non-terminal parse
function or even bypass the given context. This should simply be typedefed
to <tt>parser_context_linker&lt;T&gt;</tt> where T is the type of the user
defined context class.</td>
</tr>
<td class="table_cells"><strong>constructor</strong></td>
<td class="table_cells">Construct the context. The non-terminal is passed as
an argument to the constructor.</td>
</tr>
<tr>
<td class="table_cells"><strong>pre_parse</strong></td>
<td class="table_cells">Do something prior to parsing. The non-terminal and
the current scanner are passed as arguments.</td>
</tr>
<tr>
<td class="table_cells"><strong>post_parse</strong></td>
<td class="table_cells">Do something after parsing. This is called regardless
of the parse result. A reference to the parser's result is passed in. The
context has the power to modify this. The non-terminal and the current scanner
are also passed as arguments.</td>
</tr>
</table>
<p>The <tt>base_t</tt> deserves further explanation. Here goes... The context
is strictly a stack based class. It is created before parsing and destructed
after the non-terminal's parse member function exits. Sometimes, we need
auxiliary
data that exists throughout the full lifetime of the non-terminal host.
Since the non-terminal inherits from the context's <tt>base_t</tt>, the context
itself, when created, gets access to this upon construction when the non-terminal
is passed as an argument to the constructor. Ditto on <tt>pre_parse</tt> and
<tt>post_parse</tt>.</p>
<p>The non-terminal inherits from the context's <tt>base_t</tt> typedef. The sole
requirement is that it is a class that is default constructible. The copy-construction
and assignment requirements depends on the host. If the host requires it, so
does the context's <tt>base_t</tt>. In general, it wouldn't hurt to provide
these basic requirements.</p>
<h2>Non-default Attribute Type </h2>
<p>Right out of the box, the <tt>parser_context</tt> class may be paramaterized with a type other than the default <tt>nil_t</tt>. The following code demonstrates the usage of the <tt>parser_context</tt> template with an explicit argument to declare rules with match results different from <tt>nil_t</tt>:</p>
<pre><span class=number> </span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>parser_context</span><span class=special>&lt;</span><span class=keyword>int</span><span class=special>&gt; </span><span class=special>&gt; </span><span class=identifier>int_rule </span><span class=special>= </span><span class=identifier>int_p</span><span class=special>;
</span><span class=identifier>parse</span><span class=special>(
</span><span class=string>&quot;123&quot;</span><span class=special>,
</span><span class=comment>// Using a returned value in the semantic action
</span><span class=identifier>int_rule</span><span class=special>[</span><span class=identifier>cout </span><span class=special>&lt;&lt; </span><span class=identifier>arg1 </span><span class=special>&lt;&lt; </span><span class=identifier>endl</span><span class=special>]
</span><span class=special>);</span> </pre>
<p>In this example, <tt>int_rule</tt> is declared with <tt>int</tt> attribute type. Hence, the <tt>int_rule</tt> variable can hold any parser which returns an <tt>int</tt> value (for example <tt>int_p</tt> or <tt>bin_p</tt>). The important thing to note is that we can use the returned value in the semantic action bound to the <tt>int_rule</tt>. </p>
<p><img src="theme/lens.gif" width="15" height="16"> See <a href="../example/fundamental/parser_context.cpp">parser_context.cpp</a> in the examples. This is part of the Spirit distribution.</p>
<h2>An Example </h2>
<p>As an example let's have a look at the Spirit parser context, which inserts some debug output to the parsing process:</p>
<pre> <span class="keyword">template</span>&lt;<span class="keyword">typename</span> ContextT&gt;
<span class="keyword">struct</span> parser_context_linker : <span class="keyword">public</span> ContextT
<span class="special">{</span>
<span class="keyword">typedef</span> ContextT base_t;
<span class="keyword">template</span> &lt;<span class="keyword">typename</span> ParserT&gt;
parser_context_linker(ParserT const&amp; p)
: ContextT(p) {}
<span class="comment">// This is called just before parsing of this non-terminal</span>
<span class="keyword">template</span> <span class="special">&lt;</span><span class="keyword">typename</span> ParserT<span class="special">,</span> <span class="keyword">typename</span> ScannerT<span class="special">&gt;</span>
<span class="keyword">void</span> pre_parse<span class="special">(</span>ParserT <span class="keyword">const</span><span class="special">&amp;</span> p<span class="special">,</span> ScannerT <span class="special">&amp;</span>scan<span class="special">)</span>
<span class="special">{</span>
<span class="comment">// call the pre_parse function of the base class</span>
<span class="keyword">this</span><span class="special">-&gt;</span>base_t<span class="special">::</span>pre_parse<span class="special">(</span>p<span class="special">,</span> scan<span class="special">);</span>
<span class="preprocessor">
#if</span> <span class="identifier">BOOST_SPIRIT_DEBUG_FLAGS</span> <span class="special">&amp;</span> <span class="identifier">BOOST_SPIRIT_DEBUG_FLAGS_NODES</span>
<span class="keyword">if</span> <span class="special">(</span>trace_parser<span class="special">(</span>p<span class="special">.</span>derived<span class="special">())) {</span>
<span class="comment">// print out pre parse info</span>
impl<span class="special">::</span>print_node_info<span class="special">(</span>
<span class="keyword">false</span><span class="special">,</span> scan.get_level<span class="special">(),</span> <span class="keyword">false</span><span class="special">,</span>
parser_name<span class="special">(</span>p.derived<span class="special">()),</span>
scan<span class="special">.</span>first<span class="special">,</span> scan.last<span class="special">);</span>
<span class="special">}</span>
scan.get_level<span class="special">()++;</span> <span class="comment">// increase nesting level</span>
<span class="preprocessor">#endif</span>
<span class="special">}</span>
<span class="comment">// This is called just after parsing of the current non-terminal</span>
<span class="keyword">template</span> <span class="special">&lt;</span><span class="keyword">typename</span> ResultT<span class="special">,</span> <span class="keyword">typename</span> ParserT<span class="special">,</span> <span class="keyword">typename</span> ScannerT<span class="special">&gt;</span>
ResultT<span class="special">&amp;</span> post_parse<span class="special">(</span>
ResultT<span class="special">&amp;</span> hit<span class="special">,</span> ParserT <span class="keyword">const</span><span class="special">&amp;</span> p<span class="special">,</span> ScannerT<span class="special">&amp;</span> scan<span class="special">)
{</span>
<span class="preprocessor">
#if</span> <span class="identifier">BOOST_SPIRIT_DEBUG_FLAGS</span> <span class="special">&amp;</span> <span class="identifier">BOOST_SPIRIT_DEBUG_FLAGS_NODES</span>
<span class="special">--</span>scan.get_level<span class="special">();</span> <span class="comment">// decrease nesting level</span>
<span class="keyword">if</span> <span class="special">(</span>trace_parser<span class="special">(</span>p<span class="special">.</span>derived<span class="special">())) {</span>
impl<span class="special">::</span>print_node_info<span class="special">(</span>
hit<span class="special">,</span> scan<span class="special">.</span>get_level<span class="special">(),</span> <span class="keyword">true</span><span class="special">,</span>
parser_name<span class="special">(</span>p<span class="special">.</span>derived<span class="special">()),</span>
scan<span class="special">.</span>first<span class="special">,</span> scan<span class="special">.</span>last<span class="special">);
}</span>
<span class="preprocessor">#endif</span>
<span class="comment">// call the post_parse function of the base class</span>
<span class="keyword">return</span> <span class="keyword">this</span><span class="special">-&gt;</span>base_t<span class="special">::</span>post_parse<span class="special">(</span>hit<span class="special">,</span> p<span class="special">,</span> scan<span class="special">);
}
};</span>
</pre>
<p>During debugging (<tt>BOOST_SPIRIT_DEBUG</tt> is defined) this parser context is injected into the derivation hierarchy of the current <tt>parser_context</tt>, which was originally specified to be used for a concrete parser, so the template parameter <tt>ContextT</tt> represents the original <tt>parser_context</tt>. For this reason the <tt>pre_parse</tt> and <tt>post_parse</tt> functions call it's counterparts from the base class. Additionally these functions call a special <tt>print_node_info</tt> function, which does the actual output of the parser state info of the current non-terminal. For more info about the printed information, you may want to have a look at the topic <a href="debugging.html">Debugging</a>.</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html">
<img src="theme/u_arr.gif" border="0" width="20" height="19"></a></td>
<td width="30"><a href="indepth_the_scanner.html">
<img src="theme/l_arr.gif" border="0" width="20" height="19"></a></td>
<td width="30"><a href="predefined_actors.html">
<img src="theme/r_arr.gif" border="0" width="20" height="19"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p class="copyright">&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,290 @@
<html>
<head>
<title>In-depth The Scanner</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>In-depth:
The Scanner</b></font> </td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="indepth_the_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="indepth_the_parser_context.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<h2>Basic Scanner API </h2>
<table width="90%" border="0" align="center">
<tr>
<td class="table_title" colspan="10"> class scanner </td>
</tr>
<tr>
<tr>
<td class="table_cells"><code><span class=identifier>value_t</span></code></td>
<td class="table_cells">typedef: The value type of the scanner's iterator</td>
</tr>
<td class="table_cells"><code><span class=identifier>ref_t</span></code></td>
<td class="table_cells">typedef: The reference type of the scanner's iterator</td>
</tr>
<td class="table_cells"><code><span class=keyword>bool </span><span class=identifier>at_end</span><span class=special>()
</span><span class=keyword>const</span></code></td>
<td class="table_cells">Returns true if the input is exhausted</td>
</tr>
<td class="table_cells"><code><span class=identifier>value_t </span><span class=keyword>operator</span><span class=special>*()
</span><span class=keyword>const</span></code></td>
<td class="table_cells">Dereference/get a <code><span class=identifier>value_t</span></code>
from the input</td>
</tr>
<td class="table_cells"><code><span class=keyword> </span><span class=identifier>scanner
</span><span class=keyword>const</span><span class=special>&amp; </span><span class=keyword>operator</span><span class=special>++()</span></code></td>
<td class="table_cells">move the scanner forward</td>
</tr>
<tr>
<td class="table_cells"><code><span class=identifier>IteratorT&amp; first</span><span class=special></span></code></td>
<td class="table_cells">The iterator pointing to the current input position.
Held by reference</td>
</tr>
<tr>
<td class="table_cells"><code><span class=identifier>IteratorT </span><span class=keyword>const</span>
<span class=identifier>last</span><span class=special></span></code></td>
<td class="table_cells">The iterator pointing to the end of the input. Held
by value</td>
</tr>
</table>
<p> The basic behavior of the scanner is handled by policies. The actual execution
of the scanner's public member functions listed in the table above is implemented
by the scanner policies.</p>
<p> Three sets of policies govern the behavior of the scanner. These policies
make it possible to extend Spirit non-intrusively. The scanner policies allow
the core-functionality to be extended without requiring any potentially destabilizing
changes to the code. A library writer might provide her own policies that override
the ones that are already in place to fine tune the parsing process
to fit her own needs. Layers above the core might also want to take advantage
of this policy based machanism. Abstract syntax tree generation, debuggers and
lexers come to mind.</p>
<p> There are three sets of policies that govern:</p>
<ul>
<li>Iteration and filtering</li>
<li>Recognition and matching</li>
<li>Handling semantic actions</li>
</ul>
<a name="iteration_policy"></a>
<h2>iteration_policy</h2>
<p> Here are the default policies that govern iteration and filtering:</p>
<pre>
<code><span class=keyword>struct </span><span class=identifier>iteration_policy
</span><span class=special>{
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>void
</span><span class=identifier>advance</span><span class=special>(</span><span class=identifier>ScannerT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>scan</span><span class=special>) </span><span class=keyword>const
</span><span class=special>{ </span><span class=special>++</span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>first</span><span class=special>; </span><span class=special>}
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>bool </span><span class=identifier>at_end</span><span class=special>(</span><span class=identifier>ScannerT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>scan</span><span class=special>) </span><span class=keyword>const
</span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>first </span><span class=special>== </span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>last</span><span class=special>; </span><span class=special>}
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>&gt;
</span><span class=identifier>T </span><span class=identifier>filter</span><span class=special>(</span><span class=identifier>T </span><span class=identifier>ch</span><span class=special>) </span><span class=keyword>const
</span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>ch</span><span class=special>; </span><span class=special>}
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>::</span><span class=identifier>ref_t
</span><span class=identifier>get</span><span class=special>(</span><span class=identifier>ScannerT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>scan</span><span class=special>) </span><span class=keyword>const
</span><span class=special>{ </span><span class=keyword>return </span><span class=special>*</span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>first</span><span class=special>; </span><span class=special>}
</span><span class=special>};</span></code></pre>
<table width="90%" border="0" align="center">
<tr>
<td class="table_title" colspan="8"> Iteration and filtering policies </td>
</tr>
<tr>
<tr>
<td class="table_cells"><b>advance</b></td>
<td class="table_cells">Move the iterator forward</td>
</tr>
<td class="table_cells"><b>at_end</b></td>
<td class="table_cells">Return true if the input is exhausted</td>
</tr>
<td class="table_cells"><b>filter</b></td>
<td class="table_cells">Filter a character read from the input</td>
</tr>
<td class="table_cells"><b>get</b></td>
<td class="table_cells">Read a character from the input</td>
</tr>
</table>
<p> The following code snippet demonstrates a simple policy that converts all
characters to lower case:</p>
<pre>
<code><span class=keyword>struct </span><span class=identifier>inhibit_case_iteration_policy </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>iteration_policy
</span><span class=special>{
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>CharT</span><span class=special>&gt;
</span><span class=identifier>CharT filter</span><span class=special>(</span><span class=identifier>CharT ch</span><span class=special>) </span><span class=keyword>const
</span><span class=special>{
</span><span class=keyword>return </span>std::<span class=identifier>tolower</span><span class=special>(</span><span class=identifier>ch</span><span class=special>);
}
};</span></code></pre>
<a name="match_policy"></a>
<h2>match_policy</h2>
<p> Here are the default policies that govern recognition and matching:</p>
<pre>
<code><span class=keyword>struct </span><span class=identifier>match_policy
</span><span class=special>{
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>result </span><span class=special>
{
</span><span class=keyword>typedef </span><span class=identifier>match</span><span class=special>&lt;</span><span class=identifier>T</span><span class=special>&gt; </span><span class=identifier>type</span><span class=special>; </span><span class=special>
};
</span><span class=keyword>const </span><span class=identifier>match</span><span class=special>&lt;</span><span class=identifier>nil_t</span><span class=special>&gt;
</span><span class=identifier>no_match</span><span class=special>() </span><span class=keyword>const
</span><span class=special>{ </span><span class=keyword>
return </span><span class=identifier>match</span><span class=special>&lt;</span><span class=identifier>nil_t</span><span class=special>&gt;(); </span><span class=special>
}
</span><span class=keyword>const </span><span class=identifier>match</span><span class=special>&lt;</span><span class=identifier>nil_t</span><span class=special>&gt;
</span><span class=identifier>empty_match</span><span class=special>() </span><span class=keyword>const
</span><span class=special>{ </span><span class=keyword>
return </span><span class=identifier>match</span><span class=special>&lt;</span><span class=identifier>nil_t</span><span class=special>&gt;(</span><span class=number>0</span><span class=special>, </span><span class=identifier>nil_t</span><span class=special>());
</span><span class=special>}
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>AttrT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>IteratorT</span><span class=special>&gt;
</span><span class=identifier>match</span><span class=special>&lt;</span><span class=identifier>AttrT</span><span class=special>&gt;
</span><span class=identifier>create_match</span><span class=special>(
</span><span class=keyword>std::size_t </span><span class=identifier>length</span><span class=special>,
</span><span class=identifier>AttrT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>val</span><span class=special>,
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=comment>/*first*/</span><span class=special>,
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=comment>/*last*/</span><span class=special>) </span><span class=keyword>const
</span><span class=special>{ </span><span class=keyword>
return </span><span class=identifier>match</span><span class=special>&lt;</span><span class=identifier>AttrT</span><span class=special>&gt;(</span><span class=identifier>length</span><span class=special>, </span><span class=identifier>val</span><span class=special>); </span><span class=special>
}
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>MatchT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>IteratorT</span><span class=special>&gt;
</span><span class=keyword>void
</span><span class=identifier>group_match</span><span class=special>(
</span><span class=identifier>MatchT</span><span class=special>&amp; </span><span class=comment>/*m*/</span><span class=special>,
</span><span class=identifier>parser_id </span><span class=keyword>const</span><span class=special>&amp; </span><span class=comment>/*id*/</span><span class=special>,
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=comment>/*first*/</span><span class=special>,
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=comment>/*last*/</span><span class=special>) </span><span class=keyword>const </span><span class=special>{}
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>Match1T</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>Match2T</span><span class=special>&gt;
</span><span class=keyword>void
</span><span class=identifier>concat_match</span><span class=special>(</span><span class=identifier>Match1T</span><span class=special>&amp; </span><span class=identifier>l</span><span class=special>, </span><span class=identifier>Match2T </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>r</span><span class=special>) </span><span class=keyword>const
</span><span class=special>{ </span><span class=identifier>
l</span><span class=special>.</span><span class=identifier>concat</span><span class=special>(</span><span class=identifier>r</span><span class=special>);
</span><span class=special>}
</span><span class=special>};</span></code></pre>
<table width="90%" border="0" align="center">
<tr>
<td class="table_title" colspan="12"> Recognition and matching </td>
</tr>
<tr>
<tr>
<td class="table_cells"><b>result</b></td>
<td class="table_cells">A metafunction that returns a match type given an
attribute type (see In-depth: The Parser)</td>
</tr>
<td class="table_cells"><b>no_match</b></td>
<td class="table_cells">Create a failed match</td>
</tr>
<td class="table_cells"><b>empty_match</b></td>
<td class="table_cells">Create an empty match. An empty match is a successful
epsilon match (matching length == 0)</td>
</tr>
<td class="table_cells"><b>create_match</b></td>
<td class="table_cells">Create a match given the matching length, an attribute
and the iterator pair pointing to the matching portion of the input</td>
</tr>
<td class="table_cells"><b>group_match</b></td>
<td class="table_cells">For non terminals such as rules, this is called after
a successful match has been made to allow post processing</td>
</tr>
<td class="table_cells"><b>concat_match</b></td>
<td class="table_cells">Concatenate two match objects</td>
</tr>
</table>
<a name="action_policy"></a>
<h2>action_policy</h2>
<p> The action policy has only one function for handling semantic actions:</p>
<pre>
<code><span class=keyword>struct </span><span class=identifier>action_policy
</span><span class=special>{
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ActorT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>AttrT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>IteratorT</span><span class=special>&gt;
</span><span class=keyword>void
</span><span class=identifier>do_action</span><span class=special>(
</span><span class=identifier>ActorT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>actor</span><span class=special>,
</span><span class=identifier>AttrT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>val</span><span class=special>,
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>first</span><span class=special>,
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>last</span><span class=special>) </span><span class=keyword>const</span><span class=special>;
</span><span class=special>};</span></code></pre>
<p> The default action policy forwards to:</p>
<pre>
<code><span class=identifier>actor</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>);</span></code></pre>
<p> If the attribute <tt>val</tt> is of type nil_t. Otherwise:</p>
<pre>
<code><span class=identifier>actor</span><span class=special>(</span><span class=identifier>val</span><span class=special>);</span></code></pre>
<a name="scanner_policies_mixer"></a>
<h3>scanner_policies mixer</h3>
<p> The class <tt>scanner_policies</tt> combines the three scanner policy classes
above into one:</p>
<pre>
<code><span class=keyword>template </span><span class=special>&lt;
</span><span class=keyword>typename </span><span class=identifier>IterationPolicyT </span><span class=special>= </span><span class=identifier>iteration_policy</span><span class=special>,
</span><span class=keyword>typename </span><span class=identifier>MatchPolicyT </span><span class=special>= </span><span class=identifier>match_policy</span><span class=special>,
</span><span class=keyword>typename </span><span class=identifier>ActionPolicyT </span><span class=special>= </span><span class=identifier>action_policy</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>scanner_policies</span><span class=special>;
</span></code></pre>
<p> This <i>mixer</i> class inherits from all the three policies. This scanner_policies
class is then used to parameterize the scanner:</p>
<pre>
<code><span class=keyword>template </span><span class=special>&lt;
</span><span class=keyword>typename </span><span class=identifier>IteratorT </span><span class=special>= </span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*,
</span><span class=keyword>typename </span><span class=identifier>PoliciesT </span><span class=special>= </span><span class=identifier>scanner_policies</span><span class=special>&lt;&gt; </span><span class=special>&gt;
</span><span class=keyword>class </span><span class=identifier>scanner</span><span class=special>;
</span></code></pre>
<p> The scanner in turn inherits from the PoliciesT.</p>
<a name="rebinding_policies"></a>
<h3>Rebinding Policies</h3>
<p> The scanner can be made to rebind to a different set of policies anytime.
It has a member function <tt>change_policies(new_policies)</tt>. Given a new
set of policies, this member function creates a new scanner with the new set
of policies. The result type of the <i>rebound</i> scanner can be can be obtained
by calling the metafunction:</p>
<pre>
<code><span class=identifier>rebind_scanner_policies</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>, </span><span class=identifier>PoliciesT</span><span class=special>&gt;::</span><span class=identifier>type</span></code></pre>
<a name="rebinding_iterators"></a>
<h3>Rebinding Iterators</h3>
<p> The scanner can also be made to rebind to a different iterator type anytime.
It has a member function <tt>change_iterator(first, last)</tt>. Given a new
pair of iterator of type different from the ones held by the scanner, this member
function creates a new scanner with the new pair of iterators. The result type
of the <i>rebound</i> scanner can be can be obtained by calling the metafunction:</p>
<pre>
<code><span class=identifier>rebind_scanner_iterator</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>, </span><span class=identifier>IteratorT</span><span class=special>&gt;::</span><span class=identifier>type</span></code></pre>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="indepth_the_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="indepth_the_parser_context.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p class="copyright">&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,242 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<meta content=
"HTML Tidy for Windows (vers 1st February 2003), see www.w3.org"
name="generator">
<title>
Introduction
</title>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10" height="49"></td>
<td width="85%" height="49">
<font size="6" face=
"Verdana, Arial, Helvetica, sans-serif"><b>Introduction</b></font>
</td>
<td width="112" height="49">
<a href="http://spirit.sf.net"><img src="theme/spirit.gif"
width="112" height="48" align="right" border="0"></a>
</td>
</tr>
</table><br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30">
<a href="../index.html"><img src="theme/u_arr.gif" border="0"></a>
</td>
<td width="30">
<a href="preface.html"><img src="theme/l_arr.gif" width="20"
height="19" border="0"></a>
</td>
<td width="30">
<a href="quick_start.html"><img src="theme/r_arr.gif" border="0"></a>
</td>
</tr>
</table>
<p>
Spirit is an object-oriented recursive-descent parser generator framework
implemented using template meta-programming techniques. Expression
templates allow us to approximate the syntax of Extended Backus-Normal
Form (EBNF) completely in C++.
</p>
<p>
The Spirit framework enables a target grammar to be written exclusively
in C++. Inline EBNF grammar specifications can mix freely with other C++
code and, thanks to the generative power of C++ templates, are
immediately executable. In retrospect, conventional compiler-compilers or
parser-generators have to perform an additional translation step from the
source EBNF code to C or C++ code.
</p>
<p>
A simple EBNF grammar snippet:
</p>
<pre><code><font color="#000000"> </font></code><code><font color="#000000"><span class="identifier">group </span> <span class="special">::=</span> <span class="literal">'('</span> <span class="identifier">expression</span> <span class="literal">')'
</span> <span class="identifier">factor </span> <span class=
"special">::=</span> <span class="identifier">integer</span> <span class=
"special">|</span> <span class="identifier">group
</span> <span class="identifier">term </span> <span class=
"special">::=</span> <span class="identifier">factor</span> <span class=
"special">((</span><span class="literal">'*'</span> <span class=
"identifier">factor</span><span class="special">)</span> <span class=
"special">|</span> <span class="special">(</span><span class=
"literal">'/'</span> <span class="identifier">factor</span><span class=
"special">))*
</span> <span class="identifier">expression </span> <span class=
"special">::=</span> <span class="identifier">term</span> <span class=
"special">((</span><span class="literal">'+'</span> <span class=
"identifier">term</span><span class="special">)</span> <span class=
"special">|</span> <span class="special">(</span><span class=
"literal">'-'</span> <span class="identifier">term</span><span class=
"special">))*</span></font></code></pre>
<p>
is approximated using Spirit's facilities as seen in this code snippet:
</p>
<pre><code><font color="#000000"> </font></code><code><font color="#000000"><span class=
"identifier">group </span> <span class=
"special">=</span> <span class="literal">'('</span> <span class=
"special">&gt;&gt;</span> <span class=
"identifier">expression</span> <span class=
"special">&gt;&gt;</span> <span class="literal">')'</span><span class=
"special">;
</span> <span class="identifier">factor </span> <span class=
"special">=</span> <span class="identifier">integer</span> <span class=
"special">|</span> <span class="identifier">group</span><span class="special">;
</span> <span class="identifier">term </span> <span class=
"special">=</span> <span class="identifier">factor</span> <span class=
"special">&gt;&gt;</span> <span class="special">*((</span><span class=
"literal">'*'</span> <span class="special">&gt;&gt;</span> <span class=
"identifier">factor</span><span class="special">)</span> <span class=
"special">|</span> <span class="special">(</span><span class=
"literal">'/'</span> <span class="special">&gt;&gt;</span> <span class=
"identifier">factor</span><span class="special">));
</span> <span class="identifier">expression </span> <span class=
"special">=</span> <span class="identifier">term</span> <span class=
"special">&gt;&gt;</span> <span class="special">*((</span><span class=
"literal">'+'</span> <span class="special">&gt;&gt;</span> <span class=
"identifier">term</span><span class="special">)</span> <span class=
"special">|</span> <span class="special">(</span><span class=
"literal">'-'</span> <span class="special">&gt;&gt;</span> <span class=
"identifier">term</span><span class="special">));</span></font></code>
</pre>
<p>
Through the magic of expression templates, this is perfectly valid and
executable C++ code. The production rule <tt>expression</tt> is in fact
an object that has a member function parse that does the work given a
source code written in the grammar that we have just declared. Yes, it's
a calculator. We shall simplify for now by skipping the type declarations
and the definition of the rule <tt>integer</tt> invoked by
<tt>factor</tt>. The production rule <tt>expression</tt> in our grammar
specification, traditionally called the start symbol, can recognize
inputs such as:
</p>
<pre><code><font color="#000000"> </font></code><span class="number">12345
</span><code><font color="#000000"> </font></code><span class="special">-</span><span class="number">12345
</span><code><font color="#000000"> </font></code><span class="special">+</span><span class="number">12345
</span><code><font color="#000000"> </font></code><span class="number">1</span> <span class=
"special">+</span> <span class="number">2
</span><code><font color="#000000"> </font></code><span class="number">1</span> <span class=
"special">*</span> <span class="number">2
</span><code><font color="#000000"> </font></code><span class="number">1</span><span class=
"special">/</span><span class="number">2</span> <span class=
"special">+</span> <span class="number">3</span><span class=
"special">/</span><span class="number">4
</span><code><font color="#000000"> </font></code><span class="number">1</span> <span class=
"special">+</span> <span class="number">2</span> <span class=
"special">+</span> <span class="number">3</span> <span class=
"special">+</span> <span class="number">4
</span><code><font color="#000000"> </font></code><span class="number">1</span> <span class=
"special">*</span> <span class="number">2</span> <span class=
"special">*</span> <span class="number">3</span> <span class=
"special">*</span> <span class="number">4
</span><code><font color="#000000"> </font></code><span class="special">(</span><span class=
"number">1</span> <span class="special">+</span> <span class=
"number">2</span><span class="special">)</span> <span class=
"special">*</span> <span class="special">(</span><span class=
"number">3</span> <span class="special">+</span> <span class=
"number">4</span><span class="special">)
</span><code><font color="#000000"> </font></code><span class="special">(-</span><span class=
"number">1</span> <span class="special">+</span> <span class=
"number">2</span><span class="special">)</span> <span class=
"special">*</span> <span class="special">(</span><span class=
"number">3</span> <span class="special">+</span> <span class=
"special">-</span><span class="number">4</span><span class="special">)
</span><code><font color="#000000"> </font></code><span class="number">1</span> <span class=
"special">+</span> <span class="special">((</span><span class=
"number">6</span> <span class="special">*</span> <span class=
"number">200</span><span class="special">)</span> <span class=
"special">-</span> <span class="number">20</span><span class=
"special">)</span> <span class="special">/</span> <span class="number">6
</span><code><font color="#000000"> </font></code><span class="special">(</span><span class=
"number">1</span> <span class="special">+</span> <span class=
"special">(</span><span class="number">2</span> <span class=
"special">+</span> <span class="special">(</span><span class=
"number">3</span> <span class="special">+</span> <span class=
"special">(</span><span class="number">4</span> <span class=
"special">+</span> <span class="number">5</span><span class=
"special">))))</span>
</pre>
<p>
Certainly we have done some modifications to the original EBNF syntax.
This is done to conform to C++ syntax rules. Most notably we see the
abundance of shift <tt>&gt;&gt;</tt> operators. Since there are no
'empty' operators in C++, it is simply not possible to write something
like:
</p>
<pre><code><font color="#000000"> </font></code><span class=
"identifier">a</span> <span class="identifier">b</span>
</pre>
<p>
as seen in math syntax, for example, to mean multiplication or, in our
case, as seen in EBNF syntax to mean sequencing (b should follow a). The
framework uses the shift <tt class="operators">&gt;&gt;</tt> operator
instead for this purpose. We take the <tt class="operators">&gt;&gt;</tt>
operator, with arrows pointing to the right, to mean "is followed by".
Thus we write:
</p>
<pre><code><font color="#000000"> </font></code><span class=
"identifier">a</span> <span class="special">&gt;&gt;</span> <span class=
"identifier">b</span>
</pre>
<p>
The alternative operator <tt class="operators">|</tt> and the parentheses
<tt class="operators">()</tt> remain as is. The assignment operator
<tt class="operators">=</tt> is used in place of EBNF's <tt class=
"operators">::=</tt>. Last but not least, the Kleene star <tt class=
"operators">*</tt> which used to be a postfix operator in EBNF becomes a
prefix. Instead of:
</p>
<pre><code><font color="#000000"> </font></code><span class="identifier">a</span><span class=
"special">*</span> <span class="comment">//... in EBNF syntax,</span>
</pre>
<p>
we write:
</p>
<pre><code><font color="#000000"> </font></code><span class="special">*</span><span class=
"identifier">a</span> <span class="comment">//... in Spirit.</span>
</pre>
<p>
since there are no postfix stars, "<tt class="operators">*</tt>", in
C/C++. Finally, we terminate each rule with the ubiquitous semi-colon,
"<tt>;</tt>".
</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30">
<a href="../index.html"><img src="theme/u_arr.gif" border="0"></a>
</td>
<td width="30">
<a href="preface.html"><img src="theme/l_arr.gif" width="20"
height="19" border="0"></a>
</td>
<td width="30">
<a href="quick_start.html"><img src="theme/r_arr.gif" border="0"></a>
</td>
</tr>
</table><br>
<hr size="1">
<p class="copyright">
Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the
Boost Software License, Version 1.0. (See accompanying file
LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)</font>
</p>
<p>&nbsp;
</p>
</body>
</html>

View File

@@ -0,0 +1,187 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<title>List Parsers</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link href="theme/style.css" rel="stylesheet" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>&nbsp;</b></font></td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>List Parsers</b></font></td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="confix.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="functor_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>List Parsers are generated by the special predefined parser generator object
<tt>list_p</tt>, which generates parsers recognizing list structures
of the type </p>
<pre><span class=identifier> item </span><span class=special>&gt;&gt; </span><span class=special>*(</span><span class=identifier>delimiter </span><span class=special>&gt;&gt; </span><span class=identifier>item</span><span class=special>) </span><span class=special>&gt;&gt; </span><span class=special>!</span><span class=identifier>end</span></pre>
<p>where <tt>item</tt> is an expression, delimiter is a delimiter and end is an
optional closing expression. As you can see, the <tt>list_p</tt> generated parser
does not recognize empty lists, i.e. the parser must find at least one item
in the input stream to return a successful match. If you wish to also match
an empty list, you can make your list_p optional with operator! An example where
this utility parser is helpful is parsing comma separated C/C++ strings, which
can be easily formulated as:</p>
<pre><span class=special> </span><span class=identifier>rule</span><span class=special>&lt;&gt; </span><span class=identifier>list_of_c_strings_rule
</span><span class=special>= </span><span class=identifier>list_p</span><span class=special>(</span><span class=identifier>confix_p</span><span class=special>(</span><span class=literal>'\"'</span><span class=special>, </span><span class=special>*</span><span class=identifier>c_escape_char_p</span><span class=special>, </span><span class=literal>'\"'</span><span class=special>), </span><span class=literal>','</span><span class=special>)
</span><span class=special>;</span></pre>
<p>The <tt>confix_p</tt> and <tt>c_escape_char_p</tt> parser generators
are described <a href="confix.html">here</a> and <a href="escape_char_parser.html">here</a>.</p>
<p>The <tt>list_p</tt> parser generator object can be used to generate the following
different types of List Parsers:</p>
<table width="90%" border="0" align="center">
<tr>
<td colspan="2" class="table_title"><b>List Parsers</b></td>
</tr>
<tr>
<td width="29%" height="27" class="table_cells"><b>list_p</b></td>
<td width="71%" class="table_cells"><p><tt>list_p</tt> used by itself parses
comma separated lists without special item formatting, i.e. everything
in between two commas is matched as an <tt>item</tt>, no <tt>end</tt>
of list token is matched</p></td>
</tr>
<tr>
<td height="27" class="table_cells"><strong>list_p(delimiter)</strong></td>
<td class="table_cells"><p>generates a list parser, which recognizes lists
with the given <tt>delimiter</tt> and matches everything in between them
as an <tt>item</tt>, no <tt>end</tt> of list token is matched</p></td>
</tr>
<tr>
<td height="27" class="table_cells"><strong>list_p(item, delimiter)</strong></td>
<td class="table_cells"><p>generates a list parser, which recognizes lists
with the given <tt>delimiter</tt> and matches items based on the given
item parser, no <tt>end</tt> of list token is matched</p></td>
</tr>
<tr>
<td height="27" class="table_cells"><strong>list_p(item, delimiter, end)</strong></td>
<td class="table_cells"><p>generates a list parser, which recognizes lists
with the given <tt>delimiter</tt> and matches items based on the given
<tt>item</tt> parser and additionally recognizes an optional <tt>end</tt>
expression</p></td>
</tr>
</table>
<p>All of the parameters to list_p can be single characters, strings
or, if more complex parsing logic is required, auxiliary parsers, each of which
is automatically converted to the corresponding parser type needed for successful
parsing.</p>
<p>If the <tt>item</tt> parser is an <tt>action_parser_category</tt> type (parser
with an attached semantic action) we have to do something special. This happens,
if the user wrote something like:</p>
<pre><span class=special> </span><span class=identifier>list_p</span><span class=special>(</span><span class=identifier>item</span><span class=special>[</span><span class=identifier>func</span><span class=special>], </span><span class=identifier>delim</span><span class=special>)</span></pre>
<p> where <tt>item</tt> is the parser matching one item of the list sequence and
<tt>func</tt> is a functor to be called after matching one item. If we would
do nothing, the resulting code would parse the sequence as follows:</p>
<pre><span class=special> </span><span class=special>(</span><span class=identifier>item</span><span class=special>[</span><span class=identifier>func</span><span class=special>] </span><span class=special>- </span><span class=identifier>delim</span><span class=special>) </span><span class=special>&gt;&gt; </span><span class=special>*(</span><span class=identifier>delim </span><span class=special>&gt;&gt; </span><span class=special>(</span><span class=identifier>item</span><span class=special>[</span><span class=identifier>func</span><span class=special>] </span><span class=special>- </span><span class=identifier>delim</span><span class=special>))</span></pre>
<p> what in most cases is not what the user expects. (If this <u>is</u> what you've
expected, then please use one of the <tt>list_p</tt> generator
functions <tt>direct()</tt>, which will inhibit refactoring of the <tt>item</tt>
parser). To make the list parser behave as expected:</p>
<pre><span class=special> </span><span class=special>(</span><span class=identifier>item </span><span class=special>- </span><span class=identifier>delim</span><span class=special>)[</span><span class=identifier>func</span><span class=special>] </span><span class=special>&gt;&gt; </span><span class=special>*(</span><span class=identifier>delim </span><span class=special>&gt;&gt; </span><span class=special>(</span><span class=identifier>item </span><span class=special>- </span><span class=identifier>delim</span><span class=special>)[</span><span class=identifier>func</span><span class=special>])</span></pre>
<p> the actor attached to the item parser has to be re-attached to the <tt>(item
- delim)</tt> parser construct, which will make the resulting list parser 'do
the right thing'. This refactoring is done by the help of the <a href="refactoring.html">Refactoring
Parsers</a>. Additionally special care must be taken, if the item parser is
a <tt>unary_parser_category</tt> type parser as for instance:</p>
<pre><span class=special> </span><span class=identifier>list_p</span><span class=special>(*</span><span class=identifier>anychar_p</span><span class=special>, </span><span class=literal>','</span><span class=special>)</span></pre>
<p> which without any refactoring would result in </p>
<pre><span class=special> </span><span class=special>(*</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>))
</span><span class=special>&gt;&gt; </span><span class=special>*( </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>) </span><span class=special>&gt;&gt; </span><span class=special>(*</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>)) </span><span class=special>)</span></pre>
<p> and will not give the expected result (the first <tt>*anychar_p</tt> will
eat up all the input up to the end of the input stream). So we have to refactor
this into:</p>
<pre><span class=special> </span><span class=special>*(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>))
</span><span class=special>&gt;&gt; </span><span class=special>*( </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>) </span><span class=special>&gt;&gt; </span><span class=special>*(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>)) </span><span class=special>)</span></pre>
<p> what will give the correct result.</p>
<p> The case, where the item parser is a combination of the two mentioned problems
(i.e. the item parser is a unary parser with an attached action), is handled
accordingly too:</p>
<pre><span class=special> </span><span class=identifier>list_p</span><span class=special>((*</span><span class=identifier>anychar_p</span><span class=special>)[</span><span class=identifier>func</span><span class=special>], </span><span class=literal>','</span><span class=special>)</span></pre>
<p> will be parsed as expected:</p>
<pre><span class=special> </span><span class=special>(*(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>)))[</span><span class=identifier>func</span><span class=special>]
</span><span class=special>&gt;&gt; </span><span class=special>*( </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>) </span><span class=special>&gt;&gt; </span><span class=special>(*(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>)))[</span><span class=identifier>func</span><span class=special>] </span><span class=special>)</span></pre>
<p>The required refactoring is implemented with the help of the <a href="refactoring.html">Refactoring
Parsers</a>.</p>
<table width="90%" border="0" align="center">
<tr>
<td colspan="2" class="table_title"><b>Summary of List Parser refactorings</b></td>
</tr>
<tr class="table_title">
<td width="34%"><b>You write it as:</b></td>
<td width="66%"><code><font face="Verdana, Arial, Helvetica, sans-serif">It
is refactored to:</font></code></td>
</tr>
<tr>
<td width="34%" class="table_cells"><code><span class=identifier>list_p</span><span class=special>(</span><span class=identifier>item</span><span class=special>,
</span><span class=identifier>delimiter</span><span class=special>)</span></code></td>
<td width="66%" class="table_cells"> <code><span class=special> (</span><span class=identifier>item
</span><span class=special>- </span><span class=identifier>delimiter</span><span class=special>)
<br>
&gt;&gt; *(</span><span class=identifier>delimiter </span><span class=special>
&gt;&gt; (</span><span class=identifier>item </span><span class=special>-
</span><span class=identifier>delimiter</span><span class=special>))</span></code></td>
</tr>
<tr>
<td width="34%" class="table_cells"><code><span class=identifier>list_p</span><span class=special>(</span><span class=identifier>item</span><span class=special>[</span><span class=identifier>func</span><span class=special>],
</span><span class=identifier>delimiter</span><span class=special>)</span></code></td>
<td width="66%" class="table_cells"> <code><span class=special> (</span><span class=identifier>item
</span><span class=special> - </span><span class=identifier>delimiter</span><span class=special>)[</span><span class=identifier>func</span><span class=special>]
<br>
&gt;&gt; *(</span><span class=identifier>delimiter </span><span class=special>&gt;&gt;
(</span><span class=identifier>item </span><span class=special>- </span><span class=identifier>delimiter</span><span class=special>)[</span><span class=identifier>func</span><span class=special>])</span></code></td>
</tr>
<tr>
<td width="34%" class="table_cells"><code><span class=identifier>list_p</span><span class=special>(*</span><span class=identifier>item</span><span class=special>,
</span><span class=identifier>delimiter</span><span class=special>)</span></code></td>
<td width="66%" class="table_cells"> <code><span class=special>*(</span><span class=identifier>item
</span><span class=special>- </span><span class=identifier>delimiter</span><span class=special>)
<br>
&gt;&gt; *(</span><span class=identifier>delimiter </span><span class=special>&gt;&gt;
*(</span><span class=identifier>item </span><span class=special>- </span><span class=identifier>delimiter</span><span class=special>))</span></code></td>
</tr>
<tr>
<td width="34%" class="table_cells"><code><span class=identifier>list_p</span><span class=special>((*</span><span class=identifier>item</span><span class=special>)[</span><span class=identifier>func</span><span class=special>],
</span><span class=identifier>delimiter</span><span class=special>)</span></code></td>
<td width="66%" class="table_cells"> <code><span class=special>(*(</span><span class=identifier>item
</span><span class=special>- </span><span class=identifier>delimiter</span><span class=special>))[</span><span class=identifier>func</span><span class=special>]
<br>
&gt;&gt; *(</span><span class=identifier>delimiter </span><span class=special>&gt;&gt;
(*(</span><span class=identifier>item </span><span class=special>- </span><span class=identifier>delimiter</span><span class=special>))[</span><span class=identifier>func</span><span class=special>])</span></code></td>
</tr>
</table>
<p> <img height="16" width="15" src="theme/lens.gif"> <a href="../example/fundamental/list_parser.cpp">list_parser.cpp </a> sample shows the usage of the list_p utility parser:</p>
<ol>
<li>parsing a simple ',' delimited list w/o item formatting</li>
<li> parsing a CSV list (comma separated values - strings, integers or reals)</li>
<li>parsing a token list (token separated values - strings, integers or reals) <br>
with an action parser directly attached to the item part of the list_p generated parser</li>
</ol>
<p>This is part of the Spirit distribution.</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="confix.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="functor_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 2001-2003 Hartmut Kaiser<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
</body>
</html>

View File

@@ -0,0 +1,174 @@
<html>
<head>
<title> Loops</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%">
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b> Loops</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="escape_char_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="character_sets.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>So far we have introduced a couple of EBNF operators that deal with looping.
We have the <tt>+</tt> positive operator, which matches the preceding symbol
one (1) or more times, as well as the Kleene star <tt>*</tt> which matches the
preceding symbol zero (0) or more times.</p>
<p>Taking this further, we may want to have a generalized loop operator. To some
this may seem to be a case of overkill. Yet there are grammars that are impractical
and cumbersome, if not impossible, for the basic EBNF iteration syntax to specify.
Examples:</p>
<blockquote>
<p><img src="theme/bullet.gif" width="12" height="12"> A file name may have
a maximum of 255 characters only.<br>
<img src="theme/bullet.gif" width="12" height="12"> A specific bitmap file
format has exactly 4096 RGB color information. <br>
<img src="theme/bullet.gif" width="12" height="12"> A 32 bit binary string
(1..32 1s or 0s).</p>
</blockquote>
<p>Other than the Kleene star <tt>*</tt>, the Positive closure <tt>+</tt>, and
the optional <tt>!</tt>, a more flexible mechanism for looping is provided for
by the framework. <br>
</p>
<table width="80%" border="0" align="center">
<tr>
<td colspan="2" class="table_title">Loop Constructs</td>
</tr>
<tr>
<td class="table_cells" width="26%"><b>repeat_p (n) [p]</b></td>
<td class="table_cells" width="74%">Repeat <b>p</b> exactly <b>n</b> times</td>
</tr>
<tr>
<td class="table_cells" width="26%"><b>repeat_p (n1, n2) [p]</b></td>
<td class="table_cells" width="74%">Repeat <b>p</b> at least <b>n1</b> times
and at most <b>n2</b> times</td>
</tr>
<tr>
<td class="table_cells" width="26%"><b>repeat_p (n, more) [p] </b></td>
<td class="table_cells" width="74%">Repeat <b>p</b> at least <b>n</b> times,
continuing until <b>p</b> fails or the input is consumed</td>
</tr>
</table>
<p>Using the <tt>repeat_p</tt> parser, we can now write our examples above:</p>
<p>A file name with a maximum of 255 characters:<br>
</p>
<pre> <span class=identifier>valid_fname_chars </span><span class=special>= </span><span class=comment>/*..*/</span><span class=special>;
</span><span class=identifier>filename </span><span class=special>= </span><span class=identifier>repeat_p</span><span class=special>(</span><span class=number>1</span><span class=special>, </span><span class=number>255</span><span class=special>)[</span><span class=identifier>valid_fname_chars</span><span class=special>];</span></pre>
<p>A specific bitmap file format which has exactly 4096 RGB color information:<span class=special><br>
</span></p>
<pre> <span class=identifier>uint_parser</span><span class=special>&lt;</span><span class=keyword>unsigned</span><span class=special>, </span><span class=number>16</span><span class=special>, </span><span class=number>6</span><span class=special>, </span><span class=number>6</span><span class=special>&gt; </span><span class=identifier>rgb_p</span><span class=special>;
</span><span class=identifier>bitmap </span><span class=special>= </span><span class=identifier>repeat_p</span><span class=special>(</span><span class=number>4096</span><span class=special>)[</span><span class=identifier>rgb_p</span><span class=special>];</span></pre>
<p>As for the 32 bit binary string (1..32 1s or 0s), of course we could have easily
used the <tt>bin_p</tt> numeric parser instead. For the sake of demonstration
however:<span class=special><br>
</span></p>
<pre> <span class=identifier>bin</span><span class=number>32</span> <span class=special>= </span><span class=identifier>lexeme_d</span><span class=special>[</span><span class=identifier>repeat_p</span><span class=special>(</span>1, <span class=number>32</span><span class=special>)[</span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'1'</span><span class=special>) </span><span class=special>| </span><span class=literal>'0'</span><span class=special>]];</span></pre>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/note.gif" width="16" height="16"> Loop
parsers are run-time <a href="parametric_parsers.html">parametric</a>.</td>
</tr>
</table>
<p>The Loop parsers can be dynamic. Consider the parsing of a binary file of Pascal-style
length prefixed string, where the first byte determines the length of the incoming
string. Here's a sample input:
<blockquote>
<table width="363" border="0" cellspacing="0" cellpadding="0">
<tr>
<td class="dk_grey_bkd">
<table width="100%" border="0" cellspacing="2" cellpadding="2">
<tr>
<td class="white_bkd" width=8%">
<div align="center">11</div>
</td>
<td class="white_bkd" width="8%">
<div align="center">h</div>
</td>
<td class="white_bkd" width="8%">
<div align="center">e</div>
</td>
<td class="white_bkd" width="8%">
<div align="center">l</div>
</td>
<td class="white_bkd" width="8%">
<div align="center">l</div>
</td>
<td class="white_bkd" width="8%">
<div align="center">o</div>
</td>
<td class="white_bkd" width="8%">
<div align="center"> _</div>
</td>
<td class="white_bkd" width="8%">
<div align="center">w</div>
</td>
<td class="white_bkd" width="8%">
<div align="center">o</div>
</td>
<td class="white_bkd" width="8%">
<div align="center">r</div>
</td>
<td class="white_bkd" width="8%">
<div align="center">l</div>
</td>
<td class="white_bkd" width="8%">
<div align="center">d</div>
</td>
</tr>
</table>
</td>
</tr>
</table>
</blockquote>
<p>This trivial example cannot be practically defined in traditional EBNF. Although
some EBNF syntax allow more powerful repetition constructs other than the Kleene
star, we are still limited to parsing fixed strings. The nature of EBNF forces
the repetition factor to be a constant. On the other hand, Spirit allows the
repetition factor to be variable at run time. We could write a grammar that
accepts the input string above:</p>
<pre><span class=identifier> </span><span class=keyword>int </span><span class=identifier>c</span><span class=special>;
</span><span class=identifier>r </span><span class=special>= </span><span class=identifier>anychar_p</span><span class=special>[</span><span class=identifier>assign_a</span><span class=special>(</span><span class=identifier>c</span><span class=special>)] </span><span class=special>&gt;&gt; </span><span class=identifier>repeat_p</span><span class=special>(</span><span class=identifier>boost</span><span class=special>::</span><span class=identifier>ref</span><span class=special>(</span><span class=identifier>c</span><span class=special>))[</span><span class=identifier>anychar_p</span><span class=special>];</span></pre>
<p>The expression</p>
<pre> <span class=identifier>anychar_p</span><span class=special>[</span><span class=identifier>assign_a</span><span class=special>(</span><span class=identifier>c</span><span class=special>)]</span></pre>
<p>extracts the first character from the input and puts it in <tt>c</tt>. What
is interesting is that in addition to constants, we can also use variables as
parameters to <tt>repeat_p</tt>, as demonstrated in </p>
<pre> <span class=identifier>repeat_p</span><span class=special>(</span><span class=identifier>boost</span><span class=special>::</span><span class=identifier>ref</span><span class=special>(</span><span class=identifier>c</span><span class=special>)</span><span class=special>)</span><span class=special>[</span><span class=identifier>anychar_p</span><span class=special>]</span></pre>
<p>Notice that <tt>boost::ref</tt> is used to reference the integer <tt>c</tt>.
This usage of <tt>repeat_p</tt> makes the parser defer the evaluation of the
repetition factor until it is actually needed. Continuing our example, since
the value 11 is already extracted from the input, <tt>repeat_p</tt> is is now
expected to loop exactly 11 times.</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="escape_char_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="character_sets.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
</body>
</html>

View File

@@ -0,0 +1,276 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"><html><head><title>The multi_pass</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css"></head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tbody><tr>
<td width="10">
<br>
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>The
multi_pass</b></font> </td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</tbody></table>
<br>
<table border="0">
<tbody><tr>
<td width="10"><br>
</td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="trees.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="file_iterator.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</tbody></table>
<p>Backtracking in Spirit requires the use of the following types of iterator:
forward, bidirectional, or random access. Because of backtracking, input iterators
cannot be used. Therefore, the standard library classes istreambuf_iterator
and istream_iterator, that fall under the category of input iterators, cannot
be used. Another input iterator that is of interest is one that wraps a lexer,
such as LEX.</p>
<table width="80%" border="0" align="center">
<tbody><tr>
<td class="note_box"> <img src="theme/note.gif" width="16" height="16"> <b>Input
Iterators</b> <br>
<br>
In general, Spirit is a backtracking parser. This is not an absolute requirement
though. In the future, we shall see more deterministic parsers that require
no more than 1 character (token) of lookahead. Such parsers allow us to
use input iterators such as the istream_iterator as is. </td>
</tr>
</tbody></table>
<p> Unfortunately, with an input iterator, there is no way to save an iterator
position, and thus input iterators will not work with backtracking in Spirit.
One solution to this problem is to simply load all the data to be parsed into
a container, such as a vector or deque, and then pass the begin and end of the
container to Spirit. This method can be too memory intensive for certain applications,
which is why the multi_pass iterator was created.</p>
<p> The multi_pass iterator will convert any input iterator into a forward iterator
suitable for use with Spirit. multi_pass will buffer data when needed and will
discard the buffer when only one copy of the iterator exists.</p>
<p> A grammar must be designed with care if the multi_pass iterator is used. Any rule that may
need to backtrack, such as one that contains an alternative, will cause data to be buffered. The rules that are optimal to
use are sequence and repetition. Sequences of the form <tt>a &gt;&gt; b</tt>
will not buffer data at all. Any rule that repeats, such as kleene_star (<tt>*a</tt>)
or positive such as (<tt>+a</tt>), will only buffer the data for the current
repetition.</p>
<p> In typical grammars, ambiguity and therefore lookahead is often localized.
In fact, many well designed languages are fully deterministic and require no
lookahead at all. Peeking at the first character from the input will immediately
determine the alternative branch to take. Yet, even with highly ambiguous grammars,
alternatives are often of the form <tt>*(a | b | c | d)</tt>. The input iterator
moves on and is never stuck at the beginning. Let's look at a Pascal snippet
for example:</p>
<pre> <code><span class="identifier">program </span><span class="special">=<br> </span><span class="identifier"> programHeading </span><span class="special">&gt;&gt; </span><span class="identifier">block </span><span class="special">&gt;&gt; </span><span class="literal">'.'<br> </span><span class="special"> ;<br><br> </span><span class="identifier">block </span><span class="special">=<br> *( </span><span class="identifier">labelDeclarationPart<br> </span><span class="special">| </span><span class="identifier">constantDefinitionPart<br> </span><span class="special">| </span><span class="identifier">typeDefinitionPart<br> </span><span class="special"> | </span><span class="identifier">variableDeclarationPart<br> </span><span class="special">| </span><span class="identifier"> procedureAndFunctionDeclarationPart<br> </span><span class="special"> )<br> &gt;&gt; </span><span class="identifier">statementPart<br> </span><span class="special">;<br></span></code></pre>
<p> Notice the alternatives inside the Kleene star in the rule block . The rule
gobbles the input in a linear manner and throws away the past history with each
iteration. As this is fully deterministic LL(1) grammar, each failed alternative
only has to peek 1 character (token). The alternative that consumes more than
1 character (token) is definitely a winner. After which, the Kleene star moves
on to the next.</p>
<p>Be mindful if you use the free parse functions.
All of these make a copy of the iterator passed to them.<br>
</p>
<p>Now, after the lecture on the features to be careful with when using multi_pass,
you may think that multi_pass is way too restrictive to use. &nbsp;That's
not the case. &nbsp;If your grammar is deterministic, you can make use of flush_multi_pass in your grammar to ensure that data is not buffered when unnecessary.<br>
</p>
<p> Again, following up the example we started to use in the section on the scanner
. Here's an example using the multi_pass: This time around we are extracting
our input from the input stream using an istreambuf_iterator.</p>
<pre> <span class="preprocessor">#include</span> <span class="special">&lt;</span><span class="identifier">boost</span><span class="special">/</span><span class="identifier">spirit</span><span class="special">/</span><span class="identifier">core</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;</span>
<code><span class="preprocessor"> #include </span><span class="special">&lt;</span><span class="identifier">boost</span><span class="special">/</span><span class="identifier">spirit</span><span class="special">/</span><span class="identifier">iterator</span><span class="special">/</span><span class="identifier">multi_pass</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;</span><span class="comment">
</span><span class="keyword">using namespace</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">spirit</span><span class="special">;
</span><span class="keyword">using namespace</span> <span class="identifier">std</span><span class="special">;</span>
<span class="identifier">ifstream in</span><span class="special">(</span><span class="string">"input_file.txt"</span><span class="special">); </span><span class="comment">// we get our input from this file<br><br> </span><span class="keyword">typedef char </span><span class="identifier">char_type</span><span class="special">;</span>
<span class="keyword">typedef </span><span class="identifier">multi_pass</span><span class="special">&lt;</span><span class="identifier">istreambuf_iterator</span><span class="special">&lt;</span><span class="identifier">char_type</span><span class="special">&gt; &gt; </span><span class="identifier">iterator_type</span><span class="special">;</span>
<span class="keyword">typedef</span> <span class="identifier">skip_parser_iteration_policy</span><span class="special">&lt;</span><span class="identifier">space_parser</span><span class="special">&gt;</span> <span class="identifier">iter_policy_type</span><span class="special">;</span>
<span class="keyword">typedef</span> <span class="identifier">scanner_policies</span><span class="special">&lt;</span>iter_policy_type<span class="special">&gt;</span> <span class="identifier">scanner_policies_type</span><span class="special">;</span>
<span class="keyword">typedef</span> <span class="identifier">scanner</span><span class="special">&lt;</span>iterator_type, scanner_policies_type<span class="special">&gt;</span> <span class="identifier">scanner_type</span><span class="special">;</span>
<span class="keyword">typedef</span> rule<span class="special">&lt;</span>scanner_type<span class="special">&gt;</span> <span class="identifier">rule_type</span><span class="special">;</span>
<span class="identifier">iter_policy_type</span> <span class="identifier">iter_policy</span><span class="special">(</span><span class="identifier">space_p</span><span class="special">);</span>
<span class="identifier">scanner_policies_type</span> <span class="identifier">policies</span><span class="special">(</span><span class="identifier">iter_policy</span><span class="special">);</span>
iterator_type first(
make_multi_pass(std::istreambuf_iterator&lt;char_type&gt;(in)));
scanner_type <span class="identifier">scan</span><span class="special">(</span>
first<span class="special">,</span> make_multi_pass(std::istreambuf_iterator<span class="special">&lt;</span><span class="identifier">char_type</span><span class="special">&gt;()),</span>
<span class="identifier">policies</span><span class="special">)</span>;
<span class="special"><br> </span><span class="identifier">rule_type n_list </span><span class="special">= </span><span class="identifier">real_p </span><span class="special">&gt;&gt; *(</span><span class="literal">',' </span><span class="special">&gt;&gt; </span><span class="identifier">real_p</span><span class="special">);<br> </span><span class="identifier">match</span><span class="special">&lt;&gt;</span><span class="identifier"> m </span><span class="special">= </span><span class="identifier">n_list</span><span class="special">.</span><span class="identifier">parse</span><span class="special">(</span><span class="identifier">scan</span><span class="special">);<br></span></code></pre>
<a name="flush_multi_pass"></a>
<h2>flush_multi_pass</h2>
<p> There is a predefined pseudo-parser called flush_multi_pass. When this parser
is used with multi_pass, it will call multi_pass::clear_queue(). This will cause
any buffered data to be erased. This also will invalidate all other copies of
multi_pass and they should not be used. If they are, an boost::illegal_backtracking
exception will be thrown.</p>
<a name="multi_pass_policies"></a>
<h2>multi_pass Policies</h2>
<p> multi_pass is a templated policy driven class. The description of multi_pass
above is how it was originally implemented (before it used policies), and is
the default configuration now. But, multi_pass is capable of much more. Because
of the open-ended nature of policies, you can write your own policy to make
multi_pass behave in a way that we never before imagined.</p>
<p> The multi_pass class has five template parameters:</p>
<ul>
<li>InputT - The type multi_pass uses to acquire it's input. This is typically
an input iterator, or functor.</li>
<li>InputPolicy - A class that defines how multi_pass acquires it's input. The
InputPolicy is parameterized by InputT.</li>
<li>OwnershipPolicy - This policy determines how multi_pass deals with it's
shared components.</li>
<li>CheckingPolicy - This policy determines how checking for invalid iterators
is done.</li>
<li>StoragePolicy - The buffering scheme used by multi_pass is determined and
managed by the StoragePolicy.</li>
</ul>
<a name="predefined_policies"></a>
<h2>Predefined policies</h2>
<p> All predefined multi_pass policies are in the namespace boost::spirit::multi_pass_policies.</p>
<a name="predefined_inputpolicy_classes"></a>
<h3>Predefined InputPolicy classes</h3>
<a name="input_iterator"></a>
<h4>input_iterator</h4>
<p> This policy directs multi_pass to read from an input iterator of type InputT.</p>
<a name="lex_input"></a>
<h4>lex_input</h4>
<p> This policy obtains it's input by calling yylex(), which would typically be
provided by a scanner generated by LEX. If you use this policy your code must
link against a LEX generated scanner.</p>
<a name="functor_input"></a>
<h4>functor_input</h4>
<p> This input policy obtains it's data by calling a functor of type InputT. The
functor must meet certain requirements. It must have a typedef called result_type
which should be the type returned from operator(). Also, since an input policy
needs a way to determine when the end of input has been reached, the functor
must contain a static variable named eof which is comparable to a variable of
result_type.</p>
<a name="predefined_ownershippolicy_classes"></a>
<h3>Predefined OwnershipPolicy classes</h3>
<a name="ref_counted"></a>
<h4>ref_counted</h4>
<p> This class uses a reference counting scheme. multi_pass will delete it's shared
components when the count reaches zero.</p>
<a name="first_owner"></a>
<h4>first_owner</h4>
<p> When this policy is used, the first multi_pass created will be the one that
deletes the shared data. Each copy will not take ownership of the shared data.
This works well for spirit, since no dynamic allocation of iterators is done.
All copies are made on the stack, so the original iterator has the longest lifespan.</p>
<a name="predefined_checkingpolicy_classes"></a>
<h3>Predefined CheckingPolicy classes</h3>
<a name="no_check"></a>
<h4>no_check</h4>
<p> This policy does no checking at all.</p>
<a name="buf_id_check"></a>
<h4>buf_id_check</h4>
<p> buf_id_check keeps around a buffer id, or a buffer age. Every time clear_queue()
is called on a multi_pass iterator, it is possible that all other iterators
become invalid. When clear_queue() is called, buf_id_check increments the buffer
id. When an iterator is dereferenced, this policy checks that the buffer id
of the iterator matches the shared buffer id. This policy is most effective
when used together with the std_deque StoragePolicy. It should not be used with
the fixed_size_queue StoragePolicy, because it will not detect iterator dereferences
that are out of range.</p>
<a name="full_check"></a>
<h4>full_check</h4>
<p> This policy has not been implemented yet. When it is, it will keep track of
all iterators and make sure that they are all valid.</p>
<a name="predefined_storagepolicy_classes"></a>
<h3>Predefined StoragePolicy classes</h3>
<a name="std_deque"></a>
<h4>std_deque</h4>
<p> This policy keeps all buffered data in a std::deque. All data is stored as
long as there is more than one iterator. Once the iterator count goes down to
one, and the queue is no longer needed, it is cleared, freeing up memory. The
queue can also be forcibly cleared by calling multi_pass::clear_queue().</p>
<a name="fixed_size_queue_lt_n_gt_"></a>
<h4>fixed_size_queue&lt;N&gt;</h4>
<p> fixed_size_queue keeps a circular buffer that is size N+1 and stores N elements.
fixed_size_queue is a template with a std::size_t parameter that specified the
queue size. It is your responsibility to ensure that N is big enough for your
parser. Whenever the foremost iterator is incremented, the last character of
the buffer is automatically erased. Currently there is no way to tell if an
iterator is trailing too far behind and has become invalid. No dynamic allocation
is done by this policy during normal iterator operation, only on initial construction.
The memory usage of this StoragePolicy is set at N+1 bytes, unlike std_deque,
which is unbounded.</p>
<a name="combinations__how_to_specify_your_own_custom_multi_pass"></a>
<h2>Combinations: How to specify your own custom multi_pass</h2>
<p> The beauty of policy based designs is that you can mix and match policies
to create your own custom class by selecting the policies you want. Here's an
example of how to specify a custom multi_pass that wraps an istream_iterator&lt;char&gt;,
and is slightly more efficient than the default because it uses the first_owner
OwnershipPolicy and the no_check CheckingPolicy:</p>
<pre> <code><span class="keyword">typedef </span><span class="identifier">multi_pass</span><span class="special">&lt;<br> </span><span class="identifier">istream_iterator</span><span class="special">&lt;</span><span class="keyword">char</span><span class="special">&gt;,<br> </span><span class="identifier">multi_pass_policies</span><span class="special">::</span><span class="identifier">input_iterator</span><span class="special">,<br> </span><span class="identifier">multi_pass_policies</span><span class="special">::</span><span class="identifier">first_owner</span><span class="special">,<br> </span><span class="identifier">multi_pass_policies</span><span class="special">::</span><span class="identifier">no_check</span><span class="special">,<br> </span><span class="identifier">multi_pass_policies</span><span class="special">::</span><span class="identifier">std_deque<br> </span><span class="special">&gt; </span><span class="identifier">first_owner_multi_pass_type</span><span class="special">;<br></span></code></pre>
<p> The default template parameters for multi_pass are: input_iterator InputPolicy,
ref_counted OwnershipPolicy, buf_id_check CheckingPolicy and std_deque StoragePolicy.
So if you use multi_pass&lt;istream_iterator&lt;char&gt; &gt; you will get those
pre-defined behaviors while wrapping an istream_iterator&lt;char&gt;.</p>
<p> There is one other pre-defined class called look_ahead. look_ahead has two
template parameters: InputT, the type of the input iterator to wrap, and a std::size_t
N, which specifies the size of the buffer to the fixed_size_queue policy. While
the default multi_pass configuration is designed for safey, look_ahead is designed
for speed. look_ahead is derived from a multi_pass with the following policies:
input_iterator InputPolicy, first_owner OwnershipPolicy, no_check CheckingPolicy,
and fixed_size_queue&lt;N&gt; StoragePolicy.</p>
<a name="how_to_write_a_functor_for_use_with_the_functor_input_inputpolicy"></a>
<h3>How to write a functor for use with the functor_input InputPolicy</h3>
<p> If you want to use the functor_input InputPolicy, you can write your own functor
that will supply the input to multi_pass. The functor must satisfy two requirements.
It must have a typedef result_type which specifies the return type of operator().
This is standard practice in the STL. Also, it must supply a static variable
called eof which is compared against to know whether the input has reached the
end. Here is an example:</p>
<pre> <code><span class="keyword">class </span><span class="identifier">my_functor<br> </span><span class="special">{<br> </span><span class="keyword">public</span><span class="special">:<br><br> </span><span class="keyword">typedef char </span><span class="identifier">result_type</span><span class="special">;<br><br> </span><span class="identifier">my_functor</span><span class="special">()<br> : </span><span class="identifier">c</span><span class="special">(</span><span class="literal">'A'</span><span class="special">) {}<br><br> </span><span class="keyword">char operator</span><span class="special">()() </span><span class="keyword">const<br> </span><span class="special">{<br> </span><span class="keyword">if </span><span class="special">(</span><span class="identifier">c </span><span class="special">== </span><span class="literal">'M'</span><span class="special">)<br> </span><span class="keyword">return </span><span class="identifier">eof</span><span class="special">;<br> </span><span class="keyword">else<br> return </span><span class="identifier">c</span><span class="special">++;<br> }<br><br> </span><span class="keyword">static </span><span class="identifier">result_type eof</span><span class="special">;<br><br> </span><span class="keyword">private</span><span class="special">:<br><br> </span><span class="keyword">char </span><span class="identifier">c</span><span class="special">;<br> };<br><br> </span><span class="identifier">my_functor</span><span class="special">::</span><span class="identifier">result_type my_functor</span><span class="special">::</span><span class="identifier">eof </span><span class="special">= </span><span class="literal">'\0'</span><span class="special">;<br><br> </span><span class="keyword">typedef </span><span class="identifier">multi_pass</span><span class="special">&lt;<br> </span><span class="identifier">my_functor</span><span class="special">,<br> </span><span class="identifier">multi_pass_policies</span><span class="special">::</span><span class="identifier">functor_input</span><span class="special">,<br> </span><span class="identifier">multi_pass_policies</span><span class="special">::</span><span class="identifier">first_owner</span><span class="special">,<br> </span><span class="identifier">multi_pass_policies</span><span class="special">::</span><span class="identifier">no_check</span><span class="special">,<br> </span><span class="identifier">multi_pass_policies</span><span class="special">::</span><span class="identifier">std_deque<br> </span><span class="special">&gt; </span><span class="identifier">functor_multi_pass_type</span><span class="special">;<br><br> </span><span class="identifier">functor_multi_pass_type first </span><span class="special">= </span><span class="identifier">functor_multi_pass_type</span><span class="special">(</span><span class="identifier">my_functor</span><span class="special">());<br> </span><span class="identifier">functor_multi_pass_type last</span><span class="special">;<br></span></code></pre>
<a name="how_to_write_policies_for_use_with_multi_pass"></a>
<h3>How to write policies for use with multi_pass</h3>
<a name="inputpolicy"></a>
<h4>InputPolicy</h4>
<p> An InputPolicy must have the following interface:</p>
<pre> <code><span class="keyword">class </span><span class="identifier">my_input_policy </span><span class="comment">// your policy name<br> </span><span class="special">{<br> </span><span class="keyword">public</span><span class="special">:<br><br> </span><span class="comment">// class inner will be instantiated with the type given<br> // as the InputT parameter to multi_pass.<br><br> </span><span class="keyword">template </span><span class="special">&lt;</span><span class="keyword">typename </span><span class="identifier">InputT</span><span class="special">&gt;<br> </span><span class="keyword">class </span><span class="identifier">inner<br> </span><span class="special">{<br> </span><span class="keyword">public</span><span class="special">:<br><br> </span><span class="comment">// these typedefs determine the iterator_traits for multi_pass<br> </span><span class="keyword">typedef </span><span class="identifier">x </span><span class="identifier">value_type</span><span class="special">;<br> </span><span class="keyword">typedef </span><span class="identifier">x </span><span class="identifier">difference_type</span><span class="special">;<br> </span><span class="keyword">typedef </span><span class="identifier">x </span><span class="identifier">pointer</span><span class="special">;<br> </span><span class="keyword">typedef </span><span class="identifier">x </span><span class="identifier">reference</span><span class="special">;<br><br> </span><span class="keyword">protected</span><span class="special">:<br><br> </span><span class="identifier">inner</span><span class="special">();<br> </span><span class="identifier">inner</span><span class="special">(</span><span class="identifier">InputT </span><span class="identifier">x</span><span class="special">);<br> </span><span class="identifier">inner</span><span class="special">(</span><span class="identifier">inner </span><span class="keyword">const</span><span class="special">&amp; </span><span class="identifier">x</span><span class="special">);<br> </span><span class="comment">// delete or clean up any state<br> </span><span class="keyword">void </span><span class="identifier">destroy</span><span class="special">();<br> </span><span class="comment">// return true if *this and x have the same input<br> </span><span class="keyword">bool </span><span class="identifier">same_input</span><span class="special">(</span><span class="identifier">inner </span><span class="keyword">const</span><span class="special">&amp; </span><span class="identifier">x</span><span class="special">) </span><span class="keyword">const</span><span class="special">;<br> </span><span class="keyword">void </span><span class="identifier">swap</span><span class="special">(</span><span class="identifier">inner</span><span class="special">&amp; </span><span class="identifier">x</span><span class="special">);<br><br> </span><span class="keyword">public</span><span class="special">:<br><br> </span><span class="comment">// get an instance from the input<br> </span><span class="identifier">result_type </span><span class="identifier">get_input</span><span class="special">() </span><span class="keyword">const</span><span class="special">;<br> </span><span class="comment">// advance the input<br> </span><span class="keyword">void </span><span class="identifier">advance_input</span><span class="special">();<br> </span><span class="comment">// return true if the input is at the end<br> </span><span class="keyword">bool </span><span class="identifier">input_at_eof</span><span class="special">() </span><span class="keyword">const</span><span class="special">;<br> </span><span class="special">};<br> </span><span class="special">};<br></span></code></pre>
<p> Because of the way that multi_pass shares a buffer and input among multiple
copies, class inner should keep a pointer to it's input. The copy constructor
should simply copy the pointer. destroy() should delete it. same_input should
compare the pointers. For more details see the various implementations of InputPolicy
classes.</p>
<a name="ownershippolicy"></a>
<h4>OwnershipPolicy</h4>
<p> The OwnershipPolicy must have the following interface:</p>
<pre> <code><span class="keyword">class </span><span class="identifier">my_ownership_policy<br> </span><span class="special">{<br> </span><span class="keyword">protected</span><span class="special">:<br><br> </span><span class="identifier">my_ownership_policy</span><span class="special">();<br> </span><span class="identifier">my_ownership_policy</span><span class="special">(</span><span class="identifier">my_ownership_policy </span><span class="keyword">const</span><span class="special">&amp; </span><span class="identifier">x</span><span class="special">);<br> </span><span class="comment">// clone is called when a copy of the iterator is made<br> </span><span class="keyword">void </span><span class="identifier">clone</span><span class="special">();<br> </span><span class="comment">// called when a copy is deleted. Return true to indicate<br> // resources should be released<br> </span><span class="keyword">bool </span><span class="identifier">release</span><span class="special">();<br> </span><span class="keyword">void </span><span class="identifier">swap</span><span class="special">(</span><span class="identifier">my_ownership_policy</span><span class="special">&amp; </span><span class="identifier">x</span><span class="special">);<br><br> </span><span class="keyword">public</span><span class="special">:<br> </span><span class="comment">// returns true if there is only one iterator in existence.<br> // std_dequeue StoragePolicy will free it's buffered data if this<br> // returns true.<br> </span><span class="keyword">bool </span><span class="identifier">unique</span><span class="special">() </span><span class="keyword">const</span><span class="special">;<br> </span><span class="special">};<br></span></code></pre>
<a name="checkingpolicy"></a>
<h4>CheckingPolicy</h4>
<p> The CheckingPolicy must have the following interface:</p>
<pre> <code><span class="keyword">class </span><span class="identifier">my_check<br> </span><span class="special">{<br> </span><span class="keyword">protected</span><span class="special">:<br><br> </span><span class="identifier">my_check</span><span class="special">();<br> </span><span class="identifier">my_check</span><span class="special">(</span><span class="identifier">my_check </span><span class="keyword">const</span><span class="special">&amp; </span><span class="identifier">x</span><span class="special">);<br> </span><span class="keyword">void </span><span class="identifier">destroy</span><span class="special">();<br> </span><span class="keyword">void </span><span class="identifier">swap</span><span class="special">(</span><span class="identifier">my_check</span><span class="special">&amp; </span><span class="identifier">x</span><span class="special">);<br> </span><span class="comment">// check should make sure that this iterator is valid<br> </span><span class="keyword">void </span><span class="identifier">check_if_valid</span><span class="special">() </span><span class="keyword">const</span><span class="special">;<br> </span><span class="keyword">void </span><span class="identifier">clear_queue</span><span class="special">();<br> </span><span class="special">};<br></span></code></pre>
<a name="storagepolicy"></a>
<h4>StoragePolicy</h4>
<p> A StoragePolicy must have the following interface:</p>
<pre> <code><span class="keyword">class </span><span class="identifier">my_storage_policy<br> </span><span class="special">{<br> </span><span class="keyword">public</span><span class="special">:<br><br> </span><span class="comment">// class inner will be instantiated with the value_type from the InputPolicy<br><br> </span><span class="keyword">template </span><span class="special">&lt;</span><span class="keyword">typename </span><span class="identifier">ValueT</span><span class="special">&gt;<br> </span><span class="keyword">class </span><span class="identifier">inner<br> </span><span class="special">{<br> </span><span class="keyword">protected</span><span class="special">:<br><br> </span><span class="identifier">inner</span><span class="special">();<br> </span><span class="identifier">inner</span><span class="special">(</span><span class="identifier">inner </span><span class="keyword">const</span><span class="special">&amp; </span><span class="identifier">x</span><span class="special">);<br> </span><span class="comment">// will be called from the destructor of the last iterator.<br> </span><span class="keyword">void </span><span class="identifier">destroy</span><span class="special">();<br> </span><span class="keyword">void </span><span class="identifier">swap</span><span class="special">(</span><span class="identifier">inner</span><span class="special">&amp; </span><span class="identifier">x</span><span class="special">);<br> </span><span class="comment">// This is called when the iterator is dereferenced. It's a template<br> // method so we can recover the type of the multi_pass iterator<br> // and access it.<br> </span><span class="keyword">template </span><span class="special">&lt;</span><span class="keyword">typename </span><span class="identifier">MultiPassT</span><span class="special">&gt;<br> </span><span class="keyword">static </span><span class="identifier">ValueT </span><span class="identifier">dereference</span><span class="special">(</span><span class="identifier">MultiPassT </span><span class="keyword">const</span><span class="special">&amp; </span><span class="identifier">mp</span><span class="special">);<br> </span><span class="comment">// This is called when the iterator is incremented. It's a template<br> // method so we can recover the type of the multi_pass iterator<br> // and access it.<br> </span><span class="keyword">template </span><span class="special">&lt;</span><span class="keyword">typename </span><span class="identifier">MultiPassT</span><span class="special">&gt;<br> </span><span class="keyword">static </span><span class="keyword">void </span><span class="identifier">increment</span><span class="special">(</span><span class="identifier">MultiPassT</span><span class="special">&amp; </span><span class="identifier">mp</span><span class="special">);<br> </span><span class="keyword">void </span><span class="identifier">clear_queue</span><span class="special">();<br> </span><span class="comment">// called to determine whether the iterator is an eof iterator<br> </span><span class="keyword">template </span><span class="special">&lt;</span><span class="keyword">typename </span><span class="identifier">MultiPassT</span><span class="special">&gt;<br> </span><span class="keyword">static </span><span class="keyword">bool </span><span class="identifier">is_eof</span><span class="special">(</span><span class="identifier">MultiPassT </span><span class="keyword">const</span><span class="special">&amp; </span><span class="identifier">mp</span><span class="special">);<br> </span><span class="comment">// called by operator==<br> </span><span class="keyword">bool </span><span class="identifier">equal_to</span><span class="special">(</span><span class="identifier">inner </span><span class="keyword">const</span><span class="special">&amp; </span><span class="identifier">x</span><span class="special">) </span><span class="keyword">const</span><span class="special">;<br> </span><span class="comment">// called by operator&lt;<br> </span><span class="keyword">bool </span><span class="identifier">less_than</span><span class="special">(</span><span class="identifier">inner </span><span class="keyword">const</span><span class="special">&amp; </span><span class="identifier">x</span><span class="special">) </span><span class="keyword">const</span><span class="special">;<br> </span><span class="special">}; </span><span class="comment"> // class inner<br> </span><span class="special">};<br></span></code></pre>
<p> A StoragePolicy is the trickiest policy to write. You should study and understand
the existing StoragePolicy classes before you try and write your own.</p>
<table border="0">
<tbody><tr>
<td width="10"><br>
</td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="trees.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="file_iterator.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</tbody></table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 2001-2002 Daniel C. Nuffer<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
<p class="copyright">&nbsp;</p>
<br>
</body></html>

View File

@@ -0,0 +1,460 @@
<html>
<head>
<title>Numerics</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%">
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Numerics</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="operators.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="rule.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>Similar to <tt>chlit</tt>, <tt>strlit</tt> etc. numeric parsers are also primitives.
Numeric parsers are placed on
a section of their own
to give this important building
block better focus. The framework includes a couple of predefined objects for
parsing signed and unsigned integers and real numbers. These parsers are fully
parametric. Most of the important aspects of numeric parsing can be finely adjusted
to suit. This includes the radix base, the minimum and maximum number of allowable
digits, the exponent, the fraction etc. Policies control the real number parsers'
behavior. There are some predefined policies covering the most common real number
formats but the user can supply her own when needed. </p>
<h2>uint_parser</h2>
<p>This class is the simplest among the members of the numerics package. The <tt>uint_parser</tt>
can parse unsigned integers of arbitrary length and size. The <tt>uint_parser</tt>
parser can be used to parse ordinary primitive C/C++ integers or even user defined
scalars such as bigints (unlimited precision integers). Like most of the classes
in Spirit, the <tt>uint_parser</tt> is a template class. Template parameters
fine tune its behavior. The uint_parser is so flexible that the other numeric
parsers are implemented using it as the backbone.</p>
<pre><code><font color="#000000"><span class=identifier> </span><span class=keyword>template </span><span class=special>&lt;
</span><span class=keyword>typename </span><span class=identifier>T </span><span class=special>= </span><span class="keyword">unsigned</span><span class=special>,
</span><span class=keyword>int </span><span class=identifier>Radix </span><span class=special>= </span><span class=number>10</span><span class=special>,
</span><span class=keyword>unsigned </span><span class=identifier>MinDigits </span><span class=special>= </span><span class=number>1</span><span class=special>,
</span><span class=keyword>int </span><span class=identifier>MaxDigits </span><span class=special>= -</span><span class=number>1</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>uint_parser </span><span class=special>{ </span><span class=comment>/*...*/ </span><span class=special>};</span></font></code></pre>
<table width="90%" border="0" align="center">
<tr>
<td colspan="2" class="table_title">uint_parser template parameters</td>
</tr>
<tr>
<td class="table_cells" width="21%"><b>T</b></td>
<td class="table_cells" width="79%">The numeric base type of the numeric parser.
Defaults to <tt>unsigned</tt></td>
</tr>
<tr>
<td class="table_cells" width="21%"><b>Radix</b></td>
<td class="table_cells" width="79%">The radix base. This can be either 2:
binary, 8: octal, 10: decimal and 16: hexadecimal. Defaults to 10; decimal</td>
</tr>
<tr>
<td class="table_cells" width="21%"><b>MinDigits</b></td>
<td class="table_cells" width="79%">The minimum number of digits allowable</td>
</tr>
<tr>
<td class="table_cells" width="21%"><b>MaxDigits</b></td>
<td class="table_cells" width="79%">The maximum number of digits allowable.
If this is -1, then the maximum limit becomes unbounded</td>
</tr>
</table>
<br>
<table width="90%" border="0" align="center">
<tr>
<td colspan="2" class="table_title">Predefined uint_parsers</td>
</tr>
<tr>
<td class="table_cells" width="21%"><b>bin_p</b></td>
<td class="table_cells" width="79%"><code><span class=identifier>uint_parser</span><span class=special>&lt;</span><span class=keyword>unsigned</span><span class=special>,
</span><span class=number>2</span><span class=special>, </span><span class=number>1</span><span class=special>,
-</span><span class=number>1</span><span class=special>&gt; </span><span class=keyword>const</span></code></td>
</tr>
<tr>
<td class="table_cells" width="21%"><b>oct_p</b></td>
<td class="table_cells" width="79%"><code><span class=identifier>uint_parser</span><span class=special>&lt;</span><span class=keyword>unsigned</span><span class=special>,
</span><span class=number>8</span><span class=special>, </span><span class=number>1</span><span class=special>,
-</span><span class=number>1</span><span class=special>&gt; </span><span class=keyword>const</span></code></td>
</tr>
<tr>
<td class="table_cells" width="21%"><b>uint_p</b></td>
<td class="table_cells" width="79%"><code><span class=identifier>uint_parser</span><span class=special>&lt;</span><span class=keyword>unsigned</span><span class=special>,
</span><span class=number>10</span><span class=special>, </span><span class=number>1</span><span class=special>,
-</span><span class=number>1</span><span class=special>&gt; </span><span class=keyword>const</span></code></td>
</tr>
<tr>
<td class="table_cells" width="21%"><b>hex_p</b></td>
<td class="table_cells" width="79%"><code><span class=identifier>uint_parser</span><span class=special>&lt;</span><span class=keyword>unsigned</span><span class=special>,
</span><span class=number>16</span><span class=special>, </span><span class=number>1</span><span class=special>,
-</span><span class=number>1</span><span class=special>&gt; </span><span class=keyword>const</span></code></td>
</tr>
</table>
<p>The following example shows how the uint_parser can be used to parse thousand
separated numbers. The example can correctly parse numbers such as 1,234,567,890.</p>
<pre><span class=keyword> </span><span class=identifier>uint_parser</span><span class=special>&lt;</span><span class=keyword>unsigned</span><span class=special>, </span><span class=number>10</span><span class=special>, </span><span class=number>1</span><span class=special>, </span><span class=number>3</span><span class=special>&gt; </span><span class=identifier>uint3_p</span><span class=special>; </span><span class=comment>// 1..3 digits
</span><span class=identifier>uint_parser</span><span class=special>&lt;</span><span class=keyword>unsigned</span><span class=special>, </span><span class=number>10</span><span class=special>, </span><span class=number>3</span><span class=special>, </span><span class=number>3</span><span class=special>&gt; </span><span class=identifier>uint3_3_p</span><span class=special>; </span><span class=comment>// exactly 3 digits
</span><span class=identifier>ts_num_p </span><span class=special>= </span><span class=special>(</span><span class=identifier>uint3_p </span><span class=special>&gt;&gt; </span><span class=special>*(</span><span class=literal>',' </span><span class=special>&gt;&gt; </span><span class=identifier>uint3_3_p</span><span class=special>)); </span><span class=comment>// our thousand separated number parser</span></pre>
<p><tt>bin_p</tt>, <tt>oct_p</tt>, <tt>uint_p</tt> and <tt>hex_p</tt> are parser
generator objects designed to be used within expressions. Here's an example
of a rule that parses comma delimited list of numbers (We've seen this <a href="quick_start.html#list_of_numbers">before</a>):</p>
<pre><code><span class=identifier> </span><span class=identifier>list_of_numbers </span><span class=special>=</span> <span class=identifier>real_p </span><span class=special>&gt;&gt; *(</span><span class=literal>','</span> <span class=special>&gt;&gt; </span><span class=identifier>real_p</span><span class=special>)</span></code>;
</pre>
<p></p>
<p>Later, we shall see how we can extract the actual numbers parsed by the numeric
parsers. We shall deal with this when we get to the section on <a href="semantic_actions.html#specialized_actions">specialized
actions</a>.</p>
<h2>int_parser</h2>
<p>The <tt>int_parser</tt> can parse signed integers of arbitrary length and size.
This is almost the same as the <tt>uint_parser</tt>. The only difference is
the additional task of parsing the <tt><span class="quotes">'+'</span></tt>
or <tt class="quotes">'-'</tt> sign preceding the number. The class interface
is the same as that of the uint_parser.<br>
</p>
<table width="90%" border="0" align="center">
<tr>
<td colspan="2" class="table_title">A predefined int_parser</td>
</tr>
<tr>
<td class="table_cells" width="21%"><b>int_p</b></td>
<td class="table_cells" width="79%"><span class=identifier><code>int_parser</code></span><code><span class=special>&lt;</span><span class=keyword>int</span><span class=special>,
</span><span class=number>10</span><span class=special>, </span><span class=number>1</span><span class=special>,
-</span><span class=number>1</span><span class=special>&gt; </span><span class=keyword>const</span></code></td>
</tr>
</table>
<h2>real_parser</h2>
<p>The <tt>real_parser</tt> can parse real numbers of arbitrary length and size
limited by its parametric type <tt>T</tt>. The <tt>real_parser</tt> is a template
class with 2 template parameters. Here's the <tt>real_parser</tt> template interface:</p>
<pre><span class=keyword> template</span><span class=special>&lt;
</span><span class=keyword>typename </span><span class=identifier>T </span><span class=special>= </span><span class=keyword>double</span><span class=special>,
</span><span class=keyword>typename </span><span class=identifier>RealPoliciesT </span><span class=special>= </span><span class=identifier>ureal_parser_policies</span><span class=special>&lt;</span><span class=identifier>T</span><span class=special>&gt; </span><span class=special>&gt;
</span><span class=keyword> struct </span><span class=identifier>real_parser</span><span class=special>;</span></pre>
<p>The first template parameter is its numeric base type <tt>T</tt>. This defaults
to <tt>double</tt>. </p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/bulb.gif" width="13" height="18"><b>
Parsing special numeric types</b><br>
<br>
Notice that the numeric base type <tt>T</tt> can be specified by the user.
This means that we can use the numeric parsers to parse user defined numeric
types such as <tt>fixed_point</tt> (fixed point reals) and <tt>bigint</tt>
(unlimited precision integers).</td>
</tr>
</table>
<p>The second template parameter is a class that groups all the policies and defaults
to <tt>ureal_parser_policies&lt;T&gt;</tt>. Policies control the real number
parsers' behavior. The default policies provided are designed to parse C/C++
style real numbers of the form <b>nnn.fff.Eeee</b> where <b>nnn</b> is the whole
number part, <b>fff</b> is the fractional part, <b>E</b> is <tt class="quotes">'e'</tt>
or <tt class="quotes">'E'</tt> and <b>eee</b> is the exponent optionally preceded
by <tt class="quotes">'-'</tt> or <tt><span class="quotes">'+'</span></tt>.
This corresponds to the following grammar, with the exception that plain integers
without the decimal point are also accepted by default.</p>
<pre><code><font color="#000000"><span class=keyword> </span><span class=identifier>floatingliteral
</span><span class=special>= </span><span class=identifier>fractionalconstant </span><span class=special>&gt;&gt; </span><span class=special>!</span><span class=identifier>exponentpart
</span><span class=special>| </span><span class=special>+</span><span class=identifier>digit_p </span><span class=special>&gt;&gt; </span><span class=identifier>exponentpart
</span><span class=special>;
</span><span class=identifier>fractionalconstant
</span><span class=special>= </span><span class=special>*</span><span class=identifier>digit_p </span><span class=special>&gt;&gt; </span><span class=literal>'.' </span><span class=special>&gt;&gt; </span><span class=special>+</span><span class=identifier>digit_p
</span><span class=special>| </span><span class=special>+</span><span class=identifier>digit_p </span><span class=special>&gt;&gt; </span><span class=literal>'.'
</span><span class=special>;
</span><span class=identifier>exponentpart
</span><span class=special>= </span><span class=special>(</span><span class=literal>'e' </span><span class=special>| </span><span class=literal>'E'</span><span class=special>) </span><span class=special>&gt;&gt; </span><span class=special>!(</span><span class=literal>'+' </span><span class=special>| </span><span class=literal>'-'</span><span class=special>) </span><span class=special>&gt;&gt; </span><span class=special>+</span><span class=identifier>digit_p
</span><span class=special>;</span></font></code></pre>
<p>The default policies are provided to take care of the most common case (there
are many ways to represent, and hence parse, real numbers). In most cases, the
default setting of the <tt>real_parser</tt> is sufficient and can be used straight
out of the box. Actually, there are four <tt>real_parser</tt>s pre-defined for
immediate use:</p>
<table width="90%" border="0" align="center">
<tr>
<td colspan="2" class="table_title">Predefined real_parsers</td>
</tr>
<tr>
<td class="table_cells" width="21%"><b>ureal_p</b></td>
<td class="table_cells" width="79%"><span class=identifier><code>real_parser</code></span><code><span class=special>&lt;</span><span class=keyword>double</span><span class=special>,
</span><span class=identifier>ureal_parser_policies</span><span class=special>&lt;</span><span class=keyword>double</span><span class=special>&gt;
&gt; </span><span class=keyword>const</span></code></td>
</tr>
<tr>
<td class="table_cells" width="21%"><b>real_p</b></td>
<td class="table_cells" width="79%"><span class=identifier><code>real_parser</code></span><code><span class=special>&lt;</span><span class=keyword>double</span><span class=special>,
</span><span class=identifier>real_parser_policies</span><span class=special>&lt;</span><span class=keyword>double</span><span class=special>&gt;
&gt; </span><span class=keyword>const</span></code></td>
</tr>
<tr>
<td class="table_cells" width="21%"><b>strict_ureal_p</b></td>
<td class="table_cells" width="79%"><span class=identifier><code>real_parser</code></span><code><span class=special>&lt;</span><span class=keyword>double</span><span class=special>,
</span><span class=identifier>strict_ureal_parser_policies</span><span class=special>&lt;</span><span class=keyword>double</span><span class=special>&gt;
&gt; </span><span class=keyword>const</span></code></td>
</tr>
<tr>
<td class="table_cells" width="21%"><b>strict_real_p</b></td>
<td class="table_cells" width="79%"><span class=identifier><code>real_parser</code></span><code><span class=special>&lt;</span><span class=keyword>double</span><span class=special>,
</span><span class=identifier>strict_real_parser_policies</span><span class=special>&lt;</span><span class=keyword>double</span><span class=special>&gt;
&gt; </span><span class=keyword>const</span></code></td>
</tr>
</table>
<p>We've seen <tt>real_p</tt> before. <tt>ureal_p</tt> is its unsigned variant.</p>
<h3><a name="strict_reals"></a>Strict Reals </h3>
<p>Integer numbers are considered a subset of real numbers, so <tt>real_p</tt>
and <tt>ureal_p</tt> recognize integer numbers (without a dot) as real numbers.
<tt>strict_real_p</tt> and <tt>strict_ureal_p</tt> are the equivalent parsers
that <strong>require</strong> a dot to be present for a number to be considered
a successful match.</p>
<h2>Advanced: real_parser policies</h2>
<p>The parser policies break down real number parsing into 6 steps:</p>
<table width="90%" border="0" align="center">
<tr>
<td class="table_cells">1</td>
<td class="table_cells"><b>parse_sign</b></td>
<td class="table_cells">Parse the prefix sign</td>
</tr>
<tr>
<td class="table_cells">2</td>
<td class="table_cells"><b>parse_n</b></td>
<td class="table_cells">Parse the integer at the left of the decimal point</td>
</tr>
<tr>
<td class="table_cells">3</td>
<td class="table_cells"><b>parse_dot</b></td>
<td class="table_cells">Parse the decimal point</td>
</tr>
<tr>
<td class="table_cells">4</td>
<td class="table_cells"><b>parse_frac_n</b></td>
<td class="table_cells">Parse the fraction after the decimal point</td>
</tr>
<tr>
<td class="table_cells">5</td>
<td class="table_cells"><b>parse_exp</b></td>
<td class="table_cells">Parse the exponent prefix (e.g. 'e')</td>
</tr>
<tr>
<td class="table_cells">6</td>
<td class="table_cells"><b>parse_exp_n</b></td>
<td class="table_cells">Parse the actual exponent</td>
</tr>
</table>
<p>And the interaction of these sub-parsing tasks is further controlled by these
3 policies:</p>
<table width="90%" border="0" align="center">
<tr>
<td class="table_cells">1</td>
<td class="table_cells"><b>allow_leading_dot</b></td>
<td class="table_cells">Allow a leading dot to be present (&quot;.1&quot; becomes
equivalent to &quot;0.1&quot;)</td>
</tr>
<tr>
<td class="table_cells">2</td>
<td class="table_cells"><b>allow_trailing_dot</b></td>
<td class="table_cells">Allow a trailing dot to be present (&quot;1.&quot; becomes
equivalent to &quot;1.0&quot;)</td>
</tr>
<tr>
<td class="table_cells">3</td>
<td class="table_cells"><b>expect_dot</b></td>
<td class="table_cells">Require a dot to be present (disallows &quot;1&quot; to
be equivalent to &quot;1.0&quot;)</td>
</tr>
</table>
<p>[ <img src="theme/lens.gif" width="15" height="16"> From here on, required
reading: <a href="scanner.html">The Scanner</a>, <a href="indepth_the_parser.html">In-depth
The Parser</a> and <a href="indepth_the_scanner.html">In-depth The Scanner</a>
]</p>
<h2>sign_parser and sign_p</h2>
<p>Before we move on, a small utility parser is included here to ease the parsing
of the <span class="quotes">'-'</span> or <span class="quotes">'+'</span> sign.
While it is easy to write one:</p>
<pre> <span class=identifier>sign_p </span><span class=special>= </span><span class=special>(</span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'+'</span>) <span class=special>| </span><span class=literal>'-'</span><span class="special">)</span><span class=literal>;</span></pre>
<p>it is not possible to extract the actual sign (positive or negative) without
resorting to semantic actions. The sign_p parser has a bool attribute returned
to the caller through the match object which, after parsing, is set to <strong>true</strong>
if the parsed sign is negative. This attribute detects if the negative sign
has been parsed. Examples:</p>
<pre><span class=special> </span><span class=keyword>bool </span><span class=identifier>is_negative</span><span class=special>;
</span><span class=identifier>r </span><span class=special>= </span><span class=identifier>sign_p</span><span class=special>[</span><span class=identifier>assign_a</span><span class=special>(</span><span class=identifier>is_negative</span><span class=special>)]</span><span class=special>;</span></pre>
<p><span class=special></span>or simply...</p>
<pre> <span class=comment>// directly extract the result from the match result's value</span>
<span class=keyword>bool </span><span class=identifier>is_negative </span><span class=special>= </span><span class=identifier>sign_p</span><span class=special>.</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>).</span><span class=identifier>value</span><span class=special>();</span><span class=comment> </span></pre>
<p>The sign_p parser expects attached semantic actions to have a signature (see
<a href="semantic_actions.html#specialized_actions">Specialized Actions</a>
for further detail) compatible with: </p>
<p><b>Signature for functions:</b></p>
<pre><code><font color="#000000"><span class=identifier> </span><span class=keyword>void </span><span class=identifier>func</span><span class=special>(</span><span class="keyword">bool</span><span class=identifier> is_negative</span><span class=special>);</span></font></code></pre>
<p><b>Signature for functors:</b> </p>
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>struct </span><span class=identifier>ftor
</span><span class=special>{
</span><span class=keyword>void </span><span class=keyword>operator</span><span class=special>()(</span><span class="keyword">bool</span><span class=identifier> is_negative</span><span class=special>) </span><span class=keyword>const</span><span class=special>;
</span><span class=special>};</span></font></code></pre>
<h2><span class=identifier>ureal_parser_policies</span></h2>
<pre><span class=comment> </span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>ureal_parser_policies
</span><span class=special>{
</span><span class=keyword>typedef </span><span class=identifier>uint_parser</span><span class=special>&lt;</span><span class=identifier>T</span><span class=special>, </span><span class=number>10</span><span class=special>, </span><span class=number>1</span><span class=special>, -</span><span class=number>1</span><span class=special>&gt; </span><span class=identifier>uint_parser_t</span><span class=special>;
</span><span class=keyword>typedef </span><span class=identifier>int_parser</span><span class=special>&lt;</span><span class=identifier>T</span><span class=special>, </span><span class=number>10</span><span class=special>, </span><span class=number>1</span><span class=special>, -</span><span class=number>1</span><span class=special>&gt; </span><span class=identifier>int_parser_t</span><span class=special>;
</span><span class=keyword>static const bool</span> <span class=identifier>allow_leading_dot</span> <span class=special> =</span> <span class=literal>true</span><span class=special>;</span><span class=special>
</span><span class=keyword>static const bool</span> <span class=identifier>allow_trailing_dot </span><span class=special>=</span> <span class=literal>true</span><span class=special>;</span><span class=special></span>
<span class=special> </span><span class=keyword>static const bool</span> <span class=identifier>expect_dot</span> <span class=special> =</span> <span class=literal>false</span><span class=special>;</span><span class=special></span><span class=special><br>
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>static typename </span><span class=identifier>match_result</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>, </span><span class="identifier">nil_t</span><span class=special>&gt;::</span><span class=identifier>type
parse_sign</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>&amp; </span><span class=identifier>scan</span><span class=special>)
{ </span><span class=keyword>return </span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>no_match</span><span class=special>(); }
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>static typename </span><span class=identifier>parser_result</span><span class=special>&lt;</span><span class=identifier>uint_parser_t</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class=identifier>type
parse_n</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>&amp; </span><span class=identifier>scan</span><span class=special>)
{ </span><span class=keyword>return </span><span class=identifier>uint_parser_t</span><span class=special>().</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>); }
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>static typename </span><span class=identifier>parser_result</span><span class=special>&lt;</span><span class=identifier>chlit</span><span class=special>&lt;&gt;, </span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class=identifier>type
parse_dot</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>&amp; </span><span class=identifier>scan</span><span class=special>)
{ </span><span class=keyword>return </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'.'</span><span class=special>).</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>); }
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>static typename </span><span class=identifier>parser_result</span><span class=special>&lt;</span><span class=identifier>uint_parser_t</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class=identifier>type
parse_frac_n</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>&amp; </span><span class=identifier>scan</span><span class=special>)
{ </span><span class=keyword>return </span><span class=identifier>uint_parser_t</span><span class=special>().</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>); }
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>static typename </span><span class=identifier>parser_result</span><span class=special>&lt;</span><span class=identifier>chlit</span><span class=special>&lt;&gt;, </span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class=identifier>type
parse_exp</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>&amp; </span><span class=identifier>scan</span><span class=special>)
{ </span><span class=keyword>return </span><span class=identifier>as_lower_d</span><span class=special>[</span><span class=literal>'e'</span><span class=special>].</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>); }
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>static typename </span><span class=identifier>parser_result</span><span class=special>&lt;</span><span class=keyword>int</span><span class=identifier>_parser_t</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class=identifier>type
parse_exp_n</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>&amp; </span><span class=identifier>scan</span><span class=special>)
{ </span><span class=keyword>return int</span><span class=identifier>_parser_t</span><span class=special>().</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>); }
};
</span></pre>
<p><span class=special></span><span class=identifier>The default ureal_parser_policies
uses the lower level integer numeric parsers to do its job. </span></p>
<h2><span class=identifier>real_parser_policies</span></h2>
<pre> <span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>real_parser_policies </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>ureal_parser_policies</span><span class=special>&lt;</span><span class=identifier>T</span><span class=special>&gt;
</span><span class=special>{
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>static </span><span class=keyword>typename </span><span class=identifier>parser_result</span><span class=special>&lt;</span><span class=identifier>sign_parser</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class=identifier>type
</span><span class=identifier>parse_sign</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>&amp; </span><span class=identifier>scan</span><span class=special>)
</span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>sign_p</span><span class=special>.</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>); </span><span class=special>}
</span><span class=special>};</span></pre>
<p>Notice how the real_parser_policies replaced <b><tt>parse_sign</tt></b> of
the <b>u</b>real_parser_policies from which it is subclassed. The default real_parser_policies
simply uses a <tt>sign_p</tt> instead of <tt>scan.no_match()</tt> in the <tt>parse_sign
</tt> step. </p>
<h2><span class=identifier>strict_ureal_parser_policies and strict_real_parser_policies</span></h2>
<pre> <span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>strict_ureal_parser_policies </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>ureal_parser_policies</span><span class=special>&lt;</span><span class=identifier>T</span><span class=special>&gt;
</span><span class=special>{</span>
<span class=special> </span><span class=keyword>static const bool</span> <span class=identifier>expect_dot</span><span class=special> =</span> <span class=literal>true</span><span class=special>;</span><span class=special></span><span class=special></span>
<span class=special>};</span>
<span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>strict_real_parser_policies </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>real_parser_policies</span><span class=special>&lt;</span><span class=identifier>T</span><span class=special>&gt;
</span><span class=special>{</span>
<span class=special> </span><span class=keyword>static const bool</span> <span class=identifier>expect_dot</span><span class=special> =</span> <span class=literal>true</span><span class=special>;</span><span class=special></span><span class=special></span>
<span class=special>};</span></pre>
<p>Again, these policies replaced just the policies they wanted different from
their superclasses.</p>
<p><i>Specialized</i> real parser policies can reuse some of the defaults while
replacing a few. For example, the following is a real number parser policy that
parses thousands separated numbers with at most two decimal places and no exponent.
</p>
<p><img src="theme/lens.gif" width="15" height="16">The full source code can be
viewed <a href="../example/fundamental/thousand_separated.cpp">here</a>. </p>
<pre>
<span class=identifier> </span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>ts_real_parser_policies </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>ureal_parser_policies</span><span class=special>&lt;</span><span class=identifier>T</span><span class=special>&gt;
{
</span><span class=comment>// These policies can be used to parse thousand separated
// numbers with at most 2 decimal digits after the decimal
// point. e.g. 123,456,789.01
</span><span class=keyword>typedef </span><span class=identifier>uint_parser</span><span class=special>&lt;</span><span class=keyword>int</span><span class=special>, </span><span class=number>10</span><span class=special>, </span><span class=number>1</span><span class=special>, </span><span class=number>2</span><span class=special>&gt; </span><span class=identifier>uint2_t</span><span class=special>;
</span><span class=keyword>typedef </span><span class=identifier>uint_parser</span><span class=special>&lt;</span><span class=identifier>T</span><span class=special>, </span><span class=number>10</span><span class=special>, </span><span class=number>1</span><span class=special>, -</span><span class=number>1</span><span class=special>&gt; </span><span class=identifier>uint_parser_t</span><span class=special>;
</span><span class=keyword>typedef </span><span class=identifier>int_parser</span><span class=special>&lt;</span><span class=keyword>int</span><span class=special>, </span><span class=number>10</span><span class=special>, </span><span class=number>1</span><span class=special>, -</span><span class=number>1</span><span class=special>&gt; </span><span class=identifier>int_parser_t</span><span class=special>;
</span><span class=comment>////////////////////////////////// 2 decimal places Max
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>static typename </span><span class=identifier>parser_result</span><span class=special>&lt;</span><span class=identifier>uint2_t</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class=identifier>type
parse_frac_n</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>&amp; </span><span class=identifier>scan</span><span class=special>)
{ </span><span class=keyword>return </span><span class=identifier>uint2_t</span><span class=special>().</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>); }
</span><span class=special> </span><span class=comment>////////////////////////////////// No exponent<br></span> <span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>static typename </span><span class=identifier>parser_result</span><span class=special>&lt;</span><span class=identifier>chlit</span><span class=special>&lt;&gt;, </span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class=identifier>type
parse_exp</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>&amp; </span><span class=identifier>scan</span><span class=special>)
{ </span><span class=keyword>return </span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>no_match</span><span class=special>(); }
</span><span class=special> </span><span class=comment>////////////////////////////////// No exponent<br></span> <span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>static typename </span><span class=identifier>parser_result</span><span class=special>&lt;</span><span class=identifier>int_parser_t</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class=identifier>type
parse_exp_n</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>&amp; </span><span class=identifier>scan</span><span class=special>)
{ </span><span class=keyword>return </span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>no_match</span><span class=special>(); }
</span><span class=comment>////////////////////////////////// Thousands separated numbers
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>static typename </span><span class=identifier>parser_result</span><span class=special>&lt;</span><span class=identifier>uint_parser_t</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class=identifier>type
<a name="scanner_save"></a>parse_n</span><span class=special>(</span><span class=identifier>ScannerT</span><span class=special>&amp; </span><span class=identifier>scan</span><span class=special>)
{
</span><span class=keyword>typedef typename </span><span class=identifier>parser_result</span><span class=special>&lt;</span><span class=identifier>uint_parser_t</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class=identifier>type RT</span><span class=special>;
</span><span class="keyword">static </span><span class=identifier>uint_parser</span><span class=special>&lt;</span><span class=keyword>unsigned</span><span class=special>, </span><span class=number>10</span><span class=special>, </span><span class=number>1</span><span class=special>, </span><span class=number>3</span><span class=special>&gt; </span><span class=identifier>uint3_p</span><span class=special>;
</span><span class="keyword">static </span><span class=identifier>uint_parser</span><span class=special>&lt;</span><span class=keyword>unsigned</span><span class=special>, </span><span class=number>10</span><span class=special>, </span><span class=number>3</span><span class=special>, </span><span class=number>3</span><span class=special>&gt; </span><span class=identifier>uint3_3_p</span><span class=special>;
</span><span class=keyword>if </span><span class=special>(</span><span class=identifier>RT hit </span><span class=special>= </span><span class=identifier>uint3_p</span><span class=special>.</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>))
{
</span><span class=identifier>T n</span><span class=special>;
</span><span class="keyword">typedef typename </span>ScannerT<span class="special">::</span>iterator_t iterator_t;<span class=special>
</span>iterator_t save<span class="special"> = </span>scan.first<span class="special">;
</span><span class=keyword>while </span><span class=special>(</span><span class=identifier>match</span><span class=special>&lt;&gt; </span><span class=identifier>next </span><span class=special>= (</span><span class=literal>',' </span><span class=special>&gt;&gt; </span><span class=identifier>uint3_3_p</span><span class=special>[</span><span class=identifier>assign_a</span><span class=special>(</span><span class=identifier>n</span><span class=special>)]).</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>))
{
</span><span class=identifier>hit</span><span class=special>.</span><span class=identifier>value</span><span class=special>() *= </span><span class=number>1000</span><span class=special>;
</span><span class=identifier>hit</span><span class=special>.</span><span class=identifier>value</span><span class=special>() += </span><span class=identifier>n</span><span class=special>;
</span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>concat_match</span><span class=special>(</span><span class=identifier>hit</span><span class=special>, </span><span class=identifier>next</span><span class=special>);
</span><span class="identifier">save </span><span class=special><span class="special">= </span></span><span class="identifier">scan</span><span class="special">.</span><span class="identifier">first</span><span class=special><span class="special">;</span>
}
</span>scan<span class="special">.</span>first<span class="special"> = </span>save<span class="special">;
</span><span class=keyword>return </span><span class=identifier>hit</span><span class=special>;
</span><span class=special> // Note: On erroneous input such as &quot;123,45&quot;, the result should<br> // be a partial match &quot;123&quot;. 'save' is used to makes sure that<br> // the scanner position is placed at the last *valid* parse<br> // position.<br> }
</span><span class=keyword>return </span><span class=identifier>scan</span><span class=special>.</span><span class=identifier>no_match</span><span class=special>();
}
};</span></pre>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="operators.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="rule.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2002 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
<p>&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,222 @@
<html>
<head>
<title>Operators</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%">
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Operators</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="primitives.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="numerics.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>Operators are used as a means for object composition and embedding. Simple
parsers may be composed to form composites through operator overloading, crafted
to approximate the syntax of an Extended Backus-Normal Form (EBNF) variant.
An expression such as:</p>
<pre><code><font color="#000000"> <span class=identifier>a </span><span class=special>| </span><span class=identifier>b</span></font></code></pre>
<p>actually yields a new parser type which is a composite of its operands, a and
b. Taking this example further, if a and b were of type <tt>chlit</tt>&lt;&gt;,
the result would have the composite type:</p>
<pre><code><font color="#000000"> <span class=identifier>alternative</span><span class=special>&lt;</span><span class=identifier>chlit</span><span class=special>&lt;&gt;, </span><span class=identifier>chlit</span><span class=special>&lt;&gt; </span><span class=special>&gt;</span></font></code></pre>
<p> In general, for any binary operator, it will take its two arguments, parser1
and parser2, and create a new composed parser of the form</p>
<pre><code><font color="#000000"> <span class=identifier>op</span><span class=special>&lt;</span><span class=identifier>parser1</span><span class=special>, </span><span class=identifier>parser2</span><span class=special>&gt;</span></font></code></pre>
<p>where parser1 and parser2 can be arbitrarily complex parsers themselves, with
the only limitations being what your compiler imposes. </p>
<h3>Set Operators</h3>
<table width="90%" border="0" align="center">
<tr>
<td class="table_title" colspan="3">Set operators</td>
</tr>
<tr>
<td class="table_cells" width="20%"><code><span class=identifier>a </span><span class=special>|
</span><span class=identifier>b</span></code></td>
<td class="table_cells" width="24%">Union</td>
<td class="table_cells" width="56%">Match a or b. Also referred to as alternative</td>
</tr>
<tr>
<td class="table_cells" width="20%"><code><span class=identifier>a </span><span class=special>&
</span><span class=identifier>b</span></code></td>
<td class="table_cells" width="24%">Intersection</td>
<td class="table_cells" width="56%">Match a and b</td>
</tr>
<tr>
<td class="table_cells" width="20%"><code><span class=identifier>a </span><span class=special>-
</span><span class=identifier>b</span></code></td>
<td class="table_cells" width="24%">Difference</td>
<td class="table_cells" width="56%">Match a but not b. If both match and b's
matched text is shorter than a's matched text, a successful match is made</td>
</tr>
<tr>
<td class="table_cells" width="20%"><code><span class=identifier>a </span><span class=special>^
</span><span class=identifier>b</span></code></td>
<td class="table_cells" width="24%">XOR</td>
<td class="table_cells" width="56%">Match a or b, but not both</td>
</tr>
</table>
<p><b>Short-circuiting</b></p>
<p>Alternative operands are tried one by one on a first come first served basis
starting from the leftmost operand. After a successfully matched alternative
is found, the parser concludes its search, essentially short-circuiting the
search for other potentially viable candidates. This short-circuiting implicitly
gives the highest priority to the leftmost alternative.</p>
<p>Short-circuiting is done in the same manner as C or C++'s logical expressions;
e.g. <tt>if</tt> <tt><span class="operators">(</span>x <span class="operators">&lt;</span>
3 <span class="operators">||</span> y <span class="operators">&lt;</span> 2<span class="operators">)</span></tt>
where, if <tt>x</tt> evaluates to be less than 3, the <tt>y <span class="operators">&lt;</span>
2</tt> test is not done at all. In addition to providing an implicit priority
rule for alternatives which is necessary, given the non-deterministic nature
of the Spirit parser compiler, short-circuiting improves the execution time.
If the order of your alternatives is logically irrelevant, strive to put the
(expected) most common choice first for maximum efficiency.</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/lens.gif" width="15" height="16"> <b>Intersections</b><br>
<br>
Some researchers assert that the intersections (e.g. <tt>a &amp; b</tt>)
let us define context sensitive languages (<a href="references.html#intersections">&quot;XBNF&quot;</a>
[citing Leu-Weiner, 1973]). &quot;The theory of defining a language as the
intersection of a finite number of context free languages was developed
by Leu and Weiner in 1973&quot;.<br>
<br>
<b><img src="theme/lens.gif" width="15" height="16"> <b></b>~ Operator</b><br>
<br>
The complement operator <tt>~</tt> was originally put into consideration.
Further understanding of its value and meaning leads us to uncertainty.
The basic problem stems from the fact that <tt>~a</tt> will yield <tt>U-a</tt>,
where <tt>U</tt> is the universal set of all strings. However, where it
makes sense, some parsers can be complemented (see the <a href="primitives.html#negation">primitive
character parsers</a> for examples).</td>
</tr>
</table>
<h3>Sequencing Operators</h3>
<table width="90%" border="0" align="center">
<tr>
<td class="table_title" colspan="3">Sequencing operators</td>
</tr>
<tr>
<td class="table_cells" width="21%"><code><span class=identifier>a </span><span class=special>&gt;&gt;
</span><span class=identifier>b</span></code></td>
<td class="table_cells" width="23%">Sequence</td>
<td class="table_cells" width="56%">Match a and b in sequence</td>
</tr>
<tr>
<td class="table_cells" width="21%"><code><span class=identifier>a </span><span class=special>&&
</span><span class=identifier>b</span></code></td>
<td class="table_cells" width="23%">Sequential-and</td>
<td class="table_cells" width="56%">Sequential-and. Same as above, match a
and b in sequence</td>
</tr>
<tr>
<td class="table_cells" width="21%"><code><span class=identifier>a </span><span class=special>||
</span><span class=identifier>b</span></code></td>
<td class="table_cells" width="23%">Sequential-or</td>
<td class="table_cells" width="56%">Match a or b in sequence</td>
</tr>
</table>
<p>The sequencing operator <tt class="operators">&gt;&gt;</tt> can alternatively
be thought of as the sequential-and operator. The expression <tt>a <span class="operators">&amp;&amp;</span>
b</tt> reads as match a and b in sequence. Continuing this logic, we can also
have a sequential-or operator where the expression <tt>a <span class="operators">||</span>
b</tt> reads as match a or b and in sequence. That is, if both a and b match,
it must be in sequence; this is equivalent to <tt>a &gt;&gt; !b | b</tt>. </p>
<h3>Optional and Loops</h3>
<table width="90%" border="0" align="center">
<tr>
<td class="table_title" colspan="3">Optional and Loops</td>
</tr>
<tr>
<td class="table_cells" width="21%"><code><span class=special>*</span><span class=identifier>a</span></code></td>
<td class="table_cells" width="23%">Kleene star</td>
<td class="table_cells" width="56%">Match a zero (0) or more times</td>
</tr>
<tr>
<td class="table_cells" width="21%"><code><span class=special>+</span><span class=identifier>a</span></code></td>
<td class="table_cells" width="23%">Positive</td>
<td class="table_cells" width="56%">Match a one (1) or more times</td>
</tr>
<tr>
<td class="table_cells" width="21%"><code><span class=special>!</span><span class=identifier>a</span></code></td>
<td class="table_cells" width="23%">Optional</td>
<td class="table_cells" width="56%">Match a zero (0) or one (1) time</td>
</tr>
<tr>
<td class="table_cells" width="21%"><code><span class=identifier>a </span><span class=special>%
</span><span class=identifier>b</span></code></td>
<td class="table_cells" width="23%">List</td>
<td class="table_cells" width="56%">Match a list of one or more repetitions
of a separated by occurrences of b. This is the same as <tt>a &gt;&gt; *(b
&gt;&gt; a)</tt>. Note that <tt>a</tt> must not also match <tt>b</tt></td>
</tr>
</table>
<p><img src="theme/note.gif" width="16" height="16"> If we look more closely,
take note that we generalized the optional expression of the form <tt>!a</tt>
in the same category as loops. This is logical, considering that the optional
matches the expression following it zero (0) or one (1) time. </p>
<p><b>Primitive type operands</b></p>
<p> For binary operators, one of the operands but not both may be a <tt>char</tt>,
<tt> wchar_t</tt>, <tt>char const<span class="operators">*</span></tt> or <tt>wchar_t
const<span class="operators">*</span></tt>. Where P is a parser object, here
are some examples:</p>
<pre><code><span class=identifier> </span><span class=identifier>P </span><span class=special>| </span><span class=literal>'x'
</span><span class=identifier>P </span><span class=special>- </span><span class=identifier>L</span><span class=string>"Hello World"
</span><span class=literal>'x' </span><span class=special>&gt;&gt; </span><span class=identifier>P
</span><span class=string>"bebop" </span><span class=special>&gt;&gt; </span><span class=identifier>P</span></code></pre>
<p>It is important to emphasize that C++ mandates that operators may only be overloaded
if at least one argument is a user-defined type. Typically, in an expression
involving multiple operators, explicitly typing the leftmost operand as a parser
is enough to cause propagation to all the rest of the operands to its right
to be regarded as parsers. Examples:</p>
<pre><code><font color="#000000"><span class=identifier> </span><span class=identifier>r </span><span class=special>= </span><span class=literal>'a' </span><span class=special>| </span><span class=literal>'b' </span><span class=special>| </span><span class=literal>'c' </span><span class=special>| </span><span class=literal>'d'</span><span class=special>; </span><span class=comment>// ill formed
</span><span class=identifier>r </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'a'</span><span class=special>) </span><span class=special>| </span><span class=literal>'b' </span><span class=special>| </span><span class=literal>'c' </span><span class=special>| </span><span class=literal>'d'</span><span class=special>; </span><span class=comment>// OK</span></font></code></pre>
<p>The second case is parsed as follows:</p>
<pre><code><font color="#000000"> r <font color="#0000ff"><img src="theme/arrow.gif"> <span class=special>(((</span><span class=identifier>chlit</span><span class=special>&lt;</span><span class=keyword>char</span><span class=special>&gt; </span><span class=special>| </span><span class=keyword>char</span><span class=special>) </span><span class=special>| </span><span class=keyword>char</span><span class=special>) </span><span class=special>| </span><span class=keyword>char</span><span class=special>)</span></font>
a <font color="#0000ff"><img src="theme/arrow.gif"> <span class=special>(</span><span class=identifier>chlit</span><span class=special>&lt;</span><span class=keyword>char</span><span class=special>&gt; </span><span class=special>| </span><span class=keyword>char</span><span class=special>)</span></font>
r <font color="#0000ff"><img src="theme/arrow.gif"> <span class=special>(((</span><span class=identifier>a</span><span class=special>) </span><span class=special>| </span><span class=keyword>char</span><span class=special>) </span><span class=special>| </span><span class=keyword>char</span><span class=special>)</span></font>
b <font color="#0000ff"><img src="theme/arrow.gif"> <span class=special>(</span><span class=identifier>a </span><span class=special>| </span><span class=keyword>char</span><span class=special>)</span></font>
r <font color="#0000ff"><img src="theme/arrow.gif"> <span class=special>(((</span><span class=identifier>b</span><span class=special>)) </span><span class=special>| </span><span class=keyword>char</span><span class=special>)</span></font>
c <font color="#0000ff"><img src="theme/arrow.gif"> <span class=special>(</span><span class=identifier>b </span><span class=special>| </span><span class=keyword>char</span><span class=special>)</span></font>
r <font color="#0000ff"><img src="theme/arrow.gif"> <span class=special>(((</span><span class=identifier>c</span><span class=special>)))</span></font></font></code></pre>
<p><b>Operator precedence and grouping</b></p>
<p>Since we are defining our meta-language in C++, we follow C/C++'s operator
precedence rules. Grouping expressions inside the parentheses override this
(e.g., <tt><span class="operators">*(</span>a <span class="operators">|</span>
b<span class="operators">)</span></tt> reads: match a or b zero (0) or more
times). </p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="primitives.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="numerics.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
<p>&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,165 @@
<html>
<head>
<title>Organization</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Organization</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="basic_concepts.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="primitives.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>The framework is highly modular and is organized in layers:</p>
<table width="100%" border="0">
<tr>
<td><div align="center">
<table width="40%" border="1" cellpadding="3" cellspacing="3" class="table_cells">
<tr>
<td><div align="center"><font color="#003366"><strong>iterator</strong></font></div></td>
<td><div align="center"><font color="#003366"><strong>actor</strong></font></div></td>
</tr>
</table>
<font color="#003366" size="3"><br>
</font></div></td>
</tr>
<tr>
<td><div align="center">
<table width="20%" border="1" cellpadding="3" cellspacing="3" class="table_cells">
<tr>
<td><div align="center"><font color="#003366"><strong>debug</strong></font></div></td>
</tr>
</table>
<font color="#003366" size="3"><br>
</font></div></td>
</tr>
<tr>
<td><div align="center">
<table width="75%" border="1" cellpadding="3" cellspacing="3" class="table_cells">
<tr>
<td><div align="center"><font color="#003366"><strong>attribute</strong></font></div></td>
<td><div align="center"><font color="#003366"><strong>dynamic</strong></font></div></td>
<td><div align="center"><font color="#003366"><strong>error_handling</strong></font></div></td>
<td><div align="center"><font color="#003366"><strong>symbols</strong></font></div></td>
<td><div align="center"><font color="#003366"><strong>tree</strong></font></div></td>
<td><div align="center"><font color="#003366"><strong>utility</strong></font></div></td>
</tr>
</table>
<font color="#003366" size="3"><br>
</font></div></td>
</tr>
<tr>
<td><div align="center">
<table width="20%" border="1" cellpadding="3" cellspacing="3" class="table_cells">
<tr>
<td><div align="center"><font color="#003366"><strong>meta</strong></font></div></td>
</tr>
</table>
<font color="#003366"><br>
</font></div></td>
</tr>
<tr>
<td><div align="center">
<table width="85%" border="1" cellpadding="3" cellspacing="3" class="table_cells">
<tr>
<td colspan="4"><div align="center"><font color="#003366"><strong><font size="4">core</font></strong></font></div></td>
</tr>
<tr>
<td><div align="center"><font color="#003366"><strong>scanner</strong></font></div></td>
<td><div align="center"><font color="#003366"><strong>primitives</strong></font></div></td>
<td><div align="center"><font color="#003366"><strong>composite</strong></font></div></td>
<td><div align="center"><font color="#003366"><strong>non_terminal</strong></font></div></td>
</tr>
</table>
</div></td>
</tr>
</table>
<p>Spirit has four layers, plus an independent top layer. The independent layer,
comprising of actor and iterator, does not rely on the other layers. The framework's
architecture is completely orthogonal. The relationship among the layers is
acyclic. Lower layers do not depend nor know the existence of upper layers.
Modules in a layer do not depend on other modules in the same layer. </p>
<p>The client may use only the modules that she wants without incurring any compile
time nor run time penalty. A minimalistic approach is to use only the core as
is. The highly streamlined core is usable by itself. The core is sufficiently
suitable for tasks such as micro parsing.</p>
<p>The <strong>iterator</strong> module is independent of Spirit and may be used
in other non-Spirit applications. This module is a compilation of stand-alone
iterators and iterator wrappers compatible with Spirit.
Over time, these iterators have been found to be most useful for parsing with Spirit. </p>
<p>The <strong>actor</strong> module, also independent of Spirit, is a compilation
of predefined semantic actions that covers the most common semantics processing
tasks.</p>
<p>The <strong>debug</strong> module provides library wide parser debugging. This
module hooks itself up transparently into the core non-intrusively and only
when necessary.</p>
<p>The<strong> attribute</strong> module introduces advanced semantic action machinery
with emphasis on extraction and passing of data up and down the parser hierarchy
through inherited and synthesized attributes. Attributes may also be used to
actually control the parsing. Parametric parsers are a form of dynamic parsers
that changes their behavior at run time based on some attribute or data.</p>
<p>The <strong>dynamic</strong> module focuses on parsers with behavior that can
be modified at run-time.</p>
<p><strong>error_handling</strong>. The framework would not be complete without
Error Handling. C++'s exception handling mechanism is a perfect match for Spirit
due to its highly recursive functional nature. C++ Exceptions are used extensively
by this module for handling errors.</p>
<p>The<strong> symbols</strong> module focuses on symbol table management. This module
is rather basic now. The goal is to build a sub-framework that will be able
to accommodate C++ style multiple scope mechanisms. C++ is a great model for
the complexity of scoping that perhaps has no parallel in any other language.
There are classes and inheritance, private, protected and public access restrictions,
friends, namespaces, using declarations, using directives, Koenig lookup (Argument
Dependent Lookup) and more. The symbol table functionality we have now will
be the basis of a complete facility that will attempt to model this.</p>
<blockquote>
<p><em><font color="#003366">I wish that I could ever see, a structure as lovely
as a tree</font></em><font color="#003366">...</font></p>
</blockquote>
<p> Parse Tree and Abstract Syntax Tree (AST) generation are handled by the <b>Tree</b>
module. There are advantages with Parse Trees and Abstract Syntax Trees over
semantic actions. You can make multiple passes over the data without having
to re-parse the input. You can perform transformations on the tree. You can
evaluate things in any order you want, whereas with attribute schemes you have
to process in a begin to end fashion. You do not have to worry about backtracking
and action side effects that may occur with an ambiguous grammar.</p>
<p>The <b>utility</b> module is a set of commonly useful parsers and support classes
that were found to be useful in handling common tasks such as list processing,
comments, confix expressions, etc.</p>
<p><strong>meta</strong>, provides metaprogramming facilities for advanced Spirit
developers. This module facilitates compile-time and run-time introspection
of Spirit parsers.</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="basic_concepts.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="primitives.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p class="copyright">&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,149 @@
<html>
<head>
<title>Parametric Parsers</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Parametric
Parsers</b></font> </td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="predefined_actors.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="functional.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>We already have a hint of the dynamic nature of the Spirit framework. This
capability is fundamental to Spirit. Dynamic parsing is a very powerful concept.
We shall take this concept further through run-time parametric parsers. We are
able to handle parsing tasks that are impossible to do with any EBNF syntax
alone.</p>
<h2>A Little Secret</h2>
<p> A little critter called <tt>boost::ref</tt> lurking in the boost distribution
is quite powerful beast when used with Spirit's primitive parsers. We are used
to seeing the Spirit primitive parsers created with string or character literals
such as:</p>
<pre>
<code><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'A'</span><span class=special>)
</span><span class=identifier>range_p</span><span class=special>(</span><span class=literal>'A'</span><span class=special>, </span><span class=literal>'Z'</span><span class=special>)
</span><span class=identifier>str_p</span><span class=special>(</span><span class=string>&quot;Hello World&quot;</span><span class=special>)</span></code></pre>
<p> str_p has a second form that accepts two iterators over the string:</p>
<pre>
<code><span class=keyword>char </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>first </span><span class=special>= </span><span class=string>&quot;My oh my&quot;</span><span class=special>;
</span><span class=keyword>char </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>last </span><span class=special>= </span><span class=identifier>first </span><span class=special>+ </span><span class=identifier>std</span><span class=special>::</span><span class=identifier>strlen</span><span class=special>(</span><span class=identifier>first</span><span class=special>);
</span><span class=identifier>str_p</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>)</span></code></pre>
<p> What is not obvious is that we can use <tt>boost::ref</tt> as well:</p>
<pre>
<code><span class=keyword>char </span><span class=identifier>ch </span><span class=special>= </span><span class=literal>'A'</span><span class=special>;
</span><span class=keyword>char </span><span class=identifier>from </span><span class=special>= </span><span class=literal>'A'</span><span class=special>;
</span><span class=keyword>char </span><span class=identifier>to </span><span class=special>= </span><span class=literal>'Z'</span><span class=special>;
</span><span class=identifier>ch_p</span><span class=special>(</span><span class=identifier>boost</span><span class=special>::</span><span class=identifier>ref</span><span class=special>(</span><span class=identifier>ch</span><span class=special>))
</span><span class=identifier>range_p</span><span class=special>(</span><span class=identifier>boost</span><span class=special>::</span><span class=identifier>ref</span><span class=special>(</span><span class=identifier>from</span><span class=special>), </span><span class=identifier>boost</span><span class=special>::</span><span class=identifier>ref</span><span class=special>(</span><span class=identifier>to</span><span class=special>))</span></code></pre>
<p> When <tt>boost::ref</tt> is used, the actual parameters to <tt>ch_p</tt> and
<tt>range_p</tt> are held by reference. This means that we can change the values
of <tt>ch</tt>, <tt>from</tt> and <tt>to</tt> anytime and the corresponding
<tt>ch_p</tt> and <tt>range_p</tt> parser will follow their dynamic values.
Of course, since they are held by reference, you must make sure that the referenced
object is not destructed while parsing.</p>
<p> What about <tt>str_p</tt>?</p>
<p> While the first form of <tt>str_p</tt> (the single argument form) is reserved
for null terminated string constants, the second form (the two argument first/last
iterator form) may be used:</p>
<pre>
<code><span class=keyword>char </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>first </span><span class=special>= </span><span class=string>&quot;My oh my&quot;</span><span class=special>;
</span><span class=keyword>char </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>last </span><span class=special>= </span><span class=identifier>first </span><span class=special>+ </span><span class=identifier>std</span><span class=special>::</span><span class=identifier>strlen</span><span class=special>(</span><span class=identifier>first</span><span class=special>);
</span><span class=identifier>str_p</span><span class=special>(</span><span class=identifier>boost</span><span class=special>::</span><span class=identifier>ref</span><span class=special>(</span><span class=identifier>first</span><span class=special>), </span><span class=identifier>boost</span><span class=special>::</span><span class=identifier>ref</span><span class=special>(</span><span class=identifier>last</span><span class=special>))</span></code></pre>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"> <img src="theme/note.gif" width="16" height="16"> Hey,
don't forget <tt>chseq_p</tt>. All these apply to this seldom used primitive
as well. </td>
</tr>
</table>
<h2>Functional Parametric Primitives</h2>
<pre> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>attribute<span class="special">/</span><span class="identifier">parametric</span><span class="special">.</span>hpp<span class="special">&gt;</span></pre>
<p> Taking this further, Spirit includes functional versions of the primitives.
Rather than taking in characters, strings or references to characters and strings
(using boost::ref), the functional versions take in functions or functors.</p>
<h3>f_chlit and f_ch_p</h3>
<p> The functional version of <tt>chlit</tt>. This parser takes in a function
or functor (function object). The function is expected to have an interface
compatible with:</p>
<pre>
<code><span class=identifier>CharT </span><span class=identifier>func</span><span class=special>()</span></code></pre>
<p> where CharT is the character type (e.g. <tt>char</tt>, <tt>int</tt>, <tt>wchar_t</tt>).</p>
<p> The functor is expected to have an interface compatible with:</p>
<pre>
<code><span class=keyword>struct </span><span class=identifier>functor
</span><span class=special>{
</span><span class=identifier>CharT </span><span class=keyword>operator</span><span class=special>()() </span><span class=keyword>const</span><span class=special>;
</span><span class=special>};</span></code></pre>
<p> where CharT is the character type (e.g. <tt>char</tt>, <tt>int</tt>, <tt>wchar_t</tt>).</p>
<p> Here's a contrived example:</p>
<pre>
<code><span class=keyword>struct </span><span class=identifier>X
</span><span class=special>{
</span><span class=keyword>char </span><span class=keyword>operator</span><span class=special>()() </span><span class=keyword>const
</span><span class=special>{
</span><span class=keyword>return </span><span class=literal>'X'</span><span class=special>; </span><span class=special>
}
</span><span class=special>};</span></code></pre>
<p> Now we can use X to create our f_chlit parser:</p>
<pre>
<code><span class=identifier>f_ch_p</span><span class=special>(</span><span class=identifier>X</span><span class=special>())</span></code></pre>
<h3>f_range and f_range_p</h3>
<p> The functional version of <tt>range</tt>. This parser takes in a function
or functor compatible with the interfaces above. The difference is that <tt>f_range</tt>
(and <tt>f_range_p</tt>) expects two functors. One for the start and one for
the end of the range.</p>
<h3>f_chseq and f_chseq_p</h3>
<p> The functional version of <tt>chseq</tt>. This parser takes in two functions
or functors. One for the begin iterator and one for the end iterator. The function
is expected to have an interface compatible with:</p>
<pre>
<code><span class=identifier>IteratorT </span><span class=identifier>func</span><span class=special>()</span></code></pre>
<p> where <tt>IteratorT</tt> is the iterator type (e.g. <tt>char const*</tt>,
<tt>wchar_t const*</tt>).</p>
<p> The functor is expected to have an interface compatible with:</p>
<pre>
<code><span class=keyword>struct </span><span class=identifier>functor
</span><span class=special>{
</span><span class=identifier>IteratorT </span><span class=keyword>operator</span><span class=special>()() </span><span class=keyword>const</span><span class=special>;
</span><span class=special>};</span></code></pre>
<p> where <tt>IteratorT</tt> is the iterator type (e.g. <tt>char const*</tt>,
<tt>wchar_t const*</tt>).</p>
<h3>f_strlit and f_str_p</h3>
<p> The functional version of <tt>strlit</tt>. This parser takes in two functions
or functors compatible with the interfaces that <tt>f_chseq</tt> expects.</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="predefined_actors.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="functional.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p class="copyright">&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,184 @@
<html>
<head>
<title>Phoenix</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Phoenix</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="functional.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="closures.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>The preceding chapter introduced Phoenix as a means to implementing your semantic actions. We shall look a little bit more into this important library with focus on how you can use it handily with Spirit. This chapter is by no means a thorough discourse of the library. For more information on Phoenix, please take some time to read the <a href="../phoenix/index.html">Phoenix User's Guide</a>. If you just want to use it quickly, this chapter will probably suffice. Rather than taking you to the theories and details of the library, we shall try to provide you with annotated exemplars instead. Hopefully, this will get you into high gear quickly. </p>
<p>Semantic actions in Spirit can be just about any function or function object (functor) as long as it can satisfy the required signature. For example, <tt>uint_p</tt> requires a signature of <tt>void F(T)</tt>, where <tt>T</tt> is the type of the integer (typically <tt>unsigned int</tt>). Plain vanilla actions are of the <tt>void F(IterT, IterT)</tt> variety. You can code your actions in plain C++. Calls to C++ functions or functors will thus be of the form <tt>P[&amp;F]</tt> or <tt>P[F()]</tt> etc. (see <a href="semantic_actions.html">Semantic Actions</a>). Phoenix on the other hand, attempts to mimic C++ such that you can define the function body inlined in the code. </p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"> <img src="theme/lens.gif" width="15" height="16"> <strong>C++ in C++? </strong><br>
<br>
In as much as Spirit attempts to mimic EBNF in C++, Phoenix attempts to mimic C++ in C++!!!</td>
</tr>
</table>
<h2>var</h2>
<p>Remember the <tt>boost::ref</tt>? We discussed that in the <a href="parametric_parsers.html">Parametric Parsers chapter</a>. Phoenix has a similar, but more flexible, counterpart. It's called <tt>var</tt>. The usage is similar to <tt>boost::ref</tt> and you can use it as a direct replacement. However, unlike <tt>boost::ref</tt>, you can use it to form more complex expressions. Here are some examples:</p>
<pre> <span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) += </span><span class=number>3
</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) = </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>y</span><span class=special>) + </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>z</span><span class=special>)
</span><span class=number> </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) = </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>y</span><span class=special>) + </span><span class=identifier><span class=special>(</span>3 * var</span><span class=special>(</span><span class=identifier>z</span><span class=special>))
</span><span class=number> </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) = </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>y</span><span class=special>)[</span>var<span class="special">(</span>i<span class="special">)] </span><span class="comment">// assuming y is indexable and i is an index</span></pre>
<p>Let's start with a simple example. We'll want to parse a comma separated list of numbers and report the sum of all the numbers. Using phoenix's var, we do not have to write external semantic actions. We simply inline the code inside the semantic action slots. Here's the complete grammar with our phoenix actions (see <a href="../example/fundamental/sum.cpp">sum.cpp</a> in the examples):</p>
<pre><span class=number> </span><span class=identifier>real_p</span><span class=special>[</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>n</span><span class=special>) </span><span class=special>= </span><span class=identifier>arg1</span><span class=special>] </span><span class=special>&gt;&gt; </span><span class=special>*(</span><span class=literal>',' </span><span class=special>&gt;&gt; </span><span class=identifier>real_p</span><span class=special>[</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>n</span><span class=special>) </span><span class=special>+= </span><span class=identifier>arg1</span><span class=special>])</span> </pre>
<p> <img height="16" width="15" src="theme/lens.gif"> The full source code can be <a href="../example/fundamental/sum.cpp">viewed here</a>.
This is part of the Spirit distribution.</p>
<h3>argN</h3>
<p>Notice the expression: <span class=identifier><tt>var(n) = arg1 </tt></span>. What is <tt>arg1</tt> and what is it doing there? <tt>arg1</tt> is an argument placeholder. Remember that <tt>real_p</tt> (see <a href="numerics.html">Numerics</a>) reports the parsed number to its attached semantic action. <tt>arg1</tt> is a placeholder for the first argument passed to the semantic action by the parser. If there are more than one arguments passed in, these arguments can be referred to using <tt>arg1</tt>..<tt>argN</tt>. For instance, generic semantic actions (transduction interface; see <a href="semantic_actions.html">Semantic Actions</a>) are passed 2 arguments: the iterators (<tt>first</tt>/<tt>last</tt>) to the matching portion of the input stream. You can refer to <tt>first</tt> and <tt>last</tt> through <tt>arg1</tt> and <tt>arg2</tt>, respectively. </p>
<p>Like var, argN is also composable. Here are some examples:</p>
<pre> <span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) += </span><span class=number>arg1
</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) = </span><span class=identifier>arg1</span><span class=special> + </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>z</span><span class=special>)
</span><span class=number> </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) = </span><span class=identifier>arg1</span><span class=special> + </span><span class=identifier><span class=special>(</span>3 * arg2</span><span class=special>)
</span><span class=number> </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) = </span><span class=identifier>arg1</span><span class=special>[</span>arg2<span class="special">] </span><span class="comment">// assuming arg1 is indexable and arg2 is an index</span></pre>
<h3>val</h3>
<p>Note the expression: <tt>3 * arg2.</tt> This expression is actually a short-hand equivalent to: <tt>val(3) * arg2</tt>. We shall see later why, in some cases, we need to explicitly wrap constants and literals inside the val. Again, like var and argN, val is also composable.</p>
<h3>Functions </h3>
<p>Remember our very first example? In the <a href="quick_start.html">Quick Start</a> chapter, we presented a parser that parses a comma separated list and stuffs the parsed numbers in a vector (see <a href="../example/fundamental/number_list.cpp"> number_list.cpp</a>) . For simplicity, we used Spirit's pre-defined actors (see <a href="predefined_actors.html">Predefined Actors</a>). In the example, we used <tt>push_back_a</tt>:</p>
<pre><code><font color="#000000"> </font><font color="#000000"><span class="identifier">real_p</span><span class=
"special">[</span><span class="identifier">push_back_a</span><span class=
"special">(</span><span class="identifier">v</span><span class=
"special">)]</span> <span class="special">&gt;&gt;</span> <span class=
"special">*(</span><span class="literal">','</span> <span class=
"special">&gt;&gt;</span> <span class=
"identifier">real_p</span><span class="special">[</span><span class=
"identifier">push_back_a</span><span class="special">(</span><span class=
"identifier">v</span><span class="special">)])</span></font></code></pre>
<p>Phoenix allows you to write more powerful polymorphic functions, similar to <tt>push_back_a</tt>, easily. See <a href="../example/fundamental/stuff_vector.cpp">stuff_vector.cpp</a>. The example is similar to <a href="../example/fundamental/number_list.cpp">number_list.cpp</a> in functionality, but this time, using phoenix a function to actually implement the <tt>push_back</tt> function:</p>
<pre><span class=identifier> </span><span class=keyword>struct </span><span class=identifier>push_back_impl
</span><span class=special>{
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>Container</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>Item</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>result
</span><span class=special>{
</span><span class=keyword>typedef </span><span class=keyword>void </span><span class=identifier>type</span><span class=special>;
</span><span class=special>};
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>Container</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>Item</span><span class=special>&gt;
</span><span class=keyword>void </span><span class=keyword>operator</span><span class=special>()(</span><span class=identifier>Container</span><span class=special>&amp; </span><span class=identifier>c</span><span class=special>, </span><span class=identifier>Item </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>item</span><span class=special>) </span><span class=keyword>const
</span><span class=special>{
</span><span class=identifier>c</span><span class=special>.</span><span class=identifier>push_back</span><span class=special>(</span><span class=identifier>item</span><span class=special>);
</span><span class=special>}
</span><span class=special>};</span>
<span class=identifier>function</span><span class=special>&lt;</span><span class=identifier>push_back_impl</span><span class=special>&gt; </span><span class=keyword>const </span><span class=identifier>push_back </span><span class=special>= </span><span class=identifier>push_back_impl</span><span class=special>();</span></pre>
<p><img height="16" width="15" src="theme/lens.gif"> The full source code can be <a href="../example/fundamental/stuff_vector.cpp">viewed here</a>. This is part of the Spirit distribution.</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"> <img src="theme/lens.gif" width="15" height="16"> <strong>Predefined Phoenix Functions</strong><br>
<br>
A future version of Phoenix will include an extensive set of predefined functions covering the whole of STL containers, iterators and algorithms. push_back, will be part of this suite. </td>
</tr>
</table>
<p><span class=identifier><tt>push_back_impl</tt></span> is a simple wrapper over the <tt>push_back</tt> member function of STL containers. The extra scaffolding is there to provide phoenix with additional information that otherwise cannot be directly deduced. <tt>result</tt> relays to phoenix the return type of the functor (<tt>operator()</tt>) given its argument types (<tt>Container</tt> and <tt>Item</tt>) . In this case, the return type is always, simply <tt>void</tt>. </p>
<p><span class=identifier><tt>push_back</tt></span> is a phoenix function object. This is the actual function object that we shall use. The beauty behind phoenix function objects is that the actual use is strikingly similar to a normal C++ function call. Here's the number list parser rewritten using our phoenix function object:</p>
<pre><span class=special> </span><span class=identifier>real_p</span><span class=special>[</span><span class=identifier>push_back</span><span class=special>(</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>v</span><span class=special>), </span><span class=identifier>arg1</span><span class=special>)] </span><span class=special>&gt;&gt; </span><span class=special>*(</span><span class=literal>',' </span><span class=special>&gt;&gt; </span><span class=identifier>real_p</span><span class=special>[</span><span class=identifier>push_back</span><span class=special>(</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>v</span><span class=special>), </span><span class=identifier>arg1</span><span class=special>)])</span></pre>
<p>And, unlike predefined actors, they can be composed. See the pattern? Here are some examples:</p>
<pre> <span class=identifier>push_back</span><span class=special>(</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>v</span><span class=special>), </span><span class=identifier>arg1 + 2</span><span class=special>)</span>
<span class=identifier>push_back</span><span class=special>(</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>v</span><span class=special>), </span><span class=identifier>var<span class=special>(</span>x<span class=special>)</span></span><span class=special> + </span><span class="identifier">arg1</span><span class=special>)</span>
<span class=identifier> push_back</span><span class=special>(</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>v</span><span class=special>)[</span>arg1<span class=special>], </span><span class=identifier>arg2</span><span class=special>)<span class="comment"> // assuming v is a vector of vectors and arg1 is an index</span></span></pre>
<p>push_back does not have a return type. Say, for example, we wrote another phoenix function <tt>sin</tt>, we can use it in expressions as well: </p>
<pre> <span class=identifier>push_back</span><span class=special>(</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>v</span><span class=special>), </span><span class="identifier">sin</span><span class=special>(</span><span class=identifier>arg1<span class=special>)</span> </span><span class="special">*</span><span class=identifier> 2</span><span class=special>)</span>
</pre>
<h3>Construct</h3>
<p>Sometimes, we wish to construct an object. For instance, we might want to create a <tt>std::string</tt> given the first/last iterators. For instance, say we want to parse a list of identifiers instead. Our grammar, without the actions, is: </p>
<pre><span class=number> </span><span class=special>(+</span><span class=identifier>alpha_p</span><span class=special>) </span><span class=special>&gt;&gt; </span><span class=special>*(</span><span class=literal>',' </span><span class=special>&gt;&gt; </span><span class=special>(+</span><span class=identifier>alpha_p</span><span class=special>)</span><span class=special>)</span></pre>
<p><strong><tt>construct_</tt></strong> is a predefined phoenix function that, you guessed it, constructs an object, from the arguments passed in. The usage is:</p>
<pre><span class=number> </span><span class=identifier>construct_</span><span class=special>&lt;</span><span class=identifier>T</span><span class=special>&gt;(</span><span class=identifier>arg1</span><span class=special>, </span><span class=identifier>arg2</span><span class=special>,... </span><span class=identifier>argN</span><span class=special>)</span></pre>
<p>where T is the desired type and arg1..argN are the constructor arguments. For example, we can construct a <tt>std::string</tt> from the first/last iterator pair this way:</p>
<pre><span class=identifier> construct_</span><span class=special>&lt;</span><span class=identifier>std</span><span class=special>::</span><span class=identifier>string</span><span class=special>&gt;(</span><span class=identifier>arg1</span><span class=special>, </span><span class=identifier>arg2</span><span class=special>)</span></pre>
<p>Now, we attach the actions to our grammar:</p>
<pre><span class=number> </span><span class=special>(+</span><span class=identifier>alpha_p</span><span class=special>)
</span><span class=special>[
</span><span class=identifier>push_back</span><span class=special>(</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>v</span><span class=special>), </span><span class=identifier>construct_</span><span class=special>&lt;</span><span class=identifier>std</span><span class=special>::</span><span class=identifier>string</span><span class=special>&gt;(</span><span class=identifier>arg1</span><span class=special>, </span><span class=identifier>arg2</span><span class=special>))
</span><span class=special>]
</span><span class=special>&gt;&gt;
</span><span class=special>*(</span><span class=literal>',' </span><span class=special>&gt;&gt;
</span><span class=special>(+</span><span class=identifier>alpha_p</span><span class=special>)
</span><span class=special>[
</span><span class=identifier>push_back</span><span class=special>(</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>v</span><span class=special>), </span><span class=identifier>construct_</span><span class=special>&lt;</span><span class=identifier>std</span><span class=special>::</span><span class=identifier>string</span><span class=special>&gt;(</span><span class=identifier>arg1</span><span class=special>, </span><span class=identifier>arg2</span><span class=special>))
</span><span class=special>]
</span><span class=special>)</span></pre>
<p><img height="16" width="15" src="theme/lens.gif"> The full source code can be <a href="../example/fundamental/stuff_vector2.cpp">viewed here</a>. This is part of the Spirit distribution.<span class=special></span></p>
<h3><a name="lambda"></a>Lambda expressions</h3>
<p>All these phoenix expressions we see above are lambda expressions. The important thing to note is that these expressions are not evaluated immediately. At grammar construction time, when the actions are attached to the productions, a lambda expression actually generates an unnamed function object that is evaluated later, at parse time. In other words, lambda expressions are <strong>lazily evaluated</strong>.</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"> <b><img src="theme/lens.gif" width="15" height="16"> Lambda Expressions?</b><br>
<br>
Lambda expressions are actually unnamed partially applied functions where placeholders (e.g. arg1, arg2) are provided in place of some of the arguments. The reason this is called a lambda expression is that traditionally, such placeholders are written using the Greek letter lambda <img src="theme/lambda.png" width="15" height="22">.</td>
</tr>
</table>
<p>Phoenix uses tricks not unlike those used by Spirit to mimic C++ such that you can define the function body inlined in the code. It's weird, but as mentioned, Phoenix actually mimicks C++ in C++ using expression templates. Surely, there are limitations...</p>
<p>All components in a Phoenix expression must be an <strong>actor</strong> (in phoenix parlance) in the same way that components in Spirit should be a <tt>parser</tt>. In Spirit, you can write:</p>
<pre><span class=number> </span><span class=identifier>r </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'x'</span><span class=special>) </span><span class=special>&gt;&gt; </span><span class=literal>'y'</span><span class=special>;</span></pre>
<p>But not:</p>
<pre><span class=special> </span><span class=identifier>r </span><span class=special>= </span><span class=literal>'x' </span><span class=special>&gt;&gt; </span><span class=literal>'y'</span><span class=special>;</span></pre>
<p>In essence, <tt>parser &gt;&gt; char</tt> is a parser, but <tt>char &gt;&gt; char</tt> is a char (the char shift-right by another char).</p>
<p>The same restrictions apply to Phoenix. For instance:</p>
<pre><span class=special> </span><span class=keyword>int </span><span class=identifier>x </span><span class=special>= </span><span class=number>1</span><span class=special>;
</span><span class=identifier>cout </span><span class=special>&lt;&lt; </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) </span><span class=special>&lt;&lt; </span><span class=string>&quot;pizza&quot;</span></pre>
<p>is a well formed Phoenix expression that's lazily evaluated. But:</p>
<pre><span class=string> </span><span class=identifier>cout </span><span class=special>&lt;&lt; </span><span class=identifier>x </span><span class=special>&lt;&lt; </span><span class=string>&quot;pizza&quot;</span></pre>
<p>is not. Such expressions are immediately executed. C++ syntax dictates that at least <strong>one</strong> of the operands must be a Phoenix actor type. This also applies to compound expressions. For example:</p>
<pre><span class=string> </span><span class=identifier>cout </span><span class=special>&lt;&lt; </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) </span><span class=special>&lt;&lt; </span><span class=string>&quot;pizza&quot; </span><span class=special>&lt;&lt; </span><span class=string>&quot;man&quot;</span></pre>
<p>This is evaluated as:</p>
<pre><span class=string> </span><span class=special>(((</span><span class=identifier>cout </span><span class=special>&lt;&lt; </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>)) </span><span class=special>&lt;&lt; </span><span class=string>&quot;pizza&quot;</span><span class=special>) </span><span class=special>&lt;&lt; </span><span class=string>&quot;man&quot;</span><span class=special>)</span></pre>
<p>Since <tt>(cout &lt;&lt; var(x))</tt> is an actor, at least <strong>one</strong> of the operands is a phoenix actor, <tt>((cout &lt;&lt; var(x)) &lt;&lt; &quot;pizza&quot;)</tt> is also a Phoenix actor, and the whole expression is thus also an actor.</p>
<p>Sometimes, it is safe to write:</p>
<pre><span class=special> </span><span class=identifier>cout </span><span class=special>&lt;&lt; </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) </span><span class=special>&lt;&lt; </span><span class=identifier>val</span><span class=special>(</span><span class=string>&quot;pizza&quot;</span><span class=special>) </span><span class=special>&lt;&lt; </span><span class=identifier>val</span><span class=special>(</span><span class=string>&quot;man&quot;</span><span class=special>)</span></pre>
<p>just to make it explicitly clear what we are dealing with, especially with complex expressions, in the same way as we explicitly wrap literal strings in <tt>str_p(&quot;lit&quot;)</tt> in Spirit. </p>
<p>Phoenix (and Spirit) also deals with unary operators. In such cases, we have no choice. The operand must be a Phoenix actor (or Spirit parser). Examples:</p>
<p>Spirit:</p>
<pre><span class=special> </span><span class=special>*</span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'z'</span><span class=special>) </span><span class=comment>// good
</span><span class=special>*(</span><span class=literal>'z'</span><span class=special>) </span><span class=comment>// bad</span></pre>
<p> Phoenix:</p>
<pre><span class=comment> </span><span class=special>*</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) </span><span class=comment>// good (lazy)
</span><span class=special>*</span><span class=identifier>x </span><span class=comment>// bad (immediate)</span></pre>
<p>Also, in Phoenix, for assignments and indexing to be lazily evaluated, the object acted upon should be a Phoenix actor. Examples:</p>
<pre><span class=comment> </span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>) </span><span class=special>= </span><span class=number>123 </span><span class=comment>// good (lazy)
</span><span class=identifier>x </span><span class=special>= </span><span class=number>123 </span><span class=comment>// bad (immediate)
</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>)[</span><span class=number>0</span><span class=special>] </span><span class=comment>// good (lazy)
</span><span class=identifier>x</span><span class=special>[</span><span class=number>0</span><span class=special>] </span><span class=comment>// bad, immediate
</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>)[</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>i</span><span class=special>)] </span><span class=comment>// good (lazy)
</span><span class=identifier>x</span><span class=special>[</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>i</span><span class=special>)] </span><span class=comment>// bad and illegal (x is not an actor)
</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>x</span><span class=special>[</span><span class=identifier>var</span><span class=special>(</span><span class=identifier>i</span><span class=special>)]) </span><span class=comment>// bad and illegal (x is not an actor)</span></pre>
<h3>Wrapping up </h3>
<p>Well, there you have it. I hope with this jump-start chapter, you may be able to harness the power of lambda expressions. By all means, please read the <a href="../phoenix/index.html">phoenix manual</a> to learn more about the nitty gritty details. Surely, you'll get to know a lot more than just by reading this chapter. There are a lot of things still to be touched. There won't be enough space here to cover all the features of Phoenix even in brief. </p>
<p>The next chapter, <a href="closures.html">Closures</a>, we'll see more of phoenix. Stay tuned. </p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="functional.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="closures.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p class="copyright">&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,74 @@
<html>
<head>
<title>Portability</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Portability</b></font></td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="includes.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="style_guide.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>Historically, Spirit supported a lot of compilers, including (to some extent)
poorly conforming compilers such as VC6. Spirit v1.6.x will be the last release
that will support older poorly conforming compilers. Starting from Spirit v1.8.0,
ill conforming compilers will not be supported. If you are still using one of
these older compilers, you can still use Spirit v1.6.x.</p>
<p>The reason why Spirit v1.6.x worked on old non-conforming compilers is that
the authors laboriously took the trouble of searching for workarounds to make
these compilers happy. The process takes a lot of time and energy, especially
when one encounters the dreaded ICE or &quot;Internal Compiler Error&quot;.
Sometimes searching for a single workaround takes days or even weeks. Sometimes,
there are no known workarounds. This stifles progress a lot. And, as the library
gets more progressive and takes on more advanced C++ techniques, the difficulty
is escalated to even new heights.</p>
<p>Spirit v1.6.x will still be supported. Maintenance will still happen and bug
fixes will still be applied. There will still be active development for the
back-porting of new features introduced in Spirit v1.8.0 (and Spirit 1.9.0)
to lesser able compilers; hopefully, fueled by contributions from the community.
We welcome active support from the C++ community, especially those with special
expertise on compilers such as older Borland and MSVC++ compilers.</p>
<p>Spirit 1.8 has been tested to compile and run properly on these compilers:</p>
<ol>
<li>g++ 3.1 and above</li>
<li>Comeau 4.24.5 </li>
<li>MSVC 7.1</li>
<li>Intel 7.1</li>
</ol>
<p>If your compiler is sufficiently conforming, chances are, you can compile Spirit
as it is or with minimal portability fixes here and there. Please inform us
if your compiler is known to be ISO/ANSI conforming and is not in this list
above. Feel free to post feedback to <a href="https://lists.sourceforge.net/lists/listinfo/spirit-general">Spirit-general
mailing list</a> [Spirit-general@lists.sourceforge.net].</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="includes.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="style_guide.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p class="copyright">&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,119 @@
<html>
<head>
<title>Position Iterator</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Position
Iterator</b></font> </td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="file_iterator.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="debugging.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>Often, when writing a parser that is able to detect errors in the format of
the input stream, we want it to communicate to the user where the error happened
within that input. The classic example is when writing a compiler or interpreter
that detects syntactical errors in the parsed program, indicating the line number
and maybe even the position within the line where the error was found.</p>
<p> The class position_iterator is a tool provided within Spirit that allows parser
writers to easily implement this functionality. The concept is quite simple:
this class is an iterator wrapper that keeps track of the current position within
the file, including current file, line and column. It requires a single template
parameter, which should be the type of the iterator that is to be wrapped.</p>
<p> To use it, you'll need to add the following include:</p>
<pre>
<code><span class=preprocessor>#include </span><span class=special>&lt;</span><span class=identifier>boost</span><span class=special>/</span><span class=identifier>spirit</span><span class=special>/</span><span class=identifier>iterator</span><span class=special>/</span><span class=identifier>position_iterator</span><span class=special>.</span><span class=identifier>hpp</span><span class=special>&gt;</span></code></pre>
<p> Or include all the iterators in Spirit:</p>
<pre>
<code><span class=preprocessor>#include </span><span class=special>&lt;</span><span class=identifier>boost</span><span class=special>/</span><span class=identifier>spirit</span><span class=special>/</span><span class=identifier>iterator</span><span class=special>.</span><span class=identifier>hpp</span><span class=special>&gt;</span></code></pre>
<p> To construct the wrapper, it needs both the begin and end iterators of the
input sequence, and the file name of the input sequence. Optionally, you can
also specify the starting line and column numbers, which default to 1. Default
construction, with no parameters, creates a generic end-of-sequence iterator,
in a similar manner as it's done in the stream operators of the standard C++
library.</p>
<p> The wrapped iterator must belong to the input or forward iterator category,
and the position_iterator just inherits that category.</p>
<p> For example, to create begin and end positional iterators from an input C-
string, you'd use:</p>
<pre>
<code><span class=keyword>char </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>inputstring </span><span class=special>= </span><span class=string>&quot;...&quot;</span><span class=special>;
</span><span class=keyword>typedef </span><span class=identifier>position_iterator</span><span class=special>&lt;</span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*&gt; </span><span class=identifier>iterator_t</span><span class=special>;
</span><span class=identifier>iterator_t </span><span class=identifier>begin</span><span class=special>(</span><span class=identifier>inputstring</span><span class=special>, </span><span class=identifier>inputstring</span><span class=special>+</span><span class=identifier>strlen</span><span class=special>(</span><span class=identifier>inputstring</span><span class=special>));
</span><span class=identifier>iterator_t </span><span class=identifier>end</span><span class=special>;</span></code></pre>
<a name="operations"></a>
<h2>Operations</h2>
<pre>
<code><span class=keyword>void </span><span class=identifier>set_position</span><span class=special>(</span><span class=identifier>file_position </span><span class=keyword>const</span><span class=special>&amp;);</span></code></pre>
<p> Call this function when you need to change the current position stored in
the iterator. For example, if parsing C-style #include directives, the included
file's input must be marked by restarting the file and column to 1 and 1 and
the name to the new file's name.<br>
</p>
<pre>
<code><span class=identifier>file_position </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>get_position</span><span class=special>() </span><span class=keyword>const</span><span class=special>;</span></code></pre>
<p> Call this function to retrieve the current position.</p>
<pre>
<code><span class=keyword>void </span><span class=identifier>set_tabchars</span><span class=special>(</span><span class=keyword>int</span><span class=special>);</span></code></pre>
<p> Call this to set the number of tabs per character. This value is necessary
to correctly track the column number.<br>
</p>
<p> <a name="file_position"></a> </p>
<h2>file_position</h2>
<p> file_position is a structure that holds the position within a file. Its fields
are:</p>
<table width="90%" border="0" align="center">
<tr>
<td class="table_title" colspan="2">file_position fields</td>
</tr>
<tr>
<td class="table_cells" width="26%"><code><span class=identifier>std</span><span class=special>::</span><span class=identifier>string
</span><span class=identifier>file</span><span class=special>;</span></code></td>
<td class="table_cells" width="74%">Name of the file. Hopefully a full pathname</td>
</tr>
<tr>
<td class="table_cells" width="26%"><code><span class=keyword>int</span><span class=identifier>
line</span><span class=special>;</span></code></td>
<td class="table_cells" width="74%">Line number within the file. By default,
the first line is number 1</td>
</tr>
<tr>
<td class="table_cells" width="26%"><code><span class=keyword>int </span><span class=identifier>column</span><span class=special>;</span></code></td>
<td class="table_cells" width="74%">Column position within the file. The first
column is 1</td>
</tr>
</table>
<p><img src="theme/lens.gif" width="15" height="16"> See <a href="../example/fundamental/position_iterator/position_iterator.cpp">position_iterator.cpp</a> for a compilable example. This is part of the Spirit distribution.</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="file_iterator.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="debugging.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<hr size="1">
<p class="copyright">Copyright &copy; 2002 Juan Carlos Arevalo-Baeza<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
<p class="copyright">&nbsp; </p>
<p>&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,346 @@
<html>
<head>
<!-- Generated by the Spirit (http://spirit.sf.net) QuickDoc -->
<title>predefined_actors</title>
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" height="48" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Predefined
Actors</b></font></td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="indepth_the_parser_context.html"><img src="theme/l_arr.gif" width="20" height="19" border="0"></a></td>
<td width="20"><a href="parametric_parsers.html"><img src="theme/r_arr.gif" width="20" height="19" border="0"></a></td>
</tr>
</table>
<h2>Actors</h2><p>
The framework has a number of predefined semantic action functors.
Experience shows that these functors are so often used that they were included
as part of the core framework to spare the user from having to reinvent the
same functionality over and over again.</p>
<h2>Quick example: <tt>assign_a</tt> actor</h2>
<code>
<pre> <span class=keyword>int </span><span class=identifier>i</span><span class=special>,</span><span class=identifier> j</span><span class=special>;
</span><span class=identifier>std</span><span class=special>::</span><span class=identifier>string </span><span class=identifier>s</span><span class=special>;
</span><span class=identifier>r </span><span class=special>= </span><span class=identifier>int_p</span><span class=special>[</span><span class=identifier>assign_a</span><span class=special>(</span><span class=identifier>i</span><span class=special>)] &gt;&gt; (+</span><span class=identifier>alpha_p</span><span class=special>)[</span><span class=identifier>assign_a</span><span class=special>(</span><span class=identifier>s</span><span class=special>)] &gt;&gt; </span><span class=identifier>int_p</span><span class=special>[</span><span class=identifier>assign_a</span><span class=special>(</span><span class=identifier>j</span><span class=special>,</span><span class=identifier>i</span><span class=special>)];</span></pre>
</code>
<p>
Given an input <tt>123456 Hello 789</tt>, </p>
<ol><li><tt>assign_a(i)</tt> will extract the number <tt>123456</tt> and assign it to <tt>i</tt>, </li><li><tt>assign_a(s)</tt> will extract the string <tt>&quot;Hello&quot;</tt> and assign it to <tt>s</tt>,</li><li><tt>assign_a(j,i)</tt> will assign i to j, j=i, without using the parse result.</li></ol>
<p> Technically, the expression <tt>assign_a(v)</tt> is a template function that
generates a semantic action. In fact, actor instances are not created directly
since they usually involve a number of template parameters. Instead generator
functions (&quot;helper functions&quot;) are provided to generate actors from
their arguments. All helper functions have the &quot;_a&quot; suffix. For example,
<tt>append_actor</tt> is created using the <tt>append_a</tt> function. </p>
<p>
The semantic action generated is polymorphic and should work with any
type as long as it is compatible with the arguments received from the parser.
It might not be obvious, but a string can accept the iterator first and last
arguments that are passed into a generic semantic action (see above). In fact,
any STL container that has an <tt>assign(first, last)</tt> member function can be
used.</p>
<h2>Actors summary</h2><p>
Below are tables summarizing the &quot;built-in&quot; actors with the
conventions given below.</p>
<ul>
<li><tt>ref</tt> is a <b>reference</b> to an object stored in a policy holder
actor</li>
<li><tt>value_ref</tt> and <tt>key_ref</tt> are <b>const reference</b>s stored
in a policy holder actor</li>
<li><tt>value</tt> is the <b>parse result</b>. This could be the result for
the single argument () operator or the two argument () operator</li>
<li><tt>vt</tt> stands for the <tt>value_type</tt> type: <tt>type&amp; ref;
// vt is type::value_type</tt>.</li>
</ul>
<p> Note that examples are provided after the tables.</p>
<table width="90%" border="0" align="center">
<tr>
<td class="table_title" colspan="8"> Unary operator actors</td>
</tr>
<td width="30%" class="table_cells">++ref</td> <td width="70%" class="table_cells"><b>increment_a</b>(ref)</td>
</tr>
<td class="table_cells">--ref</td> <td class="table_cells"><b>decrement_a</b>(ref)</td>
</tr>
</table>
<table width="90%" border="0" align="center">
<tr>
<td class="table_title" colspan="26"> Assign actors</td>
</tr>
<tr>
<td class="table_cells">ref = value</td>
<td class="table_cells"><b>assign_a</b>(ref)</td>
</tr>
<td width="30%" class="table_cells">ref = value_ref</td>
<td width="70%" class="table_cells"><b>assign_a</b>(ref, value_ref)</td>
</tr>
</table>
<table width="90%" border="0" align="center">
<tr>
<td class="table_title" colspan="30"> Container actors </td>
</tr>
<tr>
<td width="30%" class="table_cells">ref.push_back(value)</td>
<td width="70%" class="table_cells"><b>push_back_a</b>(ref)</td>
</tr>
<td class="table_cells">ref.push_back(value_ref)</td>
<td class="table_cells"><b>push_back_a</b>(ref, value_ref)</td>
</tr>
<td class="table_cells">ref.push_front(value)</td>
<td class="table_cells"><b>push_front_a</b>(ref)</td>
</tr>
<td class="table_cells">ref.push_front(value_ref)</td>
<td class="table_cells"><b>push_front_a</b>(ref, value_ref)</td>
</tr>
<td class="table_cells">ref.clear()</td>
<td class="table_cells"><b>clear_a</b>(ref)</td>
</tr>
</table>
<table width="90%" border="0" align="center">
<tr>
<td class="table_title" colspan="14"> Associative container actors </td>
</tr>
<td width="30%" class="table_cells">ref.insert(vt(value, value_ref))</td> <td width="70%" class="table_cells"><b>insert_key_a</b>(ref, value_ref)</td>
</tr> <tr>
<td class="table_cells"> ref.insert(vt(key_ref,value_ref)) </td>
<td class="table_cells"> <strong>insert_at_a</strong>(ref, key_ref_, value_ref)</td>
</tr>
<tr>
<td class="table_cells"> ref.insert(vt(key_ref,value)) </td>
<td class="table_cells"> <strong>insert_at_a</strong>(ref, key_ref) </td>
</tr>
<td class="table_cells">ref[value] = value_ref</td>
<td class="table_cells"><b>assign_key_a</b>(ref, value_ref)</td>
</tr>
<td class="table_cells">ref.erase(ref,value)</td>
<td class="table_cells"><b>erase_a</b>(ref)</td>
</tr>
<td class="table_cells">ref.erase(ref,key_ref)</td>
<td class="table_cells"><b>erase_a</b>(ref, key_ref)</td>
</tr>
</table>
<table width="90%" border="0" align="center">
<tr>
<td class="table_title" colspan="8"> Miscellaneous actors </td>
</tr>
<tr>
<td width="30%" class="table_cells">swaps aref and bref</td>
<td width="70%" class="table_cells"><strong>swap_a</strong>(aref, bref)</td>
</tr>
</table>
<h3>Include Files</h3>
<p>The header files for the predefined actors are located in <tt>boost/spirit/actor</tt>.
The file <tt>actors.hpp</tt> contains all the includes for all the actors. You
may include just the specific header files that you need. The list below enumerates
the header files.</p>
<pre> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">assign_actor</span><span class="special">.</span>hpp<span class="special">&gt;<br></span> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span>assign_key_actor<span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">clear_actor</span><span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">decrement_actor</span><span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">erase_actor</span><span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span> <br> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">increment_actor</span><span class="special">.</span>hpp<span class="special">&gt;<br></span> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">insert_key_actor</span><span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span>insert_at_actor.hpp<span class="special">&gt;</span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">push_back_actor</span><span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">push_front_actor</span><span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span><span class="identifier">swap_actor</span><span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span></pre>
<h3>Examples</h3>
<h4>Increment a value</h4>
<p>
Suppose that your input string is </p>
<code>
<pre> 1,2,-3,4,...
</pre>
</code><p>
and we want to count the number of ints. The actor <tt>increment_a</tt> applies <tt>++</tt> to its reference:</p>
<code>
<pre> <span class=keyword>int </span><span class=identifier>count </span><span class=special>= </span><span class=number>0</span><span class=special>;
</span><span class=identifier>rule</span><span class=special>&lt;&gt; </span><span class=identifier>r </span><span class=special>= </span><span class=identifier>list_p</span><span class=special>.</span><span class=identifier>direct</span><span class=special>(</span><span class=identifier>int_p</span><span class=special>[</span><span class=identifier>increment_a</span><span class=special>(</span><span class=identifier>count</span><span class=special>)], </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>));</span></pre>
</code>
<h4>Append values to a vector (or other container)</h4>
<p> Here, you want to fill a <tt>vector&lt;int&gt;</tt> with the numbers. The
actor <tt>push_back_a</tt> can be used to insert the integers at the back of
the vector:</p>
<code>
<pre> <span class=identifier>vector</span><span class=special>&lt;</span><span class=keyword>int</span><span class=special>&gt; </span><span class=identifier>v</span><span class=special>;
</span><span class=identifier>rule</span><span class=special>&lt;&gt; </span><span class=identifier>r </span><span class=special>= </span><span class=identifier>list_p</span><span class=special>.</span><span class=identifier>direct</span><span class=special>(</span><span class=identifier>int_p</span><span class=special>[</span><span class=identifier>push_back_a</span><span class=special>(</span><span class=identifier>v</span><span class=special>)], </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>','</span><span class=special>));</span></pre>
</code>
<h4>insert key-value pairs into a map</h4><p>
Suppose that your input string is </p>
<code>
<pre> (1,2) (3,4) ...
</pre>
</code>
<p> and you want to parse the pair into a <tt>map&lt;int,int&gt;</tt>. <tt>assign_a</tt>
can be used to store key and values in a temporary key variable, while <tt>insert_a</tt>
is used to insert it into the map:</p>
<pre> <code><span class=identifier>map</span><span class=special>&lt;</span><span class=keyword>int</span><span class=special>, </span><span class=keyword>int</span><span class="special">&gt;::</span>value_type<span class=keyword> </span>k<span class=special>;
</span><span class=identifier>map</span><span class=special>&lt;</span><span class=keyword>int</span><span class=special>, </span><span class=keyword>int</span><span class=special>&gt; </span><span class=identifier>m</span><span class=special>;
</span><span class=identifier>rule</span><span class=special>&lt;&gt; </span><span class=identifier>pair </span><span class=special>= </span><span class=identifier>
confix_p</span><span class=special>(
</span><span class=literal>'('</span><span class=special>
, </span><span class=identifier>int_p</span><span class=special>[</span><span class=identifier>assign_a</span><span class=special>(</span>k.first<span class=special>)] &gt;&gt; </span><span class=literal>','</span><span class=special> &gt;&gt; </span><span class=identifier>int_p</span><span class=special>[</span><span class=identifier>assign_a</span><span class=special>(</span>k.second<span class=special>)]
,</span><span class=literal> ')'
</span><span class=special>)<br> [</span><span class=identifier>insert_at_a</span><span class=special>(</span><span class=identifier>m</span><span class=special>, </span><span class=identifier>k</span><span class=identifier></span><span class=special>)]
;</span></code></pre>
<h2>Policy holder actors and policy actions</h2>
<p> The action takes place through a call to the <tt>()</tt> operator: single
argument <tt>()</tt> operator call for character parsers and two argument (first,
last) call for phrase parsers. Actors should implement at least one of the two
<tt>()</tt> operator.</p>
<p>
A lot of actors need to store reference to one or more objects. For
example, actions on container need to store a reference to the container.</p>
<p> Therefore, this kind of actor have been broken down into <strong>a)</strong>
an action policy that does the action (act member function), <strong>b)</strong>
policy holder actor that stores the references and feeds the act member function.</p>
<h3>Policy holder actors</h3>
<p> The available policy holders are enumerated below.</p>
<table width="90%" border="0" align="center">
<tr>
<td class="table_title" colspan="24"> Policy holders </td>
</tr>
<tr>
<td class="table_cells">Name</td>
<td class="table_cells">Stored variables</td>
<td class="table_cells">Act signature</td>
</tr>
<td class="table_cells">ref_actor</td>
<td class="table_cells">1 reference</td>
<td class="table_cells"><tt>act(ref)</tt></td>
</tr>
<td class="table_cells">ref_value_actor</td>
<td class="table_cells">1 ref</td>
<td class="table_cells"> <tt>act(ref, value)</tt> or <tt>act(ref, first, last)</tt></td>
</tr>
<td class="table_cells">ref_const_ref_actor</td>
<td class="table_cells">1 ref and 1 const ref</td>
<td class="table_cells"><tt>act(ref, const_ref)</tt></td>
</tr>
<td class="table_cells">ref_const_ref_value_actor</td>
<td class="table_cells">1 ref</td>
<td class="table_cells"><tt>act(ref, value)</tt> or <tt>act(ref, first, last)</tt></td>
</tr>
<td class="table_cells">ref_const_ref_const_ref_actor</td>
<td class="table_cells">1 ref, 2 const ref</td>
<td class="table_cells"><tt>act(ref, const_ref1, const_ref2)</tt></td>
</tr>
</table>
<h3>Include Files</h3>
<p>The predefined policy header files are located in <tt>boost/spirit/actor</tt>:</p>
<pre> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span>ref_actor<span class="special">.</span>hpp<span class="special">&gt;<br></span> <span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span>ref_value_actor<span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span>ref_const_ref<span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span>ref_const_ref_value<span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span>ref_const_ref_value<span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span>
<span class="preprocessor">#include</span> <span class="special">&lt;</span>boost<span class="special">/</span>spirit<span class="special">/</span>actor<span class="special">/</span>ref_const_ref_const_ref<span class="special">.</span>hpp<span class="special">&gt;</span><span class="special"></span></pre>
<h3>Holder naming convention</h3>
<p> Policy holder have the following naming convention:</p>
<pre> <code>&lt;member&gt;_ &gt;&gt; *&lt;member&gt; &gt;&gt; !value &gt;&gt; actor</code></pre>
<p> where <tt>member</tt> is the action policy member which can be of type:</p>
<ul>
<li>ref, a reference</li>
<li>const_ref, a const reference</li>
<li>value, by value</li>
<li>empty, no stored members</li>
</ul>
<p> and <tt>value</tt> states if the policy uses the parse result or not.</p>
<h3>Holder example: <tt>ref_actor</tt> class</h3>
<pre><code> <span class=comment>// this is the building block for action that
// take a reference and the parse result
</span><span class=keyword>template</span><span class=special>&lt;
</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>,</span><span class="comment"> // reference type</span><span class=identifier>
</span><span class=keyword>typename </span><span class=identifier>ActionT </span><span class=comment>// action policy
</span><span class=special>&gt;
</span><span class=keyword>class </span><span class=identifier>ref_value_actor </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>ActionT
</span><span class=special>{
</span> <span class=keyword>public</span><span class=special>:
</span><span class=keyword>explicit </span><span class=identifier>ref_value_actor</span><span class=special>(</span><span class=identifier>T</span><span class=special>&amp; </span><span class=identifier>ref_</span><span class=special>)
: </span><span class=identifier>ref</span><span class=special>(</span><span class=identifier>ref_</span><span class=special>){}
</span><span class=keyword>template</span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>T2</span><span class=special>&gt;
</span><span class=keyword>void operator</span><span class=special>()(</span><span class=identifier>T2 </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>val</span><span class=special>) </span><span class=keyword>const
</span><span class=special>{
</span><span class=identifier>act</span><span class=special>(</span><span class=identifier>ref</span><span class=special>, </span><span class=identifier>val</span><span class=special>);</span><span class="comment"> // defined in ActionT</span><span class=identifier>
</span><span class=special>}
</span><span class=keyword>template</span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>IteratorT</span><span class=special>&gt;
</span><span class=keyword>void operator</span><span class=special>()(
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>first</span><span class=special>,
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>last</span><span class=special>) </span><span class=keyword>const
</span><span class=special>{
</span><span class=identifier>act</span><span class=special>(</span><span class=identifier>ref</span><span class=special>,</span><span class=identifier>first</span><span class=special>,</span><span class=identifier>last</span><span class=special>);</span><span class="comment"> // defined in ActionT</span><span class=identifier>
</span><span class=special>}
</span><span class=keyword>private</span><span class=special>:
</span><span class=identifier> T</span><span class=special>&amp; </span><span class=identifier>ref</span><span class=special>;
};</span></code></pre>
<h3>Actor example: <tt>assign_actor</tt></h3>
<code>
<pre> <span class=comment>// assign_action assigns the parse result to the reference
</span><span class=keyword>struct </span><span class=identifier>assign_action
</span><span class=special>{
</span><span class=keyword>template</span><span class=special>&lt;
</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>,
</span><span class=keyword>typename </span><span class=identifier>ValueT
</span><span class=special>&gt;
</span><span class=keyword>void </span><span class=identifier>act</span><span class=special>(</span><span class=identifier>T</span><span class=special>&amp; </span><span class=identifier>ref</span><span class=special>, </span><span class=identifier>ValueT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>value</span><span class=special>) </span><span class=keyword>const
</span><span class=special>{
</span><span class=identifier>ref</span><span class=special> = </span><span class=special></span><span class=identifier>value</span><span class=special>;
}
</span><span class=keyword>template</span><span class=special>&lt;
</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>,
</span><span class=keyword>typename </span><span class=identifier>IteratorT
</span><span class=special>&gt;
</span><span class=keyword>void </span><span class=identifier>act</span><span class=special>(
</span><span class=identifier>T</span><span class=special>&amp; </span><span class=identifier>ref</span><span class=special>,
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>first</span><span class=special>,
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>last</span><span class=special>) </span><span class=keyword>const
</span><span class=special>{
</span><span class=keyword>typedef </span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>::</span><span class=identifier>value_type </span><span class=identifier>value_type</span><span class=special>;
</span><span class=identifier>value_type </span><span class=identifier>vt</span><span class=special>(</span><span class=identifier>first</span><span class=special>,</span><span class=identifier> last</span><span class=special>);
</span><span class=identifier>ref</span><span class=special> = </span><span class=special></span><span class=identifier>vt</span><span class=special>;
}
};</span></pre>
</code>
<h3>Helper function example: <tt>assign_a</tt> function</h3>
<code>
<pre>
<span class=comment>// assign_a is a polymorphic helper function that generators an
// assign_actor based on ref_value_actor, assign_action and the
// type of its argument.
</span><span class=keyword>template</span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>&gt;
</span><span class=keyword>inline </span><span class=identifier>ref_value_actor</span><span class=special>&lt;</span><span class=identifier>T</span><span class=special>, </span>assign<span class=identifier>_action</span><span class=special>&gt;</span><span class=identifier>
assign_a</span><span class=special>(</span><span class=identifier>T</span><span class=special>&amp; </span><span class=identifier>ref</span><span class=special>)
{
</span><span class=keyword>return </span><span class=identifier>ref_value_actor</span><span class=special>&lt;</span><span class=identifier>T</span><span class=special>,</span><span class=identifier> assign_action</span><span class=special>&gt;(</span><span class=identifier>ref</span><span class=special>);
}</span></pre>
</code>
<table border="0">
<tr>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="indepth_the_parser_context.html"><img src="theme/l_arr.gif" width="20" height="19" border="0"></a></td>
<td width="20"><a href="parametric_parsers.html"><img src="theme/r_arr.gif" width="20" height="19" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 2003 <font color="#666666">Jonathan de Halleux</font><font size="2"><font size="2"><font color="#666666">
</font></font> </font><br>
Copyright &copy; 2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) </font> </p>
</body>
</html>

View File

@@ -0,0 +1,289 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<meta content=
"HTML Tidy for Windows (vers 1st February 2003), see www.w3.org"
name="generator">
<title>
Preface
</title>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10"></td>
<td width="85%">
<font size="6" face=
"Verdana, Arial, Helvetica, sans-serif"><b>Preface</b></font>
</td>
<td width="112">
<a href="http://spirit.sf.net"><img src="theme/spirit.gif"
width="112" height="48" align="right" border="0"></a>
</td>
</tr>
</table><br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30">
<a href="../index.html"><img src="theme/u_arr.gif" border="0"></a>
</td>
<td width="30">
<img src="theme/l_arr_disabled.gif" width="20" height="19">
</td>
<td width="30">
<a href="introduction.html"><img src="theme/r_arr.gif" border="0">
</a>
</td>
</tr>
</table><br>
<table width="80%" border="0" align="center">
<tr>
<td>
<p>
<i>"Examples of designs that meet most of the criteria for
"goodness" (easy to understand, flexible, efficient) are a
recursive-descent parser, which is traditional procedural code.
Another example is the STL, which is a generic library of
containers and algorithms depending crucially on both traditional
procedural code and on parametric polymorphism."</i>
</p>
<p>
<b><font color="#003366">Bjarne Stroustrup</font></b>
</p>
</td>
</tr>
</table>
<p>
<b>History</b>
</p>
<p>
A decade and a half ago, I wrote my first calculator in Pascal. It is one
of my most unforgettable coding experiences. I was amazed how a mutually
recursive set of functions can model a grammar specification. In time,
the skills I acquired from that academic experience became very
practical. Periodically I was tasked to do some parsing. For instance,
whenever I need to perform any form of I/O, even in binary, I try to
approach the task somewhat formally by writing a grammar using
Pascal-like syntax diagrams and then write a corresponding
recursive-descent parser. This worked very well.
</p>
<p>
The arrival of the Internet and the World Wide Web magnified this
thousand-fold. At one point I had to write an HTML parser for a Web
browser project. I got a recursive-descent HTML parser working based on
the W3C formal specifications easily. I was certainly glad that HTML had
a formal grammar specification. Because of the influence of the Internet,
I then had to do more parsing. RFC specifications were everywhere. SGML,
HTML, XML, even email addresses and those seemingly trivial URLs were all
formally specified using small EBNF-style grammar specifications. This
made me wish for a tool similar to big-time parser generators such as
YACC and <a href="http://www.antlr.org/">ANTLR</a>, where a parser is
built automatically from a grammar specification. Yet, I want it to be
extremely small; small enough to fit in my pocket, yet scalable.
</p>
<p>
It must be able to practically parse simple grammars such as email
addresses to moderately complex grammars such as XML and perhaps some
small to medium-sized scripting languages. Scalability is a prime goal.
You should be able to use it for small tasks such as parsing command
lines without incurring a heavy payload, as you do when you are using
YACC or PCCTS. Even now that it has evolved and matured to become a
multi-module library, true to its original intent, Spirit can still be
used for extreme micro-parsing tasks. You only pay for features that you
need. The power of Spirit comes from its modularity and extensibility.
Instead of giving you a sledgehammer, it gives you the right ingredients
to create a sledgehammer easily. For instance, it does not really have a
lexer, but you have all the raw ingredients to write one, if you need
one.
</p>
<p>
The result was Spirit. Spirit was a personal project that was conceived
when I was doing R&amp;D in Japan. Inspired by the GoF's composite and
interpreter patterns, I realized that I can model a recursive-descent
parser with hierarchical-object composition of primitives (terminals) and
composites (productions). The original version was implemented with
run-time polymorphic classes. A parser is generated at run time by
feeding in production rule strings such as <tt>"prod ::= {&lsquo;A&rsquo;
| &lsquo;B&rsquo;} &lsquo;C&rsquo;;"</tt>A compile function compiled the
parser, dynamically creating a hierarchy of objects and linking semantic
actions on the fly. A very early text can be found <a href=
"http://spirit.sourceforge.net/dl_docs/pre-spirit.htm">here</a>.
</p>
<p>
The version that we have now is a complete rewrite of the original Spirit
parser using expression templates and static polymorphism, inspired by
the works of Todd Veldhuizen (" <a href=
"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.43.248">
Expression Templates</a>", C++ Report, June 1995). Initially, the
<i><b>static-Spirit</b></i> version was meant only to replace the core of
the original <i><b>dynamic-Spirit</b></i>. Dynamic-spirit needed a parser
to implement itself anyway. The original employed a hand-coded
recursive-descent parser to parse the input grammar specification
strings.
</p>
<p>
After its initial "open-source" debut in May 2001, static-Spirit became a
success. At around November 2001, the Spirit website had an activity
percentile of 98%, making it the number one parser tool at Source Forge
at the time. Not bad for such a niche project such as a parser library.
The "static" portion of Spirit was forgotten and static-Spirit simply
became Spirit. The framework soon evolved to acquire more dynamic
features.
</p>
<p>
<b>How to use this manual</b>
</p>
<p>
The Spirit framework is organized in logical modules starting from the
core. This documentation provides a user's guide and reference for each
module in the framework. A simple and clear code example is worth a
hundred lines of documentation; therefore, the user's guide is presented
with abundant examples annotated and explained in step-wise manner. The
user's guide is based on examples -lots of them.
</p>
<p>
As much as possible, forward information (i.e. citing a specific piece of
information that has not yet been discussed) is avoided in the user's
manual portion of each module. In many cases, though, it is unavoidable
that advanced but related topics are interspersed with the normal flow of
discussion. To alleviate this problem, topics categorized as "advanced"
may be skipped at first reading.
</p>
<p>
Some icons are used to mark certain topics indicative of their relevance.
These icons precede some text to indicate:
</p>
<table width="90%" border="0" align="center">
<tr>
<td>
<table width="100%" border="0">
<tr>
<td colspan="3" class="table_title">
Icons
</td>
</tr>
<tr>
<td width="19" class="table_cells">
<img src="theme/note.gif" width="16" height="16">
</td>
<td width="58" class="table_cells">
<b>Note</b>
</td>
<td width="627" class="table_cells">
Information provided is moderately important and should be
noted by the reader.
</td>
</tr>
<tr>
<td width="19" class="table_cells">
<img src="theme/alert.gif">
</td>
<td width="58" class="table_cells">
<b>Alert</b>
</td>
<td width="627" class="table_cells">
Information provided is of utmost importance.
</td>
</tr>
<tr>
<td width="19" class="table_cells">
<img src="theme/lens.gif" width="15" height="16">
</td>
<td width="58" class="table_cells">
<b>Detail</b>
</td>
<td width="627" class="table_cells">
Information provided is auxiliary but will give the reader a
deeper insight into a specific topic. May be skipped.
</td>
</tr>
<tr>
<td width="19" class="table_cells">
<img src="theme/bulb.gif" width="13" height="18">
</td>
<td width="58" class="table_cells">
<b>Tip</b>
</td>
<td width="627" class="table_cells">
A potentially useful and helpful piece of information.
</td>
</tr>
</table>
</td>
</tr>
</table>
<p>
<b>Support</b>
</p>
<p>
Please direct all questions to Spirit's mailing list. You can subscribe
to the mailing list <a href=
"https://lists.sourceforge.net/lists/listinfo/spirit-general">here</a>.
The mailing list has a searchable archive. A search link to this archive
is provided in <a href="http://spirit.sf.net">Spirit's home page</a>. You
may also read and post messages to the mailing list through an
<a href="http://news.gmane.org/thread.php?group=gmane.comp.parsers.spirit.general">
NNTP news portal</a> (thanks to <a href=
"http://www.gmane.org">www.gmane.org</a>). The news group mirrors the
mailing list. Here are two links to the archives: via <a href=
"http://dir.gmane.org/gmane.comp.parsers.spirit.general">
gmane</a>, via <a href=
"http://sourceforge.net/mailarchive/forum.php?forum_id=1595gmane.org">geocrawler</a>.
</p>
<table width="100%" border="0" align="center">
<tr>
<td>
<div align="center">
<i><b><font size="5">To my dear daughter Phoenix</font></b></i>
</div>
</td>
</tr>
</table>
<table width="100%" border="0">
<tr>
<td width="72%">
&nbsp;
</td>
<td width="28%">
<div align="right">
<p>
<b>Joel de Guzman<br></b> September 2002
</p>
</div>
</td>
</tr>
</table>
<table border="0">
<tr>
<td width="10"></td>
<td width="30">
<a href="../index.html"><img src="theme/u_arr.gif" border="0"></a>
</td>
<td width="30">
<img src="theme/l_arr_disabled.gif" width="20" height="19">
</td>
<td width="30">
<a href="introduction.html"><img src="theme/r_arr.gif" border="0">
</a>
</td>
</tr>
</table><br>
<hr size="1">
<p class="copyright">
Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the
Boost Software License, Version 1.0. (See accompanying file
LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)</font>
</p>
<p>
&nbsp;
</p>
</body>
</html>

View File

@@ -0,0 +1,250 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html><head>
<title>Primitives</title><meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css"></head>
<body>
<table background="theme/bkd2.gif" border="0" cellspacing="2" width="100%">
<tbody><tr>
<td width="10">
</td>
<td width="85%">
<font face="Verdana, Arial, Helvetica, sans-serif" size="6"><b>Primitives</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" align="right" border="0" height="48" width="112"></a></td>
</tr>
</tbody></table>
<br>
<table border="0">
<tbody><tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="organization.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="operators.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</tbody></table>
<p>The framework predefines some parser primitives. These are the most basic building
blocks that the client uses to build more complex parsers. These primitive parsers
are template classes, making them very flexible.</p>
<p>These primitive parsers can be instantiated directly or through a templatized
helper function. Generally, the helper function is far simpler to deal with
as it involves less typing.</p>
<p>We have seen the character literal parser before through the generator function
<tt>ch_p</tt> which is not really a parser but, rather, a parser generator.
Class <tt>chlit&lt;CharT&gt;</tt> is the actual template class behind the character
literal parser. To instantiate a <tt>chlit</tt> object, you must explicitly
provide the character type, <tt>CharT</tt>, as a template parameter which determines
the type of the character. This type typically corresponds to the input type,
usually <tt>char</tt> or <tt>wchar_t</tt>. The following expression creates
a temporary parser object which will recognize the single letter <span class="quotes">'X'</span>.</p>
<pre><code><font color="#000000"><span class="identifier"> </span><span class="identifier">chlit</span><span class="special">&lt;</span><span class="keyword">char</span><span class="special">&gt;(</span><span class="literal">'X'</span><span class="special">);</span></font></code></pre>
<p>Using <tt>chlit</tt>'s generator function <tt>ch_p</tt> simplifies the usage
of the <tt>chlit&lt;&gt;</tt> class (this is true of most Spirit parser classes
since most have corresponding generator functions). It is convenient to call
the function because the compiler will deduce the template type through argument
deduction for us. The example above could be expressed less verbosely using
the <tt>ch_p </tt>helper function. </p>
<pre><code><font color="#000000"><span class="special"> </span><span class="identifier">ch_p</span><span class="special">(</span><span class="literal">'X'</span><span class="special">) </span><span class="comment">// equivalent to chlit&lt;char&gt;('X') object</span></font></code></pre>
<table align="center" border="0" width="80%">
<tbody><tr>
<td class="note_box"><img src="theme/lens.gif" height="16" width="15"> <b>Parser
generators</b><br>
<br>
Whenever you see an invocation of the parser generator function, it is equivalent
to the parser itself. Therefore, we often call <tt>ch_p</tt> a character
parser, even if, technically speaking, it is a function that generates a
character parser.</td>
</tr>
</tbody></table>
<p>The following grammar snippet shows these forms in action:</p>
<pre><code><span class="comment"> </span><span class="comment">// a rule can "store" a parser object. They're covered<br> </span><span class="comment">// later, but for now just consider a rule as an opaque type<br> </span><span class="identifier">rule</span><span class="special">&lt;&gt; </span><span class="identifier">r1</span><span class="special">, </span><span class="identifier">r2</span><span class="special">, </span><span class="identifier">r3</span><span class="special">;<br><br> </span><span class="identifier">chlit</span><span class="special">&lt;</span><span class="keyword">char</span><span class="special">&gt; </span><span class="identifier">x</span><span class="special">(</span><span class="literal">'X'</span><span class="special">); </span><span class="comment">// declare a parser named x<br><br> </span><span class="identifier">r1 </span><span class="special">= </span><span class="identifier">chlit</span><span class="special">&lt;</span><span class="keyword">char</span><span class="special">&gt;(</span><span class="literal">'X'</span><span class="special">); </span><span class="comment">// explicit declaration<br> </span><span class="identifier">r2 </span><span class="special">= </span><span class="identifier">x</span><span class="special">; </span><span class="comment">// using x<br> </span><span class="identifier">r3 </span><span class="special">= </span><span class="identifier">ch_p</span><span class="special">(</span><span class="literal">'X'</span><span class="special">) </span><span class="comment">// using the generator</span></code></pre>
<h2> chlit and ch_p</h2>
<p>Matches a single character literal. <tt>chlit</tt> has a single template type
parameter which defaults to <tt>char</tt> (i.e. <tt>chlit&lt;&gt;</tt> is equivalent
to <tt>chlit&lt;char&gt;</tt>). This type parameter is the character type that
<tt>chlit</tt> will recognize when parsing. The function generator version deduces
the template type parameters from the actual function arguments. The <tt>chlit</tt>
class constructor accepts a single parameter: the character it will match the
input against. Examples:</p>
<pre><code><span class="comment"> </span><span class="identifier">r1 </span><span class="special">= </span><span class="identifier">chlit</span><span class="special">&lt;&gt;(</span><span class="literal">'X'</span><span class="special">);<br> </span><span class="identifier">r2 </span><span class="special">= </span><span class="identifier">chlit</span><span class="special">&lt;</span><span class="keyword">wchar_t</span><span class="special">&gt;(</span><span class="identifier">L</span><span class="literal">'X'</span><span class="special">);<br> </span><span class="identifier">r3 </span><span class="special">= </span><span class="identifier">ch_p</span><span class="special">(</span><span class="literal">'X'</span><span class="special">);</span></code></pre>
<p>Going back to our original example:</p>
<pre><code><span class="special"> </span><span class="identifier">group </span><span class="special">= </span><span class="literal">'(' </span><span class="special">&gt;&gt; </span><span class="identifier">expr </span><span class="special">&gt;&gt; </span><span class="literal">')'</span><span class="special">;<br> </span><span class="identifier">expr1 </span><span class="special">= </span><span class="identifier">integer </span><span class="special">| </span><span class="identifier">group</span><span class="special">;<br> </span><span class="identifier">expr2 </span><span class="special">= </span><span class="identifier">expr1 </span><span class="special">&gt;&gt; </span><span class="special">*((</span><span class="literal">'*' </span><span class="special">&gt;&gt; </span><span class="identifier">expr1</span><span class="special">) </span><span class="special">| </span><span class="special">(</span><span class="literal">'/' </span><span class="special">&gt;&gt; </span><span class="identifier">expr1</span><span class="special">));<br> </span><span class="identifier">expr </span><span class="special">= </span><span class="identifier">expr2 </span><span class="special">&gt;&gt; </span><span class="special">*((</span><span class="literal">'+' </span><span class="special">&gt;&gt; </span><span class="identifier">expr2</span><span class="special">) </span><span class="special">| </span><span class="special">(</span><span class="literal">'-' </span><span class="special">&gt;&gt; </span><span class="identifier">expr2</span><span class="special">));</span></code></pre>
<p></p>
<p>the character literals <tt class="quotes">'('</tt>, <tt class="quotes">')'</tt>,
<tt class="quotes">'+'</tt>, <tt class="quotes">'-'</tt>, <tt class="quotes">'*'</tt>
and <tt class="quotes">'/'</tt> in the grammar declaration are <tt>chlit</tt>
objects that are implicitly created behind the scenes.</p>
<table align="center" border="0" width="80%">
<tbody><tr>
<td class="note_box"><img src="theme/lens.gif" height="16" width="15"> <b>char
operands</b> <br>
<br>
The reason this works is from two special templatized overloads of <tt>operator<span class="operators">&gt;&gt;</span></tt>
that takes a (<tt>char</tt>, <tt> ParserT</tt>), or (<tt>ParserT</tt>, <tt>char</tt>).
These functions convert the character into a <tt>chlit</tt> object.</td>
</tr>
</tbody></table>
<p> One may prefer to declare these explicitly as:</p>
<pre><code><span class="special"> </span><span class="identifier">chlit</span><span class="special">&lt;&gt; </span><span class="identifier">plus</span><span class="special">(</span><span class="literal">'+'</span><span class="special">);<br> </span><span class="identifier">chlit</span><span class="special">&lt;&gt; </span><span class="identifier">minus</span><span class="special">(</span><span class="literal">'-'</span><span class="special">);<br> </span><span class="identifier">chlit</span><span class="special">&lt;&gt; </span><span class="identifier">times</span><span class="special">(</span><span class="literal">'*'</span><span class="special">);<br> </span><span class="identifier">chlit</span><span class="special">&lt;&gt; </span><span class="identifier">divide</span><span class="special">(</span><span class="literal">'/'</span><span class="special">);<br> </span><span class="identifier">chlit</span><span class="special">&lt;&gt; </span><span class="identifier">oppar</span><span class="special">(</span><span class="literal">'('</span><span class="special">);<br> </span><span class="identifier">chlit</span><span class="special">&lt;&gt; </span><span class="identifier">clpar</span><span class="special">(</span><span class="literal">')'</span><span class="special">);</span></code></pre>
<h2>range and range_p</h2>
<p>A <tt>range</tt> of characters is created from a low/high character pair. Such
a parser matches a single character that is in the <tt>range</tt>, including
both endpoints. Like <tt>chlit</tt>, <tt>range</tt> has a single template type
parameter which defaults to <tt>char</tt>. The <tt>range</tt> class constructor
accepts two parameters: the character range (<i>from</i> and <i>to</i>, inclusive)
it will match the input against. The function generator version is <tt>range_p</tt>.
Examples:</p>
<pre><code><span class="special"> </span><span class="identifier">range</span><span class="special">&lt;&gt;(</span><span class="literal">'A'</span><span class="special">,</span><span class="literal">'Z'</span><span class="special">) </span><span class="comment">// matches 'A'..'Z'<br> </span><span class="identifier">range_p</span><span class="special">(</span><span class="literal">'a'</span><span class="special">,</span><span class="literal">'z'</span><span class="special">) </span><span class="comment">// matches 'a'..'z'</span></code></pre>
<p>Note, the first character must be "before" the second, according
to the underlying character encoding characters. The range, like chlit is a
single character parser.</p>
<table align="center" border="0" width="80%">
<tbody><tr>
<td class="note_box"><img src="theme/alert.gif" height="16" width="16"><b>
Character mapping</b><br>
<br>
Character mapping to is inherently platform dependent. It is not guaranteed
in the standard for example that 'A' &lt; 'Z', however, in many occasions,
we are well aware of the character set we are using such as ASCII, ISO-8859-1
or Unicode. Take care though when porting to another platform.</td>
</tr>
</tbody></table>
<h2> strlit and str_p</h2>
<p>This parser matches a string literal. <tt>strlit</tt> has a single template
type parameter: an iterator type. Internally, <tt>strlit</tt> holds a begin/end
iterator pair pointing to a string or a container of characters. The <tt>strlit</tt>
attempts to match the current input stream with this string. The template type
parameter defaults to <tt>char const<span class="operators">*</span></tt>. <tt>strlit</tt>
has two constructors. The first accepts a null-terminated character pointer.
This constructor may be used to build <tt>strlits</tt> from quoted string literals.
The second constructor takes in a first/last iterator pair. The function generator
version is <tt>str_p</tt>. Examples:</p>
<pre><code><span class="comment"> </span><span class="identifier">strlit</span><span class="special">&lt;&gt;(</span><span class="string">"Hello World"</span><span class="special">)<br> </span><span class="identifier">str_p</span><span class="special">(</span><span class="string">"Hello World"</span><span class="special">)<br><br> </span><span class="identifier">std</span><span class="special">::</span><span class="identifier">string </span><span class="identifier">msg</span><span class="special">(</span><span class="string">"Hello World"</span><span class="special">);<br> </span><span class="identifier">strlit</span><span class="special">&lt;</span><span class="identifier">std</span><span class="special">::</span><span class="identifier">string</span><span class="special">::</span><span class="identifier">const_iterator</span><span class="special">&gt;(</span><span class="identifier">msg</span><span class="special">.</span><span class="identifier">begin</span><span class="special">(), </span><span class="identifier">msg</span><span class="special">.</span><span class="identifier">end</span><span class="special">());</span></code></pre>
<table align="center" border="0" width="80%">
<tbody><tr>
<td class="note_box"><img src="theme/note.gif" height="16" width="16"> <b>Character
and phrase level parsing</b><br>
<br>
Typical parsers regard the processing of characters (symbols that form words
or lexemes) and phrases (words that form sentences) as separate domains.
Entities such as reserved words, operators, literal strings, numerical constants,
etc., which constitute the terminals of a grammar are usually extracted
first in a separate lexical analysis stage.<br>
<br>
At this point, as evident in the examples we have so far, it is important
to note that, contrary to standard practice, the Spirit framework handles
parsing tasks at both the character level as well as the phrase level. One
may consider that a lexical analyzer is seamlessly integrated in the Spirit
framework.<br>
<br>
Although the Spirit parser library does not need a separate lexical analyzer,
there is no reason why we cannot have one. One can always have as many parser
layers as needed. In theory, one may create a preprocessor, a lexical analyzer
and a parser proper, all using the same framework.</td>
</tr>
</tbody></table>
<h2>chseq and chseq_p</h2>
<p>Matches a character sequence. <tt>chseq</tt> has the same template type parameters
and constructor parameters as strlit. The function generator version is <tt>chseq_p</tt>.
Examples:</p>
<pre><code><span class="special"> </span><span class="identifier">chseq</span><span class="special">&lt;&gt;(</span><span class="string">"ABCDEFG"</span><span class="special">)<br> </span><span class="identifier">chseq_p</span><span class="special">(</span><span class="string">"ABCDEFG"</span><span class="special">)</span></code></pre>
<p><tt>strlit</tt> is an implicit lexeme. That is, it works solely on the character
level. <tt>chseq</tt>, <tt>strlit</tt>'s twin, on the other hand, can work on
both the character and phrase levels. What this simply means is that it can
ignore white spaces in between the string characters. For example:</p>
<pre><code><span class="special"> </span><span class="identifier">chseq</span><span class="special">&lt;&gt;(</span><span class="string">"ABCDEFG"</span><span class="special">)</span></code></pre>
<p>can parse:</p>
<pre><span class="special"> </span><span class="identifier">ABCDEFG<br> </span><span class="identifier">A </span><span class="identifier">B </span><span class="identifier">C </span><span class="identifier">D </span><span class="identifier">E </span><span class="identifier">F </span><span class="identifier">G<br> </span><span class="identifier">AB </span><span class="identifier">CD </span><span class="identifier">EFG</span></pre>
<h2>More character parsers</h2>
<p>The framework also predefines the full repertoire of single character parsers:</p>
<table align="center" border="0" width="90%">
<tbody><tr>
<td class="table_title" colspan="2">Single character parsers</td>
</tr>
<tr>
<td class="table_cells" width="30%"><b>anychar_p</b></td>
<td class="table_cells" width="70%">Matches any single character (including
the null terminator: '\0')</td>
</tr>
<tr>
<td class="table_cells" width="30%"><b>alnum_p</b></td>
<td class="table_cells" width="70%">Matches alpha-numeric characters</td>
</tr>
<tr>
<td class="table_cells" width="30%"><b>alpha_p</b></td>
<td class="table_cells" width="70%">Matches alphabetic characters</td>
</tr>
<tr>
<td class="table_cells" width="30%"><b>blank_p</b></td>
<td class="table_cells" width="70%">Matches spaces or tabs</td>
</tr>
<tr>
<td class="table_cells" width="30%"><b>cntrl_p</b></td>
<td class="table_cells" width="70%">Matches control characters</td>
</tr>
<tr>
<td class="table_cells" width="30%"><b>digit_p</b></td>
<td class="table_cells" width="70%">Matches numeric digits</td>
</tr>
<tr>
<td class="table_cells" width="30%"><b>graph_p</b></td>
<td class="table_cells" width="70%">Matches non-space printing characters</td>
</tr>
<tr>
<td class="table_cells" width="30%"><b>lower_p</b></td>
<td class="table_cells" width="70%">Matches lower case letters</td>
</tr>
<tr>
<td class="table_cells" width="30%"><b>print_p</b></td>
<td class="table_cells" width="70%">Matches printable characters</td>
</tr>
<tr>
<td class="table_cells" width="30%"><b>punct_p</b></td>
<td class="table_cells" width="70%">Matches punctuation symbols</td>
</tr>
<tr>
<td class="table_cells" width="30%"><b>space_p</b></td>
<td class="table_cells" width="70%">Matches spaces, tabs, returns, and newlines</td>
</tr>
<tr>
<td class="table_cells" width="30%"><b>upper_p</b></td>
<td class="table_cells" width="70%">Matches upper case letters</td>
</tr>
<tr>
<td class="table_cells" width="30%"><b>xdigit_p</b></td>
<td class="table_cells" width="70%">Matches hexadecimal digits</td>
</tr>
</tbody></table>
<h2><a name="negation"></a>negation ~</h2>
<p>Single character parsers such as the <tt>chlit</tt>, <tt>range</tt>, <tt>anychar_p</tt>,
<tt>alnum_p</tt> etc. can be negated. For example:</p>
<pre><code><span class="special"> ~</span><span class="identifier">ch_p</span><span class="special">(</span><span class="literal">'x'</span><span class="special">)</span></code></pre>
<p>matches any character except <tt>'x'</tt>. Double negation of a character parser
cancels out the negation. <tt>~~alpha_p</tt> is equivalent to <tt>alpha_p</tt>.</p>
<h2>eol_p</h2>
<p>Matches the end of line (CR/LF and combinations thereof).</p>
<h2><b>nothing_p</b></h2>
<p>Never matches anything and always fails.</p>
<h2>end_p</h2>
<p>Matches the end of input (returns a sucessful match with 0 length when the
input is exhausted)</p><h2>eps_p</h2>
<p>The <strong>Epsilon</strong> (<tt>epsilon_p</tt> and <tt>eps_p</tt>) is a multi-purpose
parser that returns a zero length match. See <a href="epsilon.html">Epsilon</a> for details.</p><p></p>
<table border="0">
<tbody><tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="organization.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="operators.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</tbody></table>
<br>
<hr size="1">
<p class="copyright">Copyright <20> 1998-2003 Joel de Guzman<br>
Copyright <20> 2003 Martin Wille<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt) </font> </p>
<p>&nbsp;</p>
</body></html>

View File

@@ -0,0 +1,462 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<meta content=
"HTML Tidy for Windows (vers 1st February 2003), see www.w3.org"
name="generator">
<title>
Quick Start
</title>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10"></td>
<td width="85%">
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Quick
Start</b></font>
</td>
<td width="112">
<a href="http://spirit.sf.net"><img src="theme/spirit.gif"
width="112" height="48" align="right" border="0"></a>
</td>
</tr>
</table><br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30">
<a href="../index.html"><img src="theme/u_arr.gif" border="0"></a>
</td>
<td width="30">
<a href="introduction.html"><img src="theme/l_arr.gif" border="0">
</a>
</td>
<td width="30">
<a href="basic_concepts.html"><img src="theme/r_arr.gif" border="0">
</a>
</td>
</tr>
</table>
<h2>
<b>Why would you want to use Spirit?</b>
</h2>
<p>
Spirit is designed to be a practical parsing tool. At the very least, the
ability to generate a fully-working parser from a formal EBNF
specification inlined in C++ significantly reduces development time.
While it may be practical to use a full-blown, stand-alone parser such as
YACC or ANTLR when we want to develop a computer language such as C or
Pascal, it is certainly overkill to bring in the big guns when we wish to
write extremely small micro-parsers. At that end of the spectrum,
programmers typically approach the job at hand not as a formal parsing
task but through ad hoc hacks using primitive tools such as
<tt>scanf</tt>. True, there are tools such as regular-expression
libraries (such as <a href=
"http://www.boost.org/libs/regex/index.html">boost regex</a>) or scanners
(such as <a href="http://www.boost.org/libs/tokenizer/index.html">boost
tokenizer</a>), but these tools do not scale well when we need to write
more elaborate parsers. Attempting to write even a moderately-complex
parser using these tools leads to code that is hard to understand and
maintain.
</p>
<p>
One prime objective is to make the tool easy to use. When one thinks of a
parser generator, the usual reaction is "it must be big and complex with
a steep learning curve." Not so. Spirit is designed to be fully scalable.
The framework is structured in layers. This permits learning on an
as-needed basis, after only learning the minimal core and basic concepts.
</p>
<p>
For development simplicity and ease in deployment, the entire framework
consists of only header files, with no libraries to link against or
build. Just put the spirit distribution in your include path, compile and
run. Code size? -very tight. In the quick start example that we shall
present in a short while, the code size is dominated by the instantiation
of the <tt>std::vector</tt> and <tt>std::iostream</tt>.
</p>
<h2>
<b>Trivial Example #1</b></h2>
<p>Create a parser that will parse
a floating-point number.
</p>
<pre><code><font color="#000000"> </font></code><span class="identifier">real_p</span>
</pre>
<p>
(You've got to admit, that's trivial!) The above code actually generates
a Spirit <tt>real_parser</tt> (a built-in parser) which parses a floating
point number. Take note that parsers that are meant to be used directly
by the user end with "<tt>_p</tt>" in their names as a Spirit convention.
Spirit has many pre-defined parsers and consistent naming conventions
help you keep from going insane!
</p>
<h2>
<b>Trivial Example #2</b></h2>
<p>
Create a parser that will accept a line consisting of two floating-point
numbers.
</p>
<pre><code><font color="#000000"> </font></code><code><span class=
"identifier">real_p</span> <span class=
"special">&gt;&gt;</span> <span class="identifier">real_p</span></code>
</pre>
<p>
Here you see the familiar floating-point numeric parser
<code><tt>real_p</tt></code> used twice, once for each number. What's
that <tt class="operators">&gt;&gt;</tt> operator doing in there? Well,
they had to be separated by something, and this was chosen as the
"followed by" sequence operator. The above program creates a parser from
two simpler parsers, glueing them together with the sequence operator.
The result is a parser that is a composition of smaller parsers.
Whitespace between numbers can implicitly be consumed depending on how
the parser is invoked (see below).
</p>
<p>
Note: when we combine parsers, we end up with a "bigger" parser, But it's
still a parser. Parsers can get bigger and bigger, nesting more and more,
but whenever you glue two parsers together, you end up with one bigger
parser. This is an important concept.
</p>
<h2>
<b>Trivial Example #3</b></h2>
<p>
Create a parser that will accept an arbitrary number of floating-point
numbers. (Arbitrary means anything from zero to infinity)
</p>
<pre><code><font color="#000000"> </font></code><code><span class=
"special">*</span><span class="identifier">real_p</span></code>
</pre>
<p>
This is like a regular-expression Kleene Star, though the syntax might
look a bit odd for a C++ programmer not used to seeing the <tt class=
"operators">*</tt> operator overloaded like this. Actually, if you know
regular expressions it may look odd too since the star is <b>before</b>
the expression it modifies. C'est la vie. Blame it on the fact that we
must work with the syntax rules of C++.
</p>
<p>
Any expression that evaluates to a parser may be used with the Kleene
Star. Keep in mind, though, that due to C++ operator precedence rules you
may need to put the expression in parentheses for complex expressions.
The Kleene Star is also known as a Kleene Closure, but we call it the
Star in most places.
</p>
<h3>
<b><a name="list_of_numbers"></a> Example #4 [ A Just Slightly Less Trivial Example</b>
] </h3>
<p>
This example will create a parser that accepts a comma-delimited list of numbers and put the numbers in a vector.
</p>
<h4><strong> Step 1. Create the parser</strong></h4>
<pre><code><font color="#000000"> </font></code><code><span class=
"identifier">real_p</span> <span class=
"special">&gt;&gt;</span> <span class="special">*(</span><span class=
"identifier">ch_p</span><span class="special">(</span><span class=
"literal">','</span><span class="special">)</span> <span class=
"special">&gt;&gt;</span> <span class=
"identifier">real_p</span><span class="special">)</span></code>
</pre>
<p>
Notice <tt>ch_p(',')</tt>. It is a literal character parser that can
recognize the comma <tt>','</tt>. In this case, the Kleene Star is
modifying a more complex parser, namely, the one generated by the
expression:
</p>
<pre><code><font color="#000000"> </font></code><code><span class=
"special">(</span><span class="identifier">ch_p</span><span class=
"special">(</span><span class="literal">','</span><span class=
"special">)</span> <span class="special">&gt;&gt;</span> <span class=
"identifier">real_p</span><span class="special">)</span></code>
</pre>
<p>
Note that this is a case where the parentheses are necessary. The Kleene
star encloses the complete expression above.
</p>
<h4>
<b><strong>Step 2. </strong>Using a Parser (now that it's created)</b></h4>
<p>
Now that we have created a parser, how do we use it? Like the result of
any C++ temporary object, we can either store it in a variable, or call
functions directly on it.
</p>
<p>
We'll gloss over some low-level C++ details and just get to the good
stuff.
</p>
<p>
If <b><tt>r</tt></b> is a rule (don't worry about what rules exactly are
for now. This will be discussed later. Suffice it to say that the rule is
a placeholder variable that can hold a parser), then we store the parser
as a rule like this:
</p>
<pre><code><font color="#000000"> </font></code><code><font color="#000000"><span class=
"identifier">r</span> <span class="special">=</span> <span class=
"identifier">real_p</span> <span class=
"special">&gt;&gt; *(</span><span class=
"identifier">ch_p</span><span class="special">(</span><span class=
"literal">','</span><span class="special">) &gt;&gt;</span> <span class=
"identifier">real_p</span><span class="special">);</span></font></code>
</pre>
<p>
Not too exciting, just an assignment like any other C++ expression you've
used for years. The cool thing about storing a parser in a rule is this:
rules are parsers, and now you can refer to it <b>by name</b>. (In this
case the name is <tt><b>r</b></tt>). Notice that this is now a full
assignment expression, thus we terminate it with a semicolon,
"<tt>;</tt>".
</p>
<p>
That's it. We're done with defining the parser. So the next step is now
invoking this parser to do its work. There are a couple of ways to do
this. For now, we shall use the free <tt>parse</tt> function that takes
in a <tt>char const*</tt>. The function accepts three arguments:
</p>
<blockquote>
<p>
<img src="theme/bullet.gif" width="12" height="12"> The null-terminated
<tt>const char*</tt> input<br>
<img src="theme/bullet.gif" width="12" height="12"> The parser
object<br>
<img src="theme/bullet.gif" width="12" height="12"> Another parser
called the <b>skip parser</b>
</p>
</blockquote>
<p>
In our example, we wish to skip spaces and tabs. Another parser named
<tt>space_p</tt> is included in Spirit's repertoire of predefined
parsers. It is a very simple parser that simply recognizes whitespace. We
shall use <tt>space_p</tt> as our skip parser. The skip parser is the one
responsible for skipping characters in between parser elements such as
the <tt>real_p</tt> and the <tt>ch_p</tt>.
</p>
<p>
Ok, so now let's parse!
</p>
<pre><code><font color="#000000"> </font></code><code><font color="#000000"><span class=
"identifier">r</span> <span class="special">=</span> <span class=
"identifier">real_p</span> <span class=
"special">&gt;&gt;</span> <span class="special">*(</span><span class=
"identifier">ch_p</span><span class="special">(</span><span class=
"literal">','</span><span class="special">)</span> <span class=
"special">&gt;&gt;</span> <span class=
"identifier">real_p</span><span class="special">);
</span> <span class="identifier"> parse</span><span class=
"special">(</span><span class="identifier">str</span><span class=
"special">,</span> <span class="identifier">r</span><span class=
"special">,</span> <span class="identifier">space_p</span><span class=
"special">)</span> <span class=
"comment">// Not a full statement yet, patience...</span></font></code>
</pre>
<p>
The parse function returns an object (called <tt>parse_info</tt>) that
holds, among other things, the result of the parse. In this example, we
need to know:
</p>
<blockquote>
<p>
<img src="theme/bullet.gif" width="12" height="12"> Did the parser
successfully recognize the input <tt>str</tt>?<br>
<img src="theme/bullet.gif" width="12" height="12"> Did the parser
<b>fully</b> parse and consume the input up to its end?
</p>
</blockquote>
<p>
To get a complete picture of what we have so far, let us also wrap this
parser inside a function:
</p>
<pre><code><font color="#000000"> </font></code><code><font color="#000000"><span class=
"keyword">bool
</span> <span class="identifier"> parse_numbers</span><span class=
"special">(</span><span class="keyword">char</span> <span class=
"keyword">const</span><span class="special">*</span> <span class=
"identifier">str</span><span class="special">)
{
</span> <span class="keyword"> return</span> <span class=
"identifier">parse</span><span class="special">(</span><span class=
"identifier">str</span><span class="special">,</span> <span class=
"identifier">real_p</span> <span class=
"special">&gt;&gt;</span> <span class="special">*(</span><span class=
"literal">','</span> <span class="special">&gt;&gt;</span> <span class=
"identifier">real_p</span><span class="special">),</span> <span class=
"identifier">space_p</span><span class="special">).</span><span class=
"identifier">full</span><span class="special">;
}</span></font></code>
</pre>
<p>
Note in this case we dropped the named rule and inlined the parser
directly in the call to parse. Upon calling parse, the expression
evaluates into a temporary, unnamed parser which is passed into the
parse() function, used, and then destroyed.
</p>
<table border="0" width="80%" align="center">
<tr>
<td class="note_box">
<img src="theme/note.gif" width="16" height="16"><b>char and wchar_t
operands</b><br>
<br>
The careful reader may notice that the parser expression has
<tt class="quotes">','</tt> instead of <tt>ch_p(',')</tt> as the
previous examples did. This is ok due to C++ syntax rules of
conversion. There are <tt>&gt;&gt;</tt> operators that are overloaded
to accept a <tt>char</tt> or <tt>wchar_t</tt> argument on its left or
right (but not both). An operator may be overloaded if at least one
of its parameters is a user-defined type. In this case, the
<tt>real_p</tt> is the 2nd argument to <tt>operator<span class=
"operators">&gt;&gt;</span></tt>, and so the proper overload of
<tt class="operators">&gt;&gt;</tt> is used, converting
<tt class="quotes">','</tt> into a character literal parser.<br>
<br>
The problem with omitting the <tt>ch_p</tt> call should be obvious:
<tt>'a' &gt;&gt; 'b'</tt> is <b>not</b> a spirit parser, it is a
numeric expression, right-shifting the ASCII (or another encoding)
value of <tt class="quotes">'a'</tt> by the ASCII value of
<tt class="quotes">'b'</tt>. However, both <tt>ch_p('a') &gt;&gt;
'b'</tt> and <tt>'a' &gt;&gt; ch_p('b')</tt> are Spirit sequence
parsers for the letter <tt class="quotes">'a'</tt> followed by
<tt class="quotes">'b'</tt>. You'll get used to it, sooner or
later.
</td>
</tr>
</table>
<p>
Take note that the object returned from the parse function has a member
called <tt>full</tt> which returns true if both of our requirements above
are met (i.e. the parser fully parsed the input).
</p>
<h4>
<b> Step 3. Semantic Actions</b></h4>
<p>
Our parser above is really nothing but a recognizer. It answers the
question <i class="quotes">"did the input match our grammar?"</i>, but it
does not remember any data, nor does it perform any side effects.
Remember: we want to put the parsed numbers into a vector. This is done
in an <b>action</b> that is linked to a particular parser. For example,
whenever we parse a real number, we wish to store the parsed number after
a successful match. We now wish to extract information from the parser.
Semantic actions do this. Semantic actions may be attached to any point
in the grammar specification. These actions are C++ functions or functors
that are called whenever a part of the parser successfully recognizes a
portion of the input. Say you have a parser <b>P</b>, and a C++ function
<b>F</b>, you can make the parser call <b>F</b> whenever it matches an
input by attaching <b>F</b>:
</p>
<pre><code><font color="#000000"> </font></code><code><font color="#000000"><span class=
"identifier">P</span><span class="special">[&amp;</span><span class=
"identifier">F</span><span class="special">]</span></font></code>
</pre>
<p>
Or if <b>F</b> is a function object (a functor):
</p>
<pre><code><font color="#000000"> </font></code><code><font color="#000000"><span class=
"identifier">P</span><span class="special">[</span><span class=
"identifier">F</span><span class="special">]</span></font></code>
</pre>
<p>
The function/functor signature depends on the type of the parser to which
it is attached. The parser <tt>real_p</tt> passes a single argument: the
parsed number. Thus, if we were to attach a function <b>F</b> to
<tt>real_p</tt>, we need <b>F</b> to be declared as:
</p>
<pre><code> </code><code><span class=
"keyword">void</span> <span class="identifier">F</span><span class=
"special">(</span><span class="keyword">double</span> <span class=
"identifier">n</span><span class="special">);</span></code></pre>
<p>
For our example however, again, we can take advantage of some predefined
semantic functors and functor generators (<img src="theme/lens.gif"
width="15" height="16"> A functor generator is a function that returns
a functor). For our purpose, Spirit has a functor generator
<tt>push_back_a(c)</tt>. In brief, this semantic action, when called,
<b>appends</b> the parsed value it receives from the parser it is
attached to, to the container <tt>c</tt>.
</p>
<p>
Finally, here is our complete comma-separated list parser:
</p>
<pre><code><font color="#000000"> </font></code><code><font color="#000000"><span class=
"keyword">bool
</span> <span class="identifier">parse_numbers</span><span class=
"special">(</span><span class="keyword">char</span> <span class=
"keyword">const</span><span class="special">*</span> <span class=
"identifier">str</span><span class="special">,</span> <span class=
"identifier">vector</span><span class="special">&lt;</span><span class=
"keyword">double</span><span class=
"special">&gt;&amp;</span> <span class="identifier">v</span><span class=
"special">)
{
</span> <span class="keyword">return</span> <span class=
"identifier">parse</span><span class="special">(</span><span class=
"identifier">str</span><span class="special">,
</span> <span class="comment"> // Begin grammar
</span> <span class="special"> (
</span> <span class="identifier">real_p</span><span class=
"special">[</span><span class="identifier">push_back_a</span><span class=
"special">(</span><span class="identifier">v</span><span class=
"special">)]</span> <span class="special">&gt;&gt;</span> <span class=
"special">*(</span><span class="literal">','</span> <span class=
"special">&gt;&gt;</span> <span class=
"identifier">real_p</span><span class="special">[</span><span class=
"identifier">push_back_a</span><span class="special">(</span><span class=
"identifier">v</span><span class="special">)])
)
</span> <span class="special"> ,
</span> <span class="comment"> // End grammar
</span> <span class="identifier"> space_p</span><span class=
"special">).</span><span class="identifier">full</span><span class="special">;
}</span></font></code>
</pre>
<p>
This is the same parser as above. This time with appropriate semantic
actions attached to strategic places to extract the parsed numbers and
stuff them in the vector <tt>v</tt>. The parse_numbers function returns
true when successful.
</p>
<p>
<img src="theme/lens.gif" width="15" height="16"> The full source code
can be <a href="../example/fundamental/number_list.cpp">viewed here</a>.
This is part of the Spirit distribution.
</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30">
<a href="../index.html"><img src="theme/u_arr.gif" border="0"></a>
</td>
<td width="30">
<a href="introduction.html"><img src="theme/l_arr.gif" border="0">
</a>
</td>
<td width="30">
<a href="basic_concepts.html"><img src="theme/r_arr.gif" border="0">
</a>
</td>
</tr>
</table><br>
<hr size="1">
<p class="copyright">
Copyright &copy; 1998-2003 Joel de Guzman<br>
Copyright &copy; 2002 Chris Uzdavinis<br>
<br>
<font size="2">Use, modification and distribution is subject to the
Boost Software License, Version 1.0. (See accompanying file
LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)</font>
</p>
<blockquote>&nbsp;
</blockquote>
</body>
</html>

View File

@@ -0,0 +1,602 @@
<html>
<head>
<title>Quick Reference</title>
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10"> </td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Quick
Reference </b></font></td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="error_handling.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="includes.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>This isn't intended to be a full, detailed reference; nor is it intended to
be of any use to readers who aren't already familiar with Spirit. It's just
a brief reminder of the syntax and behaviour of each component, with links to
the full documentation. </p>
<ul>
<li><strong>Primitive parser generators</strong> <i>(action arguments are listed
on the right)</i>
<ul>
<li><a href="quickref.html#null_parsers">Null parsers</a></li>
<li><a href="quickref.html#character_parsers">Character parsers</a></li>
<li><a href="quickref.html#number_parsers">Number parsers</a></li>
<li><a href="quickref.html#otherlexeme_parsers">Other lexeme parsers</a></li>
<li><a href="quickref.html#text_parsers">Text parsers</a><br>
<br>
</li>
</ul>
</li>
<li><strong>Other parser elements</strong>
<ul>
<li><a href="quickref.html#compound_parsers">Compound parsers</a></li>
<li><a href="quickref.html#general_directives">General directives</a></li>
<li><a href="quickref.html#tree_specific_directives">Tree-specific directives</a><br>
<br>
</li>
</ul>
</li>
<li><strong>Operators</strong>
<ul>
<li><a href="quickref.html#unary_operators">Unary operators</a></li>
<li><a href="quickref.html#binary_operators">Binary operators</a> <i>(in order of precedence)<br>
</i></li>
</ul>
</li>
</ul>
<table>
<tr>
<td valign="top">
<table>
<tr>
<td class="table_title" colspan="3"><a name="null_parsers"></a>Null parsers</td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">end_p</a></code></td>
<td class="table_cells">Matches EOF</td>
<td class="table_cells"><i>iter,iter</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">eps_p</a><br />
<a href="primitives.html">eps_p</a>(P)</code></td>
<td class="table_cells">Matches without consuming text</td>
<td class="table_cells"><i>iter,iter</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">epsilon_p</a><br />
<a href="primitives.html">epsilon_p</a>(P)</code></td>
<td class="table_cells">Synonym for <b>eps_p</b></td>
<td class="table_cells"><i>iter,iter</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">nothing_p</a></code></td>
<td class="table_cells">Always fails</td>
<td class="table_cells"><i>iter,iter</i></td>
</tr>
<tr>
<td class="table_title" colspan="3"><a name="character_parsers"></a>Character parsers</td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">alnum_p</a></code></td>
<td class="table_cells">Matches any alphanumeric character</td>
<td class="table_cells"><i>char</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">alpha_p</a></code></td>
<td class="table_cells">Matches any letter</td>
<td class="table_cells"><i>char</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">anychar_p</a></code></td>
<td class="table_cells">Matches any character</td>
<td class="table_cells"><i>char</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">blank_p</a></code></td>
<td class="table_cells">Matches a space or tab</td>
<td class="table_cells"><i>char</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">ch_p</a>(char)</code></td>
<td class="table_cells">Matches a character</td>
<td class="table_cells"><i>char</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="character_sets.html">chset_p</a>(charset)</code></td>
<td class="table_cells">Matches a character in the set</td>
<td class="table_cells"><i>char</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">cntrl_p</a></code></td>
<td class="table_cells">Matches any control character</td>
<td class="table_cells"><i>char</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">digit_p</a></code></td>
<td class="table_cells">Matches any digit</td>
<td class="table_cells"><i>char</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="parametric_parsers.html">f_ch_p</a>(func)</code></td>
<td class="table_cells">Matches a character</td>
<td class="table_cells"><i>char</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="parametric_parsers.html">f_range_p</a>(func1,
func2)</code></td>
<td class="table_cells">Matches any character in the inclusive range</td>
<td class="table_cells"><i>char</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">graph_p</a></code></td>
<td class="table_cells">Matches any non-space printable character</td>
<td class="table_cells"><i>char</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">lower_p</a></code></td>
<td class="table_cells">Matches any lower-case letter</td>
<td class="table_cells"><i>char</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">print_p</a></code></td>
<td class="table_cells">Matches any printable character</td>
<td class="table_cells"><i>char</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">punct_p</a></code></td>
<td class="table_cells">Matches any punctuation mark</td>
<td class="table_cells"><i>char</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">range_p</a>(char1,
char2)</code></td>
<td class="table_cells">Matches any character in the inclusive range</td>
<td class="table_cells"><i>char</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="numerics.html">sign_p</a></code></td>
<td class="table_cells">Matches a plus or minus sign</td>
<td class="table_cells"><i>bool</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">space_p</a></code></td>
<td class="table_cells">Matches any whitespace character</td>
<td class="table_cells"><i>char</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">upper_p</a></code></td>
<td class="table_cells">Matches any upper-case letter</td>
<td class="table_cells"><i>char</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">xdigit_p</a></code></td>
<td class="table_cells">Matches any hexadecimal digit</td>
<td class="table_cells"><i>char</i></td>
</tr>
<tr>
<td class="table_title" colspan="3"><a name="number_parsers"></a>Number parsers</td>
</tr>
<tr>
<td class="table_cells"><code><a href="numerics.html">bin_p</a></code></td>
<td class="table_cells">Matches an unsigned binary integer</td>
<td class="table_cells"><i>numeric</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="numerics.html">hex_p</a></code></td>
<td class="table_cells">Matches an unsigned hexadecimal integer</td>
<td class="table_cells"><i>numeric</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="numerics.html">int_p</a></code></td>
<td class="table_cells">Matches a signed decimal integer</td>
<td class="table_cells"><i>numeric</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="numerics.html">int_parser</a>&lt;type,
base, min, max&gt;</code></td>
<td class="table_cells">Matches a signed integer with <b>min</b> to <b>max</b> digits</td>
<td class="table_cells"><i>numeric</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="numerics.html">oct_p</a></code></td>
<td class="table_cells">Matches an unsigned octal integer</td>
<td class="table_cells"><i>numeric</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="numerics.html">real_p</a></code></td>
<td class="table_cells">Matches a floating point number</td>
<td class="table_cells"><i>numeric</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="numerics.html">real_parser</a>&lt;type,
policy&gt;</code></td>
<td class="table_cells">Matches a floating point number</td>
<td class="table_cells"><i>numeric</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="numerics.html">strict_real_p</a></code></td>
<td class="table_cells">Matches a floating point number (requires decimal point)</td>
<td class="table_cells"><i>numeric</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="numerics.html">strict_ureal_p</a></code></td>
<td class="table_cells">Matches an unsigned FP number (requires decimal point)</td>
<td class="table_cells"><i>numeric</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="numerics.html">uint_p</a></code></td>
<td class="table_cells">Matches an unsigned decimal integer</td>
<td class="table_cells"><i>numeric</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="numerics.html">uint_parser</a>&lt;type,
base, min, max&gt;</code></td>
<td class="table_cells">Matches an unsigned integer with <b>min</b> to <b>max</b> digits</td>
<td class="table_cells"><i>numeric</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="numerics.html">ureal_p</a></code></td>
<td class="table_cells">Matches an unsigned FP number</td>
<td class="table_cells"><i>numeric</i></td>
</tr>
<tr>
<td class="table_title" colspan="3" id="other_lexeme_parsers"><a name="otherlexeme_parsers"></a>Other lexeme parsers</td>
</tr>
<tr>
<td class="table_cells"><code><a href="escape_char_parser.html">c_escape_ch_p</a></code></td>
<td class="table_cells">Matches a C escape code</td>
<td class="table_cells"><i>char</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="confix.html">comment_p</a>(string)<br />
<a href="confix.html">comment_p</a> (string1, string2)</code></td>
<td class="table_cells">Matches C++ or C-style comments</td>
<td class="table_cells"><i>iter,iter</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">eol_p</a></code></td>
<td class="table_cells">Matches CR, LF, or any combination</td>
<td class="table_cells"><i>iter,iter</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="parametric_parsers.html">f_str_p</a>(func1,
func2)</code></td>
<td class="table_cells">Matches a string</td>
<td class="table_cells"><i>iter,iter</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="escape_char_parser.html">lex_escape_ch_p</a></code></td>
<td class="table_cells">Matches a C escape code or any backslash escape</td>
<td class="table_cells"><i>char</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="regular_expression_parser.html">regex_p</a>(regex)</code></td>
<td class="table_cells">Matches a regular expression</td>
<td class="table_cells"><i>iter,iter</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">str_p</a>(string)<br />
<a href="primitives.html">str_p</a>(iter1, iter2)</code></td>
<td class="table_cells">Matches a string</td>
<td class="table_cells"><i>iter,iter</i></td>
</tr>
<tr>
<td class="table_title" colspan="3"><a name="text_parsers"></a>Text parsers</td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">chseq_p</a>(string)<br />
<a href="primitives.html">chseq_p</a>(iter1, iter2)</code></td>
<td class="table_cells">Matches a string, possibly with embedded whitespace</td>
<td class="table_cells"><i>iter,iter</i></td>
</tr>
<tr>
<td class="table_cells"><code><a href="parametric_parsers.html">f_chseq_p</a>(func1,
func2)</code></td>
<td class="table_cells">Matches a string, possibly with embedded whitespace</td>
<td class="table_cells"><i>iter,iter</i></td>
</tr>
</table>
</td>
<td width="10">&nbsp;</td>
<td valign="top">
<table>
<tr>
<td class="table_title" colspan="2"><a name="compound_parsers"></a>Compound parsers</td>
</tr>
<tr>
<td class="table_cells"><code><a href="confix.html">confix_p</a>(open,
exp, close)</code></td>
<td class="table_cells">Matches <b>open &gt;&gt; (exp - close) &gt;&gt; close</b></td>
</tr>
<tr>
<td class="table_cells"><code><a href="dynamic_parsers.html">do_p</a>[P].<a href="dynamic_parsers.html">while_p</a>(cond)</code></td>
<td class="table_cells">Matches while a condition is true (at least once)</td>
</tr>
<tr>
<td class="table_cells"><code><a href="dynamic_parsers.html">for_p</a>(init,
cond, step)[P]</code></td>
<td class="table_cells">Matches in a loop</td>
</tr>
<tr>
<td class="table_cells"><code><a href="functor_parser.html">functor_parser</a>&lt;func&gt;</code></td>
<td class="table_cells">Wraps an external parser</td>
</tr>
<tr>
<td class="table_cells"><code><a href="dynamic_parsers.html">if_p</a>(cond)[P]<br />
<a href="dynamic_parsers.html">if_p</a>(cond)[P].<a href="dynamic_parsers.html">else_p</a>[P]</code></td>
<td class="table_cells">Matches depending on a condition</td>
</tr>
<tr>
<td class="table_cells"><code><a href="the_lazy_parser.html">lazy_p</a>(P)</code></td>
<td class="table_cells">Evaluates a parser at run time</td>
</tr>
<tr>
<td class="table_cells"><code><a href="list_parsers.html">list_p</a> <br />
<a href="list_parsers.html">list_p</a>(del)<br />
<a href="list_parsers.html">list_p</a>(item, del)<br />
<a href="list_parsers.html">list_p</a>(item, del, end)</code></td>
<td class="table_cells">Matches a delimited list</td>
</tr>
<tr>
<td class="table_cells"><code><a href="loops.html">repeat_p</a>(num)[P]<br />
<a href="loops.html">repeat_p</a>(min, max)[P]<br />
<a href="loops.html">repeat_p</a>(min, <a href="loops.html">more</a>)[P]</code></td>
<td class="table_cells">Matches multiple times</td>
</tr>
<tr>
<td class="table_cells"><code><a href="dynamic_parsers.html">while_p</a> (cond) [P]</code></td>
<td class="table_cells">Matches while a condition is true</td>
</tr>
<tr>
<td class="table_title" colspan="2" id="general_directives"><a name="general_directives"></a>General directives</td>
</tr>
<tr>
<td class="table_cells"><code><a href="directives.html">as_lower_d</a>[P]</code></td>
<td class="table_cells">Converts text to lower case before matching</td>
</tr>
<tr>
<td class="table_cells"><code><a href="refactoring.html">attach_action_d</a>[(P1
op P2)[act]]</code></td>
<td class="table_cells">Transforms to <b>P1 [act] op P2 [act]</b></td>
</tr>
<tr>
<td class="table_cells"><code><a href="directives.html">lexeme_d</a>[P]</code></td>
<td class="table_cells">Turns off whitespace skipping</td>
</tr>
<tr>
<td class="table_cells"><code><a href="directives.html">limit_d</a>[P](min,
max)</code></td>
<td class="table_cells">Matches only if the value is within the range</td>
</tr>
<tr>
<td class="table_cells"><code><a href="directives.html">longest_d</a>[P]</code></td>
<td class="table_cells">Matches the longest of alternatives</td>
</tr>
<tr>
<td class="table_cells"><code><a href="directives.html">max_limit_d</a>[P](max)</code></td>
<td class="table_cells">Matches only if <b>value &lt;= max</b></td>
</tr>
<tr>
<td class="table_cells"><code><a href="directives.html">min_limit_d</a>[P](min)</code></td>
<td class="table_cells">Matches only if <b>value &gt;= min</b></td>
</tr>
<tr>
<td class="table_cells"><code><a href="refactoring.html">refactor_action_d</a>[P1
[act] op P2]</code></td>
<td class="table_cells">Transforms to <b>(P1 op P2) [act]</b></td>
</tr>
<tr>
<td class="table_cells"><code><a href="refactoring.html">refactor_unary_d</a>[op1
P1 op2 P2]</code></td>
<td class="table_cells">Transforms to <b>op1 (P1 op2 P2)</b></td>
</tr>
<tr>
<td class="table_cells"><code><a href="scoped_lock.html">scoped_lock_d</a>[P](mutex)</code></td>
<td class="table_cells">Locks a mutex while matching</td>
</tr>
<tr>
<td class="table_cells"><code><a href="directives.html">shortest_d</a>[P]</code></td>
<td class="table_cells">Matches the shortest of alternatives</td>
</tr>
<tr>
<td class="table_title" colspan="2"><a name="tree_specific_directives"></a>Tree-specific directives</td>
</tr>
<tr>
<td class="table_cells"><code><a href="trees.html">access_node_d</a>[P]</code></td>
<td class="table_cells">Passes node value to action</td>
</tr>
<tr>
<td class="table_cells"><code><a href="trees.html">discard_first_node_d</a>[P]</code></td>
<td class="table_cells">Discards first node</td>
</tr>
<tr>
<td class="table_cells"><code><a href="trees.html">discard_last_node_d</a>[P]</code></td>
<td class="table_cells">Discards last node</td>
</tr>
<tr>
<td class="table_cells"><code><a href="trees.html">discard_node_d</a>[P]</code></td>
<td class="table_cells">Discards the generated node</td>
</tr>
<tr>
<td class="table_cells"><code><a href="trees.html">infix_node_d</a>[P]</code></td>
<td class="table_cells">Discards even-position nodes</td>
</tr>
<tr>
<td class="table_cells"><code><a href="trees.html">inner_node_d</a>[P]</code></td>
<td class="table_cells">Discards first and last nodes</td>
</tr>
<tr>
<td class="table_cells"><code><a href="trees.html">leaf_node_d</a>[P]</code></td>
<td class="table_cells">Generates a single node with no children</td>
</tr>
<tr>
<td class="table_cells"><code><a href="trees.html">no_node_d</a>[P]</code></td>
<td class="table_cells">Does not generate a node</td>
</tr>
<tr>
<td class="table_cells"><code><a href="trees.html">root_node_d</a>[P]</code></td>
<td class="table_cells">Identifies root nodes for an AST</td>
</tr>
<tr>
<td class="table_cells"><code><a href="trees.html">token_node_d</a>[P]</code></td>
<td class="table_cells">Synonym for <b>leaf_node_d</b></td>
</tr>
<tr>
<td class="table_title" colspan="2"><a name="unary_operators"></a>Unary operators</td>
</tr>
<tr>
<td class="table_cells"><code><a href="operators.html">!P</a></code></td>
<td class="table_cells">Matches <b>P</b> or an empty string</td>
</tr>
<tr>
<td class="table_cells"><code><a href="operators.html">*P</a></code></td>
<td class="table_cells">Matches <b>P</b> zero or more times</td>
</tr>
<tr>
<td class="table_cells"><code><a href="operators.html">+P</a></code></td>
<td class="table_cells">Matches <b>P</b> one or more times</td>
</tr>
<tr>
<td class="table_cells"><code><a href="primitives.html">~P</a></code></td>
<td class="table_cells">Matches anything that does not match <b>P</b></td>
</tr>
<tr>
<td class="table_title" colspan="2"><a name="binary_operators"></a>Binary operators</td>
</tr>
<tr>
<td class="table_cells"><code><a href="operators.html">P1 % P2</a></code></td>
<td class="table_cells">Matches one or more <b>P1</b> separated by <b>P2</b></td>
</tr>
<tr>
<td class="table_cells"><code><a href="operators.html">P1 - P2</a></code></td>
<td class="table_cells">Matches <b>P1</b> but not <b>P2</b></td>
</tr>
<tr>
<td class="table_cells"><code><a href="operators.html">P1 &gt;&gt; P2</a></code></td>
<td class="table_cells">Matches <b>P1</b> followed by <b>P2</b></td>
</tr>
<tr>
<td class="table_cells"><code><a href="operators.html">P1 &amp; P2</a></code></td>
<td class="table_cells">Matches both <b>P1</b> and <b>P2</b></td>
</tr>
<tr>
<td class="table_cells"><code><a href="operators.html">P1 ^ P2</a></code></td>
<td class="table_cells">Matches <b>P1</b> or <b>P2</b>, but not both</td>
</tr>
<tr>
<td class="table_cells"><code><a href="operators.html">P1 | P2</a></code></td>
<td class="table_cells">Matches <b>P1</b> or <b>P2</b></td>
</tr>
<tr>
<td class="table_cells"><code><a href="operators.html">P1 &amp;&amp; P2</a></code></td>
<td class="table_cells">Synonym for <b>P1 &gt;&gt; P2</b></td>
</tr>
<tr>
<td class="table_cells"><code><a href="operators.html">P1 || P2</a></code></td>
<td class="table_cells">Matches <b>P1 | P2 | P1 &gt;&gt; P2</b></td>
</tr>
</table>
</td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="error_handling.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="includes.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<hr size="1">
<p class="copyright">Copyright &copy; 2003 Ross Smith<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p>&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,162 @@
<html>
<head>
<title>Rationale</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Rationale</b></font></td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="faq.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="acknowledgments.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p><img src="theme/lens.gif" width="15" height="16"> <strong>Virtual functions:
From static to dynamic C++</strong></p>
<p>Rules straddle the border between static and dynamic C++. In effect, a rule
transforms compile-time polymorphism (using templates) into run-time polymorphism
(using virtual functions). This is necessary due to C++'s inability to automatically
declare a variable of a type deduced from an arbitrarily complex expression
in the right-hand side (rhs) of an assignment. Basically, we want to do something
like:</p>
<pre><code><font color="#000000"> <span class=identifier>T </span><span class=identifier>rule </span><span class=special>= </span><span class=identifier>an_arbitrarily_complex_expression</span><span class=special>;</span></font></code></pre>
<p>without having to know or care about the resulting type of the right-hand side
(rhs) of the assignment expression. Apart from this, we also need a facility
to forward declare an unknown type:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>T </span><span class=identifier>rule</span><span class=special>;
</span><span class=special>...
</span><span class=identifier>rule </span><span class=special>= </span><span class=identifier>a </span><span class=special>| </span><span class=identifier>b</span><span class=special>;</span></font></code></pre>
<p>These limitations lead us to this implementation of rules. This comes at the
expense of the overhead of a virtual function call, once through each invocation
of a rule.</p>
<p><img src="theme/lens.gif" width="15" height="16"> <strong>Multiple declaration
</strong> </p>
<p>Some BNF variants allow multiple declarations of a <tt>rule</tt>. The declarations
are taken as alternatives. Example:</p>
<pre>
<span class=identifier><code>r </code></span><code><span class=special>= </span><span class=identifier>a</span><span class=special>; </span><span class=identifier>
r </span><span class=special>= </span><span class=identifier>b</span><span class=special>;</span></code></pre>
<p> is equivalent to: </p>
<pre>
<span class=identifier><code>r </code></span><code><span class=special>= </span><span class=identifier>a </span><span class=special>| </span><span class=identifier>b</span><span class=special>;</span></code></pre>
<p>Spirit v1.3 allowed this behavior. However, the current version of Spirit <b>no
longer</b> allows this because experience shows that this behavior leads to
unwanted gotchas (for instance, it does not allow rules to be held in containers).
In the current release of Spirit, a second assignment to a rule will simply
redefine it. The old definition is destructed. This follows more closely C++
semantics and is more in line with what the user expects the rule to behave.</p>
<p><img src="theme/lens.gif" width="15" height="16"> <b>Sequencing Syntax</b>
<br>
<br>
The comma operator as in a, b seems to be a better candidate, syntax-wise. But
then the problem is with its precedence. It has the lowest precedence in C/C++,
which makes it virtually useless. <br>
<br>
Bjarne Stroustrup, in his article <a href="references.html#generalized_overloading">&quot;Generalizing
Overloading for C++2000&quot;</a> talks about overloading whitespace. Such a
feature would allow juxtapositioning of parser objects exactly as we do in (E)BNF
(e.g. a b | c instead of a &gt;&gt; b | c). Unfortunately, the article was dated
April 1, 1998. Oh well.</p>
<p><img src="theme/lens.gif" width="15" height="16"> <b>Forward iterators</b><br>
<br>
In general, the scanner expects at least a standard conforming forward iterator.
Forward iterators are needed for backtracking where the iterator needs to be
saved and restored later. Generally speaking, Spirit is a backtracking parser.
The implication of this is that at some point, the iterator position will have
to be saved to allow the parser to backtrack to a previous point. Thus, for
backtracking to work, the framework requires at least a forward iterator.<br>
<br>
Some parsers might require more specialized iterators (bi-directional or even
random access). Perhaps in the future, deterministic parsers when added to the
framework, will perform no backtracking and will need just a single token lookahead,
hence will require input iterators only.</p>
<p><img src="theme/lens.gif" width="15" height="16"><b> Why are subrules important?</b><br>
<br>
Subrules open up the opportunity to do aggressive meta programming as well because
they do not rely on virtual functions. The virtual function is the meta-programmer's
hell. Not only does it slow down the program due to the virtual function indirect
call, it is also an opaque wall where no metaprogram can get past. It kills
all meta-information beyond the virtual function call. Worse, the virtual function
cannot be templated. Which means that its arguments have to be tied to a actual
types. Many problems stem from this limitation. <br>
<br>
While Spirit is a currently classified as a non-deterministic recursive-descent
parser, Doug Gregor first noted that other parsing techniques apart from top-down
recursive descent may be applied. For instance, apart from non-deterministic
recursive descent, deterministic LL(1) and LR(1) can theoretically be implemented
using the same expression template front end. Spirit rules use virtual functions
to encode the RHS parser expression in an opaque abstract parser type. While
it serves its purpose well, the rule's virtual functions are the stumbling blocks
to more advanced metaprogramming. Subrules are free from virtual functions.</p>
<p><img src="theme/lens.gif" width="15" height="16"><b> <a name="exhaustive_rd"></a>Exhaustive
backtracking and greedy RD</b></p>
<p>Spirit doesn't do exhaustive backtracking like regular expressions are expected
to. For example:</p>
<pre> <span class="special">*</span>chlit_p<span class="special">(</span><span class="quotes">'a'</span><span class="special">) &gt;&gt;</span> chlit_p<span class="special">(</span><span class="quotes">'a'</span><span class="special">);</span></pre>
<p>will always fail to match because Spirit's Kleene star does not back off when
the rest of the rule fails to match. </p>
<p>Actually, there's a solution to this greedy RD problem. Such a scheme is discussed
in section 6.6.2 of <a
href="http://www.cs.vu.nl/%7Edick/PTAPG.html">Parsing Techniques: A Practical
Guide</a>. The trick involves passing a <em>tail</em> parser (in addition to
the scanner) to each parser. The start parser will then simply be: <tt>start
&gt;&gt; end_p;</tt> (end_p is the start's tail). </p>
<p>Spirit is greedy --using straight forward, naive RD. It is certainly possible
to implement the fully backtracking scheme presented above, but there will be
also certainly be a performance hit. The scheme will always try to match all
possible parser paths (full parser hierarchy traversal) until it reaches a point
of certainty, that the whole thing matches or fails to match. </p>
<table border="0" width="80%" align="center">
<tr>
<td class="note_box"><p><img src="theme/note.gif" width="16" height="16"><b>Backtracking
and Greedy RD </b><br>
<br>
Spirit is quite consistent and intuitive about when it backtracks and
to where, although it may not be obvious to those coming from different
backgrounds. In general, any (sub)parser will, given the same input, always
match the same portion of the input (or fail to match the input at all).
This means that Spirit is inherently greedy. Spirit will only backtrack
when a (sub)parser fails to match the input, and it will always backtrack
to the next choice point upward (not backward) in the parser structure.
In other words abb|ab will match &quot;ab&quot;, as will a(bb|b), but
(ab|a)b won't because the (ab|a) subparser will always match the 'b' after
the 'a' if it is available.</p>
<p>--Rainer Deyke</p>
</td>
</tr>
</table>
<p>There's a strong preference on &quot;simplicity with all the knobs when you
need them&quot; approach, right now. On the other hand, the flexibility of Spirit
makes it possible to have different optional schemes available. It might be
possible to implement an exhaustive backtracking RD scheme as an optional feature
in the future. </p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="faq.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="acknowledgments.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p class="copyright">&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,125 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<title>Refactoring Parsers</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link href="theme/style.css" rel="stylesheet" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>&nbsp;</b></font></td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Refactoring Parsers</b></font></td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="functor_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="regular_expression_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p><a name="refactoring_parsers"></a>There are three types of Refactoring Parsers
implemented right now, which help to abstract common parser refactoring tasks.
Parser refactoring means, that a concrete parser construct is replaced (refactored)
by another very similar parser construct. Two of the Refactoring Parsers described
here (<tt>refactor_unary_parser</tt> and <tt>refactor_action_parser</tt>) are
introduced to allow a simple and more expressive notation while using <a href="confix.html">Confix
Parsers</a> and <a href="list_parsers.html">List Parsers</a>. The third Refactoring
Parser (<tt>attach_action_parser</tt>) is implemented to abstract some functionality
required for the Grouping Parser. Nevertheless
these Refactoring Parsers may help in solving other complex parsing tasks too.</p>
<h3>Refactoring unary parsers</h3>
<p>The <tt>refactor_unary_d</tt> parser generator, which should be used to generate
a unary refactoring parser, transforms a construct of the following type</p>
<pre><code> <span class=identifier>refactor_unary_d</span><span class=special>[*</span><span class=identifier>some_parser </span><span class=special>- </span><span class=identifier>another_parser</span><span class=special>]</span></code></pre>
<p>to </p>
<pre><code> <span class=special>*(</span><span class=identifier>some_parser</span> <span class=special>- </span><span class=identifier>another_parser</span><span class=special>)</span></code></pre>
<blockquote>
<p>where <tt>refactor_unary_d</tt> is a predefined object of the parser generator
struct <tt>refactor_unary_gen&lt;&gt;</tt></p>
</blockquote>
<p>The <tt>refactor_unary_d</tt> parser generator generates a new parser as shown
above, only if the original construct is an auxiliary binary parser (here the
difference parser) and the left operand of this binary parser is an auxiliary
unary parser (here the kleene star operator). If the original parser isn't a
binary parser the compilation will fail. If the left operand isn't an unary
parser, no refactoring will take place.</p>
<h3>Refactoring action parsers</h3>
<p>The <tt>refactor_action_d</tt> parser generator, which should be used to generate
an action refactoring parser, transforms a construct of the following type</p>
<pre><code> <span class=identifier>refactor_action_d</span><span class=special>[</span><span class=identifier>some_parser</span><span class=special>[</span><span class=identifier>some_actor</span><span class=special>] </span><span class=special>- </span><span class=identifier>another_parser</span><span class=special>]</span></code></pre>
<p>to </p>
<pre><code> <span class=special>(</span><span class=identifier>some_parser </span><span class=special>- </span><span class=identifier>another_parser</span><span class=special>)[</span><span class=identifier>some_actor</span><span class=special>]</span></code></pre>
<blockquote>
<p>where <tt>refactor_action_d</tt> is a predefined object of the parser generator
struct <tt>refactor_action_gen&lt;&gt;</tt></p>
</blockquote>
<p>The <tt>refactor_action_d</tt> parser generator generates a new parser as shown
above, only if the original construct is an auxiliary binary parser (here the
difference parser) and the left operand of this binary parser is an auxiliary
parser generated by an attached semantic action. If the original parser isn't
a binary parser the compilation will fail. If the left operand isn't an action
parser, no refactoring will take place.</p>
<h3>Attach action refactoring</h3>
<p>The <tt>attach_action_d</tt> parser generator, which should be used to generate
an attach action refactoring parser, transforms a construct of the following
type</p>
<pre><code> <span class=identifier>attach_action_d</span><span class=special>[</span><span class=identifier>(some_parser</span> <span class=special>&gt;&gt; </span><span class=identifier>another_parser</span>)<span class=special>[</span><span class=identifier>some_actor</span><span class=special>]</span><span class=special>]</span></code></pre>
<p>to </p>
<pre><code> <span class=identifier>some_parser</span><span class=special>[</span><span class=identifier>some_actor</span><span class=special>]</span><span class=identifier> </span><span class=special>&gt;&gt; </span><span class=identifier>another_parser</span><span class=special>[</span><span class=identifier>some_actor</span><span class=special>]</span></code></pre>
<blockquote>
<p>where <tt>attach_action_d</tt> is a predefined object of the parser generator
struct <tt>attach_action_gen&lt;&gt;</tt></p>
</blockquote>
<p>The <tt>attach_action_d</tt> parser generator generates a new parser as shown
above, only if the original construct is an auxiliary action parser and the
parser to it this action is attached is an auxiliary binary parser (here the
sequence parser). If the original parser isn't a action parser the compilation
will fail. If the parser to which the action is attached isn't an binary parser,
no refactoring will take place.</p>
<h3>Nested refactoring</h3>
<p>Sometimes it is required to nest different types of refactoring, i.e. to transform
constructs like</p>
<pre><code> <span class=special>(*</span><span class=identifier>some_parser</span><span class=special>)[</span><span class=identifier>some_actor</span><span class=special>] </span><span class=special>- </span><span class=identifier>another_parser</span></code></pre>
<p>to </p>
<pre><code> <span class=special>(*(</span><span class=identifier>some_parser </span><span class=special>- </span><span class=identifier>another_parser</span><span class=special>))[</span><span class=identifier>some_actor</span><span class=special>]</span></code></pre>
<p>To simplify the construction of such nested refactoring parsers the <tt>refactor_unary_gen&lt;&gt;</tt>
and <tt>refactor_action_gen&lt;&gt;</tt> both can take another refactoring parser
generator type as their respective template parameter. For instance, to construct
a refactoring parser generator for the mentioned nested transformation we should
write:</p>
<pre><span class=special> </span><span class=keyword>typedef </span><span class=identifier>refactor_action_gen</span><span class=special>&lt;</span><span class=identifier>refactor_unary_gen</span><span class=special>&lt;&gt; </span><span class=special>&gt; </span><span class=identifier>refactor_t</span><span class=special>;
</span><span class=keyword>const </span><span class=identifier>refactor_t </span><span class=identifier>refactor_nested_d </span><span class=special>= </span><span class=identifier>refactor_t</span><span class=special>(</span><span class=identifier>refactor_unary_d</span><span class=special>);</span></pre>
<p>Now we could use it as follows to get the required result:</p>
<pre><code><font color="#0000FF"> </font><span class=identifier>refactor_nested_d</span><span class=special>[(*</span><span class=identifier>some_parser</span><span class=special>)[</span><span class=identifier>some_actor</span><span class=special>] </span><span class=special>- </span><span class=identifier>another_parser</span><span class=special>]</span></code></pre>
<p>An empty template parameter means not to nest this particular refactoring parser.
The default template parameter is <tt>non_nesting_refactoring</tt>, a predefined
helper structure for inhibiting nesting. Sometimes it is required to nest a
particular refactoring parser with itself. This is achieved by providing the
predefined helper structure <tt>self_nested_refactoring</tt> as the template
parameter to the corresponding refactoring parser generator template.</p>
<p><img src="theme/lens.gif" width="15" height="16"> See <a href="../example/fundamental/refactoring.cpp">refactoring.cpp</a> for a compilable example. This is part of the Spirit distribution. </p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="functor_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="regular_expression_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 2001-2003 Hartmut Kaiser<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p>&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,240 @@
<html>
<head>
<title>References</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%">
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>References</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="acknowledgments.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><img src="theme/r_arr_disabled.gif" width="20" height="19"></td>
</tr>
</table>
<br>
<table width="90%" border="0" align="center">
<tr>
<td width="36" class="table_cells"> 1.</td>
<td width="236" class="table_cells"> <a name="expression_templates"></a>Todd
Veldhuizen</td>
<td width="520" class="table_cells"> "<a
href="http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.43.248">Expression
Templates</a>". <br>
C++ Report, June 1995.</td>
</tr>
<tr>
<td width="36" class="table_cells"> 2.</td>
<td width="236" class="table_cells"> <a name="bnf"></a>Peter Naur (ed.)</td>
<td width="520" class="table_cells"> "<a href="http://www.masswerk.at/algol60/report.htm">Report
on the Algorithmic Language ALGOL 60</a>". <br>
CACM, May 1960.</td>
</tr>
<tr>
<td width="36" class="table_cells"> 3.</td>
<td width="236" class="table_cells"> ISO/IEC</td>
<td width="520" class="table_cells"> "<a
href="http://www.cl.cam.ac.uk/%7Emgk25/iso-14977.pdf">ISO-EBNF</a>", <br>
ISO/IEC 14977: 1996(E).</td>
</tr>
<tr>
<td width="36" class="table_cells"> 4.</td>
<td width="236" class="table_cells"> <a name="intersections"></a>Richard J.
Botting, Ph.D. </td>
<td width="520" class="table_cells"> "<a
href="http://www.csci.csusb.edu/dick/maths/intro_ebnf.html">XBNF</a>" (citing
Leu-Weiner, 1973). <br>
California State University, San Bernardino, 1998. </td>
</tr>
<tr>
<td width="36" class="table_cells"> 5.</td>
<td width="236" class="table_cells"> <a name="curious_recurring"></a>James
Coplien. </td>
<td width="520" class="table_cells"> "<b>Curiously Recurring Template Pattern</b>".
<br>
C++ Report, Feb. 1995.</td>
</tr>
<tr>
<td width="36" class="table_cells"> 6.</td>
<td width="236" class="table_cells"> <a name="generic_patterns"></a>Thierry
G&eacute;raud and <br>
Alexandre Duret-Lutz</td>
<td width="520" class="table_cells"> <a
href="http://www.coldewey.com/europlop2000/papers/geraud%2Bduret.zip">Generic
Programming Redesign of Patterns</a><br>
Proceedings of the 5th European Conference on Pattern Languages of Programs
<br>
(EuroPLoP'2000) Irsee, Germany, July 2000. </td>
</tr>
<tr>
<td width="36" class="table_cells">7.</td>
<td width="236" class="table_cells">Geoffrey Furnish</td>
<td width="520" height="53" class="table_cells"><a href="https://adtmag.com/articles/2000/04/25/disambiguated-glommable-expression-templates-reintroduced.aspx">&quot;Disambiguated
Glommable Expression Templates Reintroduced&quot;</a><br>
C++ Report, May 2000</td>
</tr>
<tr>
<td width="36" class="table_cells">8.</td>
<td width="236" height="53" class="table_cells"> Erich Gamma, <br>
Richard Helm, <br>
Ralph Jhonson, <br>
and John Vlissides</td>
<td width="520" height="53" class="table_cells"> <b>Design Patterns, Elements
of Reusable Object-Oriented Software</b>. <br>
Addison-Wesley, 1995.</td>
</tr>
<tr>
<td width="36" class="table_cells">9. </td>
<td width="236" class="table_cells">Alfred V. Aho<br>
Revi Sethi<br>
Feffrey D. Ulman</td>
<td width="520" class="table_cells"><b>Compilers, Principles, Techniques and
Tools</b><br>
Addison-Wesley, June 1987. </td>
</tr>
<tr>
<td width="36" class="table_cells">10. </td>
<td width="236" class="table_cells"> Dick Grune and <br>
Ceriel Jacobs</td>
<td width="520" class="table_cells"> <a
href="http://www.cs.vu.nl/%7Edick/PTAPG.html">Parsing Techniques: A Practical
Guide</a>. <br>
Ellis Horwood Ltd.: West Sussex, England, 1990. (electronic copy, 1998).</td>
</tr>
<tr>
<td width="36" class="table_cells">11. </td>
<td width="236" class="table_cells"> T. J. Parr, H. G. Dietz, and<br>
W. E. Cohen</td>
<td width="520" class="table_cells"> <a
href="https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.51.7097">PCCTS Reference Manual (Version
1.00)</a>. <br>
School of Electrical Engineering, Purdue University, West Lafayette, August
1991.</td>
</tr>
<tr>
<td width="36" class="table_cells">12. </td>
<td width="236" class="table_cells"> Adrian Johnstone and Elizabeth Scott.</td>
<td width="520" class="table_cells"> <a
href="ftp://ftp.cs.rhul.ac.uk/pub/rdp">RDP, A Recursive Descent Compiler Compiler</a>.
<br>
Technical Report CSD TR 97 25, Dept. of Computer Science, Egham, Surrey,
England, Dec. 20, 1997. </td>
</tr>
<tr>
<td width="36" class="table_cells">13. </td>
<td width="236" class="table_cells"> <a name="back_tracking_parsers"></a>Adrian
Johnstone</td>
<td width="520" class="table_cells"> <a
href="https://www.cs.rhul.ac.uk/research/languages/csle/lookahead_backtrack.html">Languages
and Architectures, <br>
Parser generators with backtrack or extended lookahead capability</a><br>
Department of Computer Science, Royal Holloway, University of London, Egham,
Surrey, England</td>
</tr>
<tr>
<td width="36" class="table_cells">14. </td>
<td width="236" class="table_cells"> <a name="damian_conway"></a>Damian Conway</td>
<td width="520" class="table_cells"><a href="http://www.csse.monash.edu.au/%7Edamian/papers/#Embedded_Input_Parsing_for_C">Parsing
with C++ Classes.</a><br>
ACM SIGPLAN Notices, 29:1, 1994.</td>
</tr>
<tr>
<td width="36" class="table_cells">15. </td>
<td width="236" class="table_cells"> Joel de Guzman</td>
<td width="520" class="table_cells"><a href="http://spirit.sourceforge.net/index.php?doc=docs/v1_3/index.html">&quot;Spirit
Version 1.3&quot;</a>. <br>
http://spirit.sourceforge.net/, November 2001.</td>
</tr>
<tr>
<td width="36" class="table_cells">16. </td>
<td width="236" class="table_cells"> <a name="combinators"></a>S. Doaitse
Swierstra and <br>
Luc Duponcheel</td>
<td width="520" class="table_cells"> <a
href="http://www.cs.uu.nl/groups/ST/Publications/AFP2.pdf">Deterministic, Error-Correcting
Combinator Parsers </a><br>
Dept. of Computer Science, Utrecht University P.O.Box 80.089, 3508 TB Utrecht,
The Netherland</td>
</tr>
<tr>
<td width="36" class="table_cells">17.</td>
<td width="236" class="table_cells"> <a name="generalized_overloading"></a>Bjarne
Stroustrup</td>
<td width="520" class="table_cells"> <a
href="http://www.research.att.com/%7Ebs/whitespace98.pdf">Generalizing Overloading
for C++2000</a><br>
Overload, Issue 25. April 1, 1998.</td>
</tr>
<tr>
<td width="36" class="table_cells">18.</td>
<td width="236" class="table_cells"><a name="regex_boost_doc"></a>Dr. John
Maddock</td>
<td width="520" class="table_cells"><a href="http://www.boost.org/libs/regex/index.html">Regex++
Documentation</a><br>
http://www.boost.org/libs/regex/index.htm </td>
</tr>
<tr>
<td class="table_cells">19.</td>
<td class="table_cells">Anonymous<br>
Edited by Graham Hutton </td>
<td class="table_cells"> Frequently Asked Questions for comp.lang.functional.
<br>
Edited by Graham Hutton, University of Nottingham. <a href="http://www.cs.nott.ac.uk/%7Egmh//faq.html">http://www.cs.nott.ac.uk/~gmh//faq.html</a></td>
</tr>
<tr>
<td class="table_cells">20.</td>
<td class="table_cells">Hewlett-Packard</td>
<td class="table_cells">Standard Template Library Programmer's Guide.<br>
<a href="https://www.boost.org/sgi/stl/">https://www.boost.org/sgi/stl/</a>,
Hewlett-Packard Company, 1994</td>
</tr>
<tr>
<td class="table_cells">21.</td>
<td class="table_cells">boost.org</td>
<td class="table_cells">Boost Libraries Documentation. <a href="http://www.boost.org/">http://www.boost.org/</a></td>
</tr>
<tr>
<td class="table_cells">22.</td>
<td class="table_cells">Brian McNamara and Yannis Smaragdakis</td>
<td class="table_cells"> FC++: Functional Programming in C++. <a href="https://people.cs.umass.edu/~yannis/fc++/">https://people.cs.umass.edu/~yannis/fc++/</a></td>
</tr>
<tr>
<td class="table_cells">23.</td>
<td class="table_cells">Todd Veldhuizen</td>
<td class="table_cells"> <a href="ftp://ftp.cs.indiana.edu/pub/techreports/TR542.pdf">Techniques
for Scientic C++.</a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="acknowledgments.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><img src="theme/r_arr_disabled.gif" width="20" height="19"></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p>&nbsp;</p>
<p>&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,91 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<title>Regular Expression Parser</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link href="theme/style.css" rel="stylesheet" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10" height="49"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>&nbsp;</b></font></td>
<td width="85%" height="49"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Regular Expression Parser</b></font></td>
<td width="112" height="49"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="refactoring.html"><img src="theme/l_arr.gif" width="20" height="19" border="0"></a></td>
<td width="30"><a href="scoped_lock.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p><a name="regular_expression_parser"></a>Regular expressions are a form of pattern-matching
that are often used in text processing. Many users will be familiar with the
usage of regular expressions. Initially there were the Unix utilities grep,
sed and awk, and the programming language perl, each of which make extensive
use of regular expressions. Today the usage of such regular expressions is integrated
in many more available systems.</p>
<p>During parser construction it is often useful to have the power of regular
expressions available. The Regular Expression Parser was introduced, to make
the use of regular expressions accessible for Spirit parser construction.</p>
<p>The Regular Expression Parser <tt>rxstrlit</tt> has a single template type
parameter: an iterator type. Internally, <tt>rxstrlit</tt> holds the Boost Regex
object containing the provided regular expression. The <tt>rxstrlit</tt> attempts
to match the current input stream with this regular expression. The template
type parameter defaults to <tt>char const<span class="operators">*</span></tt>.
<tt>rxstrlit</tt> has two constructors. The first accepts a null-terminated
character pointer. This constructor may be used to build <tt>rxstrlit's</tt>
from quoted regular expression literals. The second constructor takes in a first/last
iterator pair. The function generator version is <tt>regex_p</tt>. </p>
<p>Here are some examples:</p>
<pre><code><span class=comment> </span><span class=identifier>rxstrlit</span><span class=special>&lt;&gt;(</span><span class=string>"Hello[[:space:]]+[W|w]orld"</span><span class=special>)
</span><span class=identifier>regex_p</span><span class=special>(</span><span class=string>"Hello[[:space:]]+[W|w]orld"</span><span class=special>)
</span><span class=identifier>std</span><span class=special>::</span><span class=identifier>string </span><span class=identifier>msg</span><span class=special>(</span><span class=string>"Hello[[:space:]]+[W|w]orld"</span><span class=special>);
rx</span><span class=identifier>strlit</span><span class=special>&lt;&gt;(</span><span class=identifier>msg</span><span class=special>.</span><span class=identifier>begin</span><span class=special>(), </span><span class=identifier>msg</span><span class=special>.</span><span class=identifier>end</span><span class=special>());</span></code></pre>
<p>The generated parser object acts at the character level, thus an eventually
given skip parser is not used during the attempt to match the regular expression
(see <a href="faq.html#scanner_business">The Scanner Business</a>).</p>
<p>The Regular Expression Parser is implemented by the help of the <a href="http://www.boost.org/libs/regex/index.html">Boost
Regex++ library</a>, so you have to have some limitations in mind. </p>
<blockquote>
<p><img src="theme/bullet.gif" width="12" height="12"> Boost libraries have
to be installed on your computer and the Boost root directory has to be added
to your compiler <tt>#include&lt;...&gt;</tt> search path. You can download
the actual version at the <a href="http://www.boost.org/">Boost web site</a>.</p>
<p><img src="theme/bullet.gif" width="12" height="12"> The Boost Regex library
requires the usage of bi-directional iterators. So you have to ensure this
during the usage of the Spirit parser, which contains a Regular Expression
Parser.</p>
<p><img src="theme/bullet.gif" width="12" height="12"> The Boost Regex library
is not a header only library, as Spirit is, though it provides the possibility
to include all of the sources, if you are using it in one compilation unit
only. Define the preprocessor constant <tt>BOOST_SPIRIT_NO_REGEX_LIB</tt> before
including the spirit Regular Expression Parser header, if you want to include
all the Boost Regex sources into this compilation unit. If you are using the
Regular Expression Parser in more than one compilation unit, you should not
define this constant and must link your application against the regex library
as described in the related documentation.</p>
</blockquote>
<p> <img src="theme/lens.gif" width="15" height="16"> See <a href="../example/fundamental/regular_expression.cpp">regular_expression.cpp</a> for a compilable example. This is part of the Spirit distribution.</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="refactoring.html"><img src="theme/l_arr.gif" width="20" height="19" border="0"></a></td>
<td width="30"><a href="scoped_lock.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 2001-2002 Hartmut Kaiser<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
</body>
</html>

View File

@@ -0,0 +1,231 @@
<html>
<head>
<title>The Rule</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%">
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>The Rule</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="numerics.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="epsilon.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>The <b>rule</b> is a polymorphic parser that acts as a named place-holder capturing
the behavior of an EBNF expression assigned to it. Naming an EBNF expression
allows it to be referenced later. The <tt>rule</tt> is a template class parameterized
by the type of the scanner (<tt>ScannerT</tt>), the rule's <a href="indepth_the_parser_context.html">context</a>
and its <a href="#tag">tag</a>. Default template parameters are provided to
make it easy to use the rule.</p>
<pre><code><font color="#000000"><span class=identifier> </span><span class=keyword>template</span><span class=special>&lt;
</span><span class=keyword>typename </span><span class=identifier>ScannerT </span><span class=special>= </span><span class=identifier>scanner</span><span class=special>&lt;&gt;,
</span><span class=keyword>typename </span><span class=identifier>ContextT </span><span class=special>= </span><span class=identifier>parser_context</span><span class=special>&lt;&gt;</span><span class=identifier>,
</span><span class="keyword">typename</span><span class=identifier> TagT </span><span class="special">=</span><span class=identifier> parser_address_tag</span><span class=special>&gt;
</span><span class=keyword>class </span><span class=identifier>rule</span><span class=special>;</span></font></code></pre>
<p>Default template parameters are supplied to handle the most common case. <tt>ScannerT</tt>
defaults to <tt>scanner&lt;&gt;</tt>, a plain vanilla scanner that acts on <tt>char
const<span class="operators">*</span></tt> iterators and does nothing special
at all other than iterate through all the chars in the null terminated input
a character at a time. The rule tag, <tt>TagT</tt>, typically used with <a href="trees.html">ASTs</a>,
is used to identify a rule; it is explained <a href="#tag">here</a>. In trivial
cases, declaring a rule as <tt>rule&lt;&gt;</tt> is enough. You need not be
concerned at all with the <tt>ContextT</tt> template parameter unless you wish
to tweak the low level behavior of the rule. Detailed information on the <tt>ContextT</tt>
template parameter is provided <a href="indepth_the_parser_context.html">elsewhere</a>.
</p>
<h3><a name="order_of_parameters"></a>Order of parameters</h3>
<p>As of v1.8.0, the <tt>ScannerT</tt>, <tt>ContextT</tt> and <tt>TagT</tt> can
be specified in any order. If a template parameter is missing, it will assume
the defaults. Examples:</p>
<pre><span class=identifier> rule</span><span class=special>&lt;&gt; </span><span class=identifier>rx1</span><span class=special>;
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>scanner</span><span class=special>&lt;&gt; </span><span class=special>&gt; </span><span class=identifier>rx2</span><span class=special>;
</span> <span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>parser_context<code><font color="#000000"><span class=special>&lt;&gt;</span></font></code> </span><span class=special>&gt; </span><span class=identifier>rx3</span><span class=special>;
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>parser_context<code><font color="#000000"><span class=special>&lt;&gt;</span></font></code></span><span class=special>, </span><span class=identifier>parser_address_tag</span><span class=special>&gt; </span><span class=identifier>rx4</span><span class=special>;
</span> <span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>parser_address_tag</span><span class=special>&gt; </span><span class=identifier>rx5</span><span class=special>;
</span> <span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>parser_address_tag</span><span class=special>, </span><span class=identifier>scanner</span><span class=special>&lt;&gt;, </span><span class=identifier>parser_context<code><font color="#000000"><span class=special>&lt;&gt;</span></font></code> </span><span class=special>&gt; </span><span class=identifier>rx6</span><span class=special>;
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>parser_context<code><font color="#000000"><span class=special>&lt;&gt;</span></font></code></span><span class=special>, </span><span class=identifier>scanner</span><span class=special>&lt;&gt;, </span><span class=identifier>parser_address_tag</span><span class=special>&gt; </span><span class=identifier>rx7</span><span class=special>;</span></pre>
<h3><a name="multiple_scanner_support" id="multiple_scanner_support"></a>Multiple scanners</h3>
<p>As of v1.8.0, rules can use one or more scanner types. There are cases, for
instance, where we need a rule that can work on the phrase and character levels.
Rule/scanner mismatch has been a source of confusion and is the no. 1 <a href="faq.html#scanner_business">FAQ</a>.
To address this issue, we now have multiple scanner support. Example:</p>
<pre><span class=special> </span><span class=keyword>typedef </span><span class=identifier>scanner_list</span><span class=special>&lt;</span><span class=identifier>scanner</span><span class=special>&lt;&gt;, </span><span class=identifier>phrase_scanner_t</span><span class=special>&gt; </span><span class=identifier>scanners</span><span class=special>;
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>scanners</span><span class=special>&gt; </span><span class=identifier>r </span><span class=special>= </span><span class=special>+</span><span class=identifier>anychar_p</span><span class=special>;
</span><span class=identifier>assert</span><span class=special>(</span><span class=identifier>parse</span><span class=special>(</span><span class=string>&quot;abcdefghijk&quot;</span><span class=special>, </span><span class=identifier>r</span><span class=special>).</span><span class=identifier>full</span><span class=special>);
</span><span class=identifier>assert</span><span class=special>(</span><span class=identifier>parse</span><span class=special>(</span><span class=string>&quot;a b c d e f g h i j k&quot;</span><span class=special>, </span><span class=identifier>r</span><span class=special>, </span><span class=identifier>space_p</span><span class=special>).</span><span class=identifier>full</span><span class=special>);</span></pre>
<p>Notice how rule <tt>r</tt> is used in both the phrase and character levels.
</p>
<p>By default support for multiple scanners is disabled. The macro
<tt>BOOST_SPIRIT_RULE_SCANNERTYPE_LIMIT</tt> must be defined to the
maximum number of scanners allowed in a scanner_list. The value must
be greater than 1 to enable multiple scanners. Given the
example above, to define a limit of two scanners for the list, the
following line must be inserted into the source file before the
inclusion of Spirit headers:
</p>
<pre><span class=special> </span><span class=preprocessor>#define </span><span class=identifier>BOOST_SPIRIT_RULE_SCANNERTYPE_LIMIT</span> <span class=literal>2</span></pre>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/bulb.gif" width="13" height="18"> See
the techniques section for an <a href="techniques.html#multiple_scanner_support">example</a>
of a <a href="grammar.html">grammar</a> using a multiple scanner enabled
rule, <a href="scanner.html#lexeme_scanner">lexeme_scanner</a> and <a href="scanner.html#as_lower_scanner">as_lower_scanner.</a></td>
</tr>
</table>
<h3>Rule Declarations</h3>
<p>The rule class models EBNF's production rule. Example:</p>
<pre><code><font color="#000000"> <span class=identifier>rule</span><span class=special>&lt;&gt; </span><span class=identifier>a_rule </span><span class=special>= </span><span class=special>*(</span><span class=identifier>a </span><span class=special>| </span><span class=identifier>b</span><span class=special>) </span><span class=special>& </span><span class=special>+(</span><span class=identifier>c </span><span class=special>| </span><span class=identifier>d </span><span class=special>| </span><span class=identifier>e</span><span class=special>);</span></font></code></pre>
<p>The type and behavior of the right-hand (rhs) EBNF expression, which may be
arbitrarily complex, is encoded in the rule named a_rule. a_rule may now be
referenced elsewhere in the grammar:</p>
<pre><code><font color="#000000"> <span class=identifier>rule</span><span class=special>&lt;&gt; </span><span class=identifier>another_rule </span><span class=special>= </span><span class=identifier>f </span><span class=special>&gt;&gt; </span><span class=identifier>g </span><span class=special>&gt;&gt; </span><span class=identifier>h </span><span class=special>&gt;&gt; </span><span class=identifier>a_rule</span><span class=special>;</span></font></code></pre>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/alert.gif" width="16" height="16"> <b>Referencing
rules <br>
</b><br>
When a rule is referenced anywhere in the right hand side of an EBNF expression,
the rule is held by the expression by reference. It is the responsibility
of the client to ensure that the referenced rule stays in scope and does
not get destructed while it is being referenced. </td>
</tr>
</table>
<pre><span class=special> </span><span class=identifier>a </span><span class=special>= </span><span class=identifier>int_p</span><span class=special>;
</span><span class=identifier>b </span><span class=special>= </span><span class=identifier>a</span><span class=special>;
</span><span class=identifier>c </span><span class=special>= </span><span class=identifier>int_p </span><span class=special>&gt;&gt; </span><span class=identifier>b</span><span class=special>;</span></pre>
<h3>Copying Rules</h3>
<p>The rule is a weird C++ citizen, unlike any other C++ object. It does not have
the proper copy and assignment semantics and cannot be stored and passed around
by value. If you need to copy a rule you have to explicitly call its member
function <tt>copy()</tt>:</p>
<pre><span class=special> </span><span class=identifier>r</span><span class="special">.</span><span class=identifier>copy()</span><span class=special>;</span></pre>
<p>However, be warned that copying a rule will not deep copy other referenced
rules of the source rule being copied. This might lead to dangling references.
Again, it is the responsibility of the client to ensure that all referenced
rules stay in scope and does not get destructed while it is being referenced.
Caveat emptor.</p>
<p>If you copy a rule, then you'll want to place it in a storage somewhere. The
problem is how? The storage can't be another rule:</p>
<pre> <code><font color="#000000"><span class=identifier>rule</span><span class=special>&lt;&gt;</span></font></code> r2 <span class="special">=</span> <span class=identifier>r</span><span class="special">.</span><span class=identifier>copy()</span><span class=special>; </span><span class="comment">// BAD!</span></pre>
<p>because rules are weird and does not have the expected C++ copy-constructor
and assignment semantics! As a general rule: <strong>Don't put a copied rule
into another rule! </strong>Instead, use the <a href="stored_rule.html">stored_rule</a>
for that purpose.</p>
<h3>Forward declarations</h3>
<p>A <tt>rule</tt> may be declared before being defined to allow cyclic structures
typically found in BNF declarations. Example:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>rule</span><span class=special>&lt;&gt; </span><span class=identifier>a</span><span class=special>, </span><span class=identifier>b</span><span class=special>, </span><span class=identifier>c</span><span class=special>;
</span><span class=identifier>a </span><span class=special>= </span><span class=identifier>b </span><span class=special>| </span><span class=identifier>a</span><span class=special>;
</span><span class=identifier>b </span><span class=special>= </span><span class=identifier>c </span><span class=special>| </span><span class=identifier>a</span><span class=special>;</span></font></code></pre>
<h3>Recursion</h3>
<p>The right-hand side of a rule may reference other rules, including itself.
The limitation is that direct or indirect left recursion is not allowed (this
is an unchecked run-time error that results in an infinite loop). This is typical
of top-down parsers. Example:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>a </span><span class=special>= </span><span class=identifier>a </span><span class=special>| </span><span class=identifier>b</span><span class=special>; </span><span class=comment>// infinite loop!</span></font></code></pre>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/lens.gif" width="15" height="16"> <b>What
is left recursion?<br>
</b><br>
Left recursion happens when you have a rule that calls itself before anything
else. A top-down parser will go into an infinite loop when this happens.
See the <a href="faq.html#left_recursion">FAQ</a> for details on how to
eliminate left recursion.</td>
</tr>
</table>
<h3>Undefined rules</h3>
<p>An undefined rule matches nothing and is semantically equivalent to <tt>nothing_p</tt>.</p>
<h3>Redeclarations</h3>
<p>Like any other C++ assignment, a second assignment to a rule is destructive
and will redefine it. The old definition is lost. Rules are dynamic. A rule
can change its definition anytime:</p>
<pre><code><font color="#000000"><span class=identifier> r </span><span class=special>= </span><span class=identifier>a_definition</span><span class=special>;
</span><span class=identifier> r </span><span class=special>= </span><span class=identifier>another_definition</span><span class=special>;</span></font></code></pre>
<p>Rule <tt>r</tt> loses the old definition when the second assignment is made.
As mentioned, an undefined rule matches nothing and is semantically equivalent
to <tt>nothing_p</tt>.
<h3>Dynamic Parsers</h3>
<p>Hosting declarative EBNF in imperative C++ yields an interesting blend. We
have the best of both worlds. We have the ability to conveniently modify the
grammar at run time using imperative constructs such as <tt>if</tt>, <tt>else</tt>
statements. Example:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>if </span><span class=special>(</span><span class=identifier>feature_is_available</span><span class=special>)
</span><span class=identifier>r </span><span class=special>= </span><span class=identifier>add_this_feature</span><span class=special>;</span></font></code></pre>
<p>Rules are essentially dynamic parsers. A dynamic parser is characterized by
its ability to modify its behavior at run time. Initially, an undefined rule
matches nothing. At any time, the rule may be defined and redefined, thus, dynamically
altering its behavior.</p>
<h3>No start rule</h3>
<p>Typically, parsers have what is called a start symbol, chosen to be the root
of the grammar where parsing starts. The Spirit parser framework has no notion
of a start symbol. Any rule can be a start symbol. This feature promotes step-wise
creation of parsers. We can build parsers from the bottom up while fully testing
each level or module up until we get to the top-most level.</p>
<h3><a name="tag"></a>Parser Tags</h3>
<p>Rules may be tagged for identification purposes. This is necessary, especially
when dealing with <a href="trees.html">parse trees and ASTs</a> to see which
rule created a specific AST/parse tree node. Each rule has an ID of type <tt>parser_id</tt>.
This ID can be obtained through the rule's <tt>id()</tt> member function:</p>
<pre><code><font color="#000000"><span class=identifier> my_rule</span><span class=special>.</span><span class=identifier>id</span><span class=special>(); </span><span class=comment>// get my_rule's id</span></font></code></pre>
<p>The <tt>parser_id</tt> class is declared as:</p>
<pre> <span class="keyword">class</span> <span class="identifier">parser_id</span><br> <span class="special">{</span><br> <span class="keyword">public</span><span class="special">:</span><br> parser_id<span class="special">();</span><br> <span class="keyword">explicit</span> parser_id<span class="special">(</span><span class="keyword">void const</span><span class="special">*</span> p<span class="special">);</span><br> parser_id<span class="special">(</span><span class="keyword">std::size_t</span> l<span class="special">);</span>
<span class="keyword">bool</span> <span class="keyword">operator</span><span class="special">==(</span><span class="identifier">parser_id</span> <span class="keyword">const</span><span class="special">&amp;</span> x<span class="special">)</span> const<span class="special">;</span><br> <span class="keyword">bool</span> <span class="keyword">operator</span><span class="special">!=(</span><span class="identifier">parser_id</span> <span class="keyword">const</span><span class="special">&amp;</span> x<span class="special">)</span> const<span class="special">;</span>
<span class="keyword">bool</span> <span class="keyword"> operator</span><span class="special">&lt;(</span><span class="identifier">parser_id</span> <span class="keyword">const</span><span class="special">&amp;</span> x<span class="special">)</span> const<span class="special">;</span>
<span class="special"></span><span class="keyword">std::size_t</span><span class="identifier"> to_long</span><span class="special">()</span> <span class="keyword">const</span><span class="special">;
};</span></pre>
<h3>parser_address_tag</h3>
<p>The rule's <tt>TagT</tt> template parameter supplies this ID. This defaults
to <tt>parser_address_tag</tt>. The <tt>parser_address_tag</tt> uses the address
of the rule as its ID. This is often not the most convenient, since it is not
always possible to get the address of a rule to compare against. </p>
<h3>parser_tag</h3>
<p>It is possible to have specific constant integers to identify a rule. For this
purpose, we can use the <tt>parser_tag&lt;N&gt;</tt>, where N is a constant
integer:</p>
<pre><code><font color="#000000"><span class=identifier> rule</span><span class=special>&lt;</span><span class=identifier>parser_tag</span><span class="special">&lt;</span><span class=identifier>123</span><span class="special">&gt; &gt; </span><span class="identifier">my_rule</span><span class="special">; </span><span class="comment">// set my_rule's id to 123</span></font></code></pre>
<h3>dynamic_parser_tag</h3>
<p>The <tt>parser_tag&lt;N&gt;</tt> can only specify a <strong>static ID</strong>,
which is defined at compile time. If you need the ID to be <strong>dynamic</strong>
(changeable at runtime), you can use the <tt>dynamic_parser_tag</tt> class as
the <tt>TagT</tt> template parameter. This template parameter enables the <tt>set_id()</tt>
function, which may be used to set the required id at runtime:</p>
<pre><code><font color="#000000"><span class=identifier> rule</span><span class=special>&lt;</span><span class=identifier>dynamic_parser_tag</span><span class="special">&gt; </span><span class="identifier">my_dynrule</span><span class="special">;</span>
my_dynrule.set_id(1234); <span class="comment">// set my_dynrule's id to 1234</span></font></code></pre>
<p>If the <tt>set_id()</tt> function isn't called, the parser id defaults to the
address of the rule as its ID, just like the <tt>parser_address_tag</tt> template
parameter would do. </p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="numerics.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="epsilon.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
</body>
</html>

View File

@@ -0,0 +1,288 @@
<html>
<head>
<title>The Scanner and Parsing</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%">
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>The Scanner and Parsing</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="directives.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="grammar.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>The <b>scanner</b>'s task is to feed the sequential input data stream to the
parser. The scanner extracts data from the input, parceling, potentially modifying
or filtering, and then finally relegating the result to individual parser elements
on demand until the input is exhausted. The scanner is composed of two STL conforming
forward iterators, first and last, where first is held by reference and last,
by value. The first iterator is held by reference to allow it to be re-positioned.
The following diagram illustrates what's happening:</p>
<table width="62%" border="0" align="center">
<tr>
<td><img src="theme/scanner1.png"></td>
</tr>
</table>
<p>The scanner manages various aspects of the parsing process through a set of
policies. There are three sets of policies that govern:</p>
<blockquote>
<p><img src="theme/bullet.gif" width="12" height="12"> Iteration and filtering<br>
<img src="theme/bullet.gif" width="12" height="12"> Recognition and matching<br>
<img src="theme/bullet.gif" width="12" height="12"> Handling semantic actions</p>
</blockquote>
<p>These policies are mostly hidden from view and users generally need not know
about them. Advanced users might however provide their own policies that override
the ones that are already in place to fine tune the parsing process
to fit their own needs. We shall see how this can be done. This will be covered
in further detail later.</p>
<p>The <tt>scanner</tt> is a template class expecting two parameters: <tt>IteratorT</tt>,
the iterator type and <tt>PoliciesT</tt>, its set of policies. <tt>IteratorT</tt>
defaults to <tt>char const*</tt> while <tt>PoliciesT</tt> defaults to <tt>scanner_policies&lt;&gt;</tt>,
a predefined set of scanner policies that we can use straight out of the box.</p>
<pre><code><font color="#000000"><span class=keyword> template</span><span class=special>&lt;
</span><span class=keyword>typename </span><span class=identifier>IteratorT </span><span class=special>= </span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*,
</span><span class=keyword>typename </span><span class=identifier>PoliciesT </span><span class=special>= </span><span class=identifier>scanner_policies</span><span class=special>&lt;&gt; </span><span class=special>&gt;
</span><span class=keyword>class </span><span class=identifier>scanner</span><span class=special>;</span></font></code></pre>
<p>Spirit uses the same iterator concepts and interface formally defined by the
C++ Standard Template Library (STL). We can use iterators supplied by STL's
containers (e.g. <tt>list</tt>, <tt>vector</tt>, <tt>string</tt>, etc.) as is,
or perhaps write our own. Iterators can be as simple as a pointer (e.g. <tt>char
const<span class="operators">*</span></tt>). At the other end of the spectrum,
iterators can be quite complex; for instance, an iterator adapter that wraps
a lexer such as LEX.</p>
<h2>The Free Parse Functions</h2>
<p>The framework provides a couple of free functions to make parsing a snap. These
parser functions have two forms. The first form works on the <b>character level</b>.
The second works on the <b>phrase level</b> and asks for a <b>skip parser</b>.</p>
<p>The <b>skip parser</b> is just about any parser primitive or composite. Its
purpose is to move the scanner's <tt>first</tt> iterator to valid tokens by
skipping white spaces. In C for instance, the tab <tt class="quotes">'\t'</tt>,
the newline <tt class="quotes">'\n'</tt>, return <tt><span class="quotes">'\r'</span></tt>,
space <tt class="quotes">' '</tt> and characters inside comments <tt class="quotes">/*...*/</tt>
are considered as white spaces.</p>
<p><b>Character level parsing</b></p>
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>IteratorT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>DerivedT</span><span class=special>&gt;
</span><span class=identifier>parse_info</span><span class=special>&lt;</span><span class=identifier>IteratorT</span><span class=special>&gt;
</span><span class=identifier>parse
</span><span class=special>(
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>first</span><span class=special>,
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>last</span><span class=special>,
</span><span class=identifier>parser</span><span class=special>&lt;</span><span class=identifier>DerivedT</span><span class=special>&gt; </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>p
</span><span class=special>);</span></font></code></pre>
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>CharT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>DerivedT</span><span class=special>&gt;
</span><span class=identifier>parse_info</span><span class=special>&lt;</span><span class=identifier>CharT </span><span class=keyword>const</span><span class=special>*&gt;
</span><span class=identifier>parse
</span><span class=special>(
</span><span class=identifier>CharT </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>str</span><span class=special>,
</span><span class=identifier>parser</span><span class=special>&lt;</span><span class=identifier>DerivedT</span><span class=special>&gt; </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>p
</span><span class=special>);</span></font></code></pre>
<p>There are two variants. The first variant accepts a <tt>first</tt>, <tt>last</tt>
iterator pair like you do STL algorithms. The second variant accepts a null
terminated string. The last argument is a parser <tt>p</tt> which will be used
to parse the input.</p>
<p><b>Phrase level parsing</b></p>
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>IteratorT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>ParserT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>SkipT</span><span class=special>&gt;
</span><span class=identifier>parse_info</span><span class=special>&lt;</span><span class=identifier>IteratorT</span><span class=special>&gt;
</span><span class=identifier>parse
</span><span class=special>(
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>first</span><span class=special>,
</span><span class=identifier>IteratorT </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>last</span><span class=special>,
</span><span class=identifier>parser</span><span class=special>&lt;</span><span class=identifier>ParserT</span><span class=special>&gt; </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>p</span><span class=special>,
</span><span class=identifier>parser</span><span class=special>&lt;</span><span class=identifier>SkipT</span><span class=special>&gt; </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>skip
</span><span class=special>);</span></font></code></pre>
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>CharT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>ParserT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>SkipT</span><span class=special>&gt;
</span><span class=identifier>parse_info</span><span class=special>&lt;</span><span class=identifier>CharT </span><span class=keyword>const</span><span class=special>*&gt;
</span><span class=identifier>parse
</span><span class=special>(
</span><span class=identifier>CharT </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>str</span><span class=special>,
</span><span class=identifier>parser</span><span class=special>&lt;</span><span class=identifier>ParserT</span><span class=special>&gt; </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>p</span><span class=special>,
</span><span class=identifier>parser</span><span class=special>&lt;</span><span class=identifier>SkipT</span><span class=special>&gt; </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>skip
</span><span class=special>);</span></font></code></pre>
<p>Like above, there are two variants. The first variant accepts a <tt>first</tt>,
<tt>last</tt> iterator pair like you do STL algorithms. The second variant accepts
a null terminated string. The argument <tt>p</tt> is the parser which will be
used to parse the input. The last argument <tt>skip</tt> is the skip parser.</p>
<p><b>The parse_info structure</b></p>
<p>The functions above return a <tt>parse_info</tt> structure parameterized by
the iterator type passed in. The parse_info struct has these members:</p>
<table width="90%" border="0" align="center">
<tr>
<td colspan="2" class="table_title"><b>parse_info</b></td>
</tr>
<tr>
<td width="14%" class="table_cells"><b>stop</b></td>
<td width="86%" class="table_cells">Points to the final parse position (i.e
The parser recognized and processed the input up to this point)</td>
</tr>
<tr>
<td width="14%" class="table_cells"><b>hit</b></td>
<td width="86%" class="table_cells">True if parsing is successful. This may
be full: the parser consumed all the input, or partial: the parser consumed
only a portion of the input.</td>
</tr>
<tr>
<td width="14%" class="table_cells"><b>full</b></td>
<td width="86%" class="table_cells">True when we have a full match (i.e The
parser consumed all the input).</td>
</tr>
<tr>
<td width="14%" class="table_cells"><b>length</b></td>
<td width="86%" class="table_cells">The number of characters consumed by the
parser. This is valid only if we have a successful match (either partial
or full). </td>
</tr>
</table>
<h2><a name="phrase_scanner_t" id="phrase_scanner_t"></a><img src="theme/lens.gif" width="15" height="16">
The phrase_scanner_t and wide_phrase_scanner_t</h2>
<p>For convenience, Spirit declares these typedefs:</p>
<pre>
<span class="keyword">typedef</span> scanner<span class="special">&lt;</span><span class="keyword">char const</span><span class="special">*,</span> unspecified<span class="special">&gt;</span> phrase_scanner_t<span class="special">;</span>
<span class="keyword">typedef</span> scanner<span class="special">&lt;</span><span class="keyword">wchar_t const</span><span class="special">*,</span> <span class="identifier">unspecified</span><span class="special">&gt;</span> wide_phrase_scanner_t<span class="special">;</span>
</pre>
<p>These are the exact scanner types used by Spirit on calls to the parse function
passing in a <tt>char const*</tt> (C string) or a <tt>wchar_t const*</tt> (wide
string) as the first parameter and a <tt>space_p</tt> as skip-parser (the third
parameter). For instance, we can use these typedefs to declare some rules. Example:</p>
<pre> rule<span class="special">&lt;</span>phrase_scanner_t<span class="special">&gt; </span><span class="identifier">my_rule</span><span class="special">;
</span><span class="identifier">parse</span><span class="special">(</span><span class="string">&quot;abrakadabra&quot;</span><span class="special">, </span><span class="identifier">my_rule</span><span class="special">,</span> <span class="identifier">space_p</span><span class="special">);</span></pre>
<h2><img src="theme/lens.gif" width="15" height="16"> Direct parsing with Iterators</h2>
<p>The free parse functions make it easy for us. By using them, we need not bother
with the scanner intricacies. The free parse functions hide the dirty details.
However, sometime in the future, we will need to get under the hood. It's nice
that we know what we are dealing with when that need comes. We will need to
go low-level and call the parser's parse member function directly. </p>
<p>If we wish to work on the <b>character level</b>, the procedure is quite simple:</p>
<pre><span class=identifier> </span><span class=identifier>scanner</span><span class=special>&lt;</span><span class=identifier>IteratorT</span><span class=special>&gt; </span><span class=identifier>scan</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>);
</span><span class=keyword>if </span><span class=special>(</span><span class=identifier>p</span><span class=special>.</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>))
</span><span class=special>{
</span><span class=comment>// Parsed successfully. If first == last, then we have
// a full parse, the parser recognized the input in whole.
</span><span class=special>}
</span><span class=keyword>else
</span><span class=special>{
</span><span class=comment>// Parsing failure. The parser failed to recognize the input
</span><span class=special>}</span></pre>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/alert.gif" width="16" height="16"> <strong>The
scanner position on an unsuccessful match</strong><br> <br>
On a successful match, the input is advanced accordingly. But what happens
on an unsuccessful match? Be warned. It might be intuitive to think that
the scanner position is reset to its initial position prior to parsing.
No, the position is not reset. On an unsuccessful match, the position of
the scanner is <strong>undefined</strong>! Usually, it is positioned at
the farthest point where the error was found somewhere down the recursive
descent. If this behavior is not desired, you may need to position the scanner
yourself. The <a href="numerics.html#scanner_save">example in the numerics
chapter</a> illustrates how the scanner position can be saved and later
restored.</td>
</tr>
</table>
<p>Where <tt>p</tt> is the parser we want to use, and <tt>first</tt>/<tt>last</tt>
are the iterator pairs referring to the input. We just create a scanner given
the iterators. The scanner type we will use here uses the default <tt>scanner_policies&lt;&gt;</tt>.</p>
<p>The situation is a bit more complex when we wish to work on the <b>phrase level</b>:</p>
<pre><span class=special> </span><span class=keyword>typedef </span><span class=identifier>skip_parser_iteration_policy</span><span class=special>&lt;</span><span class=identifier>SkipT</span><span class=special>&gt; </span><span class=identifier>iter_policy_t</span><span class=special>;
</span><span class=keyword>typedef </span><span class=identifier>scanner_policies</span><span class=special>&lt;</span><span class=identifier>iter_policy_t</span><span class=special>&gt; </span><span class=identifier>scanner_policies_t</span><span class=special>;
</span><span class=keyword>typedef </span><span class=identifier>scanner</span><span class=special>&lt;</span><span class=identifier>IteratorT</span><span class=special>, </span><span class=identifier>scanner_policies_t</span><span class=special>&gt; </span><span class=identifier>scanner_t</span><span class=special>;
</span><span class=special> </span><span class=identifier>iter_policy_t </span><span class=identifier>iter_policy</span><span class=special>(</span><span class=identifier>skip</span><span class=special>);
</span><span class=identifier>scanner_policies_t </span><span class=identifier>policies</span><span class=special>(</span><span class=identifier>iter_policy</span><span class=special>);
</span><span class=identifier>scanner_t </span><span class=identifier>scan</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>, </span><span class=identifier>policies</span><span class=special>);
</span>
<span class=keyword>if </span><span class=special>(</span><span class=identifier>p</span><span class=special>.</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>))
</span><span class=special>{
</span><span class=comment>// Parsed successfully. If first == last, then we have
// a full parse, the parser recognized the input in whole.
</span><span class=special>}
</span><span class=keyword>else
</span><span class=special>{
</span><span class=comment>// Parsing failure. The parser failed to recognize the input
</span><span class=special>}</span></pre>
<p>Where <tt>SkipT</tt> is the type of the skip-parser, <tt>skip</tt>. Again,
<tt>p</tt> is the parser we want to use, and <tt>first</tt>/<tt>last</tt> are
the iterator pairs referring to the input. Given a skip-parser type <tt>SkipT</tt>,
<span class=identifier><tt>skip_parser_iteration_policy</tt></span> creates
a scanner iteration policy that skips over portions that are recognized by the
skip-parser. This may then be used to create a scanner. The <tt>scanner_policies</tt>
class wraps all scanner related policies including the iteration policies.</p>
<h2><a name="lexeme_scanner"></a>lexeme_scanner</h2>
<p>When switching from phrase level to character level parsing, the <tt>lexeme_d</tt>
(see <a href="directives.html">directives.html</a>) does its magic by disabling
the skipping of white spaces. This is done by tweaking the <a href="scanner.html">scanner</a>.
However, when we do this, all parsers inside the lexeme gets a transformed scanner
type. This should not be a problem in most cases. However, when rules are called
inside the <tt>lexeme_d</tt>, the compiler will choke if the rule does not have
the proper scanner type. If a rule must be used inside a <tt>lexeme_d</tt>,
the rule's type must be:</p>
<pre> <span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>lexeme_scanner</span><span class="special">&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class="identifier">type</span><span class=special>&gt; </span>r<span class=special>;</span></pre>
<p>where <span class=identifier><tt>ScannerT</tt></span> is the actual type of
the scanner used. Take note that <tt>lexeme_scanner</tt> will only work for phrase level scanners. </p>
<h2><a name="as_lower_scanner"></a>as_lower_scanner</h2>
<p>Similarly, the <tt>as_lower_d</tt> does its work by filtering and converting
all characters received from the scanner to lower case. This is also done by
tweaking the <a href="scanner.html">scanner</a>. Then again, all parsers inside
the <tt>as_lower_d</tt> gets a transformed scanner type. If a rule must be used
inside a <tt>as_lower_d</tt>, the rule's type must be:</p>
<pre> <span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>as_lower_scanner</span><span class="special">&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class="identifier">type</span><span class=special>&gt; </span>r<span class=special>;</span></pre>
<p>where <span class=identifier><tt>ScannerT</tt></span> is the actual type of
the scanner used. </p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/bulb.gif" width="13" height="18"> See
the techniques section for an <a href="techniques.html#multiple_scanner_support">example</a>
of a <a href="grammar.html">grammar</a> using a <a href="rule.html#multiple_scanner_support">multiple
scanner enabled rule</a>, <a href="scanner.html#lexeme_scanner">lexeme_scanner</a>
and <a href="scanner.html#as_lower_scanner">as_lower_scanner.</a></td>
</tr>
</table>
<h3><a name="no_actions_scanner"></a>no_actions_scanner</h3>
<p>Again, <tt>no_actions_d</tt> directive tweaks the scanner to disable firing
semantic actions. Like before, all parsers inside the <tt>no_actions_d</tt>
gets a transformed scanner type. If a rule must be used inside a <tt>no_actions_d</tt>,
the rule's type must be:</p>
<pre> <span class=identifier>rule</span><span class=special>&lt;</span>no_actions_scanner<span class="special">&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class="identifier">type</span><span class=special>&gt; </span>r<span class=special>;</span></pre>
<p>where <tt>ScannerT</tt> is the actual type of the scanner used. <span class=special></span></p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/note.gif" width="16" height="16"> Be
sure to add &quot;<tt>typename</tt>&quot; before <tt><span class=identifier><tt>lexeme_scanner</tt>,
<tt>as_lower_scanner</tt></span></tt> and <tt>no_actions_scanner</tt> when
these are used inside a template class or function.</td>
</tr>
</table>
<p><img src="theme/lens.gif" width="15" height="16"> See <a href="../example/fundamental/no_actions.cpp">no_actions.cpp</a>. This is part of the Spirit distribution.</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="directives.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="grammar.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p>&nbsp;</p>
<p>&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,54 @@
<html>
<head>
<title>Scoped Lock</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Scoped
Lock</b></font></td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="regular_expression_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="distinct.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<h2>scoped_lock_d</h2>
<p>The <tt>scoped_lock_d</tt> directive constructs a parser that locks a mutex
during the attempt to match the contained parser.</p>
<p>Syntax:</p>
<pre> <span class="identifier">scoped_lock_d</span><span class="special">(</span>mutex<span class="special">&amp;)[</span>body-parser<span class="special">]</span></pre>
<p>Note, that nesting <tt>scoped_lock_d</tt> directives bears the risk of deadlocks
since the order of locking depends on the grammar used and may even depend on
the input being parsed. Locking order has to be consistent within an application
to ensure deadlock free operation.</p>
<p></p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="regular_expression_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="distinct.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 2003 Martin Wille<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p class="copyright">&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,96 @@
<html>
<head>
<title>The Select Parser</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
<style type="text/css">
<!--
.style1 {font-family: "Courier New", Courier, mono}
.style2 {font-family: "Courier New", Courier, mono; font-style: italic; }
.style3 {font-family: "Courier New", Courier, mono; color: #FF0000; }
-->
</style>
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10"> </td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>The Select Parser </b></font></td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="the_lazy_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="switch_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>Select parsers may be used to identify a single parser from a given list
of parsers, which successfully recognizes the current input sequence. Example:</p>
<pre> rule<span class="special">&lt;&gt;</span> rule_select <span class="special">=</span>
select_p<span class="special">
(</span>
parser_a<span class="special">
,</span> parser_b<span class="special">
<span class="comment">/* ... */</span>
,</span> parser_n
<span class="special">)</span><span class="special">;</span></pre>
<p>The parsers (parser_a, parser_b etc.) are tried sequentially from left to right until a parser matches the current input sequence.
If there is a matching parser found, the <tt>select_p</tt> parser returns
the parser's position (zero based index). For instance, in the example above, <tt>1</tt> is returned if parser_b
matches.</p>
<p>There are two predefined parsers of the select parser family: <tt>select_p</tt>
and <tt>select_fail_p</tt>. These parsers differ in the way the no match
case is handled (when none of the parsers match the current input sequence).
While the <tt>select_p</tt> parser will return <tt>-1</tt>
if no matching parser is found, the <tt>select_fail_p</tt> parser will not match
at all.</p>
<p>The following sample shows how the select parser may be used very conveniently
in conjunction with a <a href="switch_parser.html">switch parser</a>:</p>
<pre> <span class="keyword">int</span> choice <span class="special">=</span> <span class="literal">-1</span><span class="special">;</span>
rule<span class="special">&lt;&gt;</span> rule_select <span class="special">=</span>
select_fail_p<span class="special">(</span><span class="literal">'a'</span><span class="special">,</span> <span class="literal">'b'</span><span class="special">,</span> <span class="literal">'c'</span><span class="special">,</span> <span class="literal">'d'</span><span class="special">)[</span>assign_a<span class="special">(</span>choice<span class="special">)]</span>
&gt;&gt; switch_p(var<span class="special">(</span>choice)) <span class="special">
[</span><br> case_p<span class="special">&lt;</span><span class="literal">0</span><span class="special">&gt;(</span>int_p<span class="special">),</span><br> case_p<span class="special">&lt;</span><span class="literal">1</span><span class="special">&gt;(</span>ch_p<span class="special">(</span><span class="literal">','</span><span class="special">)),</span><br> case_p<span class="special">&lt;</span><span class="literal">2</span><span class="special">&gt;(</span>str_p<span class="special">(</span><span class="string">&quot;bcd&quot;</span><span class="special">)),</span><br> default_p<br> <span class="special">]</span><br><span class="special"> ;</span></pre>
<p>This example shows a rule, which matches:</p>
<ul>
<li><span class="literal"> 'a' </span>followed
by an integer</li>
<li><span class="literal">'b' </span>followed by a<span class="literal">
','</span></li>
<li><span class="literal">'c'</span> followed by <span class="string">&quot;bcd&quot;</span></li>
<li>a single <span class="literal">'d'</span>. </li>
</ul>
<p>For other input sequences the
give rule does not match at all.</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><p><img src="theme/alert.gif" width="16" height="16"> <tt>BOOST_SPIRIT_SELECT_LIMIT</tt><br>
<br>
The number of possible entries inside the <tt>select_p</tt> parser is limited by the Spirit compile time constant <tt>BOOST_SPIRIT_SELECT_LIMIT</tt>, which defaults to 3. This value should not be greater than the compile time constant given by <tt>PHOENIX_LIMIT</tt> (see <a href="../phoenix/index.html">phoenix</a>). Example:</p>
<p class="style1"><span class="comment">// Define these before including anything else <br>
</span><span class="style3">#define</span> PHOENIX_LIMIT 10<br>
<span class="preprocessor">#define</span> BOOST_SPIRIT_SELECT_LIMIT 10 </p></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="the_lazy_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="switch_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 2003-2004 Hartmut Kaiser <br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) </font> </p>
<p>&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,258 @@
<html>
<head>
<title>Semantic Actions</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%">
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Semantic Actions</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="subrules.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="indepth_the_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>Semantic actions have the form: <b>expression[action]</b></p>
<p>Ultimately, after having defined our grammar and having generated a corresponding
parser, we will need to produce some output and do some work besides syntax
analysis; unless, of course, what we want is merely to check for the conformance
of an input with our grammar, which is very seldom the case. Semantic actions
may be attached to any expression at any level within the parser hierarchy.
An action is a C/C++ function or function object that will be called if a match
is found in the particular context where it is attached. The action function
serves as a hook into the parser and may be used to, for example:</p>
<blockquote>
<p><img src="theme/bullet.gif" width="13" height="13"> Generate output from
the parser (ASTs, for example)<br>
<img src="theme/bullet.gif" width="13" height="13"> Report warnings or errors<br>
<img src="theme/bullet.gif" width="13" height="13"> Manage symbol tables</p>
</blockquote>
<h2>Generic Semantic Actions (Transduction Interface)</h2>
<p>A generic semantic action can be any free function or function object that
is compatible with the interface:</p>
<pre><code><font color="#000000"><span class=identifier></span><span class=keyword> void </span><span class=identifier>f</span><span class=special>(</span><span class=identifier>IteratorT </span><span class=identifier>first</span><span class=special>, </span><span class=identifier>IteratorT </span><span class=identifier>last</span><span class=special>);</span></font></code></pre>
<p>where <tt>IteratorT</tt> is the type of iterator used, <tt>first</tt> points
to the current input and <tt>last</tt> points to one after the end of the input
(identical to STL iterator ranges). A function object (functor) should have
a member <tt>operator()</tt> with the same signature as above:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>struct </span><span class=identifier>my_functor
</span><span class=special>{
</span><span class=keyword>void </span><span class=keyword>operator</span><span class=special>()(</span><span class=identifier>IteratorT </span><span class=identifier>first</span><span class=special>, </span><span class=identifier>IteratorT </span><span class=identifier>last</span><span class=special>) </span><span class=keyword>const</span><span class=special>;
</span><span class=special>};</span></font></code></pre>
<p>Iterators pointing to the matching portion of the input are passed into the
function/functor.</p>
<p>In general, semantic actions accept the first-last iterator pair. This is the
transduction interface. The action functions or functors receive the unprocessed
data representing the matching production directly from the input. In many cases,
this is sufficient. Examples are source to source translation, pre-processing,
etc. </p>
<h3>Example:</h3>
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>void
</span><span class=identifier>my_action</span><span class=special>(</span><span class=keyword>char const</span><span class=special>* </span><span class=identifier>first</span><span class=special>, </span><span class=keyword>char const</span><span class=special>* </span><span class=identifier>last</span><span class=special>)
{
</span><span class=identifier>std</span><span class=special>::</span><span class=identifier>string</span><span class=special> </span><span class="identifier">str</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>);
</span><span class=identifier>std</span><span class=special>::</span><span class=identifier>cout </span><span class=special>&lt;&lt; </span><span class=identifier>str </span><span class=special>&lt;&lt; </span><span class=identifier>std</span><span class=special>::</span><span class=identifier>endl</span><span class=special>;
}
</span><span class=identifier>rule</span><span class=special>&lt;&gt; </span><span class=identifier>myrule </span><span class=special>= (</span><span class=identifier>a </span><span class=special>| </span><span class=identifier>b </span><span class=special>| *(</span><span class=identifier>c </span><span class=special>&gt;&gt; </span><span class=identifier>d</span><span class=special>))[&</span><span class=identifier>my_action</span><span class=special>];</span></font></code></pre>
<p>The function <tt>my_action</tt> will be called whenever the expression <tt>(a
| b | *(c &gt;&gt; d)</tt> matches a portion of the input stream while parsing.
Two iterators, <tt>first</tt> and <tt>last</tt>, are passed into the function.
These iterators point to the start and end, respectively, of the portion of
input stream where the match is found.</p>
<h3>Const-ness:</h3>
<p>With functors, take note that the <tt>operator()</tt> should be <tt>const</tt>.
This implies that functors are immutable. One may wish to have some member variables
that are modified when the action gets called. This is not a good idea. First
of all, functors are preferably lightweight. Functors are passed around a lot
and it would incur a lot of overhead if the functors are heavily laden. Second,
functors are passed by value. Thus, the actual functor object that finally attaches
to the parser, will surely not be the original instance supplied by the client.
What this means is that changes to a functor's state will not affect the original
functor that the client passed in since they are distinct copies. If a functor
needs to update some state variables, which is often the case, it is better
to use references to external data. The following example shows how this can
be done:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>struct </span><span class=identifier>my_functor
</span><span class=special>{
</span><span class=identifier>my_functor</span><span class=special>(</span><span class=identifier>std</span><span class=special>::</span><span class=identifier>string</span><span class=special>& </span><span class=identifier>str_</span><span class=special>)
</span><span class=special>: </span><span class=identifier>str</span><span class=special>(</span><span class=identifier>str_</span><span class=special>) </span><span class=special>{}
</span><span class=keyword>void
</span><span class=keyword>operator</span><span class=special>()(</span><span class=identifier>IteratorT </span><span class=identifier>first</span><span class=special>, </span><span class=identifier>IteratorT </span><span class=identifier>last</span><span class=special>) </span><span class=keyword>const
</span><span class=special>{
</span><span class=identifier>str</span><span class=special>.</span><span class=identifier>assign</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>);
</span><span class=special>}
</span><span class=identifier>std</span><span class=special>::</span><span class=identifier>string</span><span class=special>& </span><span class=identifier>str</span><span class=special>;
</span><span class=special>};</span></font></code></pre>
<h3>Full Example:</h3>
<p>Here now is our calculator enhanced with semantic actions:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>namespace
</span><span class=special>{
</span><span class=keyword>void </span><span class=identifier>do_int</span><span class=special>(</span><span class=keyword>char </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>str</span><span class=special>, </span><span class=keyword>char </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>end</span><span class=special>)
</span><span class=special>{
</span><span class=identifier>string </span><span class=identifier>s</span><span class=special>(</span><span class=identifier>str</span><span class=special>, </span><span class=identifier>end</span><span class=special>);
</span><span class=identifier>cout </span><span class=special>&lt;&lt; </span><span class=string>"PUSH(" </span><span class=special>&lt;&lt; </span><span class=identifier>s </span><span class=special>&lt;&lt; </span><span class=literal>')' </span><span class=special>&lt;&lt; </span><span class=identifier>endl</span><span class=special>;
</span><span class=special>}
</span><span class=keyword>void </span><span class=identifier>do_add</span><span class=special>(</span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*, </span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*) </span><span class=special>{ </span><span class=identifier>cout </span><span class=special>&lt;&lt; </span><span class=string>"ADD\n"</span><span class=special>; </span><span class=special>}
</span><span class=keyword>void </span><span class=identifier>do_subt</span><span class=special>(</span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*, </span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*) </span><span class=special>{ </span><span class=identifier>cout </span><span class=special>&lt;&lt; </span><span class=string>"SUBTRACT\n"</span><span class=special>; </span><span class=special>}
</span><span class=keyword>void </span><span class=identifier>do_mult</span><span class=special>(</span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*, </span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*) </span><span class=special>{ </span><span class=identifier>cout </span><span class=special>&lt;&lt; </span><span class=string>"MULTIPLY\n"</span><span class=special>; </span><span class=special>}
</span><span class=keyword>void </span><span class=identifier>do_div</span><span class=special>(</span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*, </span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*) </span><span class=special>{ </span><span class=identifier>cout </span><span class=special>&lt;&lt; </span><span class=string>"DIVIDE\n"</span><span class=special>; </span><span class=special>}
</span><span class=keyword>void </span><span class=identifier>do_neg</span><span class=special>(</span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*, </span><span class=keyword>char </span><span class=keyword>const</span><span class=special>*) </span><span class=special>{ </span><span class=identifier>cout </span><span class=special>&lt;&lt; </span><span class=string>"NEGATE\n"</span><span class=special>; </span><span class=special>}
</span><span class=special>}</span></font></code></pre>
<p>We augment our grammar with semantic actions:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>struct </span><span class=identifier>calculator </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>grammar</span><span class=special>&lt;</span><span class=identifier>calculator</span><span class=special>&gt;
</span><span class=special>{
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>definition
</span><span class=special>{
</span><span class=identifier>definition</span><span class=special>(</span><span class=identifier>calculator </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>self</span><span class=special>)
</span><span class=special>{
</span><span class=identifier>expression
</span><span class=special>= </span><span class=identifier>term
</span><span class=special>&gt;&gt; </span><span class=special>*( </span><span class=special>(</span><span class=literal>'+' </span><span class=special>&gt;&gt; </span><span class=identifier>term</span><span class=special>)[&</span><span class=identifier>do_add</span><span class=special>]
</span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>&gt;&gt; </span><span class=identifier>term</span><span class=special>)[&</span><span class=identifier>do_subt</span><span class=special>]
</span><span class=special>)
</span><span class=special>;
</span><span class=identifier>term </span><span class=special>=
</span><span class=identifier>factor
</span><span class=special>&gt;&gt; </span><span class=special>*( </span><span class=special>(</span><span class=literal>'*' </span><span class=special>&gt;&gt; </span><span class=identifier>factor</span><span class=special>)[&</span><span class=identifier>do_mult</span><span class=special>]
</span><span class=special>| </span><span class=special>(</span><span class=literal>'/' </span><span class=special>&gt;&gt; </span><span class=identifier>factor</span><span class=special>)[&</span><span class=identifier>do_div</span><span class=special>]
</span><span class=special>)
</span><span class=special>;
</span><span class=identifier>factor
</span><span class=special>= </span><span class=identifier>lexeme_d</span><span class=special>[(+</span><span class=identifier>digit_p</span><span class=special>)[&</span><span class=identifier>do_int</span><span class=special>]]
</span><span class=special>| </span><span class=literal>'(' </span><span class=special>&gt;&gt; </span><span class=identifier>expression </span><span class=special>&gt;&gt; </span><span class=literal>')'
</span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>&gt;&gt; </span><span class=identifier>factor</span><span class=special>)[&</span><span class=identifier>do_neg</span><span class=special>]
</span><span class=special>| </span><span class=special>(</span><span class=literal>'+' </span><span class=special>&gt;&gt; </span><span class=identifier>factor</span><span class=special>)
</span><span class=special>;
</span><span class=special>}
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt; </span><span class=identifier>expression</span><span class=special>, </span><span class=identifier>term</span><span class=special>, </span><span class=identifier>factor</span><span class=special>;
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt; </span><span class=keyword>const</span><span class=special>&
</span><span class=identifier>start</span><span class=special>() </span><span class=keyword>const </span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>expression</span><span class=special>; </span><span class=special>}
</span><span class=special>};
</span><span class=special>};</span></font></code></pre>
<p>Feeding in the expression <tt>(-1 + 2) * (3 + -4)</tt>, for example, to the
rule <tt>expression</tt> will produce the expected output:</p>
<pre><code><span class=special>-</span><span class=number>1
</span><span class=number>2
</span><span class=identifier>ADD
</span><span class=number>3
</span><span class=special>-</span><span class=number>4
</span><span class=identifier>ADD
</span><span class=identifier>MULT</span></code></pre>
<p>which, by the way, is the Reverse Polish Notation (RPN) of the given expression,
reminiscent of some primitive calculators and the language Forth.</p>
<p><img src="theme/lens.gif" width="15" height="16"> <a href="../example/fundamental/calc_plain.cpp">View
the complete source code here</a>. This is part of the Spirit distribution.
</p>
<h2><a name="specialized_actions"></a>Specialized Actions</h2>
<p>In general, semantic actions accept the first-last iterator pair. There are
situations though where we might want to pass data in its processed form. A
concrete example is the numeric parser. It is unwise to pass unprocessed data
to a semantic action attached to a numeric parser and just throw away what has
been parsed by the parser. We want to pass the actual parsed number.</p>
<p>The function and functor signature of a semantic action varies depending on
the parser where it is attached to. The following table lists the parsers that
accept unique signatures.</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/note.gif" width="16" height="16"> Unless
explicitly stated in the documentation of a specific parser type, parsers
not included in the list by default expect the generic signature as explained
above.</td>
</tr>
</table>
<h3>Numeric Actions</h3>
<p><b>Applies to:</b></p>
<blockquote>
<p><img src="theme/bullet.gif" width="13" height="13"> uint_p<br>
<img src="theme/bullet.gif" width="13" height="13"> int_p<br>
<img src="theme/bullet.gif" width="13" height="13"> ureal_p<br>
<img src="theme/bullet.gif" width="13" height="13"> real_p</p>
</blockquote>
<p><b>Signature for functions:</b></p>
<pre><code><font color="#000000"><span class=identifier> </span><span class=keyword>void </span><span class=identifier>func</span><span class=special>(</span><span class=identifier>NumT </span><span class=identifier>val</span><span class=special>);</span></font></code></pre>
<p><b>Signature for functors:</b> </p>
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>struct </span><span class=identifier>ftor
</span><span class=special>{
</span><span class=keyword>void </span><span class=keyword>operator</span><span class=special>()(</span><span class=identifier>NumT </span><span class=identifier>val</span><span class=special>) </span><span class=keyword>const</span><span class=special>;
</span><span class=special>};</span></font></code></pre>
<p>Where <tt>NumT</tt> is any primitive numeric type such as <tt>int</tt>, <tt>long</tt>,
<tt>float</tt>, <tt>double</tt>, etc., or a user defined numeric type such as
big_int. <tt>NumT</tt> is the same type used as template parameter to <tt>uint_p</tt>,
<tt>int_p</tt>, <tt>ureal_p</tt> or <tt>real_p</tt>. The parsed number is passed
into the function/functor.</p>
<h3>Character Actions</h3>
<p><b>Applies to:</b></p>
<blockquote>
<p><img src="theme/bullet.gif" width="13" height="13"> chlit, ch_p<br>
<img src="theme/bullet.gif" width="13" height="13"> range, range_p<br>
<img src="theme/bullet.gif" width="13" height="13"> anychar<br>
<img src="theme/bullet.gif" width="13" height="13"> alnum, alpha<br>
<img src="theme/bullet.gif" width="13" height="13"> cntrl, digit<br>
<img src="theme/bullet.gif" width="13" height="13"> graph, lower<br>
<img src="theme/bullet.gif" width="13" height="13"> print, punct<br>
<img src="theme/bullet.gif" width="13" height="13"> space, upper<br>
<img src="theme/bullet.gif" width="13" height="13"> xdigit</p>
</blockquote>
<p><b>Signature for functions:</b></p>
<pre><code><font color="#000000"><span class=identifier> </span><span class=keyword>void </span><span class=identifier>func</span><span class=special>(</span><span class=identifier>CharT </span><span class=identifier>ch</span><span class=special>);</span></font></code></pre>
<p><b>Signature for functors:</b></p>
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>struct </span><span class=identifier>ftor
</span><span class=special>{
</span><span class=keyword>void </span><span class=keyword>operator</span><span class=special>()(</span><span class=identifier>CharT </span><span class=identifier>ch</span><span class=special>) </span><span class=keyword>const</span><span class=special>;
</span><span class=special>};</span></font></code></pre>
<p>Where <tt>CharT</tt> is the value_type of the iterator used in parsing. A <tt>char
const*</tt> iterator for example has a <tt>value_type</tt> of <tt>char</tt>.
The matching character is passed into the function/functor.</p>
<h2>Cascading Actions</h2>
<p>Actions can be cascaded. Cascaded actions also inherit the function/functor
interface of the original. For example:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>uint_p</span><span class=special>[</span><span class=identifier>fa</span><span class=special>][</span><span class=identifier>fb</span><span class=special>][</span><span class=identifier>fc</span><span class=special>]</span></font></code></pre>
<p>Here, the functors <tt>fa</tt>, <tt>fb</tt> and <tt>fc</tt> all expect the
signature <tt>void operator()(unsigned n) const</tt>.</p>
<h2>Directives and Actions</h2>
<p>Directives inherit the function/functor interface of the subject it is
enclosing. Example:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>as_lower_d</span><span class=special>[</span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'x'</span><span class=special>)][</span><span class=identifier>f</span><span class=special>]</span></font></code></pre>
<p>Here, the functor <tt>f</tt> expects the signature <tt>void operator()(char
ch) const</tt>, assuming that the iterator used is a <tt>char const*</tt>.</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="subrules.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="indepth_the_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p>&nbsp;</p>
<p>&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,99 @@
<html>
<head>
<title>Storable Rules</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Storable
Rules</b></font></td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="stored_rule.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="the_lazy_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>The rule is a weird C++ citizen, unlike any other C++ object. It does not have
the proper copy and assignment semantics and cannot be stored and passed around
by value. You cannot store rules in STL containers (vector, stack, etc) for
later use and you cannot pass and return rules to and from functions by value.</p>
<p>EBNF is primarily declarative. Like in functional programming, an EBNF grammar
is a static recipe and there's no notion of do this then that. However, in Spirit,
we managed to coax imperative C++ to take in declarative EBNF. Hah! Fun!...
We did that by masquerading the C++ assignment operator to mimic EBNF's <tt>::=</tt>.
To do that, we gave the rule class' assignment operator and copy constructor
a different meaning and semantics. The downside is that doing so made the rule
unlike any other C++ object. You can't copy it. You can't assign it. </p>
<p>We want to have the dynamic nature of C++ to our advantage. We've seen dynamic
Spirit in action here and there. There are indeed some interesting applications
of dynamic parsers using Spirit. Yet, we will not fully utilize the power of
dynamic parsing, unless we have a rule that behaves like any other good C++
object. With such a beast, we can write full parsers that's defined at run time,
as opposed to compile time.</p>
<p>We now have dynamic rules: <tt>stored_rules</tt>. Basically they are rules
with perfect C++ assignment/copy-constructor semantics. This means that <tt>stored_rules</tt>
can be stored in containers and/or dynamically created at run-time.</p>
<pre><code><font color="#000000"><span class=identifier> </span><span class=keyword>template</span><span class=special>&lt;
</span><span class=keyword>typename </span><span class=identifier>ScannerT </span><span class=special>= </span><span class=identifier>scanner</span><span class=special>&lt;&gt;,
</span><span class=keyword>typename </span><span class=identifier>ContextT </span><span class=special>= </span><span class=identifier>parser_context</span><span class=special>&lt;&gt;</span><span class=identifier>,
</span><span class="keyword">typename</span><span class=identifier> TagT </span><span class="special">=</span><span class=identifier> parser_address_tag</span><span class=special>&gt;
</span><span class=keyword>class </span><span class=identifier>stored_rule</span><span class=special>;</span></font></code></pre>
<p>The interface is exactly the same as with the rule class (see the <a href="rule.html">section
on rules</a> for more information regarding the API). The only difference is
with the copy and assignment semantics. Now, with <tt>stored_rule</tt>s, we
can dynamically and algorithmically define our rules. Here are some samples...
</p>
<p>Say I want to dynamically create a rule for:</p>
<pre>
<span class=identifier> start </span><span class=special>= </span><span class=special>*(</span><span class=identifier>a </span><span class=special>| </span><span class=identifier>b </span><span class=special>| </span><span class=identifier>c</span><span class=special>);</span></pre>
<p> I can write it dynamically step-by-step:</p>
<pre> <span class=identifier> stored_rule</span><span class=special>&lt;&gt; </span><span class=identifier>start</span><span class=special>;
</span><span class=identifier>start </span><span class=special>= </span><span class=identifier>a</span><span class=special>;
</span><span class=identifier>start </span><span class=special>= </span><span class=identifier>start</span><span class=special>.</span><span class=identifier>copy</span><span class=special>() </span><span class=special>| </span><span class=identifier>b</span><span class=special>;
</span><span class=identifier>start </span><span class=special>= </span><span class=identifier>start</span><span class=special>.</span><span class=identifier>copy</span><span class=special>() </span><span class=special>| </span><span class=identifier>c</span><span class=special>;
</span><span class=identifier>start </span><span class=special>= </span><span class=special>*(</span><span class=identifier>start</span><span class=special>.</span><span class=identifier>copy</span><span class=special>());</span></pre>
<p>Later, I changed my mind and want to redefine it (again dynamically) as:</p>
<pre><span class=identifier> start </span><span class=special>= </span><span class=special>(</span><span class=identifier>a </span><span class=special>| </span><span class=identifier>b</span><span class=special>) </span><span class=special>&gt;&gt; </span><span class=special>(</span><span class=identifier>start </span><span class=special>| </span><span class=identifier>b</span><span class=special>);</span>
</pre>
<p>I write:</p>
<pre> <span class=special> </span><span class=identifier>start </span><span class=special>= </span><span class=identifier>b</span><span class=special>;
</span><span class=identifier>start </span><span class=special>= </span><span class=identifier>a </span><span class=special>| </span><span class=identifier>start</span><span class=special>.</span><span class=identifier>copy</span><span class=special>();
</span><span class=identifier>start </span><span class=special>= </span><span class=identifier>start</span><span class=special>.</span><span class=identifier>copy</span><span class=special>() </span><span class=special>&gt;&gt; </span><span class=special>(</span><span class=identifier>start </span><span class=special>| </span><span class=identifier>b</span><span class=special>);</span></pre>
<p>Notice the statement:</p>
<pre> <span class=special> </span><span class=identifier>start </span><span class=special>= </span><span class=identifier>start</span><span class=special>.</span><span class=identifier>copy</span><span class=special>() </span><span class=special>| </span><span class=identifier>b</span><span class=special>;</span></pre>
<p>Why is start.copy() required? Well, because like rules, stored rules are still
embedded by reference when found in the RHS (one reason is to avoid cyclic-shared-pointers).
If we write:</p>
<pre> <span class=special> </span><span class=identifier>start </span><span class=special>= </span><span class=identifier>start </span><span class=special>| </span><span class=identifier>b</span><span class=special>;</span></pre>
<p>We have <strong>left-recursion</strong>! Copying copy of start avoids self
referencing. What we are doing is making a copy of start, ORing it with b, then
destructively assigning the result back to start.</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="stored_rule.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="the_lazy_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
</body>
</html>

View File

@@ -0,0 +1,124 @@
<html>
<head>
<title>Style Guide</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Style
Guide </b></font></td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="portability.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="techniques.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p> At some point, especially when there are lots of semantic actions attached
to various points, the grammar tends to be quite difficult to follow. In order
to keep an easy-to-read, consistent en aesthetically pleasing look to the Spirit
code, the following coding styleguide is advised. </p>
<p>This coding style is adapted and extended from the ANTLR/PCCTS style (Terrence
Parr) and <a href="http://groups.yahoo.com/group/boost/files/coding_guidelines.html">Boost
coding guidelines</a> (David Abrahams and Nathan Myers) and is the combined
work of Joel de Guzman, Chris Uzdavinis and Hartmut Kaiser.</p>
<ul>
<li> Rule names use std C++ (Boost) convention. The rule name may be very long.</li>
<li>The '=' is neatly indented 4 spaces below. Like Boost, use spaces instead
of tabs. </li>
<li>Breaking the operands into separate lines puts the semantic actions neatly
to the right. </li>
<li>Semicolon at the last line terminates the rule. </li>
<li>The adjacent parts of a sequence should be indented accordingly to have
all, what belongs to one level, at one indentation level.</li>
</ul>
<pre><span class=identifier> program
</span><span class=special>= </span><span class=identifier>program_heading </span><span class=special>[</span><span class=identifier>heading_action</span><span class=special>]
</span><span class=identifier> </span><span class=special> &gt;&gt; </span><span class=identifier>block </span><span class=special>[</span><span class=identifier>block_action</span><span class=special>]
</span><span class=identifier> </span><span class=special> &gt;&gt; </span><span class=literal>'.'
</span><span class=identifier> </span><span class=special>| </span><span class=identifier>another_sequence
</span><span class=special>&gt;&gt; </span><span class=identifier>etc
</span><span class=identifier> </span><span class=special>;</span></pre>
<ul>
<li>Prefer literals in the grammar instead of identifiers. e.g. <tt>&quot;program&quot;</tt>
instead of <tt>PROGRAM</tt>, <tt>'&gt;='</tt> instead of <tt>GTE</tt> and
<tt>'.' </tt>instead of <tt>DOT</tt>. This makes it much easier to read. If
this isn't possible (for instance where the used tokens must be identified
through integers) capitalized identifiers should be used instead. </li>
<li> Breaking the operands may not be needed for short expressions. e.g. <tt>*(','
&gt;&gt; file_identifier)</tt> as long as the line does not exceed 80 characters.
</li>
<li> If a sequence fits on one line, put spaces inside the parentheses to clearly
separate them from the rules. </li>
</ul>
<pre> <span class=identifier>program_heading
</span><span class=special>= </span><span class=identifier>as_lower_d</span><span class=special>[</span><span class=string>&quot;program&quot;</span><span class=special>]
&gt;&gt; </span><span class=identifier>identifier
</span><span class=special>&gt;&gt; </span><span class=literal>'('
</span><span class=special>&gt;&gt; </span><span class=identifier>file_identifier
</span><span class=special>&gt;&gt; *( </span><span class=literal>',' </span><span class=special>&gt;&gt; </span><span class=identifier>file_identifier </span><span class=special>)
&gt;&gt; </span><span class=literal>')'
</span><span class=special>&gt;&gt; </span><span class=literal>';'
</span><span class=special>;</span></pre>
<ul>
<li> Nesting directives: If a rule does not fit on one line (80 characters)
it should be continued on the next line intended by one level. </li>
<li>The brackets of directives, semantic expressions (using Phoenix or LL lambda
expressions) or parsers should be placed as follows. </li>
</ul>
<pre> <span class=identifier>identifier
</span><span class=special>= </span><span class=identifier>nocase
</span><span class=special>[
</span><span class=identifier>lexeme
</span><span class=special>[
</span><span class=identifier>alpha </span><span class=special>&gt;&gt; *(</span><span class=identifier>alnum </span><span class=special>| </span><span class=literal>'_'</span><span class=special>) [</span><span class=identifier>id_action</span><span class=special>]
]
]
;</span></pre>
<ul>
<li> Nesting unary operators (e.g.Kleene star) </li>
<li>Unary rule operators (Kleene star, <tt>'!'</tt>, <tt>'+'</tt> etc.) should
be moved out one space before the corresponding indentation level, if this
rule has a body or a sequence after it, which does not fit on on line. This
makes the formatting more consistent and moves the rule 'body' at the same
indentation level as the rule itself, highlighting the unary operator.</li>
</ul>
<pre><span class=special> </span><span class=identifier>block
</span><span class=special>= *( </span><span class=identifier>label_declaration_part
</span><span class=special>| </span><span class=identifier>constant_definition_part
</span><span class=special>| </span><span class=identifier>type_definition_part
</span><span class=special>| </span><span class=identifier>variable_declaration_part
</span><span class=special>| </span><span class=identifier>procedure_and_function_declaration_part
</span><span class=special>)
&gt;&gt; </span><span class=identifier>statement_part
</span><span class=special>;</span></pre>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="portability.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="techniques.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 2001-2003 Joel de Guzman<br>
Copyright &copy; 2001-2002 Hartmut Kaiser<br>
Copyright &copy; 2001-2002 Chris Uzdavinis<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p class="copyright">&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,289 @@
<html>
<head>
<title>Subrules</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%">
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Subrules</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="grammar.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="semantic_actions.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>Spirit is implemented using expression templates. This is a very powerful technique.
Along with its power comes some complications. We almost take for granted that
when we write <tt>i | j &gt;&gt; k</tt> where <tt>i</tt>, <tt>j</tt> and <tt>k</tt>
are all integers the result is still an integer. Yet, with expression templates,
the same expression <tt>i | j &gt;&gt; k</tt> where <tt>i</tt>, <tt>j</tt> and
<tt>k</tt> are of type <tt>T</tt>, the result is a complex composite type [see
<a href="basic_concepts.html">Basic Concepts</a>]. Spirit expressions, which
are combinations of primitives and composites yield an infinite set of new types.
One problem is that C++ offers no easy facility to deduce the type of an arbitrarily
complex expression that yields a complex type. Thus, while it is easy to write:</p>
<pre><code><font color="#000000"><span class=identifier> </span><span class=keyword>int </span><span class=identifier>r </span><span class=special>= </span><span class=identifier>i </span><span class=special>| </span><span class=identifier>j </span><span class=special>&gt;&gt; </span><span class=identifier>k</span><span class=special>; </span><span class=comment>// where i, j, and k are ints</span></font></code></pre>
<p>Expression templates yield an endless supply of types. Without the <a href="rule.html">rule</a>,
there is no easy way to do this in C++ if <tt>i</tt>, <tt>j</tt> and <tt>k</tt>
are Spirit parsers:</p>
<pre><code><font color="#000000"><span class=comment> </span><span class=special>&lt;</span><span class=identifier>what_type???</span><span class=special>&gt; </span><span class=identifier>r </span><span class=special>= </span><span class=identifier>i </span><span class=special>| </span><span class=identifier>j </span><span class=special>&gt;&gt; </span><span class=identifier>k</span><span class=special>; </span><span class=comment>// where i, j, and k are Spirit parsers</span></font></code></pre>
<p>If <tt>i</tt>, <tt>j</tt> and <tt>k</tt> are all <tt>chlit&lt;&gt;</tt> objects,
the type that we want is:</p>
<pre><code><font color="#000000"><span class=comment> </span><span class=keyword>typedef
</span><span class=identifier>alternative</span><span class=special>&lt;
</span><span class=identifier>chlit</span><span class=special>&lt;&gt;</span><span class=comment> // i
</span><span class=special>,</span> <span class=identifier>sequence</span><span class=special>&lt;
</span><span class=identifier>chlit</span><span class=special>&lt;&gt; </span><span class=comment>// j
</span><span class=special> ,</span><span class=comment> </span><span class=identifier>chlit</span><span class=special>&lt;&gt; </span><span class=comment>// k
</span><span class=special>&gt;
&gt;
</span><span class=identifier>rule_t</span><span class=special>;
</span><span class=identifier>rule_t r </span><span class=special>= </span><span class=identifier>i </span><span class=special>| </span><span class=identifier>j </span><span class=special>&gt;&gt; </span><span class=identifier>k</span><span class=special>; </span><span class=comment>// where i, j, and k are chlit&lt;&gt; objects</span></font></code></pre>
<p>We deliberately formatted the type declaration nicely to make it understandable.
Try that with a more complex expression. While it can be done, explicitly spelling
out the type of a Spirit expression template is tedious and error prone. The
right hand side (rhs) has to mirror the type of the left hand side (lhs). (<img src="theme/lens.gif" width="15" height="16">
Yet, if you still wish to do it, see this <a href="techniques.html#no_rules">link</a>
for a technique). </p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><p><img src="theme/lens.gif" width="15" height="16"><b>
typeof and auto</b> <br>
<br>
Some compilers already support the <tt>typeof</tt> keyword. This can be
used to free us from having to explicitly type the type (pun intentional).
Using the <tt>typeof</tt>, we can rewrite the Spirit expression above
as:<br>
<br>
<span class="keyword"><code>typeof</code><code></code></span><code><span class=special>(</span><span class=identifier>i
</span><span class=special>| </span><span class=identifier>j </span><span class=special>&gt;&gt;
</span><span class=identifier>k</span><span class=special>) </span><span class=identifier>r
</span><span class=special>= </span><span class=identifier>i </span><span class=special>|
</span><span class=identifier>j </span><span class=special>&gt;&gt; </span><span class=identifier>k</span><span class=special>;</span></code><br>
<br>
While this is better than having to explicitly declare a complex type,
it is redundant, error prone and still an eye sore. The expression is
typed twice. The only way to simplify this is to introduce a macro (See
this <a href="techniques.html#typeof">link</a> for more information).<br>
<br>
<a href="http://www.boost-consulting.com">David Abrahams</a> proposed
in comp.std.c++ to reuse the <tt>auto</tt> keyword for type deduced variables.
This has been extensibly discussed in <a href="http://www.boost.org">boost.org</a>. Example:
<br>
<br>
<span class=keyword><code>auto </code></span><code><span class=identifier>r
</span><span class=special>= </span><span class=identifier>i </span><span class=special>|
</span><span class=identifier>j </span><span class=special>&gt;&gt; </span><span class=identifier>k</span><span class=special>;</span></code><br>
<br>
Once such a C++ extension is accepted into the standard, this would be
a neat solution and a nice fit for our purpose. It's not a complete solution
though since there are still situations where we do not know the rhs beforehand;
for instance when pre-declaring cyclic dependent rules.</p>
</td>
</tr>
</table>
<p>Fortunately, rules come to the rescue. Rules can capture the type of the expression
assigned to it. Thus:</p>
<pre><code><font color="#000000"> <span class=identifier>rule</span><span class=special>&lt;&gt; </span><span class=identifier>r </span><span class=special>= </span><span class=identifier>i </span><span class=special>| </span><span class=identifier>j </span><span class=special>&gt;&gt; </span><span class=identifier>k</span><span class=special>; </span><span class=comment>// where i, j, and k are chlit&lt;&gt; objects</span></font></code></pre>
<p>It might not be apparent but behind the scenes, plain rules are actually implemented
using a pointer to a runtime polymorphic abstract class that holds the dynamic
type of the parser assigned to it. When a Spirit expression is assigned to a
rule, its type is encapsulated in a concrete subclass of the abstract class.
A virtual parse function delegates the parsing to the encapsulated object.</p>
<p>Rules have drawbacks though:</p>
<p><img src="theme/bullet.gif" width="12" height="12"> It is coupled to a specific
scanner type. The rule is tied to a specific scanner [see <a href="faq.html#scanner_business">The
Scanner Business</a>].<br>
<img src="theme/bullet.gif" width="12" height="12"> The rule's parse member
function has a virtual function call overhead that cannot be inlined.</p>
<h2>Static rules: subrules</h2>
<p>The subrule is a fully static version of the rule. The subrule does not have
the drawbacks listed above. </p>
<p><img src="theme/bullet.gif" width="12" height="12"> The subrule is not tied
to a specific scanner so just about any scanner type may be used<br>
<img src="theme/bullet.gif" width="12" height="12"> The subrule also allows
aggressive inlining since there are no virtual function calls</p>
<pre><code><font color="#000000"><span class=identifier> </span><span class=keyword>template</span><span class=special>&lt;</span><span class=keyword>int </span></font><span class="identifier">ID</span><font color="#000000"><span class=special>, </span><span class=keyword>typename </span><span class=identifier>ContextT </span><span class=special>= </span><span class=identifier>parser_context</span><span class=special>&lt;&gt;</span> <span class=special>&gt;
</span><span class=keyword>class </span><span class=identifier>subrule</span><span class=special>;</span></font></code></pre>
<p>The first template parameter gives the subrule an identification tag. Like
the <a href="rule.html">rule</a>, there is a ContextT template parameter that
defaults to <code><tt>parser_context</tt></code>. You need not be concerned
at all with the <tt>ContextT</tt> template parameter unless you wish to tweak
the low level behavior of the subrule. Detailed information on the <tt>ContextT</tt>
template parameter is provided <a href="indepth_the_parser_context.html">elsewhere</a>.
</p>
<p>Presented above is the public API. There may actually be more template parameters
after <tt>ContextT</tt>. Everything after the <tt>ContextT</tt> parameter should
not be of concern to the client and are strictly for internal use only.</p>
<p>Apart from a few minor differences, the subrule follows the usage and syntax
of the rule closely. Here's the calculator grammar using subrules:</p>
<pre><code><font color="#000000"><span class=comment> </span><span class=keyword>struct </span><span class=identifier>calculator </span><span class=special>: </span><span class=keyword>public </span><span class=identifier>grammar</span><span class=special>&lt;</span><span class=identifier>calculator</span><span class=special>&gt;
</span><span class=special>{
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>definition
</span><span class=special>{
</span><span class=identifier>definition</span><span class=special>(</span><span class=identifier>calculator </span><span class=keyword>const</span><span class=special>& </span><span class=identifier>self</span><span class=special>)
</span><span class=special>{
</span><span class=identifier>first </span><span class=special>=
</span><span class=special>(
</span><span class=identifier>expression </span><span class=special>= </span><span class=identifier>term </span><span class=special>&gt;&gt; </span><span class=special>*((</span><span class=literal>'+' </span><span class=special>&gt;&gt; </span><span class=identifier>term</span><span class=special>) </span><span class=special>| </span><span class=special>(</span><span class=literal>'-' </span><span class=special>&gt;&gt; </span><span class=identifier>term</span><span class=special>)),
</span><span class=identifier>term </span><span class=special>= </span><span class=identifier>factor </span><span class=special>&gt;&gt; </span><span class=special>*((</span><span class=literal>'*' </span><span class=special>&gt;&gt; </span><span class=identifier>factor</span><span class=special>) </span><span class=special>| </span><span class=special>(</span><span class=literal>'/' </span><span class=special>&gt;&gt; </span><span class=identifier>factor</span><span class=special>)),
</span><span class=identifier>factor </span><span class=special>= </span><span class=identifier>integer </span><span class=special>| </span><span class=identifier>group</span><span class=special>,
</span><span class=identifier>group </span><span class=special>= </span><span class=literal>'(' </span><span class=special>&gt;&gt; </span><span class=identifier>expression </span><span class=special>&gt;&gt; </span><span class=literal>')'
</span><span class=special>);
</span><span class=special>}
</span><span class=identifier>subrule</span><span class=special>&lt;</span><span class=number>0</span><span class=special>&gt; </span><span class=identifier>expression</span><span class=special>;
</span><span class=identifier>subrule</span><span class=special>&lt;</span><span class=number>1</span><span class=special>&gt; </span><span class=identifier>term</span><span class=special>;
</span><span class=identifier>subrule</span><span class=special>&lt;</span><span class=number>2</span><span class=special>&gt; </span><span class=identifier>factor</span><span class=special>;
</span><span class=identifier>subrule</span><span class=special>&lt;</span><span class=number>3</span><span class=special>&gt; </span><span class=identifier>group</span><span class=special>;
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt; </span><span class=identifier>first</span><span class=special>;
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt; </span><span class=keyword>const</span><span class=special>&
</span><span class=identifier>start</span><span class=special>() </span><span class=keyword>const </span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>first</span><span class=special>; </span><span class=special>}
</span><span class=special>};
</span><span class=special>};</span></font></code></pre>
<p><img src="theme/lens.gif" width="15" height="16"> A fully working example with
<a href="semantic_actions.html">semantic actions</a> can be <a href="../example/fundamental/subrule_calc.cpp">viewed
here</a>. This is part of the Spirit distribution. </p>
<table border="0" align="left">
<tr>
<td width="199"><img src="theme/subrule1.png" width="234" height="224"></td>
<td width="2"></td>
</tr>
</table>
<p>The subrule as an efficient version of the rule. Compiler optimizations such
as aggressive inlining help reduce the code size and increase performance significantly.
</p>
<p>The subrule is not a panacea however. Subrules push the C++ compiler hard to
its knees. For example, current compilers have a limit on recursion depth that
may not be exceeded. Don't even think about writing a full pascal grammar using
subrules alone. A grammar using subrules is a single C++ expression. Current
C++ compilers cannot handle very complex expressions very well. Finally, a plain
rule is still needed to act as place holder for subrules.</p>
<p>The code above is a good example of the recommended way to use subrules. Notice
the hierarchy. We have a grammar that encapsulates the whole calculator. The
start rule is a plain rule that holds the set of subrules. The subrules in turn
defines the actual details of the grammar.</p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/lens.gif" width="15" height="16"><b>
Template instantiation depth</b> <br> <br>
Spirit pushes the C++ compiler hard. Current C++ compilers cannot handle
very complex heavily nested expressions very well. One restricting factor
is the typical compiler's limit on template recursion depth. Some, but not
all, compilers allow this limit to be configured.<br>
<br>
g++'s maximum can be set using a compiler flag: -ftemplate-depth. Set this
appropriately if you have a relatively complex grammar.<br>
<br>
Microsoft Visual C++ can take greater than 1000 for both template class
and function instantiation depths. However, the linker chokes with deep
template function instantiation unless inline recursion depth is set using
these pragmas:<br>
<br>
<span class="preprocessor">#pragma</span> inline_depth<span class="special">(</span>255<span class="special">)</span><br>
<span class="preprocessor">#pragma</span> inline_recursion<span class="special">(</span>on<span class="special">)<br>
<br>
</span>Perhaps this limitations no longer applies to more current versions
of these compilers. Be sure to check your compiler documentation.</td>
</tr>
</table>
<p>This setup gives a good balance. The subrules do all the work. Each grammar
will have only one rule: <tt>first</tt>. The rule is used just to hold the subrules
and make them visible to the grammar. </p>
<h3>The subrule definition</h3>
<p>Like the rule, the expression after assignment operator <tt>=</tt> defines
the subrule:</p>
<pre> <span class=identifier>identifier </span><span class=special>= </span><span class=identifier>expression</span></pre>
<p>Unlike rules, subrules may be defined only once. Redefining a subrule is illegal
and will result to a compile time assertion.</p>
<h3>Separators [ , ]</h3>
<p>While rules are terminated by the semicollon <tt>';'</tt>. Subrules are not
terminated but are separated by the comma: <tt>','</tt>. Like Pascal statements,
the last subrule in a group may not have a trailing comma.</p>
<pre><span class=identifier> </span><span class=identifier>a </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'a'</span><span class=special>),
</span><span class=identifier>b </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'b'</span><span class=special>),
</span><span class=identifier>c </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'c'</span><span class=special>), </span><span class=comment>// BAD, trailing comma</span><code><font color="#000000"><font color="#800000"><i></i></font></font></code><code><font color="#000000"><font color="#800000"><i></i></font></font><i></i></code></pre>
<p>
<pre><code><span class=comment> </span><span class=identifier>a </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'a'</span><span class=special>),
</span><span class=identifier>b </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'b'</span><span class=special>),
</span><span class=identifier>c </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'c'</span><span class=special>) </span><span class=comment>// OK</span></code></pre>
<h3> The start subrule</h3>
<p>Unlike rules, parsing proceeds from the start subrule. The first (topmost)
subrule in a group of subrules is called the <b>start subrule</b>. In our example
above, <tt>expression</tt> is the start subrule. When a group of subrules is
called forth, the start subrule <tt>expression</tt> is called first.</p>
<h3>IDs</h3>
<p>Each subrule has a corresponding ID; an integral constant that uniquely specifies
the subrule. Our example above has four subrules. They are declared as:</p>
<pre><code><span class=comment> </span><span class=identifier>subrule</span><span class=special>&lt;</span><span class=number>0</span><span class=special>&gt; </span><span class=identifier>expression</span><span class=special>;
</span><span class=identifier>subrule</span><span class=special>&lt;</span><span class=number>1</span><span class=special>&gt; </span><span class=identifier>term</span><span class=special>;
</span><span class=identifier>subrule</span><span class=special>&lt;</span><span class=number>2</span><span class=special>&gt; </span><span class=identifier>factor</span><span class=special>;
</span><span class=identifier>subrule</span><span class=special>&lt;</span><span class=number>3</span><span class=special>&gt; </span><span class=identifier>group</span><span class=special>;</span></code></pre>
<h3> Aliases</h3>
<p>It is possible to have subrules with similar IDs. A subrule with a similar
ID to will be an alias of the other. Both subrules may be used interchangeably.</p>
<pre><code><span class=special> </span><span class=identifier>subrule</span><span class=special>&lt;</span><span class=number>0</span><span class=special>&gt; </span><span class=identifier>a</span><span class=special>;
</span><span class=identifier>subrule</span><span class=special>&lt;</span><span class=number>0</span><span class=special>&gt; </span><span class=identifier>alias</span><span class=special>; </span><span class=comment>// alias of a</span></code></pre>
<h3>Groups: scope and nesting</h3>
<p>The scope of a subrule and its definition is the enclosing group, typically
(and by convention) enclosed inside the parentheses. IDs outside a scope are
not directly visible. Inner subrule groups can be nested by enclosing each sub-group
inside another set of parentheses. Each group is unique and acts independently.
Consequently, while it may not be advisable to do so, a subrule in a group may
share the same ID as a subrule in another group since both groups are independent
of each other.</p>
<pre><code><span class=comment> </span><span class=identifier>subrule</span><span class=special>&lt;</span><span class=number>0</span><span class=special>&gt; </span><span class=identifier>a</span><span class=special>;
</span><span class=identifier>subrule</span><span class=special>&lt;</span><span class=number>1</span><span class=special>&gt; </span><span class=identifier>b</span><span class=special>;
</span><span class=identifier>subrule</span><span class=special>&lt;</span><span class=number>0</span><span class=special>&gt; </span><span class=identifier>c</span><span class=special>;
</span><span class=identifier>subrule</span><span class=special>&lt;</span><span class=number>1</span><span class=special>&gt; </span><span class=identifier>d</span><span class=special>;
</span><span class=special>( </span><span class=comment>// outer subrule group, scope of a and b
</span><span class=identifier>a </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'a'</span><span class=special>),
</span><span class=identifier>b </span><span class=special>=
</span><span class=special>( </span><span class=comment>// inner subrule group, scope of b and c
</span><span class=identifier>c </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'c'</span><span class=special>),
</span><span class=identifier>d </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'d'</span><span class=special>)
</span><span class=special>)
</span><span class=special>)</span></code></pre>
<p>Subrule IDs need to be unique only within a group. A grammar is an implicit
group. Furthermore, even subrules in a grammar may have the same IDs without
clashing if they are inside a group. Subrules may be explicitly grouped using
the parentheses. Parenthesized groups have unique scopes. In the code above,
the outer subrule group defines the subrules <tt>a</tt> and <tt>b</tt> while
the inner subrule group defines the subrules <tt>c</tt> and <tt>d</tt>. Notice
that the definition of <tt>b</tt> is the inner subrule.</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="grammar.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="semantic_actions.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p>&nbsp;</p>
<p><code><font color="#000000"><font color="#0000ff"></font></font></code></p>
</body>
</html>

View File

@@ -0,0 +1,115 @@
<html>
<head>
<title>The Switch Parser</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
<style type="text/css">
<!--
.style1 {font-family: "Courier New", Courier, mono}
.style3 {font-family: "Courier New", Courier, mono; color: #FF0000; }
-->
</style>
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10"> </td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>The Switch Parser </b></font></td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="select_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="escape_char_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>Switch parsers may be used to simplify certain alternation constructs. Consider the following code:</p>
<pre> rule<span class="special">&lt;&gt;</span> rule_overall <span class="special">=</span>
ch_p<span class="special">(</span><span class="literal">'a'</span><span class="special">)</span> <span class="special">&gt;&gt;</span> parser_a
<span class="special">|</span> ch_p<span class="special">(</span><span class="literal">'b'</span><span class="special">)</span> <span class="special">&gt;&gt;</span> parser_b
<span class="comment">// ...</span>
<span class="special">|</span> ch_p<span class="special">(</span><span class="literal">'n'</span><span class="special">)</span> <span class="special">&gt;&gt;</span> parser_n
<span class="special">;</span></pre>
<p>Each of the alternatives are evaluated normally in a sequential manner. This tend to be inefficient, especially for a large number of alternatives. To avoid this inefficiency and to make it possible to write such constructs in a more readable form, Spirit contains the <tt>switch_p</tt> family of parsers. The switch_p parser allows us to rewrite the previous construct as:</p>
<pre> rule<span class="special">&lt;&gt;</span> rule_overall <span class="special">=</span>
switch_p
<span class="special">[</span>
case_p<span class="special">&lt;</span><span class="literal">'a'</span><span class="special">&gt;(</span>parser_a<span class="special">),</span>
case_p<span class="special">&lt;</span><span class="literal">'b'</span><span class="special">&gt;(</span>parser_b<span class="special">),</span>
<span class="comment"> // ...</span>
case_p<span class="special">&lt;</span><span class="literal">'n'</span><span class="special">&gt;(</span>parser_n<span class="special">)</span>
]
;</pre>
<p>This <tt>switch_p</tt> parser takes the next character (or token) from the input stream and tries to match it against the given integral compile time constants supplied as the template parameters to the <tt>case_p</tt> parsers. If this character matches one of the <tt>case_p</tt> branches, the associated parser is executed (i.e. if 'a' is matched, <tt>parser_a</tt> is executed, if 'b' is matched, <tt>parser_b</tt> is executed and so on) . If no <tt>case_p</tt> branch matches the next input character, the overall construct does not match at all. </p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><div align="justify"><img src="theme/bulb.gif" width="13" height="18"><strong> Nabialek trick </strong><br>
<br>
The <strong><em><a href="techniques.html#nabialek_trick">&quot;Nabialek trick&quot; </a></em></strong>(from the name of its inventor, Sam Nabialek), can also improve the rule dispatch from linear non-deterministic to deterministic. This is similar to the <tt>switch_p</tt> parser, yet, can handle grammars where a keyword (operator, etc), instead of a single character or token, precedes a production.</div></td>
</tr>
</table>
<p>Sometimes it is desirable to add handling of the default case (none of the <tt>case_p</tt> branches matched). This may be achieved with the help of a <tt>default_p</tt> branch:</p>
<pre> rule<span class="special">&lt;&gt;</span> rule_overall <span class="special">=</span>
switch_p
<span class="special">[</span>
case_p<span class="special">&lt;</span><span class="literal">'a'</span><span class="special">&gt;(</span>parser_a<span class="special">),</span>
case_p<span class="special">&lt;</span><span class="literal">'b'</span><span class="special">&gt;(</span>parser_b<span class="special">),</span>
<span class="comment"> // ...</span>
case_p<span class="special">&lt;</span><span class="literal">'n'</span><span class="special">&gt;(</span>parser_n<span class="special">),</span>
default_p<span class="special">(</span>parser_default<span class="special">)</span>
<span class="special">]
;</span></pre>
<p>This form chooses the <tt>parser_default</tt> parser if none of the cases matches the next character from the input stream. Please note that, obviously, only one <tt>default_p</tt> branch may be added to the <tt>switch_p</tt> parser construct. </p>
<p>Moreover, it is possible to omit the parentheses and body from the <tt>default_p</tt> construct, in which case, no additional parser is executed and the overall <tt>switch_p</tt> construct simply returns a match on any character of the input stream, which does not match any of the <tt>case_p</tt> branches:</p>
<pre> rule<span class="special">&lt;&gt;</span> rule_overall <span class="special">=</span>
switch_p
<span class="special">[</span>
case_p<span class="special">&lt;</span><span class="literal">'a'</span><span class="special">&gt;(</span>parser_a<span class="special">),</span>
case_p<span class="special">&lt;</span><span class="literal">'b'</span><span class="special">&gt;(</span>parser_b<span class="special">),</span>
<span class="comment">// ...</span>
case_p<span class="special">&lt;</span><span class="literal">'n'</span><span class="special">&gt;(</span>parser_n<span class="special">),</span>
default_p
<span class="special">]</span>
;</pre>
<p>There is another form of the switch_p construct. This form allows us to explicitly specify the value to be used for matching against the <tt>case_p</tt> branches: </p>
<pre> rule<span class="special">&lt;&gt;</span> rule_overall <span class="special">=</span>
switch_p<span class="special">(</span>cond<span class="special">)</span>
<span class="special">[</span>
case_p<span class="special">&lt;</span><span class="literal">'a'</span><span class="special">&gt;(</span>parser_a<span class="special">),</span>
case_p<span class="special">&lt;</span><span class="literal">'b'</span><span class="special">&gt;(</span>parser_b<span class="special">),</span>
<span class="comment"> // ...</span>
case_p<span class="special">&lt;</span><span class="literal">'n'</span><span class="special">&gt;(</span>parser_n<span class="special">)</span>
<span class="special">]</span>
;</pre>
<p>where <tt>cond</tt> is a parser or a nullary function or function object (functor). If it is a parser, then it is tried and its return value is used to match against the <tt>case_p</tt> branches. If it is a nullary function or functor, then its return value will be used. </p>
<p>Please note that during its compilation, the <tt>switch_p</tt> construct is transformed into a real C++ <tt>switch</tt> statement. This makes the runtime execution very efficient. </p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><p><img src="theme/alert.gif" width="16" height="16"> <tt>BOOST_SPIRIT_SWITCH_CASE_LIMIT</tt><br>
<br>
The number of possible <tt>case_p</tt>/<tt>default_p</tt> branches is limited by the Spirit compile time constant <tt>BOOST_SPIRIT_SWITCH_CASE_LIMIT</tt>, which defaults to 3. There is no theoretical upper limit for this constant, but most compilers won't allow you to specify a very large number.</p>
<p>Example:</p>
<p class="style1"><span class="comment">// Define these before including switch.hpp <br>
</span><span class="preprocessor">#define</span> BOOST_SPIRIT_SWITCH_CASE_LIMIT 10 </p></td>
</tr>
</table><br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="select_parser.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="escape_char_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 2003-2004 Hartmut Kaiser <br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) </font> </p>
</body>
</html>

View File

@@ -0,0 +1,204 @@
<html>
<head>
<title>Symbols</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%">
<font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Symbols</b></font>
</td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="distinct.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="trees.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>This class symbols implements a symbol table. The symbol table holds a dictionary
of symbols where each symbol is a sequence of CharTs (a <tt>char</tt>, <tt>wchar_t</tt>,
<tt>int</tt>, enumeration etc.) . The template class, parameterized by the character
type (CharT), can work efficiently with 8, 16, 32 and even 64 bit characters.
Mutable data of type T is associated with each symbol.<br>
</p>
<p>Traditionally, symbol table management is maintained separately outside the
BNF grammar through semantic actions. Contrary to standard practice, the Spirit
symbol table class <tt>symbols</tt> is-a parser. An instance of which may be
used anywhere in the EBNF grammar specification. It is an example of a dynamic
parser. A dynamic parser is characterized by its ability to modify its behavior
at run time. Initially, an empty symbols object matches nothing. At any time,
symbols may be added, thus, dynamically altering its behavior.</p>
<p>Each entry in a symbol table has an associated mutable data slot. In this regard,
one can view the symbol table as an associative container (or map) of key-value
pairs where the keys are strings. </p>
<p>The symbols class expects two template parameters (actually there is a third,
see detail box). The first parameter <tt>T</tt> specifies the data type associated
with each symbol (defaults to <tt>int</tt>) and the second parameter <tt>CharT</tt>
specifies the character type of the symbols (defaults to <tt>char</tt>). </p>
<pre><span class=identifier> </span><span class=keyword>template
</span><span class=special>&lt;
</span><span class=keyword>typename </span><span class=identifier>T </span><span class=special>= </span><span class=keyword>int</span><span class=special>,
</span><span class=keyword>typename </span><span class=identifier>CharT </span><span class=special>= </span><span class=keyword>char</span><span class=special>,
</span><span class=keyword>typename </span><span class=identifier>SetT </span><span class=special>= </span><span class=identifier>impl</span><span class=special>::</span><span class=identifier>tst</span><span class=special>&lt;</span><span class=identifier>T</span><span class=special>, </span><span class=identifier>CharT</span><span class=special>&gt;
</span><span class=special>&gt;
</span><span class=keyword>class </span><span class=identifier>symbols</span><span class=special>;</span></pre>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/lens.gif" width="15" height="16"> <b>Ternary
State Trees</b><br>
<br>
The actual set implementation is supplied by the SetT template parameter
(3rd template parameter of the symbols class) . By default, this uses the
tst class which is an implementation of the Ternary Search Tree. <br>
<br>
Ternary Search Trees are faster than hashing for many typical search problems
especially when the search interface is iterator based. Searching for a
string of length k in a ternary search tree with n strings will require
at most O(log n+k) character comparisons. TSTs are many times faster than
hash tables for unsuccessful searches since mismatches are discovered earlier
after examining only a few characters. Hash tables always examine an entire
key when searching.<br>
<br>
For details see <a href="http://www.cs.princeton.edu/%7Ers/strings/">http://www.cs.princeton.edu/~rs/strings/</a>.</td>
</tr>
</table>
<p>Here are some sample declarations:</p>
<pre><span class=identifier> </span><span class=identifier>symbols</span><span class=special>&lt;&gt; </span><span class=identifier>sym</span><span class=special>;
</span><span class=identifier>symbols</span><span class=special>&lt;</span><span class=keyword>short</span><span class=special>, </span><span class=keyword>wchar_t</span><span class=special>&gt; </span><span class=identifier>sym2</span><span class=special>;
</span><span class=keyword>struct </span><span class=identifier>my_info
</span><span class=special>{
</span><span class=keyword>int </span><span class=identifier>id</span><span class=special>;
</span><span class=keyword>double </span><span class=identifier>value</span><span class=special>;
</span><span class=special>};
</span><span class=identifier>symbols</span><span class=special>&lt;</span><span class=identifier>my_info</span><span class=special>&gt; </span><span class=identifier>sym3</span><span class=special>;</span></pre>
<p>After having declared our symbol tables, symbols may be added statically using
the construct:</p>
<pre><span class=identifier> sym </span><span class=special>= </span><span class=identifier>a</span><span class=special>, </span><span class=identifier>b</span><span class=special>, </span><span class=identifier>c</span><span class=special>, </span><span class=identifier>d </span><span class=special>...;</span></pre>
<p>where <tt>sym</tt> is a symbol table and <tt>a..d</tt> etc. are strings. <img src="theme/note.gif" width="16" height="16">Note
that the comma operator is separating the items being added to the symbol table,
through an assignment. Due to operator overloading this is possible and correct
(though it may take a little getting used to) and is a concise way to initialize
the symbol table with many symbols. Also, it is perfectly valid to make multiple
assignments to a symbol table to iteratively add symbols (or groups of symbols)
at different times.</p>
<p>Simple example:<br>
</p>
<pre><span class=identifier> sym </span><span class=special>= </span><span class=string>&quot;pineapple&quot;</span><span class=special>, </span><span class=string>&quot;orange&quot;</span><span class=special>, </span><span class=string>&quot;banana&quot;</span><span class=special>, </span><span class=string>&quot;apple&quot;</span><span class=special>, </span><span class=string>&quot;mango&quot;</span><span class=special>;</span></pre>
<p>Note that it is invalid to add the same symbol multiple times to a symbol table,
though you may modify the value associated with a symbol artibrarily many times.</p>
<p>Now, we may use sym in the grammar. Example:</p>
<pre><span class=identifier> fruits </span><span class=special>= </span><span class=identifier>sym </span><span class=special>&gt;&gt; </span><span class=special>*(</span><span class=literal>',' </span><span class=special>&gt;&gt; </span><span class=identifier>sym</span><span class=special>);</span></pre>
<p>Alternatively, symbols may be added dynamically through the member functor
<tt>add</tt> (see <tt><a href="#symbol_inserter">symbol_inserter</a></tt> below).
The member functor <tt>add</tt> may be attached to a parser as a semantic action
taking in a begin/end pair:</p>
<pre><span class=identifier> p</span><span class=special>[</span><span class=identifier>sym</span><span class=special>.</span><span class=identifier>add</span><span class=special>]</span></pre>
<p>where p is a parser (and sym is a symbol table). On success, the matching portion
of the input is added to the symbol table.</p>
<p><tt>add</tt> may also be used to directly initialize data. Examples:</p>
<pre><span class=identifier> sym</span><span class=special>.</span><span class=identifier>add</span><span class=special>(</span><span class=string>&quot;hello&quot;</span><span class=special>, </span><span class=number>1</span><span class=special>)(</span><span class=string>&quot;crazy&quot;</span><span class=special>, </span><span class=number>2</span><span class=special>)(</span><span class=string>&quot;world&quot;</span><span class=special>, </span><span class=number>3</span><span class=special>);</span></pre>
<p>Assuming of course that the data slot associated with <tt>sym</tt> is an integer.</p>
<p>The data associated with each symbol may be modified any time. The most obvious
way of course is through <a href="semantic_actions.html">semantic actions</a>.
A function or functor, as usual, may be attached to the symbol table. The symbol
table expects a function or functor compatible with the signature:</p>
<p><b>Signature for functions:</b></p>
<pre><code><font color="#000000"><span class=identifier> </span><span class=keyword>void </span><span class=identifier>func</span><span class=special>(</span><span class=identifier>T</span><span class="special">&amp;</span><span class=identifier> data</span><span class=special>);</span></font></code></pre>
<p><b>Signature for functors:</b><br>
</p>
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>struct </span><span class=identifier>ftor
</span><span class=special>{
</span><span class=keyword>void </span><span class=keyword>operator</span><span class=special>()(</span><span class=identifier>T</span><span class="special">&amp;</span><span class=identifier> data</span><span class=special>) </span><span class=keyword>const</span><span class=special>;
</span><span class=special>};</span></font></code></pre>
<p>Where <tt>T</tt> is the data type of the symbol table (the <tt>T</tt> in its
template parameter list). When the symbol table successfully matches something
from the input, the data associated with the matching entry in the symbol table
is reported to the semantic action.</p>
<h2>Symbol table utilities</h2>
<p>Sometimes, one may wish to deal with the symbol table directly. Provided are
some symbol table utilities.</p>
<p><b>add</b></p>
<pre><span class=identifier> </span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>CharT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>SetT</span><span class=special>&gt;
</span><span class=identifier>T</span><span class=special>* </span><span class=identifier>add</span><span class=special>(</span><span class=identifier>symbols</span><span class=special>&lt;</span><span class=identifier>T</span><span class=special>, </span><span class=identifier>CharT</span><span class=special>, </span><span class=identifier>SetT</span><span class=special>&gt;&amp; </span><span class=identifier>table</span><span class=special>, </span><span class=identifier>CharT </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>sym</span><span class=special>, </span><span class=identifier>T </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>data </span><span class=special>= </span><span class=identifier>T</span><span class=special>());</span></pre>
<p>adds a symbol <tt>sym</tt> (C string) to a symbol table <tt>table</tt> plus
an optional data <tt>data</tt> associated with the symbol. Returns a pointer
to the data associated with the symbol or <tt>NULL</tt> if add failed (e.g.
when the symbol is already added before).<br>
<br>
<b>find</b></p>
<pre><span class=special> </span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>T</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>CharT</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>SetT</span><span class=special>&gt;
</span><span class=identifier>T</span><span class=special>* </span><span class=identifier>find</span><span class=special>(</span><span class=identifier>symbols</span><span class=special>&lt;</span><span class=identifier>T</span><span class=special>, </span><span class=identifier>CharT</span><span class=special>, </span><span class=identifier>SetT</span><span class=special>&gt; </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>table</span><span class=special>, </span><span class=identifier>CharT </span><span class=keyword>const</span><span class=special>* </span><span class=identifier>sym</span><span class=special>);</span></pre>
<p>finds a symbol <tt>sym</tt> (C string) from a symbol table <tt>table</tt>.
Returns a pointer to the data associated with the symbol or <tt>NULL</tt> if
not found</p>
<h2><a name="symbol_inserter"></a>symbol_inserter</h2>
<p>The symbols class holds an instance of this class named <tt>add</tt>. This
can be called directly just like a member function, passing in a first/last
iterator and optional data:<br>
<br>
</p>
<pre><span class=identifier> sym</span><span class=special>.</span><span class=identifier>add</span><span class=special>(</span><span class=identifier>first</span><span class=special>, </span><span class=identifier>last</span><span class=special>, </span><span class=identifier>data</span><span class=special>);</span></pre>
<p>Or, passing in a C string and optional data:<br>
</p>
<pre><span class=identifier> sym</span><span class=special>.</span><span class=identifier>add</span><span class=special>(</span><span class=identifier>c_string</span><span class=special>, </span><span class=identifier>data</span><span class=special>);</span></pre>
<p>where <tt>sym</tt> is a symbol table. The <tt>data</tt> argument is optional.
The nice thing about this scheme is that it can be cascaded. We've seen this
applied above. Here's a snippet from the roman numerals parser</p>
<pre> <span class=comment>// Parse roman numerals (1..9) using the symbol table.
</span> <span class=keyword>struct </span><span class=identifier>ones </span><span class=special>: </span><span class=identifier>symbols</span><span class=special>&lt;</span><span class=keyword>unsigned</span><span class=special>&gt;
</span><span class=special>{
</span><span class=identifier>ones</span><span class=special>()
</span><span class=special>{
</span><span class=identifier>add
</span><span class=special>(</span><span class=string>&quot;I&quot; </span><span class=special>, </span><span class=number>1</span><span class=special>)
</span><span class=special>(</span><span class=string>&quot;II&quot; </span><span class=special>, </span><span class=number>2</span><span class=special>)
</span><span class=special>(</span><span class=string>&quot;III&quot; </span><span class=special>, </span><span class=number>3</span><span class=special>)
</span><span class=special>(</span><span class=string>&quot;IV&quot; </span><span class=special>, </span><span class=number>4</span><span class=special>)
</span><span class=special>(</span><span class=string>&quot;V&quot; </span><span class=special>, </span><span class=number>5</span><span class=special>)
</span><span class=special>(</span><span class=string>&quot;VI&quot; </span><span class=special>, </span><span class=number>6</span><span class=special>)
</span><span class=special>(</span><span class=string>&quot;VII&quot; </span><span class=special>, </span><span class=number>7</span><span class=special>)
</span><span class=special>(</span><span class=string>&quot;VIII&quot; </span><span class=special>, </span><span class=number>8</span><span class=special>)
</span><span class=special>(</span><span class=string>&quot;IX&quot; </span><span class=special>, </span><span class=number>9</span><span class=special>)
</span><span class=special>;
</span><span class=special>}
</span><span class=special>} </span><span class=identifier>ones_p</span><span class=special>;</span></pre>
<p>Notice that a user defined struct <tt>ones</tt> is subclassed from <tt>symbols</tt>.
Then at construction time, we added all the symbols using the <tt>add</tt> symbol_inserter.</p>
<p> <img height="16" width="15" src="theme/lens.gif"> The full source code can be <a href="../example/fundamental/roman_numerals.cpp">viewed here</a>. This is part of the Spirit distribution.</p>
<p>Again, <tt>add</tt> may also be used as a semantic action since it conforms
to the action interface (see semantic actions):<br>
</p>
<pre><span class=special></span><span class=identifier> p</span><span class=special>[</span><span class=identifier>sym</span><span class=special>.</span><span class=identifier>add</span><span class=special>]</span></pre>
<p>where p is a parser of course.<span class=special><br>
</span></p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="distinct.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="trees.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
</body>
</html>

View File

@@ -0,0 +1,373 @@
<html>
<head>
<title>Techniques</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>Techniques</b></font></td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="style_guide.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="faq.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<ul>
<li><a href="#templatized_functors">Templatized Functors</a></li>
<li><a href="#multiple_scanner_support">Rule With Multiple Scanners</a></li>
<li><a href="#no_rules">Look Ma' No Rules!</a></li>
<li><a href="#typeof">typeof</a></li>
<li><a href="#nabialek_trick">Nabialek trick</a></li>
</ul>
<h3><a name="templatized_functors"></a> Templatized Functors</h3>
<p>For the sake of genericity, it is often better to make the functor's member
<tt>operator()</tt> a template. That way, we do not have to concern ourselves
with the type of the argument to expect as long as the behavior is appropriate.
For instance, rather than hard-coding <tt>char const*</tt> as the argument of
a generic semantic action, it is better to make it a template member function.
That way, it can accept any type of iterator:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=keyword>struct </span><span class=identifier>my_functor
</span><span class=special>{
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>IteratorT</span><span class=special>&gt;
</span><span class=keyword>void </span><span class=keyword>operator</span><span class=special>()(</span><span class=identifier>IteratorT </span><span class=identifier>first</span><span class=special>, </span><span class=identifier>IteratorT </span><span class=identifier>last</span><span class=special>) </span><span class=keyword>const</span><span class=special>;
</span><span class=special>};</span></font></code></pre>
<p>Take note that this is only possible with functors. It is not possible to pass
in template functions as semantic actions unless you cast it to the correct
function signature; in which case, you <em>monomorphize</em> the function. This
clearly shows that functors are superior to plain functions.</p>
<h3><b><a name="multiple_scanner_support" id="multiple_scanner_support"></a> Rule
With Multiple Scanners</b></h3>
<p>As of v1.8.0, rules can use one or more scanner types. There are cases, for
instance, where we need a rule that can work on the phrase and character levels.
Rule/scanner mismatch has been a source of confusion and is the no. 1 <a href="faq.html#scanner_business">FAQ</a>.
To address this issue, we now have <a href="rule.html#multiple_scanner_support">multiple
scanner support</a>. </p>
<p>Here is an example of a grammar with a rule <tt>r</tt> that can be called with
3 types of scanners (phrase-level, lexeme, and lower-case). See the <a href="rule.html">rule</a>,
<a href="grammar.html">grammar</a>, <a href="scanner.html#lexeme_scanner">lexeme_scanner</a>
and <a href="scanner.html#as_lower_scanner">as_lower_scanner </a>for more information.
</p>
<p>Here's the grammar (see <a href="../example/techniques/multiple_scanners.cpp">multiple_scanners.cpp</a>):
</p>
<pre><span class=special> </span><span class=keyword>struct </span><span class=identifier>my_grammar </span><span class=special>: </span><span class=identifier>grammar</span><span class=special>&lt;</span><span class=identifier>my_grammar</span><span class=special>&gt;
</span><span class=special>{
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>definition
</span><span class=special>{
</span><span class=identifier>definition</span><span class=special>(</span><span class=identifier>my_grammar </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>self</span><span class=special>)
</span><span class=special>{
</span><span class=identifier>r </span><span class=special>= </span><span class=identifier>lower_p</span><span class=special>;
</span><span class=identifier>rr </span><span class=special>= </span><span class=special>+(</span><span class=identifier>lexeme_d</span><span class=special>[</span><span class=identifier>r</span><span class=special>] </span><span class=special>&gt;&gt; </span><span class=identifier>as_lower_d</span><span class=special>[</span><span class=identifier>r</span><span class=special>] </span><span class=special>&gt;&gt; </span><span class=identifier>r</span><span class=special>);
</span><span class=special>}
</span><span class=keyword>typedef </span><span class=identifier>scanner_list</span><span class=special>&lt;
</span><span class=identifier>ScannerT
</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>lexeme_scanner</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class=identifier>type
</span><span class=special>, </span><span class=keyword>typename </span><span class=identifier>as_lower_scanner</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class=identifier>type
</span><span class=special>&gt; </span><span class=identifier>scanners</span><span class=special>;
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>scanners</span><span class=special>&gt; </span><span class=identifier>r</span><span class=special>;
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt; </span><span class=identifier>rr</span><span class=special>;
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt; </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>start</span><span class=special>() </span><span class=keyword>const </span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>rr</span><span class=special>; </span><span class=special>}
</span><span class=special>};
</span><span class=special>};</span></pre>
<p>By default support for multiple scanners is disabled. The macro
<tt>BOOST_SPIRIT_RULE_SCANNERTYPE_LIMIT</tt> must be defined to the
maximum number of scanners allowed in a scanner_list. The value must
be greater than 1 to enable multiple scanners. Given the
example above, to define a limit of three scanners for the list, the
following line must be inserted into the source file before the
inclusion of Spirit headers:
</p>
<pre><span class=special> </span><span class=preprocessor>#define </span><span class=identifier>BOOST_SPIRIT_RULE_SCANNERTYPE_LIMIT</span> <span class=literal>3</span></pre>
<h3><span class=special></span><b> <a name="no_rules" id="no_rules"></a> Look
Ma' No Rules</b></h3>
<p>You use grammars and you use lots of 'em? Want a fly-weight, no-cholesterol,
super-optimized grammar? Read on...</p>
<p>I have a love-hate relationship with rules. I guess you know the reasons why.
A lot of problems stem from the limitation of rules. Dynamic polymorphism and
static polymorphism in C++ do not mix well. There is no notion of virtual template
functions in C++; at least not just yet. Thus, the <strong>rule is tied to a
specific scanner type</strong>. This results in problems such as the <a href="faq.html#scanner_business">scanner
business</a>, our no. 1 FAQ. Apart from that, the virtual functions in rules
slow down parsing, kill all meta-information, and kills inlining, hence bloating
the generated code, especially for very tiny rules such as:</p>
<pre> r <span class="special">=</span> ch_p<span class="special">(</span><span class="quotes">'x'</span><span class="special">) &gt;&gt;</span> uint_p<span class="special">;</span></pre>
<p> The rule's limitation is the main reason why the grammar is designed the way
it is now, with a nested template definition class. The rule's limitation is
also the reason why subrules exists. But do we really need rules? Of course!
Before C++ adopts some sort of auto-type deduction, such as that proposed by
David Abrahams in clc++m:</p>
<pre>
<code><span class=keyword>auto </span><span class=identifier>r </span><span class=special>= ...</span><span class=identifier>definition </span><span class=special>...</span></code></pre>
<p> we are tied to the rule as RHS placeholders. However.... in some occasions
we can get by without rules! For instance, rather than writing:</p>
<pre>
<code><span class=identifier>rule</span><span class=special>&lt;&gt; </span><span class=identifier>x </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'x'</span><span class=special>);</span></code></pre>
<p> It's better to write:</p>
<pre>
<code><span class=identifier>chlit</span><span class=special>&lt;&gt; </span><span class=identifier>x </span><span class=special>= </span><span class=identifier>ch_p</span><span class=special>(</span><span class=literal>'x'</span><span class=special>);</span></code></pre>
<p> That's trivial. But what if the rule is rather complicated? Ok, let's proceed
stepwise... I'll investigate a simple skip_parser based on the C grammar from
Hartmut Kaiser. Basically, the grammar is written as (see <a href="../example/techniques/no_rules/no_rule1.cpp">no_rule1.cpp</a>):</p>
<pre><code> <span class=keyword>struct </span><span class=identifier>skip_grammar </span><span class=special>: </span><span class=identifier>grammar</span><span class=special>&lt;</span><span class=identifier>skip_grammar</span><span class=special>&gt;
{
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>definition
</span><span class=special>{
</span><span class=identifier>definition</span><span class=special>(</span><span class=identifier>skip_grammar </span><span class=keyword>const</span><span class=special>&amp; /*</span><span class=identifier>self</span><span class=special>*/)
{
</span><span class=identifier>skip
</span><span class=special>= </span><span class=identifier>space_p
</span><span class=special>| </span><span class=string>&quot;//&quot; </span><span class=special>&gt;&gt; *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=literal>'\n'</span><span class=special>) &gt;&gt; </span><span class=literal>'\n'
</span><span class=special>| </span><span class=string>&quot;/*&quot; </span><span class=special>&gt;&gt; *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=string>&quot;*/&quot;</span><span class=special>) &gt;&gt; </span><span class=string>&quot;*/&quot;
</span><span class=special>;
}
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt; </span><span class=identifier>skip</span><span class=special>;
</span><span class=identifier>rule</span><span class=special>&lt;</span><span class=identifier>ScannerT</span><span class=special>&gt; </span><span class=keyword>const</span><span class=special>&amp;
</span><span class=identifier>start</span><span class=special>() </span><span class=keyword>const </span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>skip</span><span class=special>; }
};
};</span></code></pre>
<p> Ok, so far so good. Can we do better? Well... since there are no recursive
rules there (in fact there's only one rule), you can expand the type of rule's
RHS as the rule type (see <a href="../example/techniques/no_rules/no_rule2.cpp">no_rule2.cpp</a>):</p>
<pre><code><span class=special> </span><span class=keyword>struct </span><span class=identifier>skip_grammar </span><span class=special>: </span><span class=identifier>grammar</span><span class=special>&lt;</span><span class=identifier>skip_grammar</span><span class=special>&gt;
{
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>definition
</span><span class=special>{
</span> <span class=identifier>definition</span><span class=special>(</span><span class=identifier>skip_grammar </span><span class=keyword>const</span><span class=special>&amp; /*</span><span class=identifier>self</span><span class=special>*/)
: </span><span class=identifier>skip</span><span class=special>
( </span><span class=identifier>space_p
</span><span class=special>| </span><span class=string>&quot;//&quot; </span><span class=special>&gt;&gt; *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=literal>'\n'</span><span class=special>) &gt;&gt; </span><span class=literal>'\n'
</span><span class=special>| </span><span class=string>&quot;/*&quot; </span><span class=special>&gt;&gt; *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=string>&quot;*/&quot;</span><span class=special>) &gt;&gt; </span><span class=string>&quot;*/&quot;
</span><span class=special>)
{
}
</span><span class=keyword>typedef
</span><span class=identifier>alternative</span><span class=special>&lt;</span><span class=identifier>alternative</span><span class=special>&lt;</span><span class=identifier>space_parser</span><span class=special>, </span><span class=identifier>sequence</span><span class=special>&lt;</span><span class=identifier>sequence</span><span class=special>&lt;
</span><span class=identifier>strlit</span><span class=special>&lt;</span><span class=keyword>const </span><span class=keyword>char</span><span class=special>*&gt;, </span><span class=identifier>kleene_star</span><span class=special>&lt;</span><span class=identifier>difference</span><span class=special>&lt;</span><span class=identifier>anychar_parser</span><span class=special>,
</span><span class=identifier>chlit</span><span class=special>&lt;</span><span class=keyword>char</span><span class=special>&gt; &gt; &gt; &gt;, </span><span class=identifier>chlit</span><span class=special>&lt;</span><span class=keyword>char</span><span class=special>&gt; &gt; &gt;, </span><span class=identifier>sequence</span><span class=special>&lt;</span><span class=identifier>sequence</span><span class=special>&lt;
</span><span class=identifier>strlit</span><span class=special>&lt;</span><span class=keyword>const </span><span class=keyword>char</span><span class=special>*&gt;, </span><span class=identifier>kleene_star</span><span class=special>&lt;</span><span class=identifier>difference</span><span class=special>&lt;</span><span class=identifier>anychar_parser</span><span class=special>,
</span><span class=identifier>strlit</span><span class=special>&lt;</span><span class=keyword>const </span><span class=keyword>char</span><span class=special>*&gt; &gt; &gt; &gt;, </span><span class=identifier>strlit</span><span class=special>&lt;</span><span class=keyword>const </span><span class=keyword>char</span><span class=special>*&gt; &gt; &gt;
</span><span class=identifier>skip_t</span><span class=special>;
</span><span class=special> </span><span class=identifier>skip_t </span><span class=identifier>skip</span><span class=special>;
</span><span class=identifier>skip_t </span><span class=keyword>const</span><span class=special>&amp;
</span><span class=identifier>start</span><span class=special>() </span><span class=keyword>const </span><span class=special>{ </span><span class=keyword>return </span><span class=identifier>skip</span><span class=special>; }
};
};</span></code></pre>
<p> Ughhh! How did I do that? How was I able to get at the complex typedef? Am
I insane? Well, not really... there's a trick! What you do is define the typedef
<tt>skip_t</tt> first as int:</p>
<pre>
<code><span class=keyword>typedef </span><span class=keyword>int </span><span class=identifier>skip_t</span><span class=special>;</span></code></pre>
<p> Try to compile. Then, the compiler will generate an obnoxious error message
such as:</p>
<pre>
<code><span class=string>&quot;cannot convert boost::spirit::alternative&lt;... blah blah...to int&quot;</span><span class=special>.</span></code></pre>
<p> <strong>THERE YOU GO!</strong> You got it's type! I just copy and paste the
correct type (removing explicit qualifications, if preferred).</p>
<p> Can we still go further? Yes. Remember that the grammar was designed for rules.
The nested template definition class is needed to get around the rule's limitations.
Without rules, I propose a new class called <tt>sub_grammar</tt>, the grammar's
low-fat counterpart:</p>
<pre><code><span class=special> </span><span class=keyword>namespace </span><span class=identifier>boost </span><span class=special>{ </span><span class=keyword>namespace </span><span class=identifier>spirit
</span><span class=special>{
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>DerivedT</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>sub_grammar </span><span class=special>: </span><span class=identifier>parser</span><span class=special>&lt;</span><span class=identifier>DerivedT</span><span class=special>&gt;
{
</span><span class=keyword>typedef </span><span class=identifier>sub_grammar </span><span class=identifier>self_t</span><span class=special>;
</span><span class=keyword>typedef </span><span class=identifier>DerivedT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>embed_t</span><span class=special>;
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>result
</span><span class=special>{
</span><span class=keyword>typedef </span><span class=keyword>typename </span><span class=identifier>parser_result</span><span class=special>&lt;
</span><span class=keyword>typename </span><span class=identifier>DerivedT</span><span class=special>::</span><span class=identifier>start_t</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class=identifier>type
</span><span class=identifier>type</span><span class=special>;
};
</span><span class=identifier>DerivedT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>derived</span><span class=special>() </span><span class=keyword>const
</span><span class=special>{ </span><span class=keyword>return </span><span class=special>*</span><span class=keyword>static_cast</span><span class=special>&lt;</span><span class=identifier>DerivedT </span><span class=keyword>const</span><span class=special>*&gt;(</span><span class=keyword>this</span><span class=special>); }
</span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>ScannerT</span><span class=special>&gt;
</span><span class=keyword>typename </span><span class=identifier>parser_result</span><span class=special>&lt;</span><span class=identifier>self_t</span><span class=special>, </span><span class=identifier>ScannerT</span><span class=special>&gt;::</span><span class=identifier>type
</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>ScannerT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>scan</span><span class=special>) </span><span class=keyword>const
</span><span class=special>{
</span><span class=keyword>return </span><span class=identifier>derived</span><span class=special>().</span><span class=identifier>start</span><span class=special>.</span><span class=identifier>parse</span><span class=special>(</span><span class=identifier>scan</span><span class=special>);
}
};
}}</span></code></pre>
<p>With the <tt>sub_grammar</tt> class, we can define our skipper grammar this
way (see <a href="../example/techniques/no_rules/no_rule3.cpp">no_rule3.cpp</a>):</p>
<pre><code><span class=special> </span><span class=keyword>struct </span><span class=identifier>skip_grammar </span><span class=special>: </span><span class=identifier>sub_grammar</span><span class=special>&lt;</span><span class=identifier>skip_grammar</span><span class=special>&gt;
{
</span><span class=keyword>typedef
</span><span class=identifier>alternative</span><span class=special>&lt;</span><span class=identifier>alternative</span><span class=special>&lt;</span><span class=identifier>space_parser</span><span class=special>, </span><span class=identifier>sequence</span><span class=special>&lt;</span><span class=identifier>sequence</span><span class=special>&lt;
</span><span class=identifier>strlit</span><span class=special>&lt;</span><span class=keyword>const </span><span class=keyword>char</span><span class=special>*&gt;, </span><span class=identifier>kleene_star</span><span class=special>&lt;</span><span class=identifier>difference</span><span class=special>&lt;</span><span class=identifier>anychar_parser</span><span class=special>,
</span><span class=identifier>chlit</span><span class=special>&lt;</span><span class=keyword>char</span><span class=special>&gt; &gt; &gt; &gt;, </span><span class=identifier>chlit</span><span class=special>&lt;</span><span class=keyword>char</span><span class=special>&gt; &gt; &gt;, </span><span class=identifier>sequence</span><span class=special>&lt;</span><span class=identifier>sequence</span><span class=special>&lt;
</span><span class=identifier>strlit</span><span class=special>&lt;</span><span class=keyword>const </span><span class=keyword>char</span><span class=special>*&gt;, </span><span class=identifier>kleene_star</span><span class=special>&lt;</span><span class=identifier>difference</span><span class=special>&lt;</span><span class=identifier>anychar_parser</span><span class=special>,
</span><span class=identifier>strlit</span><span class=special>&lt;</span><span class=keyword>const </span><span class=keyword>char</span><span class=special>*&gt; &gt; &gt; &gt;, </span><span class=identifier>strlit</span><span class=special>&lt;</span><span class=keyword>const </span><span class=keyword>char</span><span class=special>*&gt; &gt; &gt;
</span><span class=identifier>start_t</span><span class=special>;
</span><span class=identifier>skip_grammar</span><span class=special>()
: </span><span class=identifier>start
</span><span class=special>(
</span><span class=identifier>space_p
</span><span class=special>| </span><span class=string>&quot;//&quot; </span><span class=special>&gt;&gt; *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=literal>'\n'</span><span class=special>) &gt;&gt; </span><span class=literal>'\n'
</span><span class=special>| </span><span class=string>&quot;/*&quot; </span><span class=special>&gt;&gt; *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=string>&quot;*/&quot;</span><span class=special>) &gt;&gt; </span><span class=string>&quot;*/&quot;
</span><span class=special>)
{}
</span><span class=identifier>start_t </span><span class=identifier>start</span><span class=special>;
};</span></code></pre>
<p>But what for, you ask? You can simply use the <tt>start_t</tt> type above as-is.
It's already a parser! We can just type:</p>
<pre>
<code><span class=identifier>skipper_t </span><span class=identifier>skipper </span><span class=special>=
</span><span class=identifier>space_p
</span><span class=special>| </span><span class=string>&quot;//&quot; </span><span class=special>&gt;&gt; *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=literal>'\n'</span><span class=special>) &gt;&gt; </span><span class=literal>'\n' </span><br> <span class=special>| </span><span class=string>&quot;/*&quot; </span><span class=special>&gt;&gt; *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=string>&quot;*/&quot;</span><span class=special>) &gt;&gt; </span><span class=string>&quot;*/&quot;</span>
<span class=special> ;</span></code></pre>
<p> and use <tt>skipper</tt> just as we would any parser? Well, a subtle difference
is that <tt>skipper</tt>, used this way will be embedded <strong>by value </strong>when<strong>
</strong>you compose more complex parsers using it. That is, if we use <tt>skipper</tt>
inside another production, the whole thing will be stored in the composite.
Heavy!</p>
<p> The proposed <tt>sub_grammar</tt> OTOH will be held by reference. Note:</p>
<pre><code> <span class=keyword>typedef </span><span class=identifier>DerivedT </span><span class=keyword>const</span><span class=special>&amp; </span><span class=identifier>embed_t</span><span class=special>;</span></code></pre>
<p>The proposed <tt>sub_grammar</tt> does not have the inherent limitations of
rules, is very lighweight, and should be blazingly fast (can be fully inlined
and does not use virtual functions). Perhaps this class will be part of a future
spirit release. </p>
<table width="80%" border="0" align="center">
<tr>
<td class="note_box"><img src="theme/note.gif" width="16" height="16"> <strong>The
no-rules result</strong><br> <br>
So, how much did we save? On MSVCV7.1, the original code: <a href="../example/techniques/no_rules/no_rule1.cpp">no_rule1.cpp</a>
compiles to <strong>28k</strong>. Eliding rules, <a href="../example/techniques/no_rules/no_rule2.cpp">no_rule2.cpp</a>,
we got <strong>24k</strong>. Not bad, we shaved off 4k amounting to a 14%
reduction. But you'll be in for a surprise. The last version, using the
sub-grammar: <a href="../example/techniques/no_rules/no_rule3.cpp">no_rule3.cpp</a>,
compiles to <strong>5.5k</strong>! That's a whopping 80% reduction.<br>
<br>
<table width="100%" border="1">
<tr>
<td><a href="../example/techniques/no_rules/no_rule1.cpp">no_rule1.cpp</a></td>
<td><strong>28k</strong></td>
<td>standard rule and grammar</td>
</tr>
<tr>
<td><a href="../example/techniques/no_rules/no_rule2.cpp">no_rule2.cpp</a></td>
<td><strong>24k</strong></td>
<td>standard grammar, no rule</td>
</tr>
<tr>
<td><a href="../example/techniques/no_rules/no_rule3.cpp">no_rule3.cpp</a></td>
<td><strong>5.5k</strong></td>
<td>sub_grammar, no rule, no grammar</td>
</tr>
</table> </td>
</tr>
</table>
<h3><b> <a name="typeof" id="typeof"></a> typeof</b></h3>
<p>Some compilers already support the <tt>typeof</tt> keyword. Examples are g++
and Metrowerks CodeWarrior. Someday, <tt>typeof</tt> will become commonplace.
It is worth noting that we can use <tt>typeof</tt> to define non-recursive rules
without using the rule class. To give an example, we'll use the skipper example
above; this time using <tt>typeof</tt>. First, to avoid redundancy, we'll introduce
a macro <tt>RULE</tt>: </p>
<pre><code> <span class=preprocessor>#define </span><span class=identifier>RULE</span><span class=special>(</span><span class=identifier>name</span><span class=special>, </span><span class=identifier>definition</span><span class=special>) </span><span class="keyword">typeof</span><span class=special>(</span><span class=identifier>definition</span><span class=special>) </span><span class=identifier>name </span><span class=special>= </span><span class=identifier>definition</span></code></pre>
<p>Then, simply:</p>
<pre><code><span class=identifier> </span><span class=identifier>RULE</span><span class=special>(
</span><span class=identifier>skipper</span><span class=special>,
( </span><span class=identifier>space_p
</span><span class=special>| </span><span class=string>&quot;//&quot; </span><span class=special>&gt;&gt; *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=literal>'\n'</span><span class=special>) &gt;&gt; </span><span class=literal>'\n'
</span><span class=special>| </span><span class=string>&quot;/*&quot; </span><span class=special>&gt;&gt; *(</span><span class=identifier>anychar_p </span><span class=special>- </span><span class=string>&quot;*/&quot;</span><span class=special>) &gt;&gt; </span><span class=string>&quot;*/&quot;
</span><span class=special>)
);</span></code></pre>
<p>(see <a href="../example/techniques/typeof.cpp">typeof.cpp</a>)</p>
<p>That's it! Now you can use skipper just as you would any parser. Be reminded,
however, that <tt>skipper</tt> above will be embedded by value when<strong>
</strong>you compose more complex parsers using it (see <tt>sub_grammar</tt> rationale above). You can use the <tt>sub_grammar</tt> class to avoid this problem.</p>
<h3><a name="nabialek_trick"></a> Nabialek trick</h3>
<p>This technique, I'll call the <strong><em>&quot;Nabialek trick&quot; </em></strong>(from the name of its inventor, Sam Nabialek), can improve the rule dispatch from linear non-deterministic to deterministic. The trick applies to grammars where a keyword (operator, etc), precedes a production. There are lots of grammars similar to this:</p>
<pre> <span class=identifier>r </span><span class=special>=
</span><span class=identifier>keyword1 </span><span class=special>&gt;&gt; </span><span class=identifier>production1
</span><span class=special>| </span><span class=identifier>keyword2 </span><span class=special>&gt;&gt; </span><span class=identifier>production2
</span><span class=special>| </span><span class=identifier>keyword3 </span><span class=special>&gt;&gt; </span><span class=identifier>production3
</span><span class=special>| </span><span class=identifier>keyword4 </span><span class=special>&gt;&gt; </span><span class=identifier>production4
</span><span class=special>| </span><span class=identifier>keyword5 </span><span class=special>&gt;&gt; </span><span class=identifier>production5
</span><span class=comment>/*** etc ***/
</span><span class=special>;</span></pre>
<p>The cascaded alternatives are tried one at a time through trial and error until something matches. The Nabialek trick takes advantage of the <a href="symbols.html">symbol table</a>'s search properties to optimize the dispatching of the alternatives. For an example, see <a href="../example/techniques/nabialek.cpp">nabialek.cpp</a>. The grammar works as follows. There are two rules (<tt>one</tt> and <tt>two</tt>). When &quot;one&quot; is recognized, rule <tt>one</tt> is invoked. When &quot;two&quot; is recognized, rule <tt>two</tt> is invoked. Here's the grammar:</p>
<pre><span class=special> </span><span class=identifier>one </span><span class=special>= </span><span class=identifier>name</span><span class=special>;
</span><span class=identifier>two </span><span class=special>= </span><span class=identifier>name </span><span class=special>&gt;&gt; </span><span class=literal>',' </span><span class=special>&gt;&gt; </span><span class=identifier>name</span><span class=special>;
</span><span class=identifier>continuations</span><span class=special>.</span><span class=identifier>add
</span><span class=special>(</span><span class=string>&quot;one&quot;</span><span class=special>, &amp;</span><span class=identifier>one</span><span class=special>)
</span><span class=special>(</span><span class=string>&quot;two&quot;</span><span class=special>, &amp;</span><span class=identifier>two</span><span class=special>)
</span><span class=special>;
</span><span class=identifier>line </span><span class=special>= </span><span class=identifier>continuations</span><span class=special>[</span><span class=identifier>set_rest</span><span class=special>&lt;</span><span class=identifier>rule_t</span><span class=special>&gt;(</span><span class=identifier>rest</span><span class=special>)] </span><span class=special>&gt;&gt; </span><span class=identifier>rest</span><span class=special>;</span></pre>
<p>where continuations is a <a href="symbols.html">symbol table</a> with pointer to rule_t slots. one, two, name, line and rest are rules:</p>
<pre><span class=special> </span><span class=identifier>rule_t </span><span class=identifier>name</span><span class=special>;
</span><span class=identifier>rule_t </span><span class=identifier>line</span><span class=special>;
</span><span class=identifier>rule_t </span><span class=identifier>rest</span><span class=special>;
</span><span class=identifier>rule_t </span><span class=identifier>one</span><span class=special>;
</span><span class=identifier>rule_t </span><span class=identifier>two</span><span class=special>;
</span><span class=identifier>symbols</span><span class=special>&lt;</span><span class=identifier>rule_t</span><span class=special>*&gt; </span><span class=identifier>continuations</span><span class=special>;</span></pre>
<p>set_rest, the semantic action attached to continuations is:</p>
<pre><span class=special> </span><span class=keyword>template </span><span class=special>&lt;</span><span class=keyword>typename </span><span class=identifier>Rule</span><span class=special>&gt;
</span><span class=keyword>struct </span><span class=identifier>set_rest
</span><span class=special>{
</span><span class=identifier>set_rest</span><span class=special>(</span><span class=identifier>Rule</span><span class=special>&amp; </span><span class=identifier>the_rule</span><span class=special>)
</span><span class=special>: </span><span class=identifier>the_rule</span><span class=special>(</span><span class=identifier>the_rule</span><span class=special>) </span><span class=special>{}
</span><span class=keyword>void </span><span class=keyword>operator</span><span class=special>()(</span><span class=identifier>Rule</span><span class=special>* </span><span class=identifier>newRule</span><span class=special>) </span><span class=keyword>const
</span><span class=special>{ </span><span class=identifier>m_theRule </span><span class=special>= </span><span class=special>*</span><span class=identifier>newRule</span><span class=special>; </span><span class=special>}
</span><span class=identifier>Rule</span><span class=special>&amp; </span><span class=identifier>the_rule</span><span class=special>;
</span><span class=special>};</span></pre>
<p>Notice how the rest <tt>rule</tt> gets set dynamically when the set_rule action is called. The dynamic grammar parses inputs such as:</p>
<p> &quot;one only&quot;<br>
&quot;one again&quot;<br>
&quot;two first, second&quot;</p>
<p>The cool part is that the <tt>rest</tt> rule is set (by the <tt>set_rest</tt> action) depending on what the symbol table got. If it got a <em>&quot;one&quot;</em> then rest = one. If it got <em>&quot;two&quot;</em>, then rest = two. Very nifty! This technique should be very fast, especially when there are lots of keywords. It would be nice to add special facilities to make this easy to use. I imagine:</p>
<pre><span class=special> </span><span class=identifier>r </span><span class=special>= </span><span class=identifier>keywords </span><span class=special>&gt;&gt; </span><span class=identifier>rest</span><span class=special>;</span></pre>
<p>where <tt>keywords</tt> is a special parser (based on the symbol table) that automatically sets its RHS (rest) depending on the acquired symbol. This, I think, is mighty cool! Someday perhaps... </p>
<p><img src="theme/note.gif" width="16" height="16"> Also, see the <a href="switch_parser.html">switch parser</a> for another deterministic parsing trick for character/token prefixes. </p>
<span class=special></span>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="style_guide.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="faq.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 1998-2003 Joel de Guzman<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p class="copyright">&nbsp;</p>
</body>
</html>

View File

@@ -0,0 +1,117 @@
<html>
<head>
<title>The Lazy Parsers</title>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<link rel="stylesheet" href="theme/style.css" type="text/css">
</head>
<body>
<table width="100%" border="0" background="theme/bkd2.gif" cellspacing="2">
<tr>
<td width="10">
</td>
<td width="85%"> <font size="6" face="Verdana, Arial, Helvetica, sans-serif"><b>The
Lazy Parser</b></font></td>
<td width="112"><a href="http://spirit.sf.net"><img src="theme/spirit.gif" width="112" height="48" align="right" border="0"></a></td>
</tr>
</table>
<br>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="dynamic_parsers.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="select_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<p>Closures are cool. It allows us to inject stack based local variables anywhere
in our parse descent hierarchy. Typically, we store temporary variables, generated
by our semantic actions, in our closure variables, as a means to pass information
up and down the recursive descent.</p>
<p>Now imagine this... Having in mind that closure variables can be just about
any type, we can store a parser, a rule, or a pointer to a parser or rule, in
a closure variable. <em>Yeah, right, so what?...</em> Ok, hold on... What if
we can use this closure variable to initiate a parse? Think about it for a second.
Suddenly we'll have some powerful dynamic parsers! Suddenly we'll have a full
round trip from to <a href="../phoenix/index.html">Phoenix</a> and Spirit and
back! <a href="../phoenix/index.html">Phoenix</a> semantic actions choose the
right Spirit parser and Spirit parsers choose the right <a href="../phoenix/index.html">Phoenix</a>
semantic action. Oh MAN, what a honky cool idea, I might say!!</p>
<h2>lazy_p</h2>
<p>This is the idea behind the <tt>lazy_p</tt> parser. The <tt>lazy_p</tt> syntax
is:</p>
<pre> lazy_p<span class="special">(</span>actor<span class="special">)</span></pre>
<p>where actor is a <a href="../phoenix/index.html">Phoenix</a> expression that
returns a Spirit parser. This returned parser is used in the parsing process.
</p>
<p>Example: </p>
<pre> lazy_p<span class="special">(</span>phoenix<span class="special">::</span>val<span class="special">(</span>int_p<span class="special">))[</span>assign_a<span class="special">(</span>result<span class="special">)]</span>
</pre>
<p>Semantic actions attached to the <tt>lazy_p</tt> parser expects the same signature
as that of the returned parser (<tt>int_p</tt>, in our example above).</p>
<h2>lazy_p example</h2>
<p>To give you a better glimpse (see the <tt><a href="../example/intermediate/lazy_parser.cpp">lazy_parser.cpp</a></tt>),
say you want to parse inputs such as:</p>
<pre> <span class=identifier>dec
</span><span class="special">{</span><span class=identifier><br> 1 2 3<br> bin
</span><span class="special">{</span><span class=identifier><br> 1 10 11<br> </span><span class="special">}</span><span class=identifier><br> 4 5 6<br> </span><span class="special">}</span></pre>
<p>where <tt>bin {...}</tt> and <tt>dec {...}</tt> specifies the numeric format
(binary or decimal) that we are expecting to read. If we analyze the input,
we want a grammar like:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>base </span><span class="special">=</span><span class=identifier> </span><span class="string">&quot;bin&quot;</span><span class=identifier> </span><span class="special">|</span><span class=identifier> </span><span class="string">&quot;dec&quot;</span><span class="special">;</span><span class=identifier>
block </span><span class=special>= </span><span class="identifier">base</span><span class=special> &gt;&gt; </span><span class="literal">'{'</span><span class=special> &gt;&gt; *</span><span class="identifier">block_line</span><span class=special> &gt;&gt; </span><span class="literal">'}'</span><span class=special>;
</span>block_line <span class=special>= </span><span class="identifier">number</span><span class=special> | </span><span class=identifier>block</span><span class=special>;</span></font></code></pre>
<p>We intentionally left out the <code><font color="#000000"><span class="identifier"><tt>number</tt></span></font></code>
rule. The tricky part is that the way <tt>number</tt> rule behaves depends on
the result of the <tt>base</tt> rule. If <tt>base</tt> got a <em>&quot;bin&quot;</em>,
then number should parse binary numbers. If <tt>base</tt> got a <em>&quot;dec&quot;</em>,
then number should parse decimal numbers. Typically we'll have to rewrite our
grammar to accommodate the different parsing behavior:</p>
<pre><code><font color="#000000"><span class=identifier> block </span><span class=special>=
</span><span class=identifier>&quot;bin&quot;</span> <span class=special>&gt;&gt; </span><span class="literal">'{'</span><span class=special> &gt;&gt; *</span>bin_line<span class=special> &gt;&gt; </span><span class="literal">'}'</span><span class=special>
| </span><span class=identifier>&quot;dec&quot;</span> <span class=special>&gt;&gt; </span><span class="literal">'{'</span><span class=special> &gt;&gt; *</span>dec_line<span class=special> &gt;&gt; </span><span class="literal">'}'</span><span class=special>
;
</span>bin_line <span class=special>= </span><span class="identifier">bin_p</span><span class=special> | </span><span class=identifier>block</span><span class=special>;
</span>dec_line <span class=special>= </span><span class="identifier">int_p</span><span class=special> | </span><span class=identifier>block</span><span class=special>;</span></font></code></pre>
<p>while this is fine, the redundancy makes us want to find a better solution;
after all, we'd want to make full use of Spirit's dynamic parsing capabilities.
Apart from that, there will be cases where the set of parsing behaviors for
our <tt>number</tt> rule is not known when the grammar is written. We'll only
be given a map of string descriptors and corresponding rules [e.g. ((&quot;dec&quot;,
int_p), (&quot;bin&quot;, bin_p) ... etc...)].</p>
<p>The basic idea is to have a rule for binary and decimal numbers. That's easy
enough to do (see <a href="numerics.html">numerics</a>). When <tt>base</tt>
is being parsed, in your semantic action, store a pointer to the selected base
in a closure variable (e.g. <tt>block.int_rule</tt>). Here's an example:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>base
</span><span class="special">=</span><span class=identifier> str_p</span><span class="special">(</span><span class="string">&quot;bin&quot;</span><span class="special">)[</span><span class=identifier>block.int_rule</span> = <span class="special">&amp;</span>var<span class="special">(</span><span class="identifier">bin_rule</span><span class="special">)]
| </span><span class=identifier>str_p</span><span class="special">(</span><span class="string">&quot;dec&quot;</span><span class="special">)[</span><span class=identifier>block.int_rule</span> = <span class="special">&amp;</span>var<span class="special">(</span><span class="identifier">dec_rule</span><span class="special">)]
;</span></font></code></pre>
<p>With this setup, your number rule will now look something like:</p>
<pre><code><font color="#000000"><span class=special> </span><span class=identifier>number </span><span class="special">=</span><span class=identifier> lazy_p</span><span class="special">(*</span><span class=identifier>block.int_rule</span><span class="special">);</span></font></code></pre>
<p>The <tt><a href="../example/intermediate/lazy_parser.cpp">lazy_parser.cpp</a></tt>
does it a bit differently, ingeniously using the <a href="symbols.html">symbol
table</a> to dispatch the correct rule, but in essence, both strategies are
similar. This technique, using the symbol table, is detailed in the Techiques section: <a href="techniques.html#nabialek_trick">nabialek_trick</a>. Admitedly, when you add up all the rules, the resulting grammar is
more complex than the hard-coded grammar above. Yet, for more complex grammar
patterns with a lot more rules to choose from, the additional setup is well
worth it.</p>
<table border="0">
<tr>
<td width="10"></td>
<td width="30"><a href="../index.html"><img src="theme/u_arr.gif" border="0"></a></td>
<td width="30"><a href="dynamic_parsers.html"><img src="theme/l_arr.gif" border="0"></a></td>
<td width="30"><a href="select_parser.html"><img src="theme/r_arr.gif" border="0"></a></td>
</tr>
</table>
<br>
<hr size="1">
<p class="copyright">Copyright &copy; 2003 Joel de Guzman<br>
Copyright &copy; 2003 Vaclav Vesely<br>
<br>
<font size="2">Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)</font></p>
<p class="copyright">&nbsp;</p>
</body>
</html>

Binary file not shown.

After

Width:  |  Height:  |  Size: 577 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 70 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 944 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 152 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 147 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 91 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 509 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 897 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 151 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 147 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 91 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.5 KiB

View File

@@ -0,0 +1,174 @@
/* Use, modification and distribution is subject to the Boost Software License,
Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
body
{
background-image: url(bkd.gif);
background-color: #FFFFFF;
margin: 1em 2em 1em 2em;
}
h1 { font-family: Verdana, Arial, Helvetica, sans-serif; font-weight: bold; text-align: left; }
h2 { font: 140% sans-serif; font-weight: bold; text-align: left; }
h3 { font: 120% sans-serif; font-weight: bold; text-align: left; }
h4 { font: bold 100% sans-serif; font-weight: bold; text-align: left; }
h5 { font: italic 100% sans-serif; font-weight: bold; text-align: left; }
h6 { font: small-caps 100% sans-serif; font-weight: bold; text-align: left; }
pre
{
border-top: gray 1pt solid;
border-right: gray 1pt solid;
border-left: gray 1pt solid;
border-bottom: gray 1pt solid;
padding-top: 2pt;
padding-right: 2pt;
padding-left: 2pt;
padding-bottom: 2pt;
display: block;
font-family: "courier new", courier, mono;
background-color: #eeeeee; font-size: small
}
code
{
font-family: "Courier New", Courier, mono;
font-size: small
}
tt
{
display: inline;
font-family: "Courier New", Courier, mono;
color: #000099;
font-size: small
}
p
{
text-align: justify;
font-family: Georgia, "Times New Roman", Times, serif
}
ul
{
list-style-image: url(bullet.gif);
font-family: Georgia, "Times New Roman", Times, serif
}
ol
{
font-family: Georgia, "Times New Roman", Times, serif
}
a
{
font-weight: bold;
color: #003366;
text-decoration: none;
}
a:hover { color: #8080FF; }
.literal { color: #666666; font-style: italic}
.keyword { color: #000099}
.identifier {}
.comment { font-style: italic; color: #990000}
.special { color: #800040}
.preprocessor { color: #FF0000}
.string { font-style: italic; color: #666666}
.copyright { color: #666666; font-size: small}
.white_bkd { background-color: #FFFFFF}
.dk_grey_bkd { background-color: #999999}
.quotes { color: #666666; font-style: italic; font-weight: bold}
.note_box
{
display: block;
border-top: gray 1pt solid;
border-right: gray 1pt solid;
border-left: gray 1pt solid;
border-bottom: gray 1pt solid;
padding-right: 12pt;
padding-left: 12pt;
padding-bottom: 12pt;
padding-top: 12pt;
font-family: Arial, Helvetica, sans-serif;
background-color: #E2E9EF;
font-size: small; text-align: justify
}
.table_title
{
background-color: #648CCA;
font-family: Verdana, Arial, Helvetica, sans-serif; color: #FFFFFF;
font-weight: bold
; padding-top: 4px; padding-right: 4px; padding-bottom: 4px; padding-left: 4px
}
.table_cells
{
background-color: #E2E9EF;
font-family: Geneva, Arial, Helvetica, san-serif;
font-size: small
; padding-top: 4px; padding-right: 4px; padding-bottom: 4px; padding-left: 4px
}
.toc
{
DISPLAY: block;
background-color: #E2E9EF
font-family: Arial, Helvetica, sans-serif;
border-top: gray 1pt solid;
border-left: gray 1pt solid;
border-bottom: gray 1pt solid;
border-right: gray 1pt solid;
padding-top: 24pt;
padding-right: 24pt;
padding-left: 24pt;
padding-bottom: 24pt;
}
.toc_title
{
background-color: #648CCA;
padding-top: 4px;
padding-right: 4px;
padding-bottom: 4px;
padding-left: 4px;
font-family: Geneva, Arial, Helvetica, san-serif;
color: #FFFFFF;
font-weight: bold
}
.toc_cells
{
background-color: #E2E9EF;
padding-top: 4px;
padding-right: 4px;
padding-bottom: 4px;
padding-left: 4px;
font-family: Geneva, Arial, Helvetica, san-serif;
font-size: small
}
div.logo
{
float: right;
}
.toc_cells_L0 { background-color: #E2E9EF; padding-top: 4px; padding-right: 4px; padding-bottom: 4px; padding-left: 4px; font-family: Geneva, Arial, Helvetica, san-serif; font-size: small }
.toc_cells_L1 { background-color: #E2E9EF; padding-top: 4px; padding-right: 4px; padding-bottom: 4px; padding-left: 44px; font-family: Geneva, Arial, Helvetica, san-serif; font-size: small }
.toc_cells_L2 { background-color: #E2E9EF; padding-top: 4px; padding-right: 4px; padding-bottom: 4px; padding-left: 88px; font-family: Geneva, Arial, Helvetica, san-serif; font-size: small }
.toc_cells_L3 { background-color: #E2E9EF; padding-top: 4px; padding-right: 4px; padding-bottom: 4px; padding-left: 122px; font-family: Geneva, Arial, Helvetica, san-serif; font-size: small }
.toc_cells_L4 { background-color: #E2E9EF; padding-top: 4px; padding-right: 4px; padding-bottom: 4px; padding-left: 166px; font-family: Geneva, Arial, Helvetica, san-serif; font-size: small }

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 170 B

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,300 @@
#==============================================================================
# Copyright (c) 2002 Joel de Guzman
# http://spirit.sourceforge.net/
#
# Use, modification and distribution is subject to the Boost Software
# License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt)
#==============================================================================
#
# Spirit examples boost-jam file
# Joel de Guzman [Sep 27, 2002] : created
# Joel de Guzman [Oct 30, 2003] : separated the applications
# Martin Wille [Jan 15, 2004] : changes for new directory structure
# Martin Wille [Jan 20, 2004] : more changes for new directory structure
# Joel de Guzman [Jul 29, 2004] : added calc_debug.cpp
#
exe ast_calc
: fundamental/ast_calc.cpp
:
;
exe "bind"
: fundamental/bind.cpp
:
;
exe boiler_plate
: fundamental/boiler_plate.cpp
:
;
exe calc_plain
: fundamental/calc_plain.cpp
:
;
exe calc_debug
: fundamental/calc_debug.cpp
:
;
exe comments
: fundamental/comments.cpp
:
;
exe complex_number
: fundamental/complex_number.cpp
:
;
exe error_handling
: fundamental/error_handling.cpp
:
;
exe error_reporting
: fundamental/error_reporting.cpp
:
;
exe file_parser
: fundamental/file_parser.cpp
:
;
exe full_calc
: fundamental/full_calc.cpp
:
;
exe functor_parser
: fundamental/functor_parser.cpp
:
;
exe list_parser
: fundamental/list_parser.cpp
:
;
exe matching_tags
: fundamental/matching_tags.cpp
:
;
exe no_actions
: fundamental/no_actions.cpp
:
;
exe number_list
: fundamental/number_list.cpp
:
;
exe parse_tree_calc1
: fundamental/parse_tree_calc1.cpp
:
;
exe parser_context
: fundamental/parser_context.cpp
:
;
exe phoenix_calc
: fundamental/phoenix_calc.cpp
:
;
exe position_iterator
: fundamental/position_iterator/position_iterator.cpp
:
;
exe refactoring
: fundamental/refactoring.cpp
:
;
exe regular_expression
: fundamental/regular_expression.cpp
/boost//regex
:
;
exe roman_numerals
: fundamental/roman_numerals.cpp
:
;
exe stuff_vector
: fundamental/stuff_vector.cpp
:
;
exe stuff_vector2
: fundamental/stuff_vector2.cpp
:
;
exe subrule_calc
: fundamental/subrule_calc.cpp
:
;
exe sum
: fundamental/sum.cpp
:
;
exe thousand_separated
: fundamental/thousand_separated.cpp
:
;
exe ast_calc2
: fundamental/more_calculators/ast_calc2.cpp
:
;
exe calc_with_variables
: fundamental/more_calculators/calc_with_variables.cpp
:
;
exe phoenix_subrule_calc
: fundamental/more_calculators/phoenix_subrule_calc.cpp
:
;
exe primitive_calc
: fundamental/more_calculators/primitive_calc.cpp
:
;
exe rpn_calc
: fundamental/more_calculators/rpn_calc.cpp
:
;
exe vmachine_calc
: fundamental/more_calculators/vmachine_calc.cpp
:
;
exe distinct_parser
: fundamental/distinct/distinct_parser.cpp
:
;
exe distinct_parser_dynamic
: fundamental/distinct/distinct_parser_dynamic.cpp
:
;
################################################################################
exe ipv4
: intermediate/ipv4.cpp
:
;
exe ipv4_opt
: intermediate/ipv4_opt.cpp
:
;
exe lazy_parser
: intermediate/lazy_parser.cpp
:
;
exe parameters
: intermediate/parameters.cpp
:
;
exe regex_convert
: intermediate/regex_convert.cpp
/boost//regex
:
;
exe simple_xml
: intermediate/simple_xml/driver.cpp
intermediate/simple_xml/tag.cpp
intermediate/simple_xml/tag.cpp
:
;
################################################################################
exe dynamic_rule
: techniques/dynamic_rule.cpp
:
;
exe epsilon
: techniques/epsilon.cpp
:
;
exe multiple_scanners
: techniques/multiple_scanners.cpp
:
;
exe nabialek
: techniques/nabialek.cpp
:
;
exe no_rule1
: techniques/no_rules/no_rule1.cpp
:
;
exe no_rule2
: techniques/no_rules/no_rule2.cpp
:
;
exe no_rule3
: techniques/no_rules/no_rule3.cpp
:
;
exe typeof
: techniques/typeof.cpp
:
;
exe rule_parser_1_1
: techniques/no_rules_with_typeof/rule_parser_1_1.cpp
:
;
exe rule_parser_1_2
: techniques/no_rules_with_typeof/rule_parser_1_2.cpp
:
;
exe rule_parser_2_1
: techniques/no_rules_with_typeof/rule_parser_2_1.cpp
:
;
exe rule_parser_2_2
: techniques/no_rules_with_typeof/rule_parser_2_2.cpp
:
;
exe opaque_rule_parser
: techniques/no_rules_with_typeof/opaque_rule_parser.cpp
:
;

Some files were not shown because too many files have changed in this diff Show More