diff --git a/.github/script/amd64-20.04.Dockerfile b/.github/script/amd64-20.04.Dockerfile index 40d980e5..1ec89ebd 100644 --- a/.github/script/amd64-20.04.Dockerfile +++ b/.github/script/amd64-20.04.Dockerfile @@ -17,4 +17,4 @@ ENV CC clang ENV CXX clang++ ENV CCACHE_DISABLE 1 RUN cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DPORTABLE=1 -DTON_ARCH= -DCMAKE_CXX_FLAGS="-mavx2" .. -RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func validator-engine validator-engine-console create-state generate-random-id create-hardfork dht-server lite-client \ No newline at end of file +RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func tolk validator-engine validator-engine-console create-state generate-random-id create-hardfork dht-server lite-client diff --git a/.github/script/amd64-22.04.Dockerfile b/.github/script/amd64-22.04.Dockerfile index 44c9c40b..6134d167 100644 --- a/.github/script/amd64-22.04.Dockerfile +++ b/.github/script/amd64-22.04.Dockerfile @@ -17,4 +17,4 @@ ENV CC clang ENV CXX clang++ ENV CCACHE_DISABLE 1 RUN cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DPORTABLE=1 -DTON_ARCH= -DCMAKE_CXX_FLAGS="-mavx2" .. -RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func validator-engine validator-engine-console create-state generate-random-id create-hardfork dht-server lite-client \ No newline at end of file +RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func tolk validator-engine validator-engine-console create-state generate-random-id create-hardfork dht-server lite-client diff --git a/.github/script/arm64-20.04.Dockerfile b/.github/script/arm64-20.04.Dockerfile index 1f57dc40..5e350534 100644 --- a/.github/script/arm64-20.04.Dockerfile +++ b/.github/script/arm64-20.04.Dockerfile @@ -17,4 +17,4 @@ ENV CC clang ENV CXX clang++ ENV CCACHE_DISABLE 1 RUN cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DPORTABLE=1 -DTON_ARCH= .. -RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func validator-engine validator-engine-console create-state generate-random-id dht-server lite-client \ No newline at end of file +RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func tolk validator-engine validator-engine-console create-state generate-random-id dht-server lite-client diff --git a/.github/script/arm64-22.04.Dockerfile b/.github/script/arm64-22.04.Dockerfile index 2b595839..f9805849 100644 --- a/.github/script/arm64-22.04.Dockerfile +++ b/.github/script/arm64-22.04.Dockerfile @@ -17,4 +17,4 @@ ENV CC clang ENV CXX clang++ ENV CCACHE_DISABLE 1 RUN cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DPORTABLE=1 -DTON_ARCH= .. -RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func validator-engine validator-engine-console create-state generate-random-id dht-server lite-client \ No newline at end of file +RUN ninja storage-daemon storage-daemon-cli tonlibjson blockchain-explorer fift func tolk validator-engine validator-engine-console create-state generate-random-id dht-server lite-client diff --git a/.github/workflows/create-release.yml b/.github/workflows/create-release.yml index 263bd9a4..ca08357c 100644 --- a/.github/workflows/create-release.yml +++ b/.github/workflows/create-release.yml @@ -167,6 +167,14 @@ jobs: asset_name: func.exe tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Windows 2019 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-win-binaries/tolk.exe + asset_name: tolk.exe + tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Windows 2019 single artifact - lite-client uses: svenstaro/upload-release-action@v2 with: @@ -257,6 +265,14 @@ jobs: asset_name: func-mac-x86-64 tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Mac x86-64 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-x86_64-macos-binaries/tolk + asset_name: tolk-mac-x86-64 + tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Mac x86-64 single artifact - lite-client uses: svenstaro/upload-release-action@v2 with: @@ -348,6 +364,14 @@ jobs: asset_name: func-mac-arm64 tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Mac arm64 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-arm64-macos-binaries/tolk + asset_name: tolk-mac-arm64 + tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Mac arm64 single artifact - lite-client uses: svenstaro/upload-release-action@v2 with: @@ -438,6 +462,14 @@ jobs: asset_name: func-linux-x86_64 tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Linux x86-64 single artifact - tolk + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/ton-x86_64-linux-binaries/tolk + asset_name: tolk-linux-x86_64 + tag: ${{ steps.tag.outputs.TAG }} + - name: Upload Linux x86-64 single artifact - lite-client uses: svenstaro/upload-release-action@v2 with: diff --git a/.github/workflows/ton-arm64-macos.yml b/.github/workflows/ton-arm64-macos.yml index 9e8302e8..affe2245 100644 --- a/.github/workflows/ton-arm64-macos.yml +++ b/.github/workflows/ton-arm64-macos.yml @@ -29,6 +29,7 @@ jobs: artifacts/lite-client -V artifacts/fift -V artifacts/func -V + artifacts/tolk -v - name: Upload artifacts uses: actions/upload-artifact@master diff --git a/.github/workflows/ton-x86-64-linux.yml b/.github/workflows/ton-x86-64-linux.yml index abbe1cca..b7ef9684 100644 --- a/.github/workflows/ton-x86-64-linux.yml +++ b/.github/workflows/ton-x86-64-linux.yml @@ -33,6 +33,7 @@ jobs: artifacts/lite-client -V artifacts/fift -V artifacts/func -V + artifacts/tolk -v - name: Upload artifacts uses: actions/upload-artifact@master diff --git a/.github/workflows/ton-x86-64-macos.yml b/.github/workflows/ton-x86-64-macos.yml index 8c71f34a..1890dc34 100644 --- a/.github/workflows/ton-x86-64-macos.yml +++ b/.github/workflows/ton-x86-64-macos.yml @@ -29,6 +29,7 @@ jobs: artifacts/lite-client -V artifacts/fift -V artifacts/func -V + artifacts/tolk -v - name: Upload artifacts uses: actions/upload-artifact@master diff --git a/.gitignore b/.gitignore index 536918ab..9b94834b 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,8 @@ test/regression-tests.cache/ **/*build*/ .idea .vscode +.DS_Store +dev/ zlib/ libsodium/ libmicrohttpd-0.9.77-w32-bin/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 573bc3a3..885fcef7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -413,6 +413,7 @@ add_subdirectory(adnl) add_subdirectory(crypto) add_subdirectory(lite-client) add_subdirectory(emulator) +add_subdirectory(tolk) #BEGIN tonlib add_subdirectory(tonlib) diff --git a/assembly/native/build-macos-portable.sh b/assembly/native/build-macos-portable.sh index 0e1003b5..af82b2c0 100644 --- a/assembly/native/build-macos-portable.sh +++ b/assembly/native/build-macos-portable.sh @@ -153,7 +153,7 @@ test $? -eq 0 || { echo "Can't configure ton"; exit 1; } if [ "$with_tests" = true ]; then ninja storage-daemon storage-daemon-cli blockchain-explorer \ - tonlib tonlibjson tonlib-cli validator-engine func fift \ + tonlib tonlibjson tonlib-cli validator-engine func tolk fift \ lite-client pow-miner validator-engine-console generate-random-id json2tlo dht-server \ http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork tlbc emulator \ test-ed25519 test-ed25519-crypto test-bigint test-vm test-fift test-cells test-smartcont \ @@ -162,7 +162,7 @@ if [ "$with_tests" = true ]; then test $? -eq 0 || { echo "Can't compile ton"; exit 1; } else ninja storage-daemon storage-daemon-cli blockchain-explorer \ - tonlib tonlibjson tonlib-cli validator-engine func fift \ + tonlib tonlibjson tonlib-cli validator-engine func tolk fift \ lite-client pow-miner validator-engine-console generate-random-id json2tlo dht-server \ http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork tlbc emulator test $? -eq 0 || { echo "Can't compile ton"; exit 1; } @@ -173,6 +173,7 @@ strip -s storage/storage-daemon/storage-daemon-cli strip -s blockchain-explorer/blockchain-explorer strip -s crypto/fift strip -s crypto/func +strip -s tolk/tolk strip -s crypto/create-state strip -s crypto/tlbc strip -s validator-engine-console/validator-engine-console @@ -197,6 +198,7 @@ if [ "$with_artifacts" = true ]; then cp build/blockchain-explorer/blockchain-explorer artifacts/ cp build/crypto/fift artifacts/ cp build/crypto/func artifacts/ + cp build/tolk/tolk artifacts/ cp build/crypto/create-state artifacts/ cp build/crypto/tlbc artifacts/ cp build/validator-engine-console/validator-engine-console artifacts/ diff --git a/assembly/native/build-macos-shared.sh b/assembly/native/build-macos-shared.sh index 7fdcfb94..8a7399aa 100644 --- a/assembly/native/build-macos-shared.sh +++ b/assembly/native/build-macos-shared.sh @@ -81,7 +81,7 @@ test $? -eq 0 || { echo "Can't configure ton"; exit 1; } if [ "$with_tests" = true ]; then ninja storage-daemon storage-daemon-cli blockchain-explorer \ - tonlib tonlibjson tonlib-cli validator-engine func fift \ + tonlib tonlibjson tonlib-cli validator-engine func tolk fift \ lite-client pow-miner validator-engine-console generate-random-id json2tlo dht-server \ http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork tlbc emulator \ test-ed25519 test-ed25519-crypto test-bigint test-vm test-fift test-cells test-smartcont \ @@ -90,7 +90,7 @@ if [ "$with_tests" = true ]; then test $? -eq 0 || { echo "Can't compile ton"; exit 1; } else ninja storage-daemon storage-daemon-cli blockchain-explorer \ - tonlib tonlibjson tonlib-cli validator-engine func fift \ + tonlib tonlibjson tonlib-cli validator-engine func tolk fift \ lite-client pow-miner validator-engine-console generate-random-id json2tlo dht-server \ http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork tlbc emulator test $? -eq 0 || { echo "Can't compile ton"; exit 1; } @@ -102,6 +102,7 @@ strip -s storage/storage-daemon/storage-daemon-cli strip -s blockchain-explorer/blockchain-explorer strip -s crypto/fift strip -s crypto/func +strip -s tolk/tolk strip -s crypto/create-state strip -s crypto/tlbc strip -s validator-engine-console/validator-engine-console @@ -126,6 +127,7 @@ if [ "$with_artifacts" = true ]; then cp build/blockchain-explorer/blockchain-explorer artifacts/ cp build/crypto/fift artifacts/ cp build/crypto/func artifacts/ + cp build/tolk/tolk artifacts/ cp build/crypto/create-state artifacts/ cp build/crypto/tlbc artifacts/ cp build/validator-engine-console/validator-engine-console artifacts/ diff --git a/assembly/native/build-ubuntu-portable.sh b/assembly/native/build-ubuntu-portable.sh index 73ae5926..8ae977e0 100644 --- a/assembly/native/build-ubuntu-portable.sh +++ b/assembly/native/build-ubuntu-portable.sh @@ -144,7 +144,7 @@ cmake -GNinja .. \ test $? -eq 0 || { echo "Can't configure ton"; exit 1; } if [ "$with_tests" = true ]; then -ninja storage-daemon storage-daemon-cli fift func tonlib tonlibjson tonlib-cli \ +ninja storage-daemon storage-daemon-cli fift func tolk tonlib tonlibjson tonlib-cli \ validator-engine lite-client pow-miner validator-engine-console blockchain-explorer \ generate-random-id json2tlo dht-server http-proxy rldp-http-proxy \ adnl-proxy create-state emulator test-ed25519 test-ed25519-crypto test-bigint \ @@ -153,7 +153,7 @@ ninja storage-daemon storage-daemon-cli fift func tonlib tonlibjson tonlib-cli \ test-fec test-tddb test-db test-validator-session-state test-emulator test $? -eq 0 || { echo "Can't compile ton"; exit 1; } else -ninja storage-daemon storage-daemon-cli fift func tonlib tonlibjson tonlib-cli \ +ninja storage-daemon storage-daemon-cli fift func tolk tonlib tonlibjson tonlib-cli \ validator-engine lite-client pow-miner validator-engine-console blockchain-explorer \ generate-random-id json2tlo dht-server http-proxy rldp-http-proxy \ adnl-proxy create-state emulator @@ -166,6 +166,7 @@ strip -s storage/storage-daemon/storage-daemon \ crypto/fift \ crypto/tlbc \ crypto/func \ + tolk/tolk \ crypto/create-state \ validator-engine-console/validator-engine-console \ tonlib/tonlib-cli \ @@ -195,7 +196,7 @@ if [ "$with_artifacts" = true ]; then mkdir artifacts mv build/tonlib/libtonlibjson.so.0.5 build/tonlib/libtonlibjson.so cp build/storage/storage-daemon/storage-daemon build/storage/storage-daemon/storage-daemon-cli \ - build/crypto/fift build/crypto/tlbc build/crypto/func build/crypto/create-state build/blockchain-explorer/blockchain-explorer \ + build/crypto/fift build/crypto/tlbc build/crypto/func build/tolk/tolk build/crypto/create-state build/blockchain-explorer/blockchain-explorer \ build/validator-engine-console/validator-engine-console build/tonlib/tonlib-cli \ build/tonlib/libtonlibjson.so build/http/http-proxy build/rldp-http-proxy/rldp-http-proxy \ build/dht-server/dht-server build/lite-client/lite-client build/validator-engine/validator-engine \ diff --git a/assembly/native/build-ubuntu-shared.sh b/assembly/native/build-ubuntu-shared.sh index 00b9aa9b..6b1841cd 100644 --- a/assembly/native/build-ubuntu-shared.sh +++ b/assembly/native/build-ubuntu-shared.sh @@ -52,7 +52,7 @@ cmake -GNinja -DTON_USE_JEMALLOC=ON .. \ test $? -eq 0 || { echo "Can't configure ton"; exit 1; } if [ "$with_tests" = true ]; then -ninja storage-daemon storage-daemon-cli fift func tonlib tonlibjson tonlib-cli \ +ninja storage-daemon storage-daemon-cli fift func tolk tonlib tonlibjson tonlib-cli \ validator-engine lite-client pow-miner validator-engine-console blockchain-explorer \ generate-random-id json2tlo dht-server http-proxy rldp-http-proxy \ adnl-proxy create-state emulator test-ed25519 test-ed25519-crypto test-bigint \ @@ -61,7 +61,7 @@ ninja storage-daemon storage-daemon-cli fift func tonlib tonlibjson tonlib-cli \ test-fec test-tddb test-db test-validator-session-state test-emulator test $? -eq 0 || { echo "Can't compile ton"; exit 1; } else -ninja storage-daemon storage-daemon-cli fift func tonlib tonlibjson tonlib-cli \ +ninja storage-daemon storage-daemon-cli fift func tolk tonlib tonlibjson tonlib-cli \ validator-engine lite-client pow-miner validator-engine-console blockchain-explorer \ generate-random-id json2tlo dht-server http-proxy rldp-http-proxy \ adnl-proxy create-state emulator @@ -74,6 +74,7 @@ strip -s storage/storage-daemon/storage-daemon \ crypto/fift \ crypto/tlbc \ crypto/func \ + tolk/tolk \ crypto/create-state \ validator-engine-console/validator-engine-console \ tonlib/tonlib-cli \ @@ -105,7 +106,7 @@ if [ "$with_artifacts" = true ]; then mkdir artifacts mv build/tonlib/libtonlibjson.so.0.5 build/tonlib/libtonlibjson.so cp build/storage/storage-daemon/storage-daemon build/storage/storage-daemon/storage-daemon-cli \ - build/crypto/fift build/crypto/tlbc build/crypto/func build/crypto/create-state build/blockchain-explorer/blockchain-explorer \ + build/crypto/fift build/crypto/tlbc build/crypto/func build/tolk/tolk build/crypto/create-state build/blockchain-explorer/blockchain-explorer \ build/validator-engine-console/validator-engine-console build/tonlib/tonlib-cli \ build/tonlib/libtonlibjson.so build/http/http-proxy build/rldp-http-proxy/rldp-http-proxy \ build/dht-server/dht-server build/lite-client/lite-client build/validator-engine/validator-engine \ diff --git a/assembly/native/build-windows-2019.bat b/assembly/native/build-windows-2019.bat index f728b88f..fdfb6bcf 100644 --- a/assembly/native/build-windows-2019.bat +++ b/assembly/native/build-windows-2019.bat @@ -155,7 +155,7 @@ IF %errorlevel% NEQ 0 ( ) IF "%1"=="-t" ( -ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tonlib tonlibjson ^ +ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tolk tonlib tonlibjson ^ tonlib-cli validator-engine lite-client pow-miner validator-engine-console generate-random-id ^ json2tlo dht-server http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork emulator ^ test-ed25519 test-ed25519-crypto test-bigint test-vm test-fift test-cells test-smartcont test-net ^ @@ -166,7 +166,7 @@ IF %errorlevel% NEQ 0 ( exit /b %errorlevel% ) ) else ( -ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tonlib tonlibjson ^ +ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tolk tonlib tonlibjson ^ tonlib-cli validator-engine lite-client pow-miner validator-engine-console generate-random-id ^ json2tlo dht-server http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork emulator IF %errorlevel% NEQ 0 ( @@ -204,6 +204,7 @@ build\blockchain-explorer\blockchain-explorer.exe ^ build\crypto\fift.exe ^ build\crypto\tlbc.exe ^ build\crypto\func.exe ^ +build\tolk\tolk.exe ^ build\crypto\create-state.exe ^ build\validator-engine-console\validator-engine-console.exe ^ build\tonlib\tonlib-cli.exe ^ diff --git a/assembly/native/build-windows.bat b/assembly/native/build-windows.bat index aa0fd69a..e1ce9e47 100644 --- a/assembly/native/build-windows.bat +++ b/assembly/native/build-windows.bat @@ -156,7 +156,7 @@ IF %errorlevel% NEQ 0 ( ) IF "%1"=="-t" ( -ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tonlib tonlibjson ^ +ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tolk tonlib tonlibjson ^ tonlib-cli validator-engine lite-client pow-miner validator-engine-console generate-random-id ^ json2tlo dht-server http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork emulator ^ test-ed25519 test-ed25519-crypto test-bigint test-vm test-fift test-cells test-smartcont test-net ^ @@ -167,7 +167,7 @@ IF %errorlevel% NEQ 0 ( exit /b %errorlevel% ) ) else ( -ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tonlib tonlibjson ^ +ninja storage-daemon storage-daemon-cli blockchain-explorer fift func tolk tonlib tonlibjson ^ tonlib-cli validator-engine lite-client pow-miner validator-engine-console generate-random-id ^ json2tlo dht-server http-proxy rldp-http-proxy adnl-proxy create-state create-hardfork emulator IF %errorlevel% NEQ 0 ( @@ -205,6 +205,7 @@ build\blockchain-explorer\blockchain-explorer.exe ^ build\crypto\fift.exe ^ build\crypto\tlbc.exe ^ build\crypto\func.exe ^ +build\tolk\tolk.exe ^ build\crypto\create-state.exe ^ build\validator-engine-console\validator-engine-console.exe ^ build\tonlib\tonlib-cli.exe ^ diff --git a/assembly/nix/build-linux-arm64-nix.sh b/assembly/nix/build-linux-arm64-nix.sh index 2c7df521..565b1d25 100644 --- a/assembly/nix/build-linux-arm64-nix.sh +++ b/assembly/nix/build-linux-arm64-nix.sh @@ -43,6 +43,7 @@ sudo strip -s storage-daemon \ fift \ tlbc \ func \ + tolk \ create-state \ validator-engine-console \ tonlib-cli \ diff --git a/assembly/nix/build-linux-x86-64-nix.sh b/assembly/nix/build-linux-x86-64-nix.sh index ae478ec2..e6a3aef0 100644 --- a/assembly/nix/build-linux-x86-64-nix.sh +++ b/assembly/nix/build-linux-x86-64-nix.sh @@ -43,6 +43,7 @@ sudo strip -s storage-daemon \ fift \ tlbc \ func \ + tolk \ create-state \ validator-engine-console \ tonlib-cli \ diff --git a/assembly/nix/build-macos-nix.sh b/assembly/nix/build-macos-nix.sh index c92eddb2..0ada59a4 100644 --- a/assembly/nix/build-macos-nix.sh +++ b/assembly/nix/build-macos-nix.sh @@ -43,6 +43,7 @@ sudo strip -xSX storage-daemon \ fift \ tlbc \ func \ + tolk \ create-state \ validator-engine-console \ tonlib-cli \ diff --git a/crypto/smartcont/mathlib.tolk b/crypto/smartcont/mathlib.tolk new file mode 100644 index 00000000..d4fea609 --- /dev/null +++ b/crypto/smartcont/mathlib.tolk @@ -0,0 +1,937 @@ +{- + - + - Tolk fixed-point mathematical library + - (initially copied from mathlib.fc) + - + -} + +{- + This file is part of TON Tolk Standard Library. + + Tolk Standard Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Tolk Standard Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + +-} + +{---------------- HIGH-LEVEL FUNCTION DECLARATIONS -----------------} +{- + Most functions declared here work either with integers or with fixed-point numbers of type `fixed248`. + `fixedNNN` informally denotes an alias for type `int` used to represent fixed-point numbers with scale 2^NNN. + Prefix `fixedNNN::` is prepended to the names of high-level functions that accept arguments and return values of type `fixedNNN`. +-} + +{- function declarations have been commented out, otherwise they are not inlined by the current Tolk compiler + +;; nearest integer to sqrt(a*b) for non-negative integers or fixed-point numbers a and b +int geom_mean(int a, int b) inline_ref; +;; integer square root +int sqrt(int a) inline; +;; fixed-point square root +;; fixed248 sqrt(fixed248 x) +int fixed248::sqrt(int x) inline; + +int fixed248::sqr(int x) inline; +const int fixed248::One; + +;; log(2) as fixed248 +int fixed248::log2_const() inline; +;; Pi as fixed248 +int fixed248::Pi_const() inline; + +;; fixed248 exp(fixed248 x) +int fixed248::exp(int x) inline_ref; +;; fixed248 exp2(fixed248 x) +int fixed248::exp2(int x) inline_ref; + +;; fixed248 log(fixed248 x) +int fixed248::log(int x) inline_ref; +;; fixed248 log2(fixed248 x) +int fixed248::log2(int x) inline; + +;; fixed248 pow(fixed248 x, fixed248 y) +int fixed248::pow(int x, int y) inline_ref; + +;; (fixed248, fixed248) sincos(fixed248 x); +(int, int) fixed248::sincos(int x) inline_ref; +;; fixed248 sin(fixed248 x); +int fixed248::sin(int x) inline; +;; fixed248 cos(fixed248 x); +int fixed248::cos(int x) inline; +;; fixed248 tan(fixed248 x); +int fixed248::tan(int x) inline_ref; +;; fixed248 cot(fixed248 x); +int fixed248::cot(int x) inline_ref; + + +;; fixed248 asin(fixed248 x); +int fixed248::asin(int x) inline; +;; fixed248 acos(fixed248 x); +int fixed248::acos(int x) inline; +;; fixed248 atan(fixed248 x); +int fixed248::atan(int x) inline_ref; +;; fixed248 acot(fixed248 x); +int fixed248::acot(int x) inline_ref; + +;; random number uniformly distributed in [0..1) +;; fixed248 random(); +int fixed248::random() impure inline; +;; random number with standard normal distribution (2100 gas on average) +;; fixed248 nrand(); +int fixed248::nrand() impure inline; +;; generates a random number approximately distributed according to the standard normal distribution (1200 gas) +;; (fails chi-squared test, but it is shorter and faster than fixed248::nrand()) +;; fixed248 nrand_fast(); +int fixed248::nrand_fast() impure inline; + +-} ;; end (declarations) + +{-------------------- INTERMEDIATE FUNCTIONS -----------------------} + +{- + Intermediate functions are used in the implementations of high-level `fixedNNN::...` functions + if necessary, they can be used to define additional high-level functions for other fixed-point types, such as fixed128, outside this library. They can be also used in a hypothetical floating-point Tolk library. + For these reasons, the declarations of these functions are collected here. +-} + +{- function declarations have been commented out, otherwise they are not inlined by the current Tolk compiler + +;; fixed258 tanh(fixed258 x, int steps); +int tanh_f258(int x, int n); + +;; computes exp(x)-1 for |x| <= log(2)/2. +;; fixed257 expm1(fixed257 x); +int expm1_f257(int x); + +;; computes (sin(x+xe),-cos(x+xe)) for |x| <= Pi/4, xe very small +;; this function is very accurate, error less than 0.7 ulp (consumes ~ 5500 gas) +;; (fixed256, fixed256) sincosn(fixed256 x, fixed259 xe) +(int, int) sincosn_f256(int x, int xe); + +;; compute (sin(x),1-cos(x)) in fixed256 for |x| < 16*atan(1/16) = 0.9987 +;; (fixed256, fixed257) sincosm1_f256(fixed256 x); +;; slightly less accurate than sincosn_f256() (error up to 3/2^256), but faster (~ 4k gas) and shorter +(int, int) sincosm1_f256(int x); + +;; compute (p, q) such that p/q = tan(x) for |x|<2*atan(1/2)=1899/2048=0.927 +;; (int, int) tan_aux(fixed256 x); +(int, int) tan_aux_f256(int x); + +;; returns (y, s) such that log(x) = y/2^256 + s*log(2) for positive integer x +;; this function is very precise (error less than 0.6 ulp) and consumes < 7k gas +;; (fixed256, int) log_aux_f256(int x); +(int, int) log_aux_f256(int x); + +;; returns (y, s) such that log2(x) = y/2^256 + s for positive integer x +;; this function is very precise (error less than 0.6 ulp) and consumes < 7k gas +;; (fixed256, int) log2_aux_f256(int x); +(int, int) log2_aux_f256(int x); + +;; compute (q, z) such that atan(x)=q*atan(1/32)+z for -1 <= x < 1 +;; this function is reasonably accurate (error < 7 ulp with ulp = 2^-261), but it consumes >7k gas +;; this is sufficient for most purposes +;; (int, fixed261) atan_aux(fixed256 x) +(int, int) atan_aux_f256(int x); + +;; fixed255 atan(fixed255 x); +int atan_f255(int x); + +;; for -1 <= x < 1 only +;; fixed256 atan_small(fixed256 x); +int atan_f256_small(int x); + +;; fixed255 asin(fixed255 x); +int asin_f255(int x); + +;; fixed254 acos(fixed255 x); +int acos_f255(int x); + +;; generates normally distributed pseudo-random number +;; fixed252 nrand(); +int nrand_f252(int x); + +;; a faster and shorter variant of nrand_f252() that fails chi-squared test +;; (should suffice for most purposes) +;; fixed252 nrand_fast(); +int nrand_fast_f252(int x); + +-} ;; end (declarations) + +{---------------- MISSING OPERATIONS AND BUILT-INS -----------------} + +int sgn(int x) asm "SGN"; + +;; compute floor(log2(x))+1 +int log2_floor_p1(int x) asm "UBITSIZE"; + +int mulrshiftr(int x, int y, int s) asm "MULRSHIFTR"; +int mulrshiftr256(int x, int y) asm "256 MULRSHIFTR#"; +(int, int) mulrshift256mod(int x, int y) asm "256 MULRSHIFT#MOD"; +(int, int) mulrshiftr256mod(int x, int y) asm "256 MULRSHIFTR#MOD"; +(int, int) mulrshiftr255mod(int x, int y) asm "255 MULRSHIFTR#MOD"; +(int, int) mulrshiftr248mod(int x, int y) asm "248 MULRSHIFTR#MOD"; +(int, int) mulrshiftr5mod(int x, int y) asm "5 MULRSHIFTR#MOD"; +(int, int) mulrshiftr6mod(int x, int y) asm "6 MULRSHIFTR#MOD"; +(int, int) mulrshiftr7mod(int x, int y) asm "7 MULRSHIFTR#MOD"; + +int lshift256divr(int x, int y) asm "256 LSHIFT#DIVR"; +(int, int) lshift256divmodr(int x, int y) asm "256 LSHIFT#DIVMODR"; +(int, int) lshift255divmodr(int x, int y) asm "255 LSHIFT#DIVMODR"; +(int, int) lshift2divmodr(int x, int y) asm "2 LSHIFT#DIVMODR"; +(int, int) lshift7divmodr(int x, int y) asm "7 LSHIFT#DIVMODR"; +(int, int) lshiftdivmodr(int x, int y, int s) asm "LSHIFTDIVMODR"; + +(int, int) rshiftr256mod(int x) asm "256 RSHIFTR#MOD"; +(int, int) rshiftr248mod(int x) asm "248 RSHIFTR#MOD"; +(int, int) rshiftr4mod(int x) asm "4 RSHIFTR#MOD"; +(int, int) rshift3mod(int x) asm "3 RSHIFT#MOD"; + +;; computes y - x (Tolk compiler does not try to use this by itself) +int sub_rev(int x, int y) asm "SUBR"; + +int nan() asm "PUSHNAN"; +int is_nan(int x) asm "ISNAN"; + +{------------------------ SQUARE ROOTS ----------------------------} + +;; computes sqrt(a*b) exactly rounded to the nearest integer +;; for all 0 <= a, b <= 2^256-1 +;; may be used with b=1 or b=scale of fixed-point numbers +int geom_mean(int a, int b) inline_ref { + ifnot (min(a, b)) { + return 0; + } + int s = log2_floor_p1(a); ;; throws out of range error if a < 0 or b < 0 + int t = log2_floor_p1(b); + ;; NB: (a-b)/2+b == (a+b)/2, but without overflow for large a and b + int x = (s == t ? (a - b) / 2 + b : 1 << ((s + t) / 2)); + do { + ;; if always used with b=2^const, may be optimized to "const LSHIFTDIVC#" + ;; it is important to use `muldivc` here, not `muldiv` or `muldivr` + int q = (muldivc(a, b, x) - x) / 2; + x += q; + } until (q == 0); + return x; +} + +;; integer square root, computes round(sqrt(a)) for all a>=0. +;; note: `inline` is better than `inline_ref` for such simple functions +int sqrt(int a) inline { + return geom_mean(a, 1); +} + +;; version for fixed248 = fixed-point numbers with scale 2^248 +;; fixed248 sqrt(fixed248 x) +int fixed248::sqrt(int x) inline { + return geom_mean(x, 1 << 248); +} + +;; fixed255 sqrt(fixed255 x) +int fixed255::sqrt(int x) inline { + return geom_mean(x, 1 << 255); +} + +;; fixed248 sqr(fixed248 x); +int fixed248::sqr(int x) inline { + return muldivr(x, x, 1 << 248); +} + +;; fixed255 sqr(fixed255 x); +int fixed255::sqr(int x) inline { + return muldivr(x, x, 1 << 255); +} + +const int fixed248::One = (1 << 248); +const int fixed255::One = (1 << 255); + +{-------------------- USEFUL CONSTANTS --------------------} + +;; store huge constants in inline_ref functions for reuse +;; (y,z) where y=round(log(2)*2^256), z=round((log(2)*2^256-y)*2^128) +;; then log(2) = y/2^256 + z/2^384 +(int, int) log2_xconst_f256() inline_ref { + return (80260960185991308862233904206310070533990667611589946606122867505419956976172, -32272921378999278490133606779486332143); +} + +;; (y,z) where Pi = y/2^254 + z/2^382 +(int, int) Pi_xconst_f254() inline_ref { + return (90942894222941581070058735694432465663348344332098107489693037779484723616546, 108051869516004014909778934258921521947); +} + +;; atan(1/16) as fixed260 +int Atan1_16_f260() inline_ref { + return 115641670674223639132965820642403718536242645001775371762318060545014644837101; ;; true value is ...101.0089... +} + +;; atan(1/8) as fixed259 +int Atan1_8_f259() inline_ref { + return 115194597005316551477397594802136977648153890007566736408151129975021336532841; ;; correction -0.1687... +} + +;; atan(1/32) as fixed261 +int Atan1_32_f261() inline_ref { + return 115754418570128574501879331591757054405465733718902755858991306434399246026247; ;; correction 0.395... +} + +;; inline is better than inline_ref for such very small functions +int log2_const_f256() inline { + (int c, _) = log2_xconst_f256(); + return c; +} + +int fixed248::log2_const() inline { + return log2_const_f256() ~>> 8; +} + +int Pi_const_f254() inline { + (int c, _) = Pi_xconst_f254(); + return c; +} + +int fixed248::Pi_const() inline { + return Pi_const_f254() ~>> 6; +} + +{--------------- HYPERBOLIC TANGENT AND EXPONENT -------------------} + +;; hyperbolic tangent of small x via n+2 terms of Lambert's continued fraction +;; n=17: good for |x| < log(2)/4 = 0.173 +;; fixed258 tanh_f258(fixed258 x, int n) +int tanh_f258(int x, int n) inline_ref { + int x2 = muldivr(x, x, 1 << 255); ;; x^2 as fixed261 + int c = int a = (2 * n + 5) << 250; ;; a=2n+5 as fixed250 + int Two = (1 << 251); ;; 2. as fixed250 + repeat (n) { + a = (c -= Two) + muldivr(x2, 1 << 239, a); ;; a := 2k+1+x^2/a as fixed250, k=n+1,n,...,2 + } + a = (touch(3) << 254) + muldivr(x2, 1 << 243, a); ;; a := 3+x^2/a as fixed254 + ;; y = x/(1+a') = x - x*a'/(1+a') = x - x*x^2/(a+x^2) where a' = x^2/a + return x - (muldivr(x, x2, a + (x2 ~>> 7)) ~>> 7); +} + +;; fixed257 expm1_f257(fixed257 x) +;; computes exp(x)-1 for small x via 19 terms of Lambert's continued fraction for tanh(x/2) +;; good for |x| < log(2)/2 = 0.347 (n=17); consumes ~3500 gas +int expm1_f257(int x) inline_ref { + ;; (almost) compute tanh(x/2) first; x/2 as fixed258 = x as fixed257 + int x2 = muldivr(x, x, 1 << 255); ;; x^2 as fixed261 + int Two = (1 << 251); ;; 2. as fixed250 + int c = int a = touch(39) << 250; ;; a=2n+5 as fixed250 + repeat (17) { + a = (c -= Two) + muldivr(x2, 1 << 239, a); ;; a := 2k+1+x^2/a as fixed250, k=n+1,n,...,2 + } + a = (touch(3) << 254) + muldivr(x2, 1 << 243, a); ;; a := 3+x^2/a as fixed254 + ;; now tanh(x/2) = x/(1+a') where a'=x^2/a ; apply exp(x)-1=2*tanh(x/2)/(1-tanh(x/2)) + int t = (x ~>> 4) - a; ;; t:=x-a as fixed254 + return x - muldivr(x2, t / 2, a + mulrshiftr256(x, t) ~/ 4) ~/ 4; ;; x - x^2 * (x-a) / (a + x*(x-a)) +} + +;; expm1_f257() may be used to implement specific fixed-point exponentials +;; example: +;; fixed248 exp(fixed248 x) +int fixed248::exp(int x) inline_ref { + var (l2c, l2d) = log2_xconst_f256(); + ;; divide x by log(2) and convert to fixed257 + ;; (int q, x) = muldivmodr(x, 256, l2c); ;; unfortunately, no such built-in + (int q, x) = lshiftdivmodr(x, l2c, 8); + x = 2 * x - muldivr(q, l2d, 1 << 127); + int y = expm1_f257(x); + ;; result is (1 + y) * (2^q) --> ((1 << 257) + y) >> (9 - q) + return (y ~>> (9 - q)) - (-1 << (248 + q)); + ;; note that (y ~>> (9 - q)) + (1 << (248 + q)) leads to overflow when q=8 +} + +;; compute 2^x in fixed248 +;; fixed248 exp2(fixed248 x) +int fixed248::exp2(int x) inline_ref { + ;; (int q, x) = divmodr(x, 1 << 248); ;; no such built-in + (int q, x) = rshiftr248mod(x); + x = muldivr(x, log2_const_f256(), 1 << 247); + int y = expm1_f257(x); + return (y ~>> (9 - q)) - (-1 << (248 + q)); +} + +{--------------------- TRIGONOMETRIC FUNCTIONS -----------------------} + +;; fixed260 tan(fixed260 x); +;; computes tan(x) for small |x|> 10)) ~>> 9); +} + +;; fixed260 tan(fixed260 x); +int tan_f260(int x) inline_ref { + return tan_f260_inlined(x); +} + +;; fixed258 tan(fixed258 x); +;; computes tan(x) for small |x|> 6)) ~>> 5); +} + +;; fixed258 tan(fixed258 x); +int tan_f258(int x) inline_ref { + return tan_f258_inlined(x); +} + +;; (fixed259, fixed263) sincosm1(fixed259 x) +;; computes (sin(x), 1-cos(x)) for small |x|<2*atan(1/16) +(int, int) sincosm1_f259_inlined(int x) inline { + int t = tan_f260_inlined(x); ;; t=tan(x/2) as fixed260 + int tt = mulrshiftr256(t, t); ;; t^2 as fixed264 + int y = tt ~/ 512 + (1 << 255); ;; 1+t^2 as fixed255 + ;; 2*t/(1+t^2) as fixed259 and 2*t^2/(1+t^2) as fixed263 + ;; return (muldivr(t, 1 << 255, y), muldivr(tt, 1 << 255, y)); + return (t - muldivr(t / 2, tt, y) ~/ 256, tt - muldivr(tt / 2, tt, y) ~/ 256); +} + +(int, int) sincosm1_f259(int x) inline_ref { + return sincosm1_f259_inlined(x); +} + +;; computes (sin(x+xe),-cos(x+xe)) for |x| <= Pi/4, xe very small +;; this function is very accurate, error less than 0.7 ulp (consumes ~ 5500 gas) +;; (fixed256, fixed256) sincosn(fixed256 x, fixed259 xe) +(int, int) sincosn_f256(int x, int xe) inline_ref { + ;; var (q, x1) = muldivmodr(x, 8, Atan1_8_f259()); ;; no muldivmodr() builtin + var (q, x1) = lshift2divmodr(abs(x), Atan1_8_f259()); ;; reduce mod theta where theta=2*atan(1/8) + var (si, co) = sincosm1_f259(x1 * 2 + xe); + var (a, b, c) = (-1, 0, 1); + repeat (q) { ;; (a+b*I) *= (8+I)^2 = 63+16*I + (a, b, c) = (63 * a - 16 * b, 16 * a + 63 * b, 65 * c); + } + ;; now a/c = cos(q*theta), b/c = sin(q*theta) exactly(!) + ;; compute (a+b*I)*(1-co+si*I)/c + ;; (b, a) = (lshift256divr(b, c), lshift256divr(a, c)); + (b, int br) = lshift256divmodr(b, c); br = muldivr(br, 128, c); + (a, int ar) = lshift256divmodr(a, c); ar = muldivr(ar, 128, c); + return (sgn(x) * (((mulrshiftr256(b, co) - br) ~/ 16 - mulrshiftr256(a, si)) ~/ 8 - b), + a - ((mulrshiftr256(a, co) - ar) ~/ 16 + mulrshiftr256(b, si)) ~/ 8); +} + +;; compute (sin(x),1-cos(x)) in fixed256 for |x| < 16*atan(1/16) = 0.9987 +;; (fixed256, fixed257) sincosm1_f256(fixed256 x); +;; slightly less accurate than sincosn_f256() (error up to 3/2^256), but faster (~ 4k gas) and shorter +(int, int) sincosm1_f256(int x) inline_ref { + var (si, co) = sincosm1_f259_inlined(x); ;; compute (sin,1-cos)(x/8) in (fixed259,fixed263) + int r = 7; + repeat (r / 2) { + ;; 1-cos(2*x) = 2*sin(x)^2, sin(2*x) = 2*sin(x)*cos(x) + (co, si) = (mulrshiftr256(si, si), si - (mulrshiftr256(si, co) ~>> r)); + r -= 2; + } + return (si, co); +} + +;; compute (p, q) such that p/q = tan(x) for |x|<2*atan(1/2)=1899/2048=0.927 +;; (int, int) tan_aux(fixed256 x); +(int, int) tan_aux_f256(int x) inline_ref { + int t = tan_f258_inlined(x); ;; t=tan(x/4) as fixed258 + ;; t:=2*t/(1-t^2)=2*(t-t^3/(t^2-1)) + int tt = mulrshiftr256(t, t); ;; t^2 as fixed260 + t = muldivr(t, tt, tt ~/ 16 + (-1 << 256)) ~/ 16 - t; ;; now t=-tan(x/2) as fixed259 + return (t, mulrshiftr256(t, t) ~/ 4 + (-1 << 256)); ;; return (2*t, t^2-1) as fixed256 +} + +;; sincosm1_f256() and sincosn_f256() may be used to implement trigonometric functions for different fixed-point types +;; example: +;; (fixed248, fixed248) sincos(fixed248 x); +(int, int) fixed248::sincos(int x) inline_ref { + var (Pic, Pid) = Pi_xconst_f254(); + ;; (int q, x) = muldivmodr(x, 128, Pic); ;; no muldivmodr() builtin + (int q, x) = lshift7divmodr(x, Pic); ;; reduce mod Pi/2 + x = 2 * x - muldivr(q, Pid, 1 << 127); + (int si, int co) = sincosm1_f256(x); ;; doesn't make sense to use more accurate sincosn_f256() + co = (1 << 248) - (co ~>> 9); + si ~>>= 8; + repeat (q & 3) { + (si, co) = (co, - si); + } + return (si, co); +} + +;; fixed248 sin(fixed248 x); +;; inline is better than inline_ref for such simple functions +int fixed248::sin(int x) inline { + (int si, _) = fixed248::sincos(x); + return si; +} + +;; fixed248 cos(fixed248 x); +int fixed248::cos(int x) inline { + (_, int co) = fixed248::sincos(x); + return co; +} + +;; similarly, tan_aux_f256() may be used to implement tan() and cot() for specific fixed-point formats +;; fixed248 tan(fixed248 x); +;; not very accurate when |tan(x)| is very large (difficult to do better without floating-point numbers) +;; however, the relative accuracy is approximately 2^-247 in all cases, which is good enough for arguments given up to 2^-249 +int fixed248::tan(int x) inline_ref { + var (Pic, Pid) = Pi_xconst_f254(); + ;; (int q, x) = muldivmodr(x, 128, Pic); ;; no muldivmodr() builtin + (int q, x) = lshift7divmodr(x, Pic); ;; reduce mod Pi/2 + x = 2 * x - muldivr(q, Pid, 1 << 127); + var (a, b) = tan_aux_f256(x); ;; now a/b = tan(x') + if (q & 1) { + (a, b) = (b, - a); + } + return muldivr(a, 1 << 248, b); ;; either -b/a or a/b as fixed248 +} + +;; fixed248 cot(fixed248 x); +int fixed248::cot(int x) inline_ref { + var (Pic, Pid) = Pi_xconst_f254(); + (int q, x) = lshift7divmodr(x, Pic); ;; reduce mod Pi/2 + x = 2 * x - muldivr(q, Pid, 1 << 127); + var (b, a) = tan_aux_f256(x); ;; now b/a = tan(x') + if (q & 1) { + (a, b) = (b, - a); + } + return muldivr(a, 1 << 248, b); ;; either -b/a or a/b as fixed248 +} + +{----------------- INVERSE HYPERBOLIC TANGENT AND LOGARITHMS -----------------} + +;; inverse hyperbolic tangent of small x, evaluated by means of n terms of the continued fraction +;; valid for |x| < 2^-2.5 ~ 0.18 if n=37 (slightly less accurate with n=36) +;; |x| < 1/8 if n=32; |x| < 2^-3.5 if n=28; |x| < 1/16 if n=25 +;; |x| < 2^-4.5 if n=23; |x| < 1/32 if n=21; |x| < 1/64 if n=18 +;; fixed258 atanh(fixed258 x); +int atanh_f258(int x, int n) inline_ref { + int x2 = mulrshiftr256(x, x); ;; x^2 as fixed260 + int One = (1 << 254); + int a = One ~/ n + (1 << 255); ;; a := 2 + 1/n as fixed254 + repeat (n - 1) { + ;; a := 1 + (1 - x^2 / a)(1 + 1/n) as fixed254 + int t = One - muldivr(x2, 1 << 248, a); ;; t := 1 - x^2 / a + a = muldivr(t, n, (int n1 = n - 1)) + One; + n = n1; + } + ;; x / (1 - x^2 / a) = x / (1 - d) = x + x * d / (1 - d) for d = x^2 / a + ;; int d = muldivr(x2, 1 << 255, a - (x2 ~>> 6)); ;; d/(1-d) = x^2/(a-x^2) as fixed261 + ;; return x + (mulrshiftr256(x, d) ~>> 5); + return x + muldivr(x, x2 / 2, a - x2 ~/ 64) ~/ 32; +} + +;; number of terms n should be chosen as for atanh_f258() +;; fixed261 atanh(fixed261 x); +int atanh_f261_inlined(int x, int n) inline { + int x2 = mulrshiftr256(x, x); ;; x^2 as fixed266 + int One = (1 << 254); + int a = One ~/ n + (1 << 255); ;; a := 2 + 1/n as fixed254 + repeat (n - 1) { + ;; a := 1 + (1 - x^2 / a)(1 + 1/n) as fixed254 + int t = One - muldivr(x2, 1 << 242, a); ;; t := 1 - x^2 / a + a = muldivr(t, n, (int n1 = n - 1)) + One; + n = n1; + } + ;; x / (1 - x^2 / a) = x / (1 - d) = x + x * d / (1 - d) for d = x^2 / a + ;; int d = muldivr(x2, 1 << 255, a - (x2 ~>> 12)); ;; d/(1-d) = x^2/(a-x^2) as fixed267 + ;; return x + (mulrshiftr256(x, d) ~>> 11); + return x + muldivr(x, x2, a - x2 ~/ 4096) ~/ 4096; +} + +;; fixed261 atanh(fixed261 x); +int atanh_f261(int x, int n) inline_ref { + return atanh_f261_inlined(x, n); +} + +;; returns (y, s) such that log(x) = y/2^257 + s*log(2) for positive integer x +;; (fixed257, int) log_aux(int x) +(int, int) log_aux_f257(int x) inline_ref { + int s = log2_floor_p1(x); + x <<= 256 - s; + int t = touch(-1 << 256); + if ((x >> 249) <= 90) { + ;; t~touch(); + t >>= 1; + s -= 1; + } + x += t; + int 2x = 2 * x; + int y = lshift256divr(2x, (x >> 1) - t); + ;; y = 2x - (mulrshiftr256(2x, y) ~>> 2); ;; this line could improve precision on very rare occasions + return (atanh_f258(y, 36), s); +} + +;; computes 33^m for small m +int pow33(int m) inline { + int t = 1; + repeat (m) { t *= 33; } + return t; +} + +;; computes 33^m for small 0<=m<=22 +;; slightly faster than pow33() +int pow33b(int m) inline { + (int mh, int ml) = m /% 5; + int t = 1; + repeat (ml) { t *= 33; } + repeat (mh) { t *= 33 * 33 * 33 * 33 * 33; } + return t; +} + +;; returns (s, q, y) such that log(x) = s*log(2) + q*log(33/32) + y/2^260 for positive integer x +;; (int, int, fixed260) log_auxx_f260(int x); +(int, int, int) log_auxx_f260(int x) inline_ref { + int s = log2_floor_p1(x) - 1; + x <<= 255 - s; ;; rescale to 1 <= x < 2 as fixed255 + int t = touch(2873) << 244; ;; ~ (33/32)^11 ~ sqrt(2) as fixed255 + int x1 = (x - t) >> 1; + int q = muldivr(x1, 65, x1 + t) + 11; ;; crude approximation to round(log(x)/log(33/32)) + ;; t = 1; repeat (q) { t *= 33; } ;; t:=33^q, 0<=q<=22 + t = pow33b(q); + t <<= (51 - q) * 5; ;; t:=(33/32)^q as fixed255, nearest power of 33/32 to x + x -= t; + int y = lshift256divr(x << 4, (x >> 1) + t); ;; y = (x-t)/(x+t) as fixed261 + y = atanh_f261(y, 18); ;; atanh((x-t)/(x+t)) as fixed261, or log(x/t) as fixed260 + return (s, q, y); +} + +;; returns (y, s) such that log(x) = y/2^256 + s*log(2) for positive integer x +;; this function is very precise (error less than 0.6 ulp) and consumes < 7k gas +;; (fixed256, int) log_aux_f256(int x); +(int, int) log_aux_f256(int x) inline_ref { + var (s, q, y) = log_auxx_f260(x); + var (yh, yl) = rshiftr4mod(y); ;; y ~/% 16 , but Tolk does not optimize this to RSHIFTR#MOD + ;; int Log33_32 = 3563114646320977386603103333812068872452913448227778071188132859183498739150; ;; log(33/32) as fixed256 + ;; int Log33_32_l = -3769; ;; log(33/32) = Log33_32 / 2^256 + Log33_32_l / 2^269 + yh += (yl * 512 + q * -3769) ~>> 13; ;; compensation, may be removed if slightly worse accuracy is acceptable + int Log33_32 = 3563114646320977386603103333812068872452913448227778071188132859183498739150; ;; log(33/32) as fixed256 + return (yh + q * Log33_32, s); +} + +;; returns (y, s) such that log2(x) = y/2^256 + s for positive integer x +;; this function is very precise (error less than 0.6 ulp) and consumes < 7k gas +;; (fixed256, int) log2_aux_f256(int x); +(int, int) log2_aux_f256(int x) inline_ref { + var (s, q, y) = log_auxx_f260(x); + y = lshift256divr(y, log2_const_f256()) ~>> 4; ;; y/log(2) as fixed256 + int Log33_32 = 5140487830366106860412008603913034462883915832139695448455767612111363481357; ;; log_2(33/32) as fixed256 + ;; Log33_32/2^256 happens to be a very precise approximation to log_2(33/32), no compensation required + return (y + q * Log33_32, s); +} + +;; functions log_aux_f256() and log2_aux_f256() may be used to implement specific fixed-point instances of log() and log2() + +;; fixed248 log(fixed248 x) +int fixed248::log(int x) inline_ref { + var (y, s) = log_aux_f256(x); + return muldivr(s - 248, log2_const_f256(), 1 << 8) + (y ~>> 8); + ;; return muldivr(s - 248, 80260960185991308862233904206310070533990667611589946606122867505419956976172, 1 << 8) + (y ~>> 8); +} + +;; fixed248 log2(fixed248 x) +int fixed248::log2(int x) inline { + var (y, s) = log2_aux_f256(x); + return ((s - 248) << 248) + (y ~>> 8); +} + +;; computes x^y as exp(y*log(x)), x >= 0 +;; fixed248 pow(fixed248 x, fixed248 y); +int fixed248::pow(int x, int y) inline_ref { + ifnot (y) { + return 1 << 248; ;; x^0 = 1 + } + if (x <= 0) { + int bad = (x | y) < 0; + return 0 >> bad; ;; 0^y = 0 if x=0 and y>=0; "out of range" exception otherwise + } + var (l, s) = log2_aux_f256(x); + s -= 248; ;; log_2(x) = s+l, l is fixed256, 0<=l<1 + ;; compute (s+l)*y = q+ll + var (q1, r1) = mulrshiftr248mod(s, y); ;; muldivmodr(s, y, 1 << 248) + var (q2, r2) = mulrshift256mod(l, y); + r2 >>= 247; + var (q3, r3) = rshiftr248mod(q2); ;; divmodr(q2, 1 << 248); + var (q, ll) = rshiftr248mod(r1 + r3); + ll = 512 * ll + r2; + q += q1 + q3; + ;; now log_2(x^y) = y*log_2(x) = q + ll, ss integer, ll fixed257, -1/2<=ll<1/2 + int sq = q + 248; + if (sq <= 0) { + return - (sq == 0); ;; underflow + } + int y = expm1_f257(mulrshiftr256(ll, log2_const_f256())); + return (y ~>> (9 - q)) - (-1 << sq); +} + +{--------------------- INVERSE TRIGONOMETRIC FUNCTIONS -------------------} + +;; number of terms n should be chosen as for atanh_f258() +;; fixed259 atan(fixed259 x); +int atan_f259(int x, int n) inline_ref { + int x2 = mulrshiftr256(x, x); ;; x^2 as fixed262 + int One = (1 << 254); + int a = One ~/ n + (1 << 255); ;; a := 2 + 1/n as fixed254 + repeat (n - 1) { + ;; a := 1 + (1 + x^2 / a)(1 + 1/n) as fixed254 + int t = One + muldivr(x2, 1 << 246, a); ;; t := 1 + x^2 / a + a = muldivr(t, n, (int n1 = n - 1)) + One; + n = n1; + } + ;; x / (1 + x^2 / a) = x / (1 + d) = x - x * d / (1 + d) = x - x * x^2/(a+x^2) for d = x^2 / a + return x - muldivr(x, x2, a + x2 ~/ 256) ~/ 256; +} + +;; number of terms n should be chosen as for atanh_f261() +;; fixed261 atan(fixed261 x); +int atan_f261_inlined(int x, int n) inline { + int x2 = mulrshiftr256(x, x); ;; x^2 as fixed266 + int One = (1 << 254); + int a = One ~/ n + (1 << 255); ;; a := 2 + 1/n as fixed254 + repeat (n - 1) { + ;; a := 1 + (1 + x^2 / a)(1 + 1/n) as fixed254 + int t = One + muldivr(x2, 1 << 242, a); ;; t := 1 + x^2 / a + a = muldivr(t, n, (int n1 = n - 1)) + One; + n = n1; + } + ;; x / (1 + x^2 / a) = x / (1 + d) = x - x * d / (1 + d) = x - x * x^2/(a+x^2) for d = x^2 / a + return x - muldivr(x, x2, a + x2 ~/ 4096) ~/ 4096; +} + +;; fixed261 atan(fixed261 x); +int atan_f261(int x, int n) inline_ref { + return atan_f261_inlined(x, n); +} + +;; computes (q,a,b) such that q is approximately atan(x)/atan(1/32) and a+b*I=(1+I/32)^q as fixed255 +;; then b/a=atan(q*atan(1/32)) exactly, and (a,b) is almost a unit vector pointing in the direction of (1,x) +;; must have |x|<1.1, x is fixed24 +;; (int, fixed255, fixed255) atan_aux_prereduce(fixed24 x); +(int, int, int) atan_aux_prereduce(int x) inline_ref { + int xu = abs(x); + int tc = 7214596; ;; tan(13*theta) as fixed24 where theta=atan(1/32) + int t1 = muldivr(xu - tc, 1 << 88, xu * tc + (1 << 48)); ;; tan(x') as fixed64 where x'=atan(x)-13*theta + ;; t1/(3+t1^2) * 3073/32 = x'/3 * 3072/32 = x' / (96/3072) = x' / theta + int q = muldivr(t1 * 3073, 1 << 59, t1 * t1 + (touch(3) << 128)) + 13; ;; approximately round(atan(x)/theta), 0<=q<=25 + var (pa, pb) = (33226912, 5232641); ;; (32+I)^5 + var (qh, ql) = q /% 5; + var (a, b) = (1 << (5 * (51 - q)), 0); ;; (1/32^q, 0) as fixed255 + repeat (ql) { ;; a+b*I *= 32+I + (a, b) = (sub_rev(touch(b), 32 * a), a + 32 * b); ;; same as (32 * a - b, 32 * b + a), but more efficient + } + repeat (qh) { ;; a+b*I *= (32+I)^5 = pa + pb*I + (a, b) = (a * pa - b * pb, a * pb + b * pa); + } + int xs = sgn(x); + return (xs * q, a, xs * b); +} + +;; compute (q, z) such that atan(x)=q*atan(1/32)+z for -1 <= x < 1 +;; this function is reasonably accurate (error < 7 ulp with ulp = 2^-261), but it consumes >7k gas +;; this is sufficient for most purposes +;; (int, fixed261) atan_aux(fixed256 x) +(int, int) atan_aux_f256(int x) inline_ref { + var (q, a, b) = atan_aux_prereduce(x ~>> 232); ;; convert x to fixed24 + ;; now b/a = tan(q*atan(1/32)) exactly, where q is near atan(x)/atan(1/32); so b/a is near x + ;; compute y = u/v = (a*x-b)/(a+b*x) as fixed261 ; then |y|<0.0167 = 1.07/64 and atan(x)=atan(y)+q*atan(1/32) + var (u, ul) = mulrshiftr256mod(a, x); + u = (ul ~>> 250) + ((u - b) << 6); ;; |u| < 1/32, convert fixed255 -> fixed261 + int v = a + mulrshiftr256(b, x); ;; v is scalar product of (a,b) and (1,x), it is approximately in [1..sqrt(2)] as fixed255 + int y = muldivr(u, 1 << 255, v); ;; y = u/v as fixed261 + int z = atan_f261_inlined(y, 18); ;; z = atan(x)-q*atan(1/32) + return (q, z); +} + +;; compute (q, z) such that atan(x)=q*atan(1/32)+z for -1 <= x < 1 +;; this function is very accurate (error < 2 ulp), but it consumes >7k gas +;; in most cases, faster function atan_aux_f256() should be used +;; (int, fixed261) atan_auxx(fixed256 x) +(int, int) atan_auxx_f256(int x) inline_ref { + var (q, a, b) = atan_aux_prereduce(x ~>> 232); ;; convert x to fixed24 + ;; now b/a = tan(q*atan(1/32)) exactly, where q is near atan(x)/atan(1/32); so b/a is near x + ;; compute y = (a*x-b)/(a+b*x) as fixed261 ; then |y|<0.0167 = 1.07/64 and atan(x)=atan(y)+q*atan(1/32) + ;; use sort of double precision arithmetic for this + var (u, ul) = mulrshiftr256mod(a, x); + ul /= 2; + u -= b; ;; |u| < 1/32 as fixed255 + var (v, vl) = mulrshiftr256mod(b, x); + vl /= 2; + v += a; ;; v is scalar product of (a,b) and (1,x), it is approximately in [1..sqrt(2)] as fixed255 + ;; y = (u + ul*eps) / (v + vl*eps) = u/v + (ul - vl * u/v)/v * eps where eps=1/2^255 + var (y, r) = lshift255divmodr(u, v); ;; y = u/v as fixed255 + int yl = muldivr(ul + r, 1 << 255, v) - muldivr(vl, y, v); ;; y/2^255 + yl/2^510 represent u/v + y = (yl ~>> 249) + (y << 6); ;; convert y to fixed261 + int z = atan_f261_inlined(y, 18); ;; z = atan(x)-q*atan(1/32) + return (q, z); +} + +;; consumes ~ 8k gas +;; fixed255 atan(fixed255 x); +int atan_f255(int x) inline_ref { + int s = (x ~>> 256); + touch(x); + if (s) { + x = lshift256divr(-1 << 255, x); ;; x:=-1/x as fixed256 + } else { + x *= 2; ;; convert to fixed256 + } + var (q, z) = atan_aux_f256(x); + ;; now atan(x) = z + q*atan(1/32) + s*(Pi/2), z is fixed261 + var (Pi_h, Pi_l) = Pi_xconst_f254(); ;; Pi/2 as fixed255 + fixed383 + var (qh, ql) = mulrshiftr6mod (q, Atan1_32_f261()); + return qh + s * Pi_h + (z + ql + muldivr(s, Pi_l, 1 << 122)) ~/ 64; +} + +;; computes atan(x) for -1 <= x < 1 only +;; fixed256 atan_small(fixed256 x); +int atan_f256_small(int x) inline_ref { + var (q, z) = atan_aux_f256(x); + ;; now atan(x) = z + q*atan(1/32), z is fixed261 + var (qh, ql) = mulrshiftr5mod (q, Atan1_32_f261()); + return qh + (z + ql) ~/ 32; +} + +;; fixed255 asin(fixed255 x); +int asin_f255(int x) inline_ref { + int a = fixed255::One - fixed255::sqr(x); ;; a:=1-x^2 + ifnot (a) { + return sgn(x) * Pi_const_f254(); ;; Pi/2 or -Pi/2 + } + int y = fixed255::sqrt(a); ;; sqrt(1-x^2) + int t = - lshift256divr(x, (-1 << 255) - y); ;; t = x/(1+sqrt(1-x^2)) avoiding overflow + return atan_f256_small(t); ;; asin(x)=2*atan(t) +} + +;; fixed254 acos(fixed255 x); +int acos_f255(int x) inline_ref { + int Pi = Pi_const_f254(); + if (x == (-1 << 255)) { + return Pi; ;; acos(-1) = Pi + } + Pi /= 2; + int y = fixed255::sqrt(fixed255::One - fixed255::sqr(x)); ;; sqrt(1-x^2) + int t = lshift256divr(x, (-1 << 255) - y); ;; t = -x/(1+sqrt(1-x^2)) avoiding overflow + return Pi + atan_f256_small(t) ~/ 2; ;; acos(x)=Pi/2 + 2*atan(t) +} + +;; consumes ~ 10k gas +;; fixed248 asin(fixed248 x) +int fixed248::asin(int x) inline { + return asin_f255(x << 7) ~>> 7; +} + +;; consumes ~ 10k gas +;; fixed248 acos(fixed248 x) +int fixed248::acos(int x) inline { + return acos_f255(x << 7) ~>> 6; +} + +;; consumes ~ 7500 gas +;; fixed248 atan(fixed248 x); +int fixed248::atan(int x) inline_ref { + int s = (x ~>> 249); + touch(x); + if (s) { + s = sgn(s); + x = lshift256divr(-1 << 248, x); ;; x:=-1/x as fixed256 + } else { + x <<= 8; ;; convert to fixed256 + } + var (q, z) = atan_aux_f256(x); + ;; now atan(x) = z + q*atan(1/32) + s*(Pi/2), z is fixed261 + return (z ~/ 64 + s * Pi_const_f254() + muldivr(q, Atan1_32_f261(), 64)) ~/ 128; ;; compute in fixed255, then convert +} + +;; fixed248 acot(fixed248 x); +int fixed248::acot(int x) inline_ref { + int s = (x ~>> 249); + touch(x); + if (s) { + x = lshift256divr(-1 << 248, x); ;; x:=-1/x as fixed256 + s = 0; + } else { + x <<= 8; ;; convert to fixed256 + s = sgn(x); + } + var (q, z) = atan_aux_f256(x); + ;; now acot(x) = - z - q*atan(1/32) + s*(Pi/2), z is fixed261 + return (s * Pi_const_f254() - z ~/ 64 - muldivr(q, Atan1_32_f261(), 64)) ~/ 128; ;; compute in fixed255, then convert +} + +{--------------------- PSEUDO-RANDOM NUMBERS -------------------} + +;; random number with standard normal distribution N(0,1) +;; generated by Kinderman--Monahan ratio method modified by J.Leva +;; spends ~ 2k..3k gas on average +;; fixed252 nrand(); +int nrand_f252() impure inline_ref { + var (x, s, t, A, B, r0) = (nan(), touch(29483) << 236, touch(-3167) << 239, 12845, 16693, 9043); + ;; 4/sqrt(e*Pi) = 1.369 loop iterations on average + do { + var (u, v) = (random() / 16 + 1, muldivr(random() - (1 << 255), 7027, 1 << 16)); ;; fixed252; 7027=ceil(sqrt(8/e)*2^12) + int va = abs(v); + var (u1, v1) = (u - s, va - t); ;; (u - 29483/2^16, abs(v) + 3167/2^13) as fixed252 + ;; Q := u1^2 + v1 * (A*v1 - B*u1) as fixed252 where A=12845/2^16, B=16693/2^16 + int Q = muldivr(u1, u1, 1 << 252) + muldivr(v1, muldivr(v1, A, 1 << 16) - muldivr(u1, B, 1 << 16), 1 << 252); + ;; must have 9043 / 2^15 < Q < 9125 / 2^15, otherwise accept if smaller, reject if larger + int Qd = (Q >> 237) - r0; + if ((Qd < 9125 - 9043) & (va / u < 16)) { + x = muldivr(v, 1 << 252, u); ;; x:=v/u as fixed252; reject immediately if |v/u| >= 16 + if (Qd >= 0) { ;; immediately accept if Qd < 0 + ;; rarely taken branch - 0.012 times per call on average + ;; check condition v^2 < -4*u^2*log(u), or equivalent condition u < exp(-x^2/4) for x=v/u + int xx = mulrshiftr256(x, x) ~/ 4; ;; x^2/4 as fixed248 + int ex = fixed248::exp(- xx) * 16; ;; exp(-x^2/4) as fixed252 + if (u > ex) { + x = nan(); ;; condition false, reject + } + } + } + } until (~ is_nan(x)); + return x; +} + +;; generates a random number approximately distributed according to the standard normal distribution +;; much faster than nrand_f252(), should be suitable for most purposes when only several random numbers are needed +;; fixed252 nrand_fast(); +int nrand_fast_f252() impure inline_ref { + int t = touch(-3) << 253; ;; -6. as fixed252 + repeat (12) { + t += random() / 16; ;; add together 12 uniformly random numbers + } + return t; +} + +;; random number uniformly distributed in [0..1) +;; fixed248 random(); +int fixed248::random() impure inline { + return random() >> 8; +} + +;; random number with standard normal distribution +;; fixed248 nrand(); +int fixed248::nrand() impure inline { + return nrand_f252() ~>> 4; +} + +;; generates a random number approximately distributed according to the standard normal distribution +;; fixed248 nrand_fast(); +int fixed248::nrand_fast() impure inline { + return nrand_fast_f252() ~>> 4; +} diff --git a/crypto/smartcont/stdlib.tolk b/crypto/smartcont/stdlib.tolk new file mode 100644 index 00000000..344d9031 --- /dev/null +++ b/crypto/smartcont/stdlib.tolk @@ -0,0 +1,638 @@ +;; Standard library for Tolk +;; (initially copied from stdlib.fc) +;; + +{- + This file is part of TON Tolk Standard Library. + + Tolk Standard Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Tolk Standard Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + +-} + +{- + # Tuple manipulation primitives + The names and the types are mostly self-explaining. + + Note that currently values of atomic type `tuple` can't be cast to composite tuple type (e.g. `[int, cell]`) + and vise versa. +-} + +{- + # Lisp-style lists + + Lists can be represented as nested 2-elements tuples. + Empty list is conventionally represented as TVM `null` value (it can be obtained by calling [null()]). + For example, tuple `(1, (2, (3, null)))` represents list `[1, 2, 3]`. Elements of a list can be of different types. +-} + +;;; Adds an element to the beginning of lisp-style list. +forall X -> tuple cons(X head, tuple tail) asm "CONS"; + +;;; Extracts the head and the tail of lisp-style list. +forall X -> (X, tuple) uncons(tuple list) asm "UNCONS"; + +;;; Extracts the tail and the head of lisp-style list. +forall X -> (tuple, X) list_next(tuple list) asm( -> 1 0) "UNCONS"; + +;;; Returns the head of lisp-style list. +forall X -> X car(tuple list) asm "CAR"; + +;;; Returns the tail of lisp-style list. +tuple cdr(tuple list) asm "CDR"; + +;;; Creates tuple with zero elements. +tuple empty_tuple() asm "NIL"; + +;;; Appends a value `x` to a `Tuple t = (x1, ..., xn)`, but only if the resulting `Tuple t' = (x1, ..., xn, x)` +;;; is of length at most 255. Otherwise throws a type check exception. +forall X -> tuple tpush(tuple t, X value) asm "TPUSH"; +forall X -> (tuple, ()) ~tpush(tuple t, X value) asm "TPUSH"; + +;;; Creates a tuple of length one with given argument as element. +forall X -> [X] single(X x) asm "SINGLE"; + +;;; Unpacks a tuple of length one +forall X -> X unsingle([X] t) asm "UNSINGLE"; + +;;; Creates a tuple of length two with given arguments as elements. +forall X, Y -> [X, Y] pair(X x, Y y) asm "PAIR"; + +;;; Unpacks a tuple of length two +forall X, Y -> (X, Y) unpair([X, Y] t) asm "UNPAIR"; + +;;; Creates a tuple of length three with given arguments as elements. +forall X, Y, Z -> [X, Y, Z] triple(X x, Y y, Z z) asm "TRIPLE"; + +;;; Unpacks a tuple of length three +forall X, Y, Z -> (X, Y, Z) untriple([X, Y, Z] t) asm "UNTRIPLE"; + +;;; Creates a tuple of length four with given arguments as elements. +forall X, Y, Z, W -> [X, Y, Z, W] tuple4(X x, Y y, Z z, W w) asm "4 TUPLE"; + +;;; Unpacks a tuple of length four +forall X, Y, Z, W -> (X, Y, Z, W) untuple4([X, Y, Z, W] t) asm "4 UNTUPLE"; + +;;; Returns the first element of a tuple (with unknown element types). +forall X -> X first(tuple t) asm "FIRST"; + +;;; Returns the second element of a tuple (with unknown element types). +forall X -> X second(tuple t) asm "SECOND"; + +;;; Returns the third element of a tuple (with unknown element types). +forall X -> X third(tuple t) asm "THIRD"; + +;;; Returns the fourth element of a tuple (with unknown element types). +forall X -> X fourth(tuple t) asm "3 INDEX"; + +;;; Returns the first element of a pair tuple. +forall X, Y -> X pair_first([X, Y] p) asm "FIRST"; + +;;; Returns the second element of a pair tuple. +forall X, Y -> Y pair_second([X, Y] p) asm "SECOND"; + +;;; Returns the first element of a triple tuple. +forall X, Y, Z -> X triple_first([X, Y, Z] p) asm "FIRST"; + +;;; Returns the second element of a triple tuple. +forall X, Y, Z -> Y triple_second([X, Y, Z] p) asm "SECOND"; + +;;; Returns the third element of a triple tuple. +forall X, Y, Z -> Z triple_third([X, Y, Z] p) asm "THIRD"; + + +;;; Push null element (casted to given type) +;;; By the TVM type `Null` Tolk represents absence of a value of some atomic type. +;;; So `null` can actually have any atomic type. +forall X -> X null() asm "PUSHNULL"; + +;;; Moves a variable [x] to the top of the stack +forall X -> (X, ()) ~impure_touch(X x) impure asm "NOP"; + + + +;;; Returns the current Unix time as an Integer +int now() asm "NOW"; + +;;; Returns the internal address of the current smart contract as a Slice with a `MsgAddressInt`. +;;; If necessary, it can be parsed further using primitives such as [parse_std_addr]. +slice my_address() asm "MYADDR"; + +;;; Returns the balance of the smart contract as a tuple consisting of an int +;;; (balance in nanotoncoins) and a `cell` +;;; (a dictionary with 32-bit keys representing the balance of "extra currencies") +;;; at the start of Computation Phase. +;;; Note that RAW primitives such as [send_raw_message] do not update this field. +[int, cell] get_balance() asm "BALANCE"; + +;;; Returns the logical time of the current transaction. +int cur_lt() asm "LTIME"; + +;;; Returns the starting logical time of the current block. +int block_lt() asm "BLOCKLT"; + +;;; Computes the representation hash of a `cell` [c] and returns it as a 256-bit unsigned integer `x`. +;;; Useful for signing and checking signatures of arbitrary entities represented by a tree of cells. +int cell_hash(cell c) asm "HASHCU"; + +;;; Computes the hash of a `slice s` and returns it as a 256-bit unsigned integer `x`. +;;; The result is the same as if an ordinary cell containing only data and references from `s` had been created +;;; and its hash computed by [cell_hash]. +int slice_hash(slice s) asm "HASHSU"; + +;;; Computes sha256 of the data bits of `slice` [s]. If the bit length of `s` is not divisible by eight, +;;; throws a cell underflow exception. The hash value is returned as a 256-bit unsigned integer `x`. +int string_hash(slice s) asm "SHA256U"; + +{- + # Signature checks +-} + +;;; Checks the Ed25519-`signature` of a `hash` (a 256-bit unsigned integer, usually computed as the hash of some data) +;;; using [public_key] (also represented by a 256-bit unsigned integer). +;;; The signature must contain at least 512 data bits; only the first 512 bits are used. +;;; The result is `−1` if the signature is valid, `0` otherwise. +;;; Note that `CHKSIGNU` creates a 256-bit slice with the hash and calls `CHKSIGNS`. +;;; That is, if [hash] is computed as the hash of some data, these data are hashed twice, +;;; the second hashing occurring inside `CHKSIGNS`. +int check_signature(int hash, slice signature, int public_key) asm "CHKSIGNU"; + +;;; Checks whether [signature] is a valid Ed25519-signature of the data portion of `slice data` using `public_key`, +;;; similarly to [check_signature]. +;;; If the bit length of [data] is not divisible by eight, throws a cell underflow exception. +;;; The verification of Ed25519 signatures is the standard one, +;;; with sha256 used to reduce [data] to the 256-bit number that is actually signed. +int check_data_signature(slice data, slice signature, int public_key) asm "CHKSIGNS"; + +{--- + # Computation of boc size + The primitives below may be useful for computing storage fees of user-provided data. +-} + +;;; Returns `(x, y, z, -1)` or `(null, null, null, 0)`. +;;; Recursively computes the count of distinct cells `x`, data bits `y`, and cell references `z` +;;; in the DAG rooted at `cell` [c], effectively returning the total storage used by this DAG taking into account +;;; the identification of equal cells. +;;; The values of `x`, `y`, and `z` are computed by a depth-first traversal of this DAG, +;;; with a hash table of visited cell hashes used to prevent visits of already-visited cells. +;;; The total count of visited cells `x` cannot exceed non-negative [max_cells]; +;;; otherwise the computation is aborted before visiting the `(max_cells + 1)`-st cell and +;;; a zero flag is returned to indicate failure. If [c] is `null`, returns `x = y = z = 0`. +(int, int, int) compute_data_size(cell c, int max_cells) impure asm "CDATASIZE"; + +;;; Similar to [compute_data_size?], but accepting a `slice` [s] instead of a `cell`. +;;; The returned value of `x` does not take into account the cell that contains the `slice` [s] itself; +;;; however, the data bits and the cell references of [s] are accounted for in `y` and `z`. +(int, int, int) slice_compute_data_size(slice s, int max_cells) impure asm "SDATASIZE"; + +;;; A non-quiet version of [compute_data_size?] that throws a cell overflow exception (`8`) on failure. +(int, int, int, int) compute_data_size?(cell c, int max_cells) asm "CDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; + +;;; A non-quiet version of [slice_compute_data_size?] that throws a cell overflow exception (8) on failure. +(int, int, int, int) slice_compute_data_size?(cell c, int max_cells) asm "SDATASIZEQ NULLSWAPIFNOT2 NULLSWAPIFNOT"; + +;;; Throws an exception with exit_code excno if cond is not 0 (commented since implemented in compilator) +;; () throw_if(int excno, int cond) impure asm "THROWARGIF"; + +{-- + # Debug primitives + Only works for local TVM execution with debug level verbosity +-} +;;; Dumps the stack (at most the top 255 values) and shows the total stack depth. +() dump_stack() impure asm "DUMPSTK"; + +{- + # Persistent storage save and load +-} + +;;; Returns the persistent contract storage cell. It can be parsed or modified with slice and builder primitives later. +cell get_data() asm "c4 PUSH"; + +;;; Sets `cell` [c] as persistent contract data. You can update persistent contract storage with this primitive. +() set_data(cell c) impure asm "c4 POP"; + +{- + # Continuation primitives +-} +;;; Usually `c3` has a continuation initialized by the whole code of the contract. It is used for function calls. +;;; The primitive returns the current value of `c3`. +cont get_c3() impure asm "c3 PUSH"; + +;;; Updates the current value of `c3`. Usually, it is used for updating smart contract code in run-time. +;;; Note that after execution of this primitive the current code +;;; (and the stack of recursive function calls) won't change, +;;; but any other function call will use a function from the new code. +() set_c3(cont c) impure asm "c3 POP"; + +;;; Transforms a `slice` [s] into a simple ordinary continuation `c`, with `c.code = s` and an empty stack and savelist. +cont bless(slice s) impure asm "BLESS"; + +{--- + # Gas related primitives +-} + +;;; Sets current gas limit `gl` to its maximal allowed value `gm`, and resets the gas credit `gc` to zero, +;;; decreasing the value of `gr` by `gc` in the process. +;;; In other words, the current smart contract agrees to buy some gas to finish the current transaction. +;;; This action is required to process external messages, which bring no value (hence no gas) with themselves. +;;; +;;; For more details check [accept_message effects](https://ton.org/docs/#/smart-contracts/accept). +() accept_message() impure asm "ACCEPT"; + +;;; Sets current gas limit `gl` to the minimum of limit and `gm`, and resets the gas credit `gc` to zero. +;;; If the gas consumed so far (including the present instruction) exceeds the resulting value of `gl`, +;;; an (unhandled) out of gas exception is thrown before setting new gas limits. +;;; Notice that [set_gas_limit] with an argument `limit ≥ 2^63 − 1` is equivalent to [accept_message]. +() set_gas_limit(int limit) impure asm "SETGASLIMIT"; + +;;; Commits the current state of registers `c4` (“persistent data”) and `c5` (“actions”) +;;; so that the current execution is considered “successful” with the saved values even if an exception +;;; in Computation Phase is thrown later. +() commit() impure asm "COMMIT"; + +;;; Not implemented +;;() buy_gas(int gram) impure asm "BUYGAS"; + +;;; Computes the amount of gas that can be bought for `amount` nanoTONs, +;;; and sets `gl` accordingly in the same way as [set_gas_limit]. +() buy_gas(int amount) impure asm "BUYGAS"; + +;;; Computes the minimum of two integers [x] and [y]. +int min(int x, int y) asm "MIN"; + +;;; Computes the maximum of two integers [x] and [y]. +int max(int x, int y) asm "MAX"; + +;;; Sorts two integers. +(int, int) minmax(int x, int y) asm "MINMAX"; + +;;; Computes the absolute value of an integer [x]. +int abs(int x) asm "ABS"; + +{- + # Slice primitives + + It is said that a primitive _loads_ some data, + if it returns the data and the remainder of the slice + (so it can also be used as modifying method). + + It is said that a primitive _preloads_ some data, if it returns only the data + (it can be used as non-modifying method). + + Unless otherwise stated, loading and preloading primitives read the data from a prefix of the slice. +-} + + +;;; Converts a `cell` [c] into a `slice`. Notice that [c] must be either an ordinary cell, +;;; or an exotic cell (see [TVM.pdf](https://ton-blockchain.github.io/docs/tvm.pdf), 3.1.2) +;;; which is automatically loaded to yield an ordinary cell `c'`, converted into a `slice` afterwards. +slice begin_parse(cell c) asm "CTOS"; + +;;; Checks if [s] is empty. If not, throws an exception. +() end_parse(slice s) impure asm "ENDS"; + +;;; Loads the first reference from the slice. +(slice, cell) load_ref(slice s) asm( -> 1 0) "LDREF"; + +;;; Preloads the first reference from the slice. +cell preload_ref(slice s) asm "PLDREF"; + + {- Functions below are commented because are implemented on compilator level for optimisation -} + +;;; Loads a signed [len]-bit integer from a slice [s]. +;; (slice, int) ~load_int(slice s, int len) asm(s len -> 1 0) "LDIX"; + +;;; Loads an unsigned [len]-bit integer from a slice [s]. +;; (slice, int) ~load_uint(slice s, int len) asm( -> 1 0) "LDUX"; + +;;; Preloads a signed [len]-bit integer from a slice [s]. +;; int preload_int(slice s, int len) asm "PLDIX"; + +;;; Preloads an unsigned [len]-bit integer from a slice [s]. +;; int preload_uint(slice s, int len) asm "PLDUX"; + +;;; Loads the first `0 ≤ len ≤ 1023` bits from slice [s] into a separate `slice s''`. +;; (slice, slice) load_bits(slice s, int len) asm(s len -> 1 0) "LDSLICEX"; + +;;; Preloads the first `0 ≤ len ≤ 1023` bits from slice [s] into a separate `slice s''`. +;; slice preload_bits(slice s, int len) asm "PLDSLICEX"; + +;;; Loads serialized amount of TonCoins (any unsigned integer up to `2^120 - 1`). +(slice, int) load_grams(slice s) asm( -> 1 0) "LDGRAMS"; +(slice, int) load_coins(slice s) asm( -> 1 0) "LDGRAMS"; + +;;; Returns all but the first `0 ≤ len ≤ 1023` bits of `slice` [s]. +slice skip_bits(slice s, int len) asm "SDSKIPFIRST"; +(slice, ()) ~skip_bits(slice s, int len) asm "SDSKIPFIRST"; + +;;; Returns the first `0 ≤ len ≤ 1023` bits of `slice` [s]. +slice first_bits(slice s, int len) asm "SDCUTFIRST"; + +;;; Returns all but the last `0 ≤ len ≤ 1023` bits of `slice` [s]. +slice skip_last_bits(slice s, int len) asm "SDSKIPLAST"; +(slice, ()) ~skip_last_bits(slice s, int len) asm "SDSKIPLAST"; + +;;; Returns the last `0 ≤ len ≤ 1023` bits of `slice` [s]. +slice slice_last(slice s, int len) asm "SDCUTLAST"; + +;;; Loads a dictionary `D` (HashMapE) from `slice` [s]. +;;; (returns `null` if `nothing` constructor is used). +(slice, cell) load_dict(slice s) asm( -> 1 0) "LDDICT"; + +;;; Preloads a dictionary `D` from `slice` [s]. +cell preload_dict(slice s) asm "PLDDICT"; + +;;; Loads a dictionary as [load_dict], but returns only the remainder of the slice. +slice skip_dict(slice s) asm "SKIPDICT"; + +;;; Loads (Maybe ^Cell) from `slice` [s]. +;;; In other words loads 1 bit and if it is true +;;; loads first ref and return it with slice remainder +;;; otherwise returns `null` and slice remainder +(slice, cell) load_maybe_ref(slice s) asm( -> 1 0) "LDOPTREF"; + +;;; Preloads (Maybe ^Cell) from `slice` [s]. +cell preload_maybe_ref(slice s) asm "PLDOPTREF"; + + +;;; Returns the depth of `cell` [c]. +;;; If [c] has no references, then return `0`; +;;; otherwise the returned value is one plus the maximum of depths of cells referred to from [c]. +;;; If [c] is a `null` instead of a cell, returns zero. +int cell_depth(cell c) asm "CDEPTH"; + + +{- + # Slice size primitives +-} + +;;; Returns the number of references in `slice` [s]. +int slice_refs(slice s) asm "SREFS"; + +;;; Returns the number of data bits in `slice` [s]. +int slice_bits(slice s) asm "SBITS"; + +;;; Returns both the number of data bits and the number of references in `slice` [s]. +(int, int) slice_bits_refs(slice s) asm "SBITREFS"; + +;;; Checks whether a `slice` [s] is empty (i.e., contains no bits of data and no cell references). +int slice_empty?(slice s) asm "SEMPTY"; + +;;; Checks whether `slice` [s] has no bits of data. +int slice_data_empty?(slice s) asm "SDEMPTY"; + +;;; Checks whether `slice` [s] has no references. +int slice_refs_empty?(slice s) asm "SREMPTY"; + +;;; Returns the depth of `slice` [s]. +;;; If [s] has no references, then returns `0`; +;;; otherwise the returned value is one plus the maximum of depths of cells referred to from [s]. +int slice_depth(slice s) asm "SDEPTH"; + +{- + # Builder size primitives +-} + +;;; Returns the number of cell references already stored in `builder` [b] +int builder_refs(builder b) asm "BREFS"; + +;;; Returns the number of data bits already stored in `builder` [b]. +int builder_bits(builder b) asm "BBITS"; + +;;; Returns the depth of `builder` [b]. +;;; If no cell references are stored in [b], then returns 0; +;;; otherwise the returned value is one plus the maximum of depths of cells referred to from [b]. +int builder_depth(builder b) asm "BDEPTH"; + +{- + # Builder primitives + It is said that a primitive _stores_ a value `x` into a builder `b` + if it returns a modified version of the builder `b'` with the value `x` stored at the end of it. + It can be used as non-modifying method. + + All the primitives below first check whether there is enough space in the `builder`, + and only then check the range of the value being serialized. +-} + +;;; Creates a new empty `builder`. +builder begin_cell() asm "NEWC"; + +;;; Converts a `builder` into an ordinary `cell`. +cell end_cell(builder b) asm "ENDC"; + +;;; Stores a reference to `cell` [c] into `builder` [b]. +builder store_ref(builder b, cell c) asm(c b) "STREF"; + +;;; Stores an unsigned [len]-bit integer `x` into `b` for `0 ≤ len ≤ 256`. +;; builder store_uint(builder b, int x, int len) asm(x b len) "STUX"; + +;;; Stores a signed [len]-bit integer `x` into `b` for` 0 ≤ len ≤ 257`. +;; builder store_int(builder b, int x, int len) asm(x b len) "STIX"; + + +;;; Stores `slice` [s] into `builder` [b] +builder store_slice(builder b, slice s) asm "STSLICER"; + +;;; Stores (serializes) an integer [x] in the range `0..2^120 − 1` into `builder` [b]. +;;; The serialization of [x] consists of a 4-bit unsigned big-endian integer `l`, +;;; which is the smallest integer `l ≥ 0`, such that `x < 2^8l`, +;;; followed by an `8l`-bit unsigned big-endian representation of [x]. +;;; If [x] does not belong to the supported range, a range check exception is thrown. +;;; +;;; Store amounts of TonCoins to the builder as VarUInteger 16 +builder store_grams(builder b, int x) asm "STGRAMS"; +builder store_coins(builder b, int x) asm "STGRAMS"; + +;;; Stores dictionary `D` represented by `cell` [c] or `null` into `builder` [b]. +;;; In other words, stores a `1`-bit and a reference to [c] if [c] is not `null` and `0`-bit otherwise. +builder store_dict(builder b, cell c) asm(c b) "STDICT"; + +;;; Stores (Maybe ^Cell) to builder: +;;; if cell is null store 1 zero bit +;;; otherwise store 1 true bit and ref to cell +builder store_maybe_ref(builder b, cell c) asm(c b) "STOPTREF"; + + +{- + # Address manipulation primitives + The address manipulation primitives listed below serialize and deserialize values according to the following TL-B scheme: + ```TL-B + addr_none$00 = MsgAddressExt; + addr_extern$01 len:(## 8) external_address:(bits len) + = MsgAddressExt; + anycast_info$_ depth:(#<= 30) { depth >= 1 } + rewrite_pfx:(bits depth) = Anycast; + addr_std$10 anycast:(Maybe Anycast) + workchain_id:int8 address:bits256 = MsgAddressInt; + addr_var$11 anycast:(Maybe Anycast) addr_len:(## 9) + workchain_id:int32 address:(bits addr_len) = MsgAddressInt; + _ _:MsgAddressInt = MsgAddress; + _ _:MsgAddressExt = MsgAddress; + + int_msg_info$0 ihr_disabled:Bool bounce:Bool bounced:Bool + src:MsgAddress dest:MsgAddressInt + value:CurrencyCollection ihr_fee:Grams fwd_fee:Grams + created_lt:uint64 created_at:uint32 = CommonMsgInfoRelaxed; + ext_out_msg_info$11 src:MsgAddress dest:MsgAddressExt + created_lt:uint64 created_at:uint32 = CommonMsgInfoRelaxed; + ``` + A deserialized `MsgAddress` is represented by a tuple `t` as follows: + + - `addr_none` is represented by `t = (0)`, + i.e., a tuple containing exactly one integer equal to zero. + - `addr_extern` is represented by `t = (1, s)`, + where slice `s` contains the field `external_address`. In other words, ` + t` is a pair (a tuple consisting of two entries), containing an integer equal to one and slice `s`. + - `addr_std` is represented by `t = (2, u, x, s)`, + where `u` is either a `null` (if `anycast` is absent) or a slice `s'` containing `rewrite_pfx` (if anycast is present). + Next, integer `x` is the `workchain_id`, and slice `s` contains the address. + - `addr_var` is represented by `t = (3, u, x, s)`, + where `u`, `x`, and `s` have the same meaning as for `addr_std`. +-} + +;;; Loads from slice [s] the only prefix that is a valid `MsgAddress`, +;;; and returns both this prefix `s'` and the remainder `s''` of [s] as slices. +(slice, slice) load_msg_addr(slice s) asm( -> 1 0) "LDMSGADDR"; + +;;; Decomposes slice [s] containing a valid `MsgAddress` into a `tuple t` with separate fields of this `MsgAddress`. +;;; If [s] is not a valid `MsgAddress`, a cell deserialization exception is thrown. +tuple parse_addr(slice s) asm "PARSEMSGADDR"; + +;;; Parses slice [s] containing a valid `MsgAddressInt` (usually a `msg_addr_std`), +;;; applies rewriting from the anycast (if present) to the same-length prefix of the address, +;;; and returns both the workchain and the 256-bit address as integers. +;;; If the address is not 256-bit, or if [s] is not a valid serialization of `MsgAddressInt`, +;;; throws a cell deserialization exception. +(int, int) parse_std_addr(slice s) asm "REWRITESTDADDR"; + +;;; A variant of [parse_std_addr] that returns the (rewritten) address as a slice [s], +;;; even if it is not exactly 256 bit long (represented by a `msg_addr_var`). +(int, slice) parse_var_addr(slice s) asm "REWRITEVARADDR"; + +{- + # Dictionary primitives +-} + + +;;; Sets the value associated with [key_len]-bit key signed index in dictionary [dict] to [value] (cell), +;;; and returns the resulting dictionary. +cell idict_set_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTISETREF"; +(cell, ()) ~idict_set_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTISETREF"; + +;;; Sets the value associated with [key_len]-bit key unsigned index in dictionary [dict] to [value] (cell), +;;; and returns the resulting dictionary. +cell udict_set_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTUSETREF"; +(cell, ()) ~udict_set_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTUSETREF"; + +cell idict_get_ref(cell dict, int key_len, int index) asm(index dict key_len) "DICTIGETOPTREF"; +(cell, int) idict_get_ref?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIGETREF" "NULLSWAPIFNOT"; +(cell, int) udict_get_ref?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUGETREF" "NULLSWAPIFNOT"; +(cell, cell) idict_set_get_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTISETGETOPTREF"; +(cell, cell) udict_set_get_ref(cell dict, int key_len, int index, cell value) asm(value index dict key_len) "DICTUSETGETOPTREF"; +(cell, int) idict_delete?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIDEL"; +(cell, int) udict_delete?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUDEL"; +(slice, int) idict_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIGET" "NULLSWAPIFNOT"; +(slice, int) udict_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUGET" "NULLSWAPIFNOT"; +(cell, slice, int) idict_delete_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIDELGET" "NULLSWAPIFNOT"; +(cell, slice, int) udict_delete_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUDELGET" "NULLSWAPIFNOT"; +(cell, (slice, int)) ~idict_delete_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTIDELGET" "NULLSWAPIFNOT"; +(cell, (slice, int)) ~udict_delete_get?(cell dict, int key_len, int index) asm(index dict key_len) "DICTUDELGET" "NULLSWAPIFNOT"; +cell udict_set(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTUSET"; +(cell, ()) ~udict_set(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTUSET"; +cell idict_set(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTISET"; +(cell, ()) ~idict_set(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTISET"; +cell dict_set(cell dict, int key_len, slice index, slice value) asm(value index dict key_len) "DICTSET"; +(cell, ()) ~dict_set(cell dict, int key_len, slice index, slice value) asm(value index dict key_len) "DICTSET"; +(cell, int) udict_add?(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTUADD"; +(cell, int) udict_replace?(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTUREPLACE"; +(cell, int) idict_add?(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTIADD"; +(cell, int) idict_replace?(cell dict, int key_len, int index, slice value) asm(value index dict key_len) "DICTIREPLACE"; +cell udict_set_builder(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTUSETB"; +(cell, ()) ~udict_set_builder(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTUSETB"; +cell idict_set_builder(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTISETB"; +(cell, ()) ~idict_set_builder(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTISETB"; +cell dict_set_builder(cell dict, int key_len, slice index, builder value) asm(value index dict key_len) "DICTSETB"; +(cell, ()) ~dict_set_builder(cell dict, int key_len, slice index, builder value) asm(value index dict key_len) "DICTSETB"; +(cell, int) udict_add_builder?(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTUADDB"; +(cell, int) udict_replace_builder?(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTUREPLACEB"; +(cell, int) idict_add_builder?(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTIADDB"; +(cell, int) idict_replace_builder?(cell dict, int key_len, int index, builder value) asm(value index dict key_len) "DICTIREPLACEB"; +(cell, int, slice, int) udict_delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; +(cell, (int, slice, int)) ~udict::delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTUREMMIN" "NULLSWAPIFNOT2"; +(cell, int, slice, int) idict_delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; +(cell, (int, slice, int)) ~idict::delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTIREMMIN" "NULLSWAPIFNOT2"; +(cell, slice, slice, int) dict_delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; +(cell, (slice, slice, int)) ~dict::delete_get_min(cell dict, int key_len) asm(-> 0 2 1 3) "DICTREMMIN" "NULLSWAPIFNOT2"; +(cell, int, slice, int) udict_delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; +(cell, (int, slice, int)) ~udict::delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTUREMMAX" "NULLSWAPIFNOT2"; +(cell, int, slice, int) idict_delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; +(cell, (int, slice, int)) ~idict::delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTIREMMAX" "NULLSWAPIFNOT2"; +(cell, slice, slice, int) dict_delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; +(cell, (slice, slice, int)) ~dict::delete_get_max(cell dict, int key_len) asm(-> 0 2 1 3) "DICTREMMAX" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_min?(cell dict, int key_len) asm (-> 1 0 2) "DICTUMIN" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_max?(cell dict, int key_len) asm (-> 1 0 2) "DICTUMAX" "NULLSWAPIFNOT2"; +(int, cell, int) udict_get_min_ref?(cell dict, int key_len) asm (-> 1 0 2) "DICTUMINREF" "NULLSWAPIFNOT2"; +(int, cell, int) udict_get_max_ref?(cell dict, int key_len) asm (-> 1 0 2) "DICTUMAXREF" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_min?(cell dict, int key_len) asm (-> 1 0 2) "DICTIMIN" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_max?(cell dict, int key_len) asm (-> 1 0 2) "DICTIMAX" "NULLSWAPIFNOT2"; +(int, cell, int) idict_get_min_ref?(cell dict, int key_len) asm (-> 1 0 2) "DICTIMINREF" "NULLSWAPIFNOT2"; +(int, cell, int) idict_get_max_ref?(cell dict, int key_len) asm (-> 1 0 2) "DICTIMAXREF" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_next?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTUGETNEXT" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_nexteq?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTUGETNEXTEQ" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_prev?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTUGETPREV" "NULLSWAPIFNOT2"; +(int, slice, int) udict_get_preveq?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTUGETPREVEQ" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_next?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTIGETNEXT" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_nexteq?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTIGETNEXTEQ" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_prev?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTIGETPREV" "NULLSWAPIFNOT2"; +(int, slice, int) idict_get_preveq?(cell dict, int key_len, int pivot) asm(pivot dict key_len -> 1 0 2) "DICTIGETPREVEQ" "NULLSWAPIFNOT2"; + +;;; Creates an empty dictionary, which is actually a null value. Equivalent to PUSHNULL +cell new_dict() asm "NEWDICT"; +;;; Checks whether a dictionary is empty. Equivalent to cell_null?. +int dict_empty?(cell c) asm "DICTEMPTY"; + + +{- Prefix dictionary primitives -} +(slice, slice, slice, int) pfxdict_get?(cell dict, int key_len, slice key) asm(key dict key_len) "PFXDICTGETQ" "NULLSWAPIFNOT2"; +(cell, int) pfxdict_set?(cell dict, int key_len, slice key, slice value) asm(value key dict key_len) "PFXDICTSET"; +(cell, int) pfxdict_delete?(cell dict, int key_len, slice key) asm(key dict key_len) "PFXDICTDEL"; + +;;; Returns the value of the global configuration parameter with integer index `i` as a `cell` or `null` value. +cell config_param(int x) asm "CONFIGOPTPARAM"; +;;; Checks whether c is a null. Note, that Tolk also has polymorphic null? built-in. +int cell_null?(cell c) asm "ISNULL"; + +;;; Creates an output action which would reserve exactly amount nanotoncoins (if mode = 0), at most amount nanotoncoins (if mode = 2), or all but amount nanotoncoins (if mode = 1 or mode = 3), from the remaining balance of the account. It is roughly equivalent to creating an outbound message carrying amount nanotoncoins (or b − amount nanotoncoins, where b is the remaining balance) to oneself, so that the subsequent output actions would not be able to spend more money than the remainder. Bit +2 in mode means that the external action does not fail if the specified amount cannot be reserved; instead, all remaining balance is reserved. Bit +8 in mode means `amount <- -amount` before performing any further actions. Bit +4 in mode means that amount is increased by the original balance of the current account (before the compute phase), including all extra currencies, before performing any other checks and actions. Currently, amount must be a non-negative integer, and mode must be in the range 0..15. +() raw_reserve(int amount, int mode) impure asm "RAWRESERVE"; +;;; Similar to raw_reserve, but also accepts a dictionary extra_amount (represented by a cell or null) with extra currencies. In this way currencies other than TonCoin can be reserved. +() raw_reserve_extra(int amount, cell extra_amount, int mode) impure asm "RAWRESERVEX"; +;;; Sends a raw message contained in msg, which should contain a correctly serialized object Message X, with the only exception that the source address is allowed to have dummy value addr_none (to be automatically replaced with the current smart contract address), and ihr_fee, fwd_fee, created_lt and created_at fields can have arbitrary values (to be rewritten with correct values during the action phase of the current transaction). Integer parameter mode contains the flags. Currently mode = 0 is used for ordinary messages; mode = 128 is used for messages that are to carry all the remaining balance of the current smart contract (instead of the value originally indicated in the message); mode = 64 is used for messages that carry all the remaining value of the inbound message in addition to the value initially indicated in the new message (if bit 0 is not set, the gas fees are deducted from this amount); mode' = mode + 1 means that the sender wants to pay transfer fees separately; mode' = mode + 2 means that any errors arising while processing this message during the action phase should be ignored. Finally, mode' = mode + 32 means that the current account must be destroyed if its resulting balance is zero. This flag is usually employed together with +128. +() send_raw_message(cell msg, int mode) impure asm "SENDRAWMSG"; +;;; Creates an output action that would change this smart contract code to that given by cell new_code. Notice that this change will take effect only after the successful termination of the current run of the smart contract +() set_code(cell new_code) impure asm "SETCODE"; + +;;; Generates a new pseudo-random unsigned 256-bit integer x. The algorithm is as follows: if r is the old value of the random seed, considered as a 32-byte array (by constructing the big-endian representation of an unsigned 256-bit integer), then its sha512(r) is computed; the first 32 bytes of this hash are stored as the new value r' of the random seed, and the remaining 32 bytes are returned as the next random value x. +int random() impure asm "RANDU256"; +;;; Generates a new pseudo-random integer z in the range 0..range−1 (or range..−1, if range < 0). More precisely, an unsigned random value x is generated as in random; then z := x * range / 2^256 is computed. +int rand(int range) impure asm "RAND"; +;;; Returns the current random seed as an unsigned 256-bit Integer. +int get_seed() impure asm "RANDSEED"; +;;; Sets the random seed to unsigned 256-bit seed. +() set_seed(int) impure asm "SETRAND"; +;;; Mixes unsigned 256-bit integer x into the random seed r by setting the random seed to sha256 of the concatenation of two 32-byte strings: the first with the big-endian representation of the old seed r, and the second with the big-endian representation of x. +() randomize(int x) impure asm "ADDRAND"; +;;; Equivalent to randomize(cur_lt());. +() randomize_lt() impure asm "LTIME" "ADDRAND"; + +;;; Checks whether the data parts of two slices coinside +int equal_slice_bits (slice a, slice b) asm "SDEQ"; + +;;; Concatenates two builders +builder store_builder(builder to, builder from) asm "STBR"; diff --git a/tolk/CMakeLists.txt b/tolk/CMakeLists.txt new file mode 100644 index 00000000..54aaf8d2 --- /dev/null +++ b/tolk/CMakeLists.txt @@ -0,0 +1,51 @@ +cmake_minimum_required(VERSION 3.5 FATAL_ERROR) + +set(TOLK_SOURCE + srcread.cpp + lexer.cpp + symtable.cpp + keywords.cpp + unify-types.cpp + parse-tolk.cpp + abscode.cpp + gen-abscode.cpp + analyzer.cpp + asmops.cpp + builtins.cpp + stack-transform.cpp + optimize.cpp + codegen.cpp + tolk.cpp +) + +add_executable(tolk tolk-main.cpp ${TOLK_SOURCE}) +target_include_directories(tolk PUBLIC $) +target_link_libraries(tolk PUBLIC git ton_crypto) # todo replace with ton_crypto_core in the future +if (WINGETOPT_FOUND) + target_link_libraries_system(tolk wingetopt) +endif () + +if (USE_EMSCRIPTEN) + add_executable(tolkfiftlib tolk-wasm.cpp ${TOLK_SOURCE}) + target_include_directories(tolkfiftlib PUBLIC $) + target_link_libraries(tolkfiftlib PUBLIC fift-lib git) + target_link_options(tolkfiftlib PRIVATE + -sEXPORTED_RUNTIME_METHODS=FS,ccall,cwrap,UTF8ToString,stringToUTF8,lengthBytesUTF8,addFunction,removeFunction,setValue + -sEXPORTED_FUNCTIONS=_tolk_compile,_version,_malloc,_free,_setThrew + -sEXPORT_NAME=CompilerModule + -sERROR_ON_UNDEFINED_SYMBOLS=0 + -sFILESYSTEM=1 -lnodefs.js + -Oz + -sIGNORE_MISSING_MAIN=1 + -sAUTO_NATIVE_LIBRARIES=0 + -sMODULARIZE=1 + -sTOTAL_MEMORY=33554432 + -sALLOW_MEMORY_GROWTH=1 + -sALLOW_TABLE_GROWTH=1 + --embed-file ${CMAKE_CURRENT_SOURCE_DIR}/../crypto/fift/lib@/fiftlib + -fexceptions + ) + target_compile_options(tolkfiftlib PRIVATE -fexceptions -fno-stack-protector) +endif () + +install(TARGETS tolk RUNTIME DESTINATION bin) diff --git a/tolk/abscode.cpp b/tolk/abscode.cpp new file mode 100644 index 00000000..7dd64bd0 --- /dev/null +++ b/tolk/abscode.cpp @@ -0,0 +1,526 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" + +namespace tolk { + +/* + * + * ABSTRACT CODE + * + */ + +TmpVar::TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type, SymDef* sym, const SrcLocation* loc) + : v_type(_type), idx(_idx), cls(_cls), coord(0) { + if (sym) { + name = sym->sym_idx; + sym->value->idx = _idx; + } + if (loc) { + where = std::make_unique(*loc); + } + if (!_type) { + v_type = TypeExpr::new_hole(); + } + if (cls == _Named) { + undefined = true; + } +} + +void TmpVar::set_location(const SrcLocation& loc) { + if (where) { + *where = loc; + } else { + where = std::make_unique(loc); + } +} + +void TmpVar::dump(std::ostream& os) const { + show(os); + os << " : " << v_type << " (width "; + v_type->show_width(os); + os << ")"; + if (coord > 0) { + os << " = _" << (coord >> 8) << '.' << (coord & 255); + } else if (coord < 0) { + int n = (~coord >> 8), k = (~coord & 0xff); + if (k) { + os << " = (_" << n << ".._" << (n + k - 1) << ")"; + } else { + os << " = ()"; + } + } + os << std::endl; +} + +void TmpVar::show(std::ostream& os, int omit_idx) const { + if (cls & _Named) { + os << symbols.get_name(name); + if (omit_idx && (omit_idx >= 2 || (cls & _UniqueName))) { + return; + } + } + os << '_' << idx; +} + +std::ostream& operator<<(std::ostream& os, const TmpVar& var) { + var.show(os); + return os; +} + +void VarDescr::show_value(std::ostream& os) const { + if (val & _Int) { + os << 'i'; + } + if (val & _Const) { + os << 'c'; + } + if (val & _Zero) { + os << '0'; + } + if (val & _NonZero) { + os << '!'; + } + if (val & _Pos) { + os << '>'; + } + if (val & _Neg) { + os << '<'; + } + if (val & _Bool) { + os << 'B'; + } + if (val & _Bit) { + os << 'b'; + } + if (val & _Even) { + os << 'E'; + } + if (val & _Odd) { + os << 'O'; + } + if (val & _Finite) { + os << 'f'; + } + if (val & _Nan) { + os << 'N'; + } + if (int_const.not_null()) { + os << '=' << int_const; + } +} + +void VarDescr::show(std::ostream& os, const char* name) const { + if (flags & _Last) { + os << '*'; + } + if (flags & _Unused) { + os << '?'; + } + if (name) { + os << name; + } + os << '_' << idx; + show_value(os); +} + +void VarDescr::set_const(long long value) { + return set_const(td::make_refint(value)); +} + +void VarDescr::set_const(td::RefInt256 value) { + int_const = std::move(value); + if (!int_const->signed_fits_bits(257)) { + int_const.write().invalidate(); + } + val = _Const | _Int; + int s = sgn(int_const); + if (s < -1) { + val |= _Nan | _NonZero; + } else if (s < 0) { + val |= _NonZero | _Neg | _Finite; + if (*int_const == -1) { + val |= _Bool; + } + } else if (s > 0) { + val |= _NonZero | _Pos | _Finite; + } else if (!s) { + //if (*int_const == 1) { + // val |= _Bit; + //} + val |= _Zero | _Neg | _Pos | _Finite | _Bool | _Bit; + } + if (val & _Finite) { + val |= int_const->get_bit(0) ? _Odd : _Even; + } +} + +void VarDescr::set_const(std::string value) { + str_const = value; + val = _Const; +} + +void VarDescr::set_const_nan() { + set_const(td::make_refint()); +} + +void VarDescr::operator|=(const VarDescr& y) { + val &= y.val; + if (is_int_const() && y.is_int_const() && cmp(int_const, y.int_const) != 0) { + val &= ~_Const; + } + if (!(val & _Const)) { + int_const.clear(); + } +} + +void VarDescr::operator&=(const VarDescr& y) { + val |= y.val; + if (y.int_const.not_null() && int_const.is_null()) { + int_const = y.int_const; + } +} + +void VarDescr::set_value(const VarDescr& y) { + val = y.val; + int_const = y.int_const; +} + +void VarDescr::set_value(VarDescr&& y) { + val = y.val; + int_const = std::move(y.int_const); +} + +void VarDescr::clear_value() { + val = 0; + int_const.clear(); +} + +void VarDescrList::show(std::ostream& os) const { + if (unreachable) { + os << " "; + } + os << "["; + for (const auto& v : list) { + os << ' ' << v; + } + os << " ]\n"; +} + +void Op::flags_set_clear(int set, int clear) { + flags = (flags | set) & ~clear; + for (auto& op : block0) { + op.flags_set_clear(set, clear); + } + for (auto& op : block1) { + op.flags_set_clear(set, clear); + } +} +void Op::split_vars(const std::vector& vars) { + split_var_list(left, vars); + split_var_list(right, vars); + for (auto& op : block0) { + op.split_vars(vars); + } + for (auto& op : block1) { + op.split_vars(vars); + } +} + +void Op::split_var_list(std::vector& var_list, const std::vector& vars) { + int new_size = 0, changes = 0; + for (var_idx_t v : var_list) { + int c = vars.at(v).coord; + if (c < 0) { + ++changes; + new_size += (~c & 0xff); + } else { + ++new_size; + } + } + if (!changes) { + return; + } + std::vector new_var_list; + new_var_list.reserve(new_size); + for (var_idx_t v : var_list) { + int c = vars.at(v).coord; + if (c < 0) { + int n = (~c >> 8), k = (~c & 0xff); + while (k-- > 0) { + new_var_list.push_back(n++); + } + } else { + new_var_list.push_back(v); + } + } + var_list = std::move(new_var_list); +} + +void Op::show(std::ostream& os, const std::vector& vars, std::string pfx, int mode) const { + if (mode & 2) { + os << pfx << " ["; + for (const auto& v : var_info.list) { + os << ' '; + if (v.flags & VarDescr::_Last) { + os << '*'; + } + if (v.flags & VarDescr::_Unused) { + os << '?'; + } + os << vars[v.idx]; + if (mode & 4) { + os << ':'; + v.show_value(os); + } + } + os << " ]\n"; + } + std::string dis = disabled() ? " " : ""; + if (noreturn()) { + dis += " "; + } + if (!is_pure()) { + dis += " "; + } + switch (cl) { + case _Undef: + os << pfx << dis << "???\n"; + break; + case _Nop: + os << pfx << dis << "NOP\n"; + break; + case _Call: + os << pfx << dis << "CALL: "; + show_var_list(os, left, vars); + os << " := " << (fun_ref ? fun_ref->name() : "(null)") << " "; + if ((mode & 4) && args.size() == right.size()) { + show_var_list(os, args, vars); + } else { + show_var_list(os, right, vars); + } + os << std::endl; + break; + case _CallInd: + os << pfx << dis << "CALLIND: "; + show_var_list(os, left, vars); + os << " := EXEC "; + show_var_list(os, right, vars); + os << std::endl; + break; + case _Let: + os << pfx << dis << "LET "; + show_var_list(os, left, vars); + os << " := "; + show_var_list(os, right, vars); + os << std::endl; + break; + case _Tuple: + os << pfx << dis << "MKTUPLE "; + show_var_list(os, left, vars); + os << " := "; + show_var_list(os, right, vars); + os << std::endl; + break; + case _UnTuple: + os << pfx << dis << "UNTUPLE "; + show_var_list(os, left, vars); + os << " := "; + show_var_list(os, right, vars); + os << std::endl; + break; + case _IntConst: + os << pfx << dis << "CONST "; + show_var_list(os, left, vars); + os << " := " << int_const << std::endl; + break; + case _SliceConst: + os << pfx << dis << "SCONST "; + show_var_list(os, left, vars); + os << " := " << str_const << std::endl; + break; + case _Import: + os << pfx << dis << "IMPORT "; + show_var_list(os, left, vars); + os << std::endl; + break; + case _Return: + os << pfx << dis << "RETURN "; + show_var_list(os, left, vars); + os << std::endl; + break; + case _GlobVar: + os << pfx << dis << "GLOBVAR "; + show_var_list(os, left, vars); + os << " := " << (fun_ref ? fun_ref->name() : "(null)") << std::endl; + break; + case _SetGlob: + os << pfx << dis << "SETGLOB "; + os << (fun_ref ? fun_ref->name() : "(null)") << " := "; + show_var_list(os, right, vars); + os << std::endl; + break; + case _Repeat: + os << pfx << dis << "REPEAT "; + show_var_list(os, left, vars); + os << ' '; + show_block(os, block0.get(), vars, pfx, mode); + os << std::endl; + break; + case _If: + os << pfx << dis << "IF "; + show_var_list(os, left, vars); + os << ' '; + show_block(os, block0.get(), vars, pfx, mode); + os << " ELSE "; + show_block(os, block1.get(), vars, pfx, mode); + os << std::endl; + break; + case _While: + os << pfx << dis << "WHILE "; + show_var_list(os, left, vars); + os << ' '; + show_block(os, block0.get(), vars, pfx, mode); + os << " DO "; + show_block(os, block1.get(), vars, pfx, mode); + os << std::endl; + break; + case _Until: + os << pfx << dis << "UNTIL "; + show_var_list(os, left, vars); + os << ' '; + show_block(os, block0.get(), vars, pfx, mode); + os << std::endl; + break; + case _Again: + os << pfx << dis << "AGAIN "; + show_var_list(os, left, vars); + os << ' '; + show_block(os, block0.get(), vars, pfx, mode); + os << std::endl; + break; + default: + os << pfx << dis << " "; + show_var_list(os, left, vars); + os << " -- "; + show_var_list(os, right, vars); + os << std::endl; + break; + } +} + +void Op::show_var_list(std::ostream& os, const std::vector& idx_list, + const std::vector& vars) const { + if (!idx_list.size()) { + os << "()"; + } else if (idx_list.size() == 1) { + os << vars.at(idx_list[0]); + } else { + os << "(" << vars.at(idx_list[0]); + for (std::size_t i = 1; i < idx_list.size(); i++) { + os << "," << vars.at(idx_list[i]); + } + os << ")"; + } +} + +void Op::show_var_list(std::ostream& os, const std::vector& list, const std::vector& vars) const { + auto n = list.size(); + if (!n) { + os << "()"; + } else { + os << "( "; + for (std::size_t i = 0; i < list.size(); i++) { + if (i) { + os << ", "; + } + if (list[i].is_unused()) { + os << '?'; + } + os << vars.at(list[i].idx) << ':'; + list[i].show_value(os); + } + os << " )"; + } +} + +void Op::show_block(std::ostream& os, const Op* block, const std::vector& vars, std::string pfx, int mode) { + os << "{" << std::endl; + std::string pfx2 = pfx + " "; + for (const Op& op : block) { + op.show(os, vars, pfx2, mode); + } + os << pfx << "}"; +} + +void CodeBlob::flags_set_clear(int set, int clear) { + for (auto& op : ops) { + op.flags_set_clear(set, clear); + } +} + +std::ostream& operator<<(std::ostream& os, const CodeBlob& code) { + code.print(os); + return os; +} + +// flags: +1 = show variable definition locations; +2 = show vars after each op; +4 = show var abstract value info after each op; +8 = show all variables at start +void CodeBlob::print(std::ostream& os, int flags) const { + os << "CODE BLOB: " << var_cnt << " variables, " << in_var_cnt << " input\n"; + if ((flags & 8) != 0) { + for (const auto& var : vars) { + var.dump(os); + if (var.where && (flags & 1) != 0) { + var.where->show(os); + os << " defined here:\n"; + var.where->show_context(os); + } + } + } + os << "------- BEGIN --------\n"; + for (const auto& op : ops) { + op.show(os, vars, "", flags); + } + os << "-------- END ---------\n\n"; +} + +var_idx_t CodeBlob::create_var(int cls, TypeExpr* var_type, SymDef* sym, const SrcLocation* location) { + vars.emplace_back(var_cnt, cls, var_type, sym, location); + if (sym) { + sym->value->idx = var_cnt; + } + return var_cnt++; +} + +bool CodeBlob::import_params(FormalArgList arg_list) { + if (var_cnt || in_var_cnt || op_cnt) { + return false; + } + std::vector list; + for (const auto& par : arg_list) { + TypeExpr* arg_type; + SymDef* arg_sym; + SrcLocation arg_loc; + std::tie(arg_type, arg_sym, arg_loc) = par; + list.push_back(create_var(arg_sym ? (TmpVar::_In | TmpVar::_Named) : TmpVar::_In, arg_type, arg_sym, &arg_loc)); + } + emplace_back(loc, Op::_Import, list); + in_var_cnt = var_cnt; + return true; +} + +} // namespace tolk diff --git a/tolk/analyzer.cpp b/tolk/analyzer.cpp new file mode 100644 index 00000000..ea41a103 --- /dev/null +++ b/tolk/analyzer.cpp @@ -0,0 +1,916 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" + +namespace tolk { + +/* + * + * ANALYZE AND PREPROCESS ABSTRACT CODE + * + */ + +void CodeBlob::simplify_var_types() { + for (TmpVar& var : vars) { + TypeExpr::remove_indirect(var.v_type); + var.v_type->recompute_width(); + } +} + +int CodeBlob::split_vars(bool strict) { + int n = var_cnt, changes = 0; + for (int j = 0; j < var_cnt; j++) { + TmpVar& var = vars[j]; + if (strict && var.v_type->minw != var.v_type->maxw) { + throw ParseError{var.where.get(), "variable does not have fixed width, cannot manipulate it"}; + } + std::vector comp_types; + int k = var.v_type->extract_components(comp_types); + tolk_assert(k <= 254 && n <= 0x7fff00); + tolk_assert((unsigned)k == comp_types.size()); + if (k != 1) { + var.coord = ~((n << 8) + k); + for (int i = 0; i < k; i++) { + auto v = create_var(vars[j].cls, comp_types[i], 0, vars[j].where.get()); + tolk_assert(v == n + i); + tolk_assert(vars[v].idx == v); + vars[v].name = vars[j].name; + vars[v].coord = ((int)j << 8) + i + 1; + } + n += k; + ++changes; + } else if (strict && var.v_type->minw != 1) { + throw ParseError{var.where.get(), + "cannot work with variable or variable component of width greater than one"}; + } + } + if (!changes) { + return 0; + } + for (auto& op : ops) { + op.split_vars(vars); + } + return changes; +} + +bool CodeBlob::compute_used_code_vars() { + VarDescrList empty_var_info; + return compute_used_code_vars(ops, empty_var_info, true); +} + +bool CodeBlob::compute_used_code_vars(std::unique_ptr& ops_ptr, const VarDescrList& var_info, bool edit) const { + tolk_assert(ops_ptr); + if (!ops_ptr->next) { + tolk_assert(ops_ptr->cl == Op::_Nop); + return ops_ptr->set_var_info(var_info); + } + // here and below, bitwise | (not logical ||) are used to execute both left and right parts + return static_cast(compute_used_code_vars(ops_ptr->next, var_info, edit)) | + static_cast(ops_ptr->compute_used_vars(*this, edit)); +} + +bool operator==(const VarDescrList& x, const VarDescrList& y) { + if (x.size() != y.size()) { + return false; + } + for (std::size_t i = 0; i < x.size(); i++) { + if (x.list[i].idx != y.list[i].idx || x.list[i].flags != y.list[i].flags) { + return false; + } + } + return true; +} + +bool same_values(const VarDescr& x, const VarDescr& y) { + if (x.val != y.val || x.int_const.is_null() != y.int_const.is_null()) { + return false; + } + if (x.int_const.not_null() && cmp(x.int_const, y.int_const) != 0) { + return false; + } + return true; +} + +bool same_values(const VarDescrList& x, const VarDescrList& y) { + if (x.size() != y.size()) { + return false; + } + for (std::size_t i = 0; i < x.size(); i++) { + if (x.list[i].idx != y.list[i].idx || !same_values(x.list[i], y.list[i])) { + return false; + } + } + return true; +} + +bool Op::set_var_info(const VarDescrList& new_var_info) { + if (var_info == new_var_info) { + return false; + } + var_info = new_var_info; + return true; +} + +bool Op::set_var_info(VarDescrList&& new_var_info) { + if (var_info == new_var_info) { + return false; + } + var_info = std::move(new_var_info); + return true; +} + +bool Op::set_var_info_except(const VarDescrList& new_var_info, const std::vector& var_list) { + if (!var_list.size()) { + return set_var_info(new_var_info); + } + VarDescrList tmp_info{new_var_info}; + tmp_info -= var_list; + return set_var_info(tmp_info); +} + +bool Op::set_var_info_except(VarDescrList&& new_var_info, const std::vector& var_list) { + if (var_list.size()) { + new_var_info -= var_list; + } + return set_var_info(std::move(new_var_info)); +} +std::vector sort_unique_vars(const std::vector& var_list) { + std::vector vars{var_list}, unique_vars; + std::sort(vars.begin(), vars.end()); + vars.erase(std::unique(vars.begin(), vars.end()), vars.end()); + return vars; +} + +VarDescr* VarDescrList::operator[](var_idx_t idx) { + auto it = std::lower_bound(list.begin(), list.end(), idx); + return it != list.end() && it->idx == idx ? &*it : nullptr; +} + +const VarDescr* VarDescrList::operator[](var_idx_t idx) const { + auto it = std::lower_bound(list.begin(), list.end(), idx); + return it != list.end() && it->idx == idx ? &*it : nullptr; +} + +std::size_t VarDescrList::count(const std::vector idx_list) const { + std::size_t res = 0; + for (var_idx_t idx : idx_list) { + if (operator[](idx)) { + ++res; + } + } + return res; +} + +std::size_t VarDescrList::count_used(const std::vector idx_list) const { + std::size_t res = 0; + for (var_idx_t idx : idx_list) { + auto v = operator[](idx); + if (v && !v->is_unused()) { + ++res; + } + } + return res; +} + +VarDescrList& VarDescrList::operator-=(var_idx_t idx) { + auto it = std::lower_bound(list.begin(), list.end(), idx); + if (it != list.end() && it->idx == idx) { + list.erase(it); + } + return *this; +} + +VarDescrList& VarDescrList::operator-=(const std::vector& idx_list) { + for (var_idx_t idx : idx_list) { + *this -= idx; + } + return *this; +} + +VarDescrList& VarDescrList::add_var(var_idx_t idx, bool unused) { + auto it = std::lower_bound(list.begin(), list.end(), idx); + if (it == list.end() || it->idx != idx) { + list.emplace(it, idx, VarDescr::_Last | (unused ? VarDescr::_Unused : 0)); + } else if (it->is_unused() && !unused) { + it->clear_unused(); + } + return *this; +} + +VarDescrList& VarDescrList::add_vars(const std::vector& idx_list, bool unused) { + for (var_idx_t idx : idx_list) { + add_var(idx, unused); + } + return *this; +} + +VarDescr& VarDescrList::add(var_idx_t idx) { + auto it = std::lower_bound(list.begin(), list.end(), idx); + if (it == list.end() || it->idx != idx) { + it = list.emplace(it, idx); + } + return *it; +} + +VarDescr& VarDescrList::add_newval(var_idx_t idx) { + auto it = std::lower_bound(list.begin(), list.end(), idx); + if (it == list.end() || it->idx != idx) { + return *list.emplace(it, idx); + } else { + it->clear_value(); + return *it; + } +} + +VarDescrList& VarDescrList::clear_last() { + for (auto& var : list) { + if (var.flags & VarDescr::_Last) { + var.flags &= ~VarDescr::_Last; + } + } + return *this; +} + +VarDescrList VarDescrList::operator+(const VarDescrList& y) const { + VarDescrList res; + auto it1 = list.cbegin(); + auto it2 = y.list.cbegin(); + while (it1 != list.cend() && it2 != y.list.cend()) { + if (it1->idx < it2->idx) { + res.list.push_back(*it1++); + } else if (it1->idx > it2->idx) { + res.list.push_back(*it2++); + } else { + res.list.push_back(*it1++); + res.list.back() += *it2++; + } + } + while (it1 != list.cend()) { + res.list.push_back(*it1++); + } + while (it2 != y.list.cend()) { + res.list.push_back(*it2++); + } + return res; +} + +VarDescrList& VarDescrList::operator+=(const VarDescrList& y) { + return *this = *this + y; +} + +VarDescrList VarDescrList::operator|(const VarDescrList& y) const { + if (y.unreachable) { + return *this; + } + if (unreachable) { + return y; + } + VarDescrList res; + auto it1 = list.cbegin(); + auto it2 = y.list.cbegin(); + while (it1 != list.cend() && it2 != y.list.cend()) { + if (it1->idx < it2->idx) { + it1++; + } else if (it1->idx > it2->idx) { + it2++; + } else { + res.list.push_back(*it1++); + res.list.back() |= *it2++; + } + } + return res; +} + +VarDescrList& VarDescrList::operator|=(const VarDescrList& y) { + if (y.unreachable) { + return *this; + } else { + return *this = *this | y; + } +} + +VarDescrList& VarDescrList::operator&=(const VarDescrList& values) { + for (const VarDescr& vd : values.list) { + VarDescr* item = operator[](vd.idx); + if (item) { + *item &= vd; + } + } + unreachable |= values.unreachable; + return *this; +} + +VarDescrList& VarDescrList::import_values(const VarDescrList& values) { + if (values.unreachable) { + set_unreachable(); + } else + for (auto& vd : list) { + auto new_vd = values[vd.idx]; + if (new_vd) { + vd.set_value(*new_vd); + } else { + vd.clear_value(); + } + } + return *this; +} + +bool Op::std_compute_used_vars(bool disabled) { + // left = OP right + // var_info := (var_info - left) + right + VarDescrList new_var_info{next->var_info}; + new_var_info -= left; + new_var_info.clear_last(); + if (args.size() == right.size() && !disabled) { + for (const VarDescr& arg : args) { + new_var_info.add_var(arg.idx, arg.is_unused()); + } + } else { + new_var_info.add_vars(right, disabled); + } + return set_var_info(std::move(new_var_info)); +} + +bool Op::compute_used_vars(const CodeBlob& code, bool edit) { + tolk_assert(next); + const VarDescrList& next_var_info = next->var_info; + if (cl == _Nop) { + return set_var_info_except(next_var_info, left); + } + switch (cl) { + case _IntConst: + case _SliceConst: + case _GlobVar: + case _Call: + case _CallInd: + case _Tuple: + case _UnTuple: { + // left = EXEC right; + if (!next_var_info.count_used(left) && is_pure()) { + // all variables in `left` are not needed + if (edit) { + disable(); + } + return std_compute_used_vars(true); + } + return std_compute_used_vars(); + } + case _SetGlob: { + // GLOB = right + if (right.empty() && edit) { + disable(); + } + return std_compute_used_vars(right.empty()); + } + case _Let: { + // left = right + std::size_t cnt = next_var_info.count_used(left); + tolk_assert(left.size() == right.size()); + auto l_it = left.cbegin(), r_it = right.cbegin(); + VarDescrList new_var_info{next_var_info}; + new_var_info -= left; + new_var_info.clear_last(); + std::vector new_left, new_right; + for (; l_it < left.cend(); ++l_it, ++r_it) { + if (std::find(l_it + 1, left.cend(), *l_it) == left.cend()) { + auto p = next_var_info[*l_it]; + new_var_info.add_var(*r_it, edit && (!p || p->is_unused())); + new_left.push_back(*l_it); + new_right.push_back(*r_it); + } + } + if (new_left.size() < left.size()) { + left = std::move(new_left); + right = std::move(new_right); + } + if (!cnt && edit) { + // all variables in `left` are not needed + disable(); + } + return set_var_info(std::move(new_var_info)); + } + case _Return: { + // return left + if (var_info.count(left) == left.size()) { + return false; + } + std::vector unique_vars = sort_unique_vars(left); + var_info.list.clear(); + for (var_idx_t i : unique_vars) { + var_info.list.emplace_back(i, VarDescr::_Last); + } + return true; + } + case _Import: { + // import left + std::vector unique_vars = sort_unique_vars(left); + var_info.list.clear(); + for (var_idx_t i : unique_vars) { + var_info.list.emplace_back(i, next_var_info[i] ? 0 : VarDescr::_Last); + } + return true; + } + case _If: { + // if (left) then block0 else block1 + // VarDescrList nx_var_info = next_var_info; + // nx_var_info.clear_last(); + code.compute_used_code_vars(block0, next_var_info, edit); + VarDescrList merge_info; + if (block1) { + code.compute_used_code_vars(block1, next_var_info, edit); + merge_info = block0->var_info + block1->var_info; + } else { + merge_info = block0->var_info + next_var_info; + } + merge_info.clear_last(); + merge_info += left; + return set_var_info(std::move(merge_info)); + } + case _While: { + // while (block0 || left) block1; + // ... block0 left { block1 block0 left } next + VarDescrList new_var_info{next_var_info}; + bool changes = false; + do { + VarDescrList after_cond{new_var_info}; + after_cond += left; + code.compute_used_code_vars(block0, after_cond, changes); + code.compute_used_code_vars(block1, block0->var_info, changes); + std::size_t n = new_var_info.size(); + new_var_info += block1->var_info; + new_var_info.clear_last(); + if (changes) { + break; + } + changes = (new_var_info.size() == n); + } while (changes <= edit); + new_var_info += left; + code.compute_used_code_vars(block0, new_var_info, edit); + return set_var_info(block0->var_info); + } + case _Until: { + // until (block0 || left); + // .. { block0 left } block0 left next + VarDescrList after_cond_first{next_var_info}; + after_cond_first += left; + code.compute_used_code_vars(block0, after_cond_first, false); + VarDescrList new_var_info{block0->var_info}; + bool changes = false; + do { + VarDescrList after_cond{new_var_info}; + after_cond += next_var_info; + after_cond += left; + code.compute_used_code_vars(block0, after_cond, changes); + std::size_t n = new_var_info.size(); + new_var_info += block0->var_info; + new_var_info.clear_last(); + if (changes) { + break; + } + changes = (new_var_info.size() == n); + } while (changes <= edit); + return set_var_info(std::move(new_var_info) + next_var_info); + } + case _Repeat: { + // repeat (left) block0 + // left { block0 } next + VarDescrList new_var_info{next_var_info}; + bool changes = false; + do { + code.compute_used_code_vars(block0, new_var_info, changes); + std::size_t n = new_var_info.size(); + new_var_info += block0->var_info; + new_var_info.clear_last(); + if (changes) { + break; + } + changes = (new_var_info.size() == n); + } while (changes <= edit); + tolk_assert(left.size() == 1); + bool last = new_var_info.count_used(left) == 0; + new_var_info += left; + if (last) { + new_var_info[left[0]]->flags |= VarDescr::_Last; + } + return set_var_info(std::move(new_var_info)); + } + case _Again: { + // for(;;) block0 + // { block0 } + VarDescrList new_var_info; + bool changes = false; + do { + code.compute_used_code_vars(block0, new_var_info, changes); + std::size_t n = new_var_info.size(); + new_var_info += block0->var_info; + new_var_info.clear_last(); + if (changes) { + break; + } + changes = (new_var_info.size() == n); + } while (changes <= edit); + return set_var_info(std::move(new_var_info)); + } + case _TryCatch: { + code.compute_used_code_vars(block0, next_var_info, edit); + code.compute_used_code_vars(block1, next_var_info, edit); + VarDescrList merge_info = block0->var_info + block1->var_info + next_var_info; + merge_info -= left; + merge_info.clear_last(); + return set_var_info(std::move(merge_info)); + } + default: + std::cerr << "fatal: unknown operation in compute_used_vars()\n"; + throw ParseError{where, "unknown operation"}; + } +} + +bool prune_unreachable(std::unique_ptr& ops) { + if (!ops) { + return true; + } + Op& op = *ops; + if (op.cl == Op::_Nop) { + if (op.next) { + ops = std::move(op.next); + return prune_unreachable(ops); + } + return true; + } + bool reach; + switch (op.cl) { + case Op::_IntConst: + case Op::_SliceConst: + case Op::_GlobVar: + case Op::_SetGlob: + case Op::_Call: + case Op::_CallInd: + case Op::_Tuple: + case Op::_UnTuple: + case Op::_Import: + reach = true; + break; + case Op::_Let: { + reach = true; + break; + } + case Op::_Return: + reach = false; + break; + case Op::_If: { + // if left then block0 else block1; ... + VarDescr* c_var = op.var_info[op.left[0]]; + if (c_var && c_var->always_true()) { + op.block0->last().next = std::move(op.next); + ops = std::move(op.block0); + return prune_unreachable(ops); + } else if (c_var && c_var->always_false()) { + op.block1->last().next = std::move(op.next); + ops = std::move(op.block1); + return prune_unreachable(ops); + } else { + reach = static_cast(prune_unreachable(op.block0)) | static_cast(prune_unreachable(op.block1)); + } + break; + } + case Op::_While: { + // while (block0 || left) block1; + if (!prune_unreachable(op.block0)) { + // computation of block0 never returns + ops = std::move(op.block0); + return prune_unreachable(ops); + } + VarDescr* c_var = op.block0->last().var_info[op.left[0]]; + if (c_var && c_var->always_false()) { + // block1 never executed + op.block0->last().next = std::move(op.next); + ops = std::move(op.block0); + return prune_unreachable(ops); + } else if (c_var && c_var->always_true()) { + if (!prune_unreachable(op.block1)) { + // block1 never returns + op.block0->last().next = std::move(op.block1); + ops = std::move(op.block0); + return false; + } + // infinite loop + op.cl = Op::_Again; + op.block0->last().next = std::move(op.block1); + op.left.clear(); + reach = false; + } else { + if (!prune_unreachable(op.block1)) { + // block1 never returns, while equivalent to block0 ; if left then block1 else next + op.cl = Op::_If; + std::unique_ptr new_op = std::move(op.block0); + op.block0 = std::move(op.block1); + op.block1 = std::make_unique(op.next->where, Op::_Nop); + new_op->last().next = std::move(ops); + ops = std::move(new_op); + } + reach = true; // block1 may be never executed + } + break; + } + case Op::_Repeat: { + // repeat (left) block0 + VarDescr* c_var = op.var_info[op.left[0]]; + if (c_var && c_var->always_nonpos()) { + // loop never executed + ops = std::move(op.next); + return prune_unreachable(ops); + } + if (c_var && c_var->always_pos()) { + if (!prune_unreachable(op.block0)) { + // block0 executed at least once, and it never returns + // replace code with block0 + ops = std::move(op.block0); + return false; + } + } else { + prune_unreachable(op.block0); + } + reach = true; + break; + } + case Op::_Until: + case Op::_Again: { + // do block0 until left; ... + if (!prune_unreachable(op.block0)) { + // block0 never returns, replace loop by block0 + ops = std::move(op.block0); + return false; + } + reach = (op.cl != Op::_Again); + break; + } + case Op::_TryCatch: { + reach = static_cast(prune_unreachable(op.block0)) | static_cast(prune_unreachable(op.block1)); + break; + } + default: + std::cerr << "fatal: unknown operation \n"; + throw ParseError{op.where, "unknown operation in prune_unreachable()"}; + } + if (reach) { + return prune_unreachable(op.next); + } else { + while (op.next->next) { + op.next = std::move(op.next->next); + } + return false; + } +} + +void CodeBlob::prune_unreachable_code() { + if (prune_unreachable(ops)) { + throw ParseError{loc, "control reaches end of function"}; + } +} + +void CodeBlob::fwd_analyze() { + VarDescrList values; + tolk_assert(ops && ops->cl == Op::_Import); + for (var_idx_t i : ops->left) { + values += i; + if (vars[i].v_type->is_int()) { + values[i]->val |= VarDescr::_Int; + } + } + ops->fwd_analyze(values); +} + +void Op::prepare_args(VarDescrList values) { + if (args.size() != right.size()) { + args.clear(); + for (var_idx_t i : right) { + args.emplace_back(i); + } + } + for (std::size_t i = 0; i < right.size(); i++) { + const VarDescr* val = values[right[i]]; + if (val) { + args[i].set_value(*val); + // args[i].clear_unused(); + } else { + args[i].clear_value(); + } + args[i].clear_unused(); + } +} + +VarDescrList Op::fwd_analyze(VarDescrList values) { + var_info.import_values(values); + switch (cl) { + case _Nop: + case _Import: + break; + case _Return: + values.set_unreachable(); + break; + case _IntConst: { + values.add_newval(left[0]).set_const(int_const); + break; + } + case _SliceConst: { + values.add_newval(left[0]).set_const(str_const); + break; + } + case _Call: { + prepare_args(values); + auto func = dynamic_cast(fun_ref->value); + if (func) { + std::vector res; + res.reserve(left.size()); + for (var_idx_t i : left) { + res.emplace_back(i); + } + AsmOpList tmp; + func->compile(tmp, res, args, where); // abstract interpretation of res := f (args) + int j = 0; + for (var_idx_t i : left) { + values.add_newval(i).set_value(res[j++]); + } + } else { + for (var_idx_t i : left) { + values.add_newval(i); + } + } + break; + } + case _Tuple: + case _UnTuple: + case _GlobVar: + case _CallInd: { + for (var_idx_t i : left) { + values.add_newval(i); + } + break; + } + case _SetGlob: + break; + case _Let: { + std::vector old_val; + tolk_assert(left.size() == right.size()); + for (std::size_t i = 0; i < right.size(); i++) { + const VarDescr* ov = values[right[i]]; + if (!ov && verbosity >= 5) { + std::cerr << "FATAL: error in assignment at right component #" << i << " (no value for _" << right[i] << ")" + << std::endl; + for (auto x : left) { + std::cerr << '_' << x << " "; + } + std::cerr << "= "; + for (auto x : right) { + std::cerr << '_' << x << " "; + } + std::cerr << std::endl; + } + // tolk_assert(ov); + if (ov) { + old_val.push_back(*ov); + } else { + old_val.emplace_back(); + } + } + for (std::size_t i = 0; i < left.size(); i++) { + values.add_newval(left[i]).set_value(std::move(old_val[i])); + } + break; + } + case _If: { + VarDescrList val1 = block0->fwd_analyze(values); + VarDescrList val2 = block1 ? block1->fwd_analyze(std::move(values)) : std::move(values); + values = val1 | val2; + break; + } + case _Repeat: { + bool atl1 = (values[left[0]] && values[left[0]]->always_pos()); + VarDescrList next_values = block0->fwd_analyze(values); + while (true) { + VarDescrList new_values = values | next_values; + if (same_values(new_values, values)) { + break; + } + values = std::move(new_values); + next_values = block0->fwd_analyze(values); + } + if (atl1) { + values = std::move(next_values); + } + break; + } + case _While: { + auto values0 = values; + values = block0->fwd_analyze(values); + if (values[left[0]] && values[left[0]]->always_false()) { + // block1 never executed + block1->fwd_analyze(values); + break; + } + while (true) { + VarDescrList next_values = values | block0->fwd_analyze(values0 | block1->fwd_analyze(values)); + if (same_values(next_values, values)) { + break; + } + values = std::move(next_values); + } + break; + } + case _Until: + case _Again: { + while (true) { + VarDescrList next_values = values | block0->fwd_analyze(values); + if (same_values(next_values, values)) { + break; + } + values = std::move(next_values); + } + values = block0->fwd_analyze(values); + break; + } + case _TryCatch: { + VarDescrList val1 = block0->fwd_analyze(values); + VarDescrList val2 = block1->fwd_analyze(std::move(values)); + values = val1 | val2; + break; + } + default: + std::cerr << "fatal: unknown operation \n"; + throw ParseError{where, "unknown operation in fwd_analyze()"}; + } + if (next) { + return next->fwd_analyze(std::move(values)); + } else { + return values; + } +} + +bool Op::set_noreturn(bool nr) { + if (nr) { + flags |= _NoReturn; + } else { + flags &= ~_NoReturn; + } + return nr; +} + +bool Op::mark_noreturn() { + switch (cl) { + case _Nop: + if (!next) { + return set_noreturn(false); + } + // fallthrough + case _Import: + case _IntConst: + case _SliceConst: + case _Let: + case _Tuple: + case _UnTuple: + case _SetGlob: + case _GlobVar: + case _CallInd: + case _Call: + return set_noreturn(next->mark_noreturn()); + case _Return: + return set_noreturn(true); + case _If: + case _TryCatch: + return set_noreturn((static_cast(block0->mark_noreturn()) & static_cast(block1 && block1->mark_noreturn())) | static_cast(next->mark_noreturn())); + case _Again: + block0->mark_noreturn(); + return set_noreturn(true); + case _Until: + return set_noreturn(static_cast(block0->mark_noreturn()) | static_cast(next->mark_noreturn())); + case _While: + block1->mark_noreturn(); + return set_noreturn(static_cast(block0->mark_noreturn()) | static_cast(next->mark_noreturn())); + case _Repeat: + block0->mark_noreturn(); + return set_noreturn(next->mark_noreturn()); + default: + std::cerr << "fatal: unknown operation \n"; + throw ParseError{where, "unknown operation in mark_noreturn()"}; + } +} + +void CodeBlob::mark_noreturn() { + ops->mark_noreturn(); +} + +} // namespace tolk diff --git a/tolk/asmops.cpp b/tolk/asmops.cpp new file mode 100644 index 00000000..cbe268f2 --- /dev/null +++ b/tolk/asmops.cpp @@ -0,0 +1,372 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include + +namespace tolk { + +/* + * + * ASM-OP LIST FUNCTIONS + * + */ + +int is_pos_pow2(td::RefInt256 x) { + if (sgn(x) > 0 && !sgn(x & (x - 1))) { + return x->bit_size(false) - 1; + } else { + return -1; + } +} + +int is_neg_pow2(td::RefInt256 x) { + return sgn(x) < 0 ? is_pos_pow2(-x) : 0; +} + +std::ostream& operator<<(std::ostream& os, AsmOp::SReg stack_reg) { + int i = stack_reg.idx; + if (i >= 0) { + if (i < 16) { + return os << 's' << i; + } else { + return os << i << " s()"; + } + } else if (i >= -2) { + return os << "s(" << i << ')'; + } else { + return os << i << " s()"; + } +} + +AsmOp AsmOp::Const(int arg, std::string push_op, td::RefInt256 origin) { + std::ostringstream os; + os << arg << ' ' << push_op; + return AsmOp::Const(os.str(), origin); +} + +AsmOp AsmOp::make_stk2(int a, int b, const char* str, int delta) { + std::ostringstream os; + os << SReg(a) << ' ' << SReg(b) << ' ' << str; + int c = std::max(a, b) + 1; + return AsmOp::Custom(os.str(), c, c + delta); +} + +AsmOp AsmOp::make_stk3(int a, int b, int c, const char* str, int delta) { + std::ostringstream os; + os << SReg(a) << ' ' << SReg(b) << ' ' << SReg(c) << ' ' << str; + int m = std::max(a, std::max(b, c)) + 1; + return AsmOp::Custom(os.str(), m, m + delta); +} + +AsmOp AsmOp::BlkSwap(int a, int b) { + std::ostringstream os; + if (a == 1 && b == 1) { + return AsmOp::Xchg(0, 1); + } else if (a == 1) { + if (b == 2) { + os << "ROT"; + } else { + os << b << " ROLL"; + } + } else if (b == 1) { + if (a == 2) { + os << "-ROT"; + } else { + os << a << " -ROLL"; + } + } else { + os << a << " " << b << " BLKSWAP"; + } + return AsmOp::Custom(os.str(), a + b, a + b); +} + +AsmOp AsmOp::BlkPush(int a, int b) { + std::ostringstream os; + if (a == 1) { + return AsmOp::Push(b); + } else if (a == 2 && b == 1) { + os << "2DUP"; + } else { + os << a << " " << b << " BLKPUSH"; + } + return AsmOp::Custom(os.str(), b + 1, a + b + 1); +} + +AsmOp AsmOp::BlkDrop(int a) { + std::ostringstream os; + if (a == 1) { + return AsmOp::Pop(); + } else if (a == 2) { + os << "2DROP"; + } else { + os << a << " BLKDROP"; + } + return AsmOp::Custom(os.str(), a, 0); +} + +AsmOp AsmOp::BlkDrop2(int a, int b) { + if (!b) { + return BlkDrop(a); + } + std::ostringstream os; + os << a << " " << b << " BLKDROP2"; + return AsmOp::Custom(os.str(), a + b, b); +} + +AsmOp AsmOp::BlkReverse(int a, int b) { + std::ostringstream os; + os << a << " " << b << " REVERSE"; + return AsmOp::Custom(os.str(), a + b, a + b); +} + +AsmOp AsmOp::Tuple(int a) { + switch (a) { + case 1: + return AsmOp::Custom("SINGLE", 1, 1); + case 2: + return AsmOp::Custom("PAIR", 2, 1); + case 3: + return AsmOp::Custom("TRIPLE", 3, 1); + } + std::ostringstream os; + os << a << " TUPLE"; + return AsmOp::Custom(os.str(), a, 1); +} + +AsmOp AsmOp::UnTuple(int a) { + switch (a) { + case 1: + return AsmOp::Custom("UNSINGLE", 1, 1); + case 2: + return AsmOp::Custom("UNPAIR", 1, 2); + case 3: + return AsmOp::Custom("UNTRIPLE", 1, 3); + } + std::ostringstream os; + os << a << " UNTUPLE"; + return AsmOp::Custom(os.str(), 1, a); +} + +AsmOp AsmOp::IntConst(td::RefInt256 x) { + if (x->signed_fits_bits(8)) { + return AsmOp::Const(dec_string(x) + " PUSHINT", x); + } + if (!x->is_valid()) { + return AsmOp::Const("PUSHNAN", x); + } + int k = is_pos_pow2(x); + if (k >= 0) { + return AsmOp::Const(k, "PUSHPOW2", x); + } + k = is_pos_pow2(x + 1); + if (k >= 0) { + return AsmOp::Const(k, "PUSHPOW2DEC", x); + } + k = is_pos_pow2(-x); + if (k >= 0) { + return AsmOp::Const(k, "PUSHNEGPOW2", x); + } + if (!x->mod_pow2_short(23)) { + return AsmOp::Const(dec_string(x) + " PUSHINTX", x); + } + return AsmOp::Const(dec_string(x) + " PUSHINT", x); +} + +AsmOp AsmOp::BoolConst(bool f) { + return AsmOp::Const(f ? "TRUE" : "FALSE"); +} + +AsmOp AsmOp::Parse(std::string custom_op) { + if (custom_op == "NOP") { + return AsmOp::Nop(); + } else if (custom_op == "SWAP") { + return AsmOp::Xchg(1); + } else if (custom_op == "DROP") { + return AsmOp::Pop(0); + } else if (custom_op == "NIP") { + return AsmOp::Pop(1); + } else if (custom_op == "DUP") { + return AsmOp::Push(0); + } else if (custom_op == "OVER") { + return AsmOp::Push(1); + } else { + return AsmOp::Custom(custom_op); + } +} + +AsmOp AsmOp::Parse(std::string custom_op, int args, int retv) { + auto res = Parse(custom_op); + if (res.is_custom()) { + res.a = args; + res.b = retv; + } + return res; +} + +void AsmOp::out(std::ostream& os) const { + if (!op.empty()) { + os << op; + return; + } + switch (t) { + case a_none: + break; + case a_xchg: + if (!a && !(b & -2)) { + os << (b ? "SWAP" : "NOP"); + break; + } + os << SReg(a) << ' ' << SReg(b) << " XCHG"; + break; + case a_push: + if (!(a & -2)) { + os << (a ? "OVER" : "DUP"); + break; + } + os << SReg(a) << " PUSH"; + break; + case a_pop: + if (!(a & -2)) { + os << (a ? "NIP" : "DROP"); + break; + } + os << SReg(a) << " POP"; + break; + default: + throw Fatal{"unknown assembler operation"}; + } +} + +void AsmOp::out_indent_nl(std::ostream& os, bool no_eol) const { + for (int i = 0; i < indent; i++) { + os << " "; + } + out(os); + if (!no_eol) { + os << std::endl; + } +} + +std::string AsmOp::to_string() const { + if (!op.empty()) { + return op; + } else { + std::ostringstream os; + out(os); + return os.str(); + } +} + +bool AsmOpList::append(const std::vector& ops) { + for (const auto& op : ops) { + if (!append(op)) { + return false; + } + } + return true; +} + +const_idx_t AsmOpList::register_const(Const new_const) { + if (new_const.is_null()) { + return not_const; + } + unsigned idx; + for (idx = 0; idx < constants_.size(); idx++) { + if (!td::cmp(new_const, constants_[idx])) { + return idx; + } + } + constants_.push_back(std::move(new_const)); + return (const_idx_t)idx; +} + +Const AsmOpList::get_const(const_idx_t idx) { + if ((unsigned)idx < constants_.size()) { + return constants_[idx]; + } else { + return {}; + } +} + +void AsmOpList::show_var(std::ostream& os, var_idx_t idx) const { + if (!var_names_ || (unsigned)idx >= var_names_->size()) { + os << '_' << idx; + } else { + var_names_->at(idx).show(os, 2); + } +} + +void AsmOpList::show_var_ext(std::ostream& os, std::pair idx_pair) const { + auto i = idx_pair.first; + auto j = idx_pair.second; + if (!var_names_ || (unsigned)i >= var_names_->size()) { + os << '_' << i; + } else { + var_names_->at(i).show(os, 2); + } + if ((unsigned)j < constants_.size() && constants_[j].not_null()) { + os << '=' << constants_[j]; + } +} + +void AsmOpList::out(std::ostream& os, int mode) const { + if (!(mode & 2)) { + for (const auto& op : list_) { + op.out_indent_nl(os); + } + } else { + std::size_t n = list_.size(); + for (std::size_t i = 0; i < n; i++) { + const auto& op = list_[i]; + if (!op.is_comment() && i + 1 < n && list_[i + 1].is_comment()) { + op.out_indent_nl(os, true); + os << '\t'; + do { + i++; + } while (i + 1 < n && list_[i + 1].is_comment()); + list_[i].out(os); + os << std::endl; + } else { + op.out_indent_nl(os, false); + } + } + } +} + +bool apply_op(StackTransform& trans, const AsmOp& op) { + if (!trans.is_valid()) { + return false; + } + switch (op.t) { + case AsmOp::a_none: + return true; + case AsmOp::a_xchg: + return trans.apply_xchg(op.a, op.b, true); + case AsmOp::a_push: + return trans.apply_push(op.a); + case AsmOp::a_pop: + return trans.apply_pop(op.a); + case AsmOp::a_const: + return !op.a && op.b == 1 && trans.apply_push_newconst(); + case AsmOp::a_custom: + return op.is_gconst() && trans.apply_push_newconst(); + default: + return false; + } +} + +} // namespace tolk diff --git a/tolk/builtins.cpp b/tolk/builtins.cpp new file mode 100644 index 00000000..16ebd259 --- /dev/null +++ b/tolk/builtins.cpp @@ -0,0 +1,1231 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" + +namespace tolk { +using namespace std::literals::string_literals; + +/* + * + * SYMBOL VALUES + * + */ + +int glob_func_cnt, undef_func_cnt, glob_var_cnt, const_cnt; +std::vector glob_func, glob_vars; +std::set prohibited_var_names; + +SymDef* predefine_builtin_func(std::string name, TypeExpr* func_type) { + if (name.back() == '_') { + prohibited_var_names.insert(name); + } + sym_idx_t name_idx = symbols.lookup(name, 1); + if (symbols.is_keyword(name_idx)) { + std::cerr << "fatal: global function `" << name << "` already defined as a keyword" << std::endl; + } + SymDef* def = define_global_symbol(name_idx, true); + if (!def) { + std::cerr << "fatal: global function `" << name << "` already defined" << std::endl; + std::exit(1); + } + return def; +} + +template +SymDef* define_builtin_func(std::string name, TypeExpr* func_type, const T& func, bool impure = false) { + SymDef* def = predefine_builtin_func(name, func_type); + def->value = new SymValAsmFunc{func_type, func, impure}; + return def; +} + +template +SymDef* define_builtin_func(std::string name, TypeExpr* func_type, const T& func, std::initializer_list arg_order, + std::initializer_list ret_order = {}, bool impure = false) { + SymDef* def = predefine_builtin_func(name, func_type); + def->value = new SymValAsmFunc{func_type, func, arg_order, ret_order, impure}; + return def; +} + +SymDef* define_builtin_func(std::string name, TypeExpr* func_type, const AsmOp& macro, + std::initializer_list arg_order, std::initializer_list ret_order = {}, + bool impure = false) { + SymDef* def = predefine_builtin_func(name, func_type); + def->value = new SymValAsmFunc{func_type, make_simple_compile(macro), arg_order, ret_order, impure}; + return def; +} + +SymDef* force_autoapply(SymDef* def) { + if (def) { + auto val = dynamic_cast(def->value); + if (val) { + val->auto_apply = true; + } + } + return def; +} + +template +SymDef* define_builtin_const(std::string name, TypeExpr* const_type, Args&&... args) { + return force_autoapply( + define_builtin_func(name, TypeExpr::new_map(TypeExpr::new_unit(), const_type), std::forward(args)...)); +} + +bool SymValAsmFunc::compile(AsmOpList& dest, std::vector& out, std::vector& in, + const SrcLocation& where) const { + if (simple_compile) { + return dest.append(simple_compile(out, in, where)); + } else if (ext_compile) { + return ext_compile(dest, out, in); + } else { + return false; + } +} + +/* + * + * DEFINE BUILT-IN FUNCTIONS + * + */ + +int emulate_negate(int a) { + int f = VarDescr::_Pos | VarDescr::_Neg; + if ((a & f) && (~a & f)) { + a ^= f; + } + f = VarDescr::_Bit | VarDescr::_Bool; + if ((a & f) && (~a & f)) { + a ^= f; + } + return a; +} + +int emulate_add(int a, int b) { + if (b & VarDescr::_Zero) { + return a; + } else if (a & VarDescr::_Zero) { + return b; + } + int u = a & b, v = a | b; + int r = VarDescr::_Int; + int t = u & (VarDescr::_Pos | VarDescr::_Neg); + if (v & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + // non-quiet addition always returns finite results! + r |= t | VarDescr::_Finite; + if (t) { + r |= v & VarDescr::_NonZero; + } + r |= v & VarDescr::_Nan; + if (u & (VarDescr::_Odd | VarDescr::_Even)) { + r |= VarDescr::_Even; + } else if (!(~v & (VarDescr::_Odd | VarDescr::_Even))) { + r |= VarDescr::_Odd | VarDescr::_NonZero; + } + return r; +} + +int emulate_sub(int a, int b) { + return emulate_add(a, emulate_negate(b)); +} + +int emulate_mul(int a, int b) { + if ((b & (VarDescr::_NonZero | VarDescr::_Bit)) == (VarDescr::_NonZero | VarDescr::_Bit)) { + return a; + } else if ((a & (VarDescr::_NonZero | VarDescr::_Bit)) == (VarDescr::_NonZero | VarDescr::_Bit)) { + return b; + } + int u = a & b, v = a | b; + int r = VarDescr::_Int; + if (v & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + // non-quiet multiplication always yields finite results, if any + r |= VarDescr::_Finite; + if (v & VarDescr::_Zero) { + // non-quiet multiplication + // the result is zero, if any result at all + return VarDescr::ConstZero; + } + if (u & (VarDescr::_Pos | VarDescr::_Neg)) { + r |= VarDescr::_Pos; + } else if (!(~v & (VarDescr::_Pos | VarDescr::_Neg))) { + r |= VarDescr::_Neg; + } + if (u & (VarDescr::_Bit | VarDescr::_Bool)) { + r |= VarDescr::_Bit; + } else if (!(~v & (VarDescr::_Bit | VarDescr::_Bool))) { + r |= VarDescr::_Bool; + } + r |= v & VarDescr::_Even; + r |= u & (VarDescr::_Odd | VarDescr::_NonZero); + return r; +} + +int emulate_and(int a, int b) { + int both = a & b, any = a | b; + int r = VarDescr::_Int; + if (any & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + r |= VarDescr::_Finite; + if (any & VarDescr::_Zero) { + return VarDescr::ConstZero; + } + r |= both & (VarDescr::_Even | VarDescr::_Odd); + r |= both & (VarDescr::_Bit | VarDescr::_Bool); + if (both & VarDescr::_Odd) { + r |= VarDescr::_NonZero; + } + return r; +} + +int emulate_or(int a, int b) { + if (b & VarDescr::_Zero) { + return a; + } else if (a & VarDescr::_Zero) { + return b; + } + int both = a & b, any = a | b; + int r = VarDescr::_Int; + if (any & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + r |= VarDescr::_Finite; + r |= any & VarDescr::_NonZero; + r |= any & VarDescr::_Odd; + r |= both & VarDescr::_Even; + return r; +} + +int emulate_xor(int a, int b) { + if (b & VarDescr::_Zero) { + return a; + } else if (a & VarDescr::_Zero) { + return b; + } + int both = a & b, any = a | b; + int r = VarDescr::_Int; + if (any & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + r |= VarDescr::_Finite; + r |= both & VarDescr::_Even; + if (both & VarDescr::_Odd) { + r |= VarDescr::_Even; + } + return r; +} + +int emulate_not(int a) { + if ((a & VarDescr::ConstZero) == VarDescr::ConstZero) { + return VarDescr::ConstTrue; + } + if ((a & VarDescr::ConstTrue) == VarDescr::ConstTrue) { + return VarDescr::ConstZero; + } + int a2 = a; + int f = VarDescr::_Even | VarDescr::_Odd; + if ((a2 & f) && (~a2 & f)) { + a2 ^= f; + } + a2 &= ~(VarDescr::_Zero | VarDescr::_NonZero | VarDescr::_Bit | VarDescr::_Pos | VarDescr::_Neg); + if ((a & VarDescr::_Neg) && (a & VarDescr::_NonZero)) { + a2 |= VarDescr::_Pos; + } + if (a & VarDescr::_Pos) { + a2 |= VarDescr::_Neg; + } + return a2; +} + +int emulate_lshift(int a, int b) { + if (((a | b) & VarDescr::_Nan) || !(~b & (VarDescr::_Neg | VarDescr::_NonZero))) { + return VarDescr::_Int | VarDescr::_Nan; + } + if (b & VarDescr::_Zero) { + return a; + } + int t = ((b & VarDescr::_NonZero) ? VarDescr::_Even : 0); + t |= b & VarDescr::_Finite; + return emulate_mul(a, VarDescr::_Int | VarDescr::_Pos | VarDescr::_NonZero | VarDescr::_Even | t); +} + +int emulate_div(int a, int b) { + if ((b & (VarDescr::_NonZero | VarDescr::_Bit)) == (VarDescr::_NonZero | VarDescr::_Bit)) { + return a; + } else if ((b & (VarDescr::_NonZero | VarDescr::_Bool)) == (VarDescr::_NonZero | VarDescr::_Bool)) { + return emulate_negate(a); + } + if (b & VarDescr::_Zero) { + return VarDescr::_Int | VarDescr::_Nan; + } + int u = a & b, v = a | b; + int r = VarDescr::_Int; + if (v & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + // non-quiet division always yields finite results, if any + r |= VarDescr::_Finite; + if (a & VarDescr::_Zero) { + // non-quiet division + // the result is zero, if any result at all + return VarDescr::ConstZero; + } + if (u & (VarDescr::_Pos | VarDescr::_Neg)) { + r |= VarDescr::_Pos; + } else if (!(~v & (VarDescr::_Pos | VarDescr::_Neg))) { + r |= VarDescr::_Neg; + } + if (u & (VarDescr::_Bit | VarDescr::_Bool)) { + r |= VarDescr::_Bit; + } else if (!(~v & (VarDescr::_Bit | VarDescr::_Bool))) { + r |= VarDescr::_Bool; + } + return r; +} + +int emulate_rshift(int a, int b) { + if (((a | b) & VarDescr::_Nan) || !(~b & (VarDescr::_Neg | VarDescr::_NonZero))) { + return VarDescr::_Int | VarDescr::_Nan; + } + if (b & VarDescr::_Zero) { + return a; + } + int t = ((b & VarDescr::_NonZero) ? VarDescr::_Even : 0); + t |= b & VarDescr::_Finite; + return emulate_div(a, VarDescr::_Int | VarDescr::_Pos | VarDescr::_NonZero | VarDescr::_Even | t); +} + +int emulate_mod(int a, int b, int round_mode = -1) { + if ((b & (VarDescr::_NonZero | VarDescr::_Bit)) == (VarDescr::_NonZero | VarDescr::_Bit)) { + return VarDescr::ConstZero; + } else if ((b & (VarDescr::_NonZero | VarDescr::_Bool)) == (VarDescr::_NonZero | VarDescr::_Bool)) { + return VarDescr::ConstZero; + } + if (b & VarDescr::_Zero) { + return VarDescr::_Int | VarDescr::_Nan; + } + int r = VarDescr::_Int; + if ((a | b) & VarDescr::_Nan) { + return r | VarDescr::_Nan; + } + // non-quiet division always yields finite results, if any + r |= VarDescr::_Finite; + if (a & VarDescr::_Zero) { + // non-quiet division + // the result is zero, if any result at all + return VarDescr::ConstZero; + } + if (round_mode < 0) { + r |= b & (VarDescr::_Pos | VarDescr::_Neg); + } else if (round_mode > 0) { + r |= emulate_negate(b) & (VarDescr::_Pos | VarDescr::_Neg); + } + if (a & (VarDescr::_Bit | VarDescr::_Bool)) { + if (r & VarDescr::_Pos) { + r |= VarDescr::_Bit; + } + if (r & VarDescr::_Neg) { + r |= VarDescr::_Bool; + } + } + if (b & VarDescr::_Even) { + r |= a & (VarDescr::_Even | VarDescr::_Odd); + } + return r; +} + +bool VarDescr::always_less(const VarDescr& other) const { + if (is_int_const() && other.is_int_const()) { + return int_const < other.int_const; + } + return (always_nonpos() && other.always_pos()) || (always_neg() && other.always_nonneg()); +} + +bool VarDescr::always_leq(const VarDescr& other) const { + if (is_int_const() && other.is_int_const()) { + return int_const <= other.int_const; + } + return always_nonpos() && other.always_nonneg(); +} + +bool VarDescr::always_greater(const VarDescr& other) const { + return other.always_less(*this); +} + +bool VarDescr::always_geq(const VarDescr& other) const { + return other.always_leq(*this); +} + +bool VarDescr::always_equal(const VarDescr& other) const { + return is_int_const() && other.is_int_const() && *int_const == *other.int_const; +} + +bool VarDescr::always_neq(const VarDescr& other) const { + if (is_int_const() && other.is_int_const()) { + return *int_const != *other.int_const; + } + return always_greater(other) || always_less(other) || (always_even() && other.always_odd()) || + (always_odd() && other.always_even()); +} + +AsmOp exec_op(std::string op) { + return AsmOp::Custom(op); +} + +AsmOp exec_op(std::string op, int args, int retv = 1) { + return AsmOp::Custom(op, args, retv); +} + +AsmOp exec_arg_op(std::string op, long long arg) { + std::ostringstream os; + os << arg << ' ' << op; + return AsmOp::Custom(os.str()); +} + +AsmOp exec_arg_op(std::string op, long long arg, int args, int retv) { + std::ostringstream os; + os << arg << ' ' << op; + return AsmOp::Custom(os.str(), args, retv); +} + +AsmOp exec_arg_op(std::string op, td::RefInt256 arg) { + std::ostringstream os; + os << arg << ' ' << op; + return AsmOp::Custom(os.str()); +} + +AsmOp exec_arg_op(std::string op, td::RefInt256 arg, int args, int retv) { + std::ostringstream os; + os << arg << ' ' << op; + return AsmOp::Custom(os.str(), args, retv); +} + +AsmOp exec_arg2_op(std::string op, long long imm1, long long imm2, int args, int retv) { + std::ostringstream os; + os << imm1 << ' ' << imm2 << ' ' << op; + return AsmOp::Custom(os.str(), args, retv); +} + +AsmOp push_const(td::RefInt256 x) { + return AsmOp::IntConst(std::move(x)); +} + +AsmOp compile_add(std::vector& res, std::vector& args, const SrcLocation& where) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + r.set_const(x.int_const + y.int_const); + if (!r.int_const->is_valid()) { + throw ParseError(where, "integer overflow"); + } + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_add(x.val, y.val); + if (y.is_int_const() && y.int_const->signed_fits_bits(8)) { + y.unused(); + if (y.always_zero()) { + return AsmOp::Nop(); + } + if (*y.int_const == 1) { + return exec_op("INC", 1); + } + if (*y.int_const == -1) { + return exec_op("DEC", 1); + } + return exec_arg_op("ADDCONST", y.int_const, 1); + } + if (x.is_int_const() && x.int_const->signed_fits_bits(8)) { + x.unused(); + if (x.always_zero()) { + return AsmOp::Nop(); + } + if (*x.int_const == 1) { + return exec_op("INC", 1); + } + if (*x.int_const == -1) { + return exec_op("DEC", 1); + } + return exec_arg_op("ADDCONST", x.int_const, 1); + } + return exec_op("ADD", 2); +} + +AsmOp compile_sub(std::vector& res, std::vector& args, const SrcLocation& where) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + r.set_const(x.int_const - y.int_const); + if (!r.int_const->is_valid()) { + throw ParseError(where, "integer overflow"); + } + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_sub(x.val, y.val); + if (y.is_int_const() && (-y.int_const)->signed_fits_bits(8)) { + y.unused(); + if (y.always_zero()) { + return {}; + } + if (*y.int_const == 1) { + return exec_op("DEC", 1); + } + if (*y.int_const == -1) { + return exec_op("INC", 1); + } + return exec_arg_op("ADDCONST", -y.int_const, 1); + } + if (x.always_zero()) { + x.unused(); + return exec_op("NEGATE", 1); + } + return exec_op("SUB", 2); +} + +AsmOp compile_negate(std::vector& res, std::vector& args, const SrcLocation& where) { + tolk_assert(res.size() == 1 && args.size() == 1); + VarDescr &r = res[0], &x = args[0]; + if (x.is_int_const()) { + r.set_const(-x.int_const); + if (!r.int_const->is_valid()) { + throw ParseError(where, "integer overflow"); + } + x.unused(); + return push_const(r.int_const); + } + r.val = emulate_negate(x.val); + return exec_op("NEGATE", 1); +} + +AsmOp compile_and(std::vector& res, std::vector& args, const SrcLocation& where) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + r.set_const(x.int_const & y.int_const); + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_and(x.val, y.val); + return exec_op("AND", 2); +} + +AsmOp compile_or(std::vector& res, std::vector& args, const SrcLocation& where) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + r.set_const(x.int_const | y.int_const); + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_or(x.val, y.val); + return exec_op("OR", 2); +} + +AsmOp compile_xor(std::vector& res, std::vector& args, const SrcLocation& where) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + r.set_const(x.int_const ^ y.int_const); + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_xor(x.val, y.val); + return exec_op("XOR", 2); +} + +AsmOp compile_not(std::vector& res, std::vector& args, const SrcLocation& where) { + tolk_assert(res.size() == 1 && args.size() == 1); + VarDescr &r = res[0], &x = args[0]; + if (x.is_int_const()) { + r.set_const(~x.int_const); + x.unused(); + return push_const(r.int_const); + } + r.val = emulate_not(x.val); + return exec_op("NOT", 1); +} + +AsmOp compile_mul_internal(VarDescr& r, VarDescr& x, VarDescr& y, const SrcLocation& where) { + if (x.is_int_const() && y.is_int_const()) { + r.set_const(x.int_const * y.int_const); + if (!r.int_const->is_valid()) { + throw ParseError(where, "integer overflow"); + } + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_mul(x.val, y.val); + if (y.is_int_const()) { + int k = is_pos_pow2(y.int_const); + if (y.int_const->signed_fits_bits(8) && k < 0) { + y.unused(); + if (y.always_zero() && x.always_finite()) { + // dubious optimization: NaN * 0 = ? + r.set_const(y.int_const); + x.unused(); + return push_const(r.int_const); + } + if (*y.int_const == 1 && x.always_finite()) { + return AsmOp::Nop(); + } + if (*y.int_const == -1) { + return exec_op("NEGATE", 1); + } + return exec_arg_op("MULCONST", y.int_const, 1); + } + if (k > 0) { + y.unused(); + return exec_arg_op("LSHIFT#", k, 1); + } + if (k == 0) { + y.unused(); + return AsmOp::Nop(); + } + } + if (x.is_int_const()) { + int k = is_pos_pow2(x.int_const); + if (x.int_const->signed_fits_bits(8) && k < 0) { + x.unused(); + if (x.always_zero() && y.always_finite()) { + // dubious optimization: NaN * 0 = ? + r.set_const(x.int_const); + y.unused(); + return push_const(r.int_const); + } + if (*x.int_const == 1 && y.always_finite()) { + return AsmOp::Nop(); + } + if (*x.int_const == -1) { + return exec_op("NEGATE", 1); + } + return exec_arg_op("MULCONST", x.int_const, 1); + } + if (k > 0) { + x.unused(); + return exec_arg_op("LSHIFT#", k, 1); + } + if (k == 0) { + x.unused(); + return AsmOp::Nop(); + } + } + return exec_op("MUL", 2); +} + +AsmOp compile_mul(std::vector& res, std::vector& args, const SrcLocation& where) { + tolk_assert(res.size() == 1 && args.size() == 2); + return compile_mul_internal(res[0], args[0], args[1], where); +} + +AsmOp compile_lshift(std::vector& res, std::vector& args, const SrcLocation& where) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (y.is_int_const()) { + auto yv = y.int_const->to_long(); + if (yv < 0 || yv > 256) { + throw ParseError(where, "lshift argument is out of range"); + } else if (x.is_int_const()) { + r.set_const(x.int_const << (int)yv); + if (!r.int_const->is_valid()) { + throw ParseError(where, "integer overflow"); + } + x.unused(); + y.unused(); + return push_const(r.int_const); + } + } + r.val = emulate_lshift(x.val, y.val); + if (y.is_int_const()) { + int k = (int)(y.int_const->to_long()); + if (!k /* && x.always_finite() */) { + // dubious optimization: what if x=NaN ? + y.unused(); + return AsmOp::Nop(); + } + y.unused(); + return exec_arg_op("LSHIFT#", k, 1); + } + if (x.is_int_const()) { + auto xv = x.int_const->to_long(); + if (xv == 1) { + x.unused(); + return exec_op("POW2", 1); + } + if (xv == -1) { + x.unused(); + return exec_op("-1 PUSHINT SWAP LSHIFT", 1); + } + } + return exec_op("LSHIFT", 2); +} + +AsmOp compile_rshift(std::vector& res, std::vector& args, const SrcLocation& where, + int round_mode) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (y.is_int_const()) { + auto yv = y.int_const->to_long(); + if (yv < 0 || yv > 256) { + throw ParseError(where, "rshift argument is out of range"); + } else if (x.is_int_const()) { + r.set_const(td::rshift(x.int_const, (int)yv, round_mode)); + x.unused(); + y.unused(); + return push_const(r.int_const); + } + } + r.val = emulate_rshift(x.val, y.val); + std::string rshift = (round_mode < 0 ? "RSHIFT" : (round_mode ? "RSHIFTC" : "RSHIFTR")); + if (y.is_int_const()) { + int k = (int)(y.int_const->to_long()); + if (!k /* && x.always_finite() */) { + // dubious optimization: what if x=NaN ? + y.unused(); + return AsmOp::Nop(); + } + y.unused(); + return exec_arg_op(rshift + "#", k, 1); + } + return exec_op(rshift, 2); +} + +AsmOp compile_div_internal(VarDescr& r, VarDescr& x, VarDescr& y, const SrcLocation& where, int round_mode) { + if (x.is_int_const() && y.is_int_const()) { + r.set_const(div(x.int_const, y.int_const, round_mode)); + if (!r.int_const->is_valid()) { + throw ParseError(where, *y.int_const == 0 ? "division by zero" : "integer overflow"); + } + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_div(x.val, y.val); + if (y.is_int_const()) { + if (*y.int_const == 0) { + throw ParseError(where, "division by zero"); + } + if (*y.int_const == 1 && x.always_finite()) { + y.unused(); + return AsmOp::Nop(); + } + if (*y.int_const == -1) { + y.unused(); + return exec_op("NEGATE", 1); + } + int k = is_pos_pow2(y.int_const); + if (k > 0) { + y.unused(); + std::string op = "RSHIFT"; + if (round_mode >= 0) { + op += (round_mode > 0 ? 'C' : 'R'); + } + return exec_arg_op(op + '#', k, 1); + } + } + std::string op = "DIV"; + if (round_mode >= 0) { + op += (round_mode > 0 ? 'C' : 'R'); + } + return exec_op(op, 2); +} + +AsmOp compile_div(std::vector& res, std::vector& args, const SrcLocation& where, int round_mode) { + tolk_assert(res.size() == 1 && args.size() == 2); + return compile_div_internal(res[0], args[0], args[1], where, round_mode); +} + +AsmOp compile_mod(std::vector& res, std::vector& args, const SrcLocation& where, + int round_mode) { + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + r.set_const(mod(x.int_const, y.int_const, round_mode)); + if (!r.int_const->is_valid()) { + throw ParseError(where, *y.int_const == 0 ? "division by zero" : "integer overflow"); + } + x.unused(); + y.unused(); + return push_const(r.int_const); + } + r.val = emulate_mod(x.val, y.val); + if (y.is_int_const()) { + if (*y.int_const == 0) { + throw ParseError(where, "division by zero"); + } + if ((*y.int_const == 1 || *y.int_const == -1) && x.always_finite()) { + x.unused(); + y.unused(); + r.set_const(td::zero_refint()); + return push_const(r.int_const); + } + int k = is_pos_pow2(y.int_const); + if (k > 0) { + y.unused(); + std::string op = "MODPOW2"; + if (round_mode >= 0) { + op += (round_mode > 0 ? 'C' : 'R'); + } + return exec_arg_op(op + '#', k, 1); + } + } + std::string op = "MOD"; + if (round_mode >= 0) { + op += (round_mode > 0 ? 'C' : 'R'); + } + return exec_op(op, 2); +} + +AsmOp compile_muldiv(std::vector& res, std::vector& args, const SrcLocation& where, + int round_mode) { + tolk_assert(res.size() == 1 && args.size() == 3); + VarDescr &r = res[0], &x = args[0], &y = args[1], &z = args[2]; + if (x.is_int_const() && y.is_int_const() && z.is_int_const()) { + r.set_const(muldiv(x.int_const, y.int_const, z.int_const, round_mode)); + if (!r.int_const->is_valid()) { + throw ParseError(where, *z.int_const == 0 ? "division by zero" : "integer overflow"); + } + x.unused(); + y.unused(); + z.unused(); + return push_const(r.int_const); + } + if (x.always_zero() || y.always_zero()) { + // dubious optimization for z=0... + x.unused(); + y.unused(); + z.unused(); + r.set_const(td::make_refint(0)); + return push_const(r.int_const); + } + char c = (round_mode < 0) ? 0 : (round_mode > 0 ? 'C' : 'R'); + r.val = emulate_div(emulate_mul(x.val, y.val), z.val); + if (z.is_int_const()) { + if (*z.int_const == 0) { + throw ParseError(where, "division by zero"); + } + if (*z.int_const == 1) { + z.unused(); + return compile_mul_internal(r, x, y, where); + } + } + if (y.is_int_const() && *y.int_const == 1) { + y.unused(); + return compile_div_internal(r, x, z, where, round_mode); + } + if (x.is_int_const() && *x.int_const == 1) { + x.unused(); + return compile_div_internal(r, y, z, where, round_mode); + } + if (z.is_int_const()) { + int k = is_pos_pow2(z.int_const); + if (k > 0) { + z.unused(); + std::string op = "MULRSHIFT"; + if (c) { + op += c; + } + return exec_arg_op(op + '#', k, 2); + } + } + if (y.is_int_const()) { + int k = is_pos_pow2(y.int_const); + if (k > 0) { + y.unused(); + std::string op = "LSHIFT#DIV"; + if (c) { + op += c; + } + return exec_arg_op(op, k, 2); + } + } + if (x.is_int_const()) { + int k = is_pos_pow2(x.int_const); + if (k > 0) { + x.unused(); + std::string op = "LSHIFT#DIV"; + if (c) { + op += c; + } + return exec_arg_op(op, k, 2); + } + } + std::string op = "MULDIV"; + if (c) { + op += c; + } + return exec_op(op, 3); +} + +int compute_compare(td::RefInt256 x, td::RefInt256 y, int mode) { + int s = td::cmp(x, y); + if (mode == 7) { + return s; + } else { + return -((mode >> (1 - s)) & 1); + } +} + +// return value: +// 4 -> constant 1 +// 2 -> constant 0 +// 1 -> constant -1 +// 3 -> 0 or -1 +int compute_compare(const VarDescr& x, const VarDescr& y, int mode) { + switch (mode) { + case 1: // > + return x.always_greater(y) ? 1 : (x.always_leq(y) ? 2 : 3); + case 2: // = + return x.always_equal(y) ? 1 : (x.always_neq(y) ? 2 : 3); + case 3: // >= + return x.always_geq(y) ? 1 : (x.always_less(y) ? 2 : 3); + case 4: // < + return x.always_less(y) ? 1 : (x.always_geq(y) ? 2 : 3); + case 5: // <> + return x.always_neq(y) ? 1 : (x.always_equal(y) ? 2 : 3); + case 6: // <= + return x.always_leq(y) ? 1 : (x.always_greater(y) ? 2 : 3); + case 7: // <=> + return x.always_less(y) + ? 1 + : (x.always_equal(y) + ? 2 + : (x.always_greater(y) + ? 4 + : (x.always_leq(y) ? 3 : (x.always_geq(y) ? 6 : (x.always_neq(y) ? 5 : 7))))); + default: + return 7; + } +} + +AsmOp compile_cmp_int(std::vector& res, std::vector& args, int mode) { + tolk_assert(mode >= 1 && mode <= 7); + tolk_assert(res.size() == 1 && args.size() == 2); + VarDescr &r = res[0], &x = args[0], &y = args[1]; + if (x.is_int_const() && y.is_int_const()) { + int v = compute_compare(x.int_const, y.int_const, mode); + r.set_const(v); + x.unused(); + y.unused(); + return mode == 7 ? push_const(r.int_const) : AsmOp::BoolConst(v != 0); + } + int v = compute_compare(x, y, mode); + // std::cerr << "compute_compare(" << x << ", " << y << ", " << mode << ") = " << v << std::endl; + tolk_assert(v); + if (!(v & (v - 1))) { + r.set_const(v - (v >> 2) - 2); + x.unused(); + y.unused(); + return mode == 7 ? push_const(r.int_const) : AsmOp::BoolConst(v & 1); + } + r.val = ~0; + if (v & 1) { + r.val &= VarDescr::ConstTrue; + } + if (v & 2) { + r.val &= VarDescr::ConstZero; + } + if (v & 4) { + r.val &= VarDescr::ConstOne; + } + // std::cerr << "result: " << r << std::endl; + static const char* cmp_int_names[] = {"", "GTINT", "EQINT", "GTINT", "LESSINT", "NEQINT", "LESSINT"}; + static const char* cmp_names[] = {"", "GREATER", "EQUAL", "GEQ", "LESS", "NEQ", "LEQ", "CMP"}; + static int cmp_int_delta[] = {0, 0, 0, -1, 0, 0, 1}; + if (mode != 7) { + if (y.is_int_const() && y.int_const >= -128 && y.int_const <= 127) { + y.unused(); + return exec_arg_op(cmp_int_names[mode], y.int_const + cmp_int_delta[mode], 1); + } + if (x.is_int_const() && x.int_const >= -128 && x.int_const <= 127) { + x.unused(); + mode = ((mode & 4) >> 2) | (mode & 2) | ((mode & 1) << 2); + return exec_arg_op(cmp_int_names[mode], x.int_const + cmp_int_delta[mode], 1); + } + } + return exec_op(cmp_names[mode], 2); +} + +AsmOp compile_throw(std::vector& res, std::vector& args, const SrcLocation&) { + tolk_assert(res.empty() && args.size() == 1); + VarDescr& x = args[0]; + if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) { + x.unused(); + return exec_arg_op("THROW", x.int_const, 0, 0); + } else { + return exec_op("THROWANY", 1, 0); + } +} + +AsmOp compile_cond_throw(std::vector& res, std::vector& args, bool mode) { + tolk_assert(res.empty() && args.size() == 2); + VarDescr &x = args[0], &y = args[1]; + std::string suff = (mode ? "IF" : "IFNOT"); + bool skip_cond = false; + if (y.always_true() || y.always_false()) { + y.unused(); + skip_cond = true; + if (y.always_true() != mode) { + x.unused(); + return AsmOp::Nop(); + } + } + if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) { + x.unused(); + return skip_cond ? exec_arg_op("THROW", x.int_const, 0, 0) : exec_arg_op("THROW"s + suff, x.int_const, 1, 0); + } else { + return skip_cond ? exec_op("THROWANY", 1, 0) : exec_op("THROWANY"s + suff, 2, 0); + } +} + +AsmOp compile_throw_arg(std::vector& res, std::vector& args, const SrcLocation&) { + tolk_assert(res.empty() && args.size() == 2); + VarDescr &x = args[1]; + if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) { + x.unused(); + return exec_arg_op("THROWARG", x.int_const, 1, 0); + } else { + return exec_op("THROWARGANY", 2, 0); + } +} + +AsmOp compile_cond_throw_arg(std::vector& res, std::vector& args, bool mode) { + tolk_assert(res.empty() && args.size() == 3); + VarDescr &x = args[1], &y = args[2]; + std::string suff = (mode ? "IF" : "IFNOT"); + bool skip_cond = false; + if (y.always_true() || y.always_false()) { + y.unused(); + skip_cond = true; + if (y.always_true() != mode) { + x.unused(); + return AsmOp::Nop(); + } + } + if (x.is_int_const() && x.int_const->unsigned_fits_bits(11)) { + x.unused(); + return skip_cond ? exec_arg_op("THROWARG", x.int_const, 1, 0) : exec_arg_op("THROWARG"s + suff, x.int_const, 2, 0); + } else { + return skip_cond ? exec_op("THROWARGANY", 2, 0) : exec_op("THROWARGANY"s + suff, 3, 0); + } +} + +AsmOp compile_bool_const(std::vector& res, std::vector& args, bool val) { + tolk_assert(res.size() == 1 && args.empty()); + VarDescr& r = res[0]; + r.set_const(val ? -1 : 0); + return AsmOp::Const(val ? "TRUE" : "FALSE"); +} + +// (slice, int) load_int(slice s, int len) asm(s len -> 1 0) "LDIX"; +// (slice, int) load_uint(slice s, int len) asm( -> 1 0) "LDUX"; +// int preload_int(slice s, int len) asm "PLDIX"; +// int preload_uint(slice s, int len) asm "PLDUX"; +AsmOp compile_fetch_int(std::vector& res, std::vector& args, bool fetch, bool sgnd) { + tolk_assert(args.size() == 2 && res.size() == 1 + (unsigned)fetch); + auto &y = args[1], &r = res.back(); + r.val = (sgnd ? VarDescr::FiniteInt : VarDescr::FiniteUInt); + int v = -1; + if (y.is_int_const() && y.int_const >= 0 && y.int_const <= 256) { + v = (int)y.int_const->to_long(); + if (!v) { + r.val = VarDescr::ConstZero; + } + if (v == 1) { + r.val = (sgnd ? VarDescr::ValBool : VarDescr::ValBit); + } + if (v > 0) { + y.unused(); + return exec_arg_op((fetch ? "LD"s : "PLD"s) + (sgnd ? 'I' : 'U'), v, 1, 1 + (unsigned)fetch); + } + } + return exec_op((fetch ? "LD"s : "PLD"s) + (sgnd ? "IX" : "UX"), 2, 1 + (unsigned)fetch); +} + +// builder store_uint(builder b, int x, int len) asm(x b len) "STUX"; +// builder store_int(builder b, int x, int len) asm(x b len) "STIX"; +AsmOp compile_store_int(std::vector& res, std::vector& args, bool sgnd) { + tolk_assert(args.size() == 3 && res.size() == 1); + auto& z = args[2]; + if (z.is_int_const() && z.int_const > 0 && z.int_const <= 256) { + z.unused(); + return exec_arg_op("ST"s + (sgnd ? 'I' : 'U'), z.int_const, 2, 1); + } + return exec_op("ST"s + (sgnd ? "IX" : "UX"), 3, 1); +} + +AsmOp compile_fetch_slice(std::vector& res, std::vector& args, bool fetch) { + tolk_assert(args.size() == 2 && res.size() == 1 + (unsigned)fetch); + auto& y = args[1]; + int v = -1; + if (y.is_int_const() && y.int_const > 0 && y.int_const <= 256) { + v = (int)y.int_const->to_long(); + if (v > 0) { + y.unused(); + return exec_arg_op(fetch ? "LDSLICE" : "PLDSLICE", v, 1, 1 + (unsigned)fetch); + } + } + return exec_op(fetch ? "LDSLICEX" : "PLDSLICEX", 2, 1 + (unsigned)fetch); +} + +// _at(tuple t, int index) asm "INDEXVAR"; +AsmOp compile_tuple_at(std::vector& res, std::vector& args, const SrcLocation&) { + tolk_assert(args.size() == 2 && res.size() == 1); + auto& y = args[1]; + if (y.is_int_const() && y.int_const >= 0 && y.int_const < 16) { + y.unused(); + return exec_arg_op("INDEX", y.int_const, 1, 1); + } + return exec_op("INDEXVAR", 2, 1); +} + +// int null?(X arg) +AsmOp compile_is_null(std::vector& res, std::vector& args, const SrcLocation&) { + tolk_assert(args.size() == 1 && res.size() == 1); + auto &x = args[0], &r = res[0]; + if (x.always_null() || x.always_not_null()) { + x.unused(); + r.set_const(x.always_null() ? -1 : 0); + return push_const(r.int_const); + } + res[0].val = VarDescr::ValBool; + return exec_op("ISNULL", 1, 1); +} + + +void define_builtins() { + using namespace std::placeholders; + auto Unit = TypeExpr::new_unit(); + auto Int = TypeExpr::new_atomic(_Int); + auto Cell = TypeExpr::new_atomic(_Cell); + auto Slice = TypeExpr::new_atomic(_Slice); + auto Builder = TypeExpr::new_atomic(_Builder); + // auto Null = TypeExpr::new_atomic(_Null); + auto Tuple = TypeExpr::new_atomic(_Tuple); + auto Int2 = TypeExpr::new_tensor({Int, Int}); + auto Int3 = TypeExpr::new_tensor({Int, Int, Int}); + auto TupleInt = TypeExpr::new_tensor({Tuple, Int}); + auto SliceInt = TypeExpr::new_tensor({Slice, Int}); + auto X = TypeExpr::new_var(); + auto Y = TypeExpr::new_var(); + auto Z = TypeExpr::new_var(); + auto XY = TypeExpr::new_tensor({X, Y}); + auto arith_bin_op = TypeExpr::new_map(Int2, Int); + auto arith_un_op = TypeExpr::new_map(Int, Int); + auto impure_bin_op = TypeExpr::new_map(Int2, Unit); + auto impure_un_op = TypeExpr::new_map(Int, Unit); + auto fetch_int_op = TypeExpr::new_map(SliceInt, SliceInt); + auto prefetch_int_op = TypeExpr::new_map(SliceInt, Int); + auto store_int_op = TypeExpr::new_map(TypeExpr::new_tensor({Builder, Int, Int}), Builder); + auto store_int_method = + TypeExpr::new_map(TypeExpr::new_tensor({Builder, Int, Int}), TypeExpr::new_tensor({Builder, Unit})); + auto fetch_slice_op = TypeExpr::new_map(SliceInt, TypeExpr::new_tensor({Slice, Slice})); + auto prefetch_slice_op = TypeExpr::new_map(SliceInt, Slice); + //auto arith_null_op = TypeExpr::new_map(TypeExpr::new_unit(), Int); + auto throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int}), Unit)); + auto cond_throw_arg_op = TypeExpr::new_forall({X}, TypeExpr::new_map(TypeExpr::new_tensor({X, Int, Int}), Unit)); + define_builtin_func("_+_", arith_bin_op, compile_add); + define_builtin_func("_-_", arith_bin_op, compile_sub); + define_builtin_func("-_", arith_un_op, compile_negate); + define_builtin_func("_*_", arith_bin_op, compile_mul); + define_builtin_func("_/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1)); + define_builtin_func("_~/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0)); + define_builtin_func("_^/_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 1)); + define_builtin_func("_%_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1)); + define_builtin_func("_~%_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, 0)); + define_builtin_func("_^%_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, 1)); + define_builtin_func("_/%_", TypeExpr::new_map(Int2, Int2), AsmOp::Custom("DIVMOD", 2, 2)); + define_builtin_func("divmod", TypeExpr::new_map(Int2, Int2), AsmOp::Custom("DIVMOD", 2, 2)); + define_builtin_func("~divmod", TypeExpr::new_map(Int2, Int2), AsmOp::Custom("DIVMOD", 2, 2)); + define_builtin_func("moddiv", TypeExpr::new_map(Int2, Int2), AsmOp::Custom("DIVMOD", 2, 2), {}, {1, 0}); + define_builtin_func("~moddiv", TypeExpr::new_map(Int2, Int2), AsmOp::Custom("DIVMOD", 2, 2), {}, {1, 0}); + define_builtin_func("_<<_", arith_bin_op, compile_lshift); + define_builtin_func("_>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1)); + define_builtin_func("_~>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0)); + define_builtin_func("_^>>_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1)); + define_builtin_func("_&_", arith_bin_op, compile_and); + define_builtin_func("_|_", arith_bin_op, compile_or); + define_builtin_func("_^_", arith_bin_op, compile_xor); + define_builtin_func("~_", arith_un_op, compile_not); + define_builtin_func("^_+=_", arith_bin_op, compile_add); + define_builtin_func("^_-=_", arith_bin_op, compile_sub); + define_builtin_func("^_*=_", arith_bin_op, compile_mul); + define_builtin_func("^_/=_", arith_bin_op, std::bind(compile_div, _1, _2, _3, -1)); + define_builtin_func("^_~/=_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 0)); + define_builtin_func("^_^/=_", arith_bin_op, std::bind(compile_div, _1, _2, _3, 1)); + define_builtin_func("^_%=_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, -1)); + define_builtin_func("^_~%=_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, 0)); + define_builtin_func("^_^%=_", arith_bin_op, std::bind(compile_mod, _1, _2, _3, 1)); + define_builtin_func("^_<<=_", arith_bin_op, compile_lshift); + define_builtin_func("^_>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, -1)); + define_builtin_func("^_~>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 0)); + define_builtin_func("^_^>>=_", arith_bin_op, std::bind(compile_rshift, _1, _2, _3, 1)); + define_builtin_func("^_&=_", arith_bin_op, compile_and); + define_builtin_func("^_|=_", arith_bin_op, compile_or); + define_builtin_func("^_^=_", arith_bin_op, compile_xor); + define_builtin_func("muldiv", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, -1)); + define_builtin_func("muldivr", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 0)); + define_builtin_func("muldivc", TypeExpr::new_map(Int3, Int), std::bind(compile_muldiv, _1, _2, _3, 1)); + define_builtin_func("muldivmod", TypeExpr::new_map(Int3, Int2), AsmOp::Custom("MULDIVMOD", 3, 2)); + define_builtin_func("_==_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 2)); + define_builtin_func("_!=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 5)); + define_builtin_func("_<_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 4)); + define_builtin_func("_>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 1)); + define_builtin_func("_<=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 6)); + define_builtin_func("_>=_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 3)); + define_builtin_func("_<=>_", arith_bin_op, std::bind(compile_cmp_int, _1, _2, 7)); + define_builtin_const("true", Int, /* AsmOp::Const("TRUE") */ std::bind(compile_bool_const, _1, _2, true)); + define_builtin_const("false", Int, /* AsmOp::Const("FALSE") */ std::bind(compile_bool_const, _1, _2, false)); + // define_builtin_func("null", Null, AsmOp::Const("PUSHNULL")); + define_builtin_const("nil", Tuple, AsmOp::Const("PUSHNULL")); + define_builtin_const("Nil", Tuple, AsmOp::Const("NIL")); + define_builtin_func("null?", TypeExpr::new_forall({X}, TypeExpr::new_map(X, Int)), compile_is_null); + define_builtin_func("throw", impure_un_op, compile_throw, true); + define_builtin_func("throw_if", impure_bin_op, std::bind(compile_cond_throw, _1, _2, true), true); + define_builtin_func("throw_unless", impure_bin_op, std::bind(compile_cond_throw, _1, _2, false), true); + define_builtin_func("throw_arg", throw_arg_op, compile_throw_arg, true); + define_builtin_func("throw_arg_if", cond_throw_arg_op, std::bind(compile_cond_throw_arg, _1, _2, true), true); + define_builtin_func("throw_arg_unless", cond_throw_arg_op, std::bind(compile_cond_throw_arg, _1, _2, false), true); + define_builtin_func("load_int", fetch_int_op, std::bind(compile_fetch_int, _1, _2, true, true), {}, {1, 0}); + define_builtin_func("load_uint", fetch_int_op, std::bind(compile_fetch_int, _1, _2, true, false), {}, {1, 0}); + define_builtin_func("preload_int", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, true)); + define_builtin_func("preload_uint", prefetch_int_op, std::bind(compile_fetch_int, _1, _2, false, false)); + define_builtin_func("store_int", store_int_op, std::bind(compile_store_int, _1, _2, true), {1, 0, 2}); + define_builtin_func("store_uint", store_int_op, std::bind(compile_store_int, _1, _2, false), {1, 0, 2}); + define_builtin_func("~store_int", store_int_method, std::bind(compile_store_int, _1, _2, true), {1, 0, 2}); + define_builtin_func("~store_uint", store_int_method, std::bind(compile_store_int, _1, _2, false), {1, 0, 2}); + define_builtin_func("load_bits", fetch_slice_op, std::bind(compile_fetch_slice, _1, _2, true), {}, {1, 0}); + define_builtin_func("preload_bits", prefetch_slice_op, std::bind(compile_fetch_slice, _1, _2, false)); + define_builtin_func("at", TypeExpr::new_forall({X}, TypeExpr::new_map(TupleInt, X)), compile_tuple_at); + define_builtin_func("touch", TypeExpr::new_forall({X}, TypeExpr::new_map(X, X)), AsmOp::Nop()); + define_builtin_func("~touch", TypeExpr::new_forall({X}, TypeExpr::new_map(X, TypeExpr::new_tensor({X, Unit}))), + AsmOp::Nop()); + define_builtin_func("~dump", TypeExpr::new_forall({X}, TypeExpr::new_map(X, TypeExpr::new_tensor({X, Unit}))), + AsmOp::Custom("s0 DUMP", 1, 1), true); + define_builtin_func("~strdump", TypeExpr::new_forall({X}, TypeExpr::new_map(X, TypeExpr::new_tensor({X, Unit}))), + AsmOp::Custom("STRDUMP", 1, 1), true); +} + +} // namespace tolk diff --git a/tolk/codegen.cpp b/tolk/codegen.cpp new file mode 100644 index 00000000..504d0b21 --- /dev/null +++ b/tolk/codegen.cpp @@ -0,0 +1,910 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" + +namespace tolk { + +/* + * + * GENERATE TVM STACK CODE + * + */ + +StackLayout Stack::vars() const { + StackLayout res; + res.reserve(s.size()); + for (auto x : s) { + res.push_back(x.first); + } + return res; +} + +int Stack::find(var_idx_t var, int from) const { + for (int i = from; i < depth(); i++) { + if (at(i).first == var) { + return i; + } + } + return -1; +} + +// finds var in [from .. to) +int Stack::find(var_idx_t var, int from, int to) const { + for (int i = from; i < depth() && i < to; i++) { + if (at(i).first == var) { + return i; + } + } + return -1; +} + +// finds var outside [from .. to) +int Stack::find_outside(var_idx_t var, int from, int to) const { + from = std::max(from, 0); + if (from >= to) { + return find(var); + } else { + int t = find(var, 0, from); + return t >= 0 ? t : find(var, to); + } +} + +int Stack::find_const(const_idx_t cst, int from) const { + for (int i = from; i < depth(); i++) { + if (at(i).second == cst) { + return i; + } + } + return -1; +} + +void Stack::forget_const() { + for (auto& vc : s) { + if (vc.second != not_const) { + vc.second = not_const; + } + } +} + +void Stack::issue_pop(int i) { + validate(i); + if (output_enabled()) { + o << AsmOp::Pop(i); + } + at(i) = get(0); + s.pop_back(); + modified(); +} + +void Stack::issue_push(int i) { + validate(i); + if (output_enabled()) { + o << AsmOp::Push(i); + } + s.push_back(get(i)); + modified(); +} + +void Stack::issue_xchg(int i, int j) { + validate(i); + validate(j); + if (i != j && get(i) != get(j)) { + if (output_enabled()) { + o << AsmOp::Xchg(i, j); + } + std::swap(at(i), at(j)); + modified(); + } +} + +int Stack::drop_vars_except(const VarDescrList& var_info, int excl_var) { + int dropped = 0, changes; + do { + changes = 0; + int n = depth(); + for (int i = 0; i < n; i++) { + var_idx_t idx = at(i).first; + if (((!var_info[idx] || var_info[idx]->is_unused()) && idx != excl_var) || find(idx, 0, i - 1) >= 0) { + // unneeded + issue_pop(i); + changes = 1; + break; + } + } + dropped += changes; + } while (changes); + return dropped; +} + +void Stack::show(int flags) { + std::ostringstream os; + for (auto i : s) { + os << ' '; + o.show_var_ext(os, i); + } + o << AsmOp::Comment(os.str()); + mode |= _Shown; +} + +void Stack::forget_var(var_idx_t idx) { + for (auto& x : s) { + if (x.first == idx) { + x = std::make_pair(_Garbage, not_const); + modified(); + } + } +} + +void Stack::push_new_var(var_idx_t idx) { + forget_var(idx); + s.emplace_back(idx, not_const); + modified(); +} + +void Stack::push_new_const(var_idx_t idx, const_idx_t cidx) { + forget_var(idx); + s.emplace_back(idx, cidx); + modified(); +} + +void Stack::assign_var(var_idx_t new_idx, var_idx_t old_idx) { + int i = find(old_idx); + tolk_assert(i >= 0 && "variable not found in stack"); + if (new_idx != old_idx) { + at(i).first = new_idx; + modified(); + } +} + +void Stack::do_copy_var(var_idx_t new_idx, var_idx_t old_idx) { + int i = find(old_idx); + tolk_assert(i >= 0 && "variable not found in stack"); + if (find(old_idx, i + 1) < 0) { + issue_push(i); + tolk_assert(at(0).first == old_idx); + } + assign_var(new_idx, old_idx); +} + +void Stack::enforce_state(const StackLayout& req_stack) { + int k = (int)req_stack.size(); + for (int i = 0; i < k; i++) { + var_idx_t x = req_stack[i]; + if (i < depth() && s[i].first == x) { + continue; + } + while (depth() > 0 && std::find(req_stack.cbegin(), req_stack.cend(), get(0).first) == req_stack.cend()) { + // current TOS entry is unused in req_stack, drop it + issue_pop(0); + } + int j = find(x); + if (j >= depth() - i) { + issue_push(j); + j = 0; + } + issue_xchg(j, depth() - i - 1); + tolk_assert(s[i].first == x); + } + while (depth() > k) { + issue_pop(0); + } + tolk_assert(depth() == k); + for (int i = 0; i < k; i++) { + tolk_assert(s[i].first == req_stack[i]); + } +} + +void Stack::merge_const(const Stack& req_stack) { + tolk_assert(s.size() == req_stack.s.size()); + for (std::size_t i = 0; i < s.size(); i++) { + tolk_assert(s[i].first == req_stack.s[i].first); + if (s[i].second != req_stack.s[i].second) { + s[i].second = not_const; + } + } +} + +void Stack::merge_state(const Stack& req_stack) { + enforce_state(req_stack.vars()); + merge_const(req_stack); +} + +void Stack::rearrange_top(const StackLayout& top, std::vector last) { + while (last.size() < top.size()) { + last.push_back(false); + } + int k = (int)top.size(); + for (int i = 0; i < k; i++) { + for (int j = i + 1; j < k; j++) { + if (top[i] == top[j]) { + last[i] = false; + break; + } + } + } + int ss = 0; + for (int i = 0; i < k; i++) { + if (last[i]) { + ++ss; + } + } + for (int i = 0; i < k; i++) { + var_idx_t x = top[i]; + // find s(j) containing x with j not in [ss, ss+i) + int j = find_outside(x, ss, ss + i); + if (last[i]) { + // rearrange x to be at s(ss-1) + issue_xchg(--ss, j); + tolk_assert(get(ss).first == x); + } else { + // create a new copy of x + issue_push(j); + issue_xchg(0, ss); + tolk_assert(get(ss).first == x); + } + } + tolk_assert(!ss); +} + +void Stack::rearrange_top(var_idx_t top, bool last) { + int i = find(top); + if (last) { + issue_xchg(0, i); + } else { + issue_push(i); + } + tolk_assert(get(0).first == top); +} + +bool Op::generate_code_step(Stack& stack) { + stack.opt_show(); + stack.drop_vars_except(var_info); + stack.opt_show(); + bool inline_func = stack.mode & Stack::_InlineFunc; + switch (cl) { + case _Nop: + case _Import: + return true; + case _Return: { + stack.enforce_state(left); + if (stack.o.retalt_ && (stack.mode & Stack::_NeedRetAlt)) { + stack.o << "RETALT"; + } + stack.opt_show(); + return false; + } + case _IntConst: { + auto p = next->var_info[left[0]]; + if (!p || p->is_unused()) { + return true; + } + auto cidx = stack.o.register_const(int_const); + int i = stack.find_const(cidx); + if (i < 0) { + stack.o << push_const(int_const); + stack.push_new_const(left[0], cidx); + } else { + tolk_assert(stack.at(i).second == cidx); + stack.do_copy_var(left[0], stack[i]); + } + return true; + } + case _SliceConst: { + auto p = next->var_info[left[0]]; + if (!p || p->is_unused()) { + return true; + } + stack.o << AsmOp::Const("x{" + str_const + "} PUSHSLICE"); + stack.push_new_var(left[0]); + return true; + } + case _GlobVar: + if (dynamic_cast(fun_ref->value)) { + bool used = false; + for (auto i : left) { + auto p = next->var_info[i]; + if (p && !p->is_unused()) { + used = true; + } + } + if (!used || disabled()) { + return true; + } + std::string name = symbols.get_name(fun_ref->sym_idx); + stack.o << AsmOp::Custom(name + " GETGLOB", 0, 1); + if (left.size() != 1) { + tolk_assert(left.size() <= 15); + stack.o << AsmOp::UnTuple((int)left.size()); + } + for (auto i : left) { + stack.push_new_var(i); + } + return true; + } else { + tolk_assert(left.size() == 1); + auto p = next->var_info[left[0]]; + if (!p || p->is_unused() || disabled()) { + return true; + } + stack.o << "CONT:<{"; + stack.o.indent(); + auto func = dynamic_cast(fun_ref->value); + if (func) { + // TODO: create and compile a true lambda instead of this (so that arg_order and ret_order would work correctly) + std::vector args0, res; + TypeExpr::remove_indirect(func->sym_type); + tolk_assert(func->get_type()->is_map()); + auto wr = func->get_type()->args.at(0)->get_width(); + auto wl = func->get_type()->args.at(1)->get_width(); + tolk_assert(wl >= 0 && wr >= 0); + for (int i = 0; i < wl; i++) { + res.emplace_back(0); + } + for (int i = 0; i < wr; i++) { + args0.emplace_back(0); + } + func->compile(stack.o, res, args0, where); // compile res := f (args0) + } else { + std::string name = symbols.get_name(fun_ref->sym_idx); + stack.o << AsmOp::Custom(name + " CALLDICT", (int)right.size(), (int)left.size()); + } + stack.o.undent(); + stack.o << "}>"; + stack.push_new_var(left.at(0)); + return true; + } + case _Let: { + tolk_assert(left.size() == right.size()); + int i = 0; + std::vector active; + active.reserve(left.size()); + for (std::size_t k = 0; k < left.size(); k++) { + var_idx_t y = left[k]; // "y" = "x" + auto p = next->var_info[y]; + active.push_back(p && !p->is_unused()); + } + for (std::size_t k = 0; k < left.size(); k++) { + if (!active[k]) { + continue; + } + var_idx_t x = right[k]; // "y" = "x" + bool is_last = true; + for (std::size_t l = k + 1; l < right.size(); l++) { + if (right[l] == x && active[l]) { + is_last = false; + } + } + if (is_last) { + auto info = var_info[x]; + is_last = (info && info->is_last()); + } + if (is_last) { + stack.assign_var(--i, x); + } else { + stack.do_copy_var(--i, x); + } + } + i = 0; + for (std::size_t k = 0; k < left.size(); k++) { + if (active[k]) { + stack.assign_var(left[k], --i); + } + } + return true; + } + case _Tuple: + case _UnTuple: { + if (disabled()) { + return true; + } + std::vector last; + for (var_idx_t x : right) { + last.push_back(var_info[x] && var_info[x]->is_last()); + } + stack.rearrange_top(right, std::move(last)); + stack.opt_show(); + int k = (int)stack.depth() - (int)right.size(); + tolk_assert(k >= 0); + if (cl == _Tuple) { + stack.o << AsmOp::Tuple((int)right.size()); + tolk_assert(left.size() == 1); + } else { + stack.o << AsmOp::UnTuple((int)left.size()); + tolk_assert(right.size() == 1); + } + stack.s.resize(k); + for (int i = 0; i < (int)left.size(); i++) { + stack.push_new_var(left.at(i)); + } + return true; + } + case _Call: + case _CallInd: { + if (disabled()) { + return true; + } + SymValFunc* func = (fun_ref ? dynamic_cast(fun_ref->value) : nullptr); + auto arg_order = (func ? func->get_arg_order() : nullptr); + auto ret_order = (func ? func->get_ret_order() : nullptr); + tolk_assert(!arg_order || arg_order->size() == right.size()); + tolk_assert(!ret_order || ret_order->size() == left.size()); + std::vector right1; + if (args.size()) { + tolk_assert(args.size() == right.size()); + for (int i = 0; i < (int)right.size(); i++) { + int j = arg_order ? arg_order->at(i) : i; + const VarDescr& arg = args.at(j); + if (!arg.is_unused()) { + tolk_assert(var_info[arg.idx] && !var_info[arg.idx]->is_unused()); + right1.push_back(arg.idx); + } + } + } else if (arg_order) { + for (int i = 0; i < (int)right.size(); i++) { + right1.push_back(right.at(arg_order->at(i))); + } + } else { + right1 = right; + } + std::vector last; + for (var_idx_t x : right1) { + last.push_back(var_info[x] && var_info[x]->is_last()); + } + stack.rearrange_top(right1, std::move(last)); + stack.opt_show(); + int k = (int)stack.depth() - (int)right1.size(); + tolk_assert(k >= 0); + for (int i = 0; i < (int)right1.size(); i++) { + if (stack.s[k + i].first != right1[i]) { + std::cerr << stack.o; + } + tolk_assert(stack.s[k + i].first == right1[i]); + } + auto exec_callxargs = [&](int args, int ret) { + if (args <= 15 && ret <= 15) { + stack.o << exec_arg2_op("CALLXARGS", args, ret, args + 1, ret); + } else { + tolk_assert(args <= 254 && ret <= 254); + stack.o << AsmOp::Const(PSTRING() << args << " PUSHINT"); + stack.o << AsmOp::Const(PSTRING() << ret << " PUSHINT"); + stack.o << AsmOp::Custom("CALLXVARARGS", args + 3, ret); + } + }; + if (cl == _CallInd) { + exec_callxargs((int)right.size() - 1, (int)left.size()); + } else { + auto func = dynamic_cast(fun_ref->value); + if (func) { + std::vector res; + res.reserve(left.size()); + for (var_idx_t i : left) { + res.emplace_back(i); + } + func->compile(stack.o, res, args, where); // compile res := f (args) + } else { + auto fv = dynamic_cast(fun_ref->value); + std::string name = symbols.get_name(fun_ref->sym_idx); + bool is_inline = (fv && (fv->flags & 3)); + if (is_inline) { + stack.o << AsmOp::Custom(name + " INLINECALLDICT", (int)right.size(), (int)left.size()); + } else if (fv && fv->code && fv->code->require_callxargs) { + stack.o << AsmOp::Custom(name + (" PREPAREDICT"), 0, 2); + exec_callxargs((int)right.size() + 1, (int)left.size()); + } else { + stack.o << AsmOp::Custom(name + " CALLDICT", (int)right.size(), (int)left.size()); + } + } + } + stack.s.resize(k); + for (int i = 0; i < (int)left.size(); i++) { + int j = ret_order ? ret_order->at(i) : i; + stack.push_new_var(left.at(j)); + } + return true; + } + case _SetGlob: { + tolk_assert(fun_ref && dynamic_cast(fun_ref->value)); + std::vector last; + for (var_idx_t x : right) { + last.push_back(var_info[x] && var_info[x]->is_last()); + } + stack.rearrange_top(right, std::move(last)); + stack.opt_show(); + int k = (int)stack.depth() - (int)right.size(); + tolk_assert(k >= 0); + for (int i = 0; i < (int)right.size(); i++) { + if (stack.s[k + i].first != right[i]) { + std::cerr << stack.o; + } + tolk_assert(stack.s[k + i].first == right[i]); + } + if (right.size() > 1) { + stack.o << AsmOp::Tuple((int)right.size()); + } + if (!right.empty()) { + std::string name = symbols.get_name(fun_ref->sym_idx); + stack.o << AsmOp::Custom(name + " SETGLOB", 1, 0); + } + stack.s.resize(k); + return true; + } + case _If: { + if (block0->is_empty() && block1->is_empty()) { + return true; + } + if (!next->noreturn() && (block0->noreturn() != block1->noreturn())) { + stack.o.retalt_ = true; + } + var_idx_t x = left[0]; + stack.rearrange_top(x, var_info[x] && var_info[x]->is_last()); + tolk_assert(stack[0] == x); + stack.opt_show(); + stack.s.pop_back(); + stack.modified(); + if (inline_func && (block0->noreturn() || block1->noreturn())) { + bool is0 = block0->noreturn(); + Op* block_noreturn = is0 ? block0.get() : block1.get(); + Op* block_other = is0 ? block1.get() : block0.get(); + stack.mode &= ~Stack::_InlineFunc; + stack.o << (is0 ? "IF:<{" : "IFNOT:<{"); + stack.o.indent(); + Stack stack_copy{stack}; + block_noreturn->generate_code_all(stack_copy); + stack.o.undent(); + stack.o << "}>ELSE<{"; + stack.o.indent(); + block_other->generate_code_all(stack); + if (!block_other->noreturn()) { + next->generate_code_all(stack); + } + stack.o.undent(); + stack.o << "}>"; + return false; + } + if (block1->is_empty() || block0->is_empty()) { + bool is0 = block1->is_empty(); + Op* block = is0 ? block0.get() : block1.get(); + // if (left) block0; ... + // if (!left) block1; ... + if (block->noreturn()) { + stack.o << (is0 ? "IFJMP:<{" : "IFNOTJMP:<{"); + stack.o.indent(); + Stack stack_copy{stack}; + stack_copy.mode &= ~Stack::_InlineFunc; + stack_copy.mode |= next->noreturn() ? 0 : Stack::_NeedRetAlt; + block->generate_code_all(stack_copy); + stack.o.undent(); + stack.o << "}>"; + return true; + } + stack.o << (is0 ? "IF:<{" : "IFNOT:<{"); + stack.o.indent(); + Stack stack_copy{stack}, stack_target{stack}; + stack_target.disable_output(); + stack_target.drop_vars_except(next->var_info); + stack_copy.mode &= ~Stack::_InlineFunc; + block->generate_code_all(stack_copy); + stack_copy.drop_vars_except(var_info); + stack_copy.opt_show(); + if ((is0 && stack_copy == stack) || (!is0 && stack_copy.vars() == stack.vars())) { + stack.o.undent(); + stack.o << "}>"; + if (!is0) { + stack.merge_const(stack_copy); + } + return true; + } + // stack_copy.drop_vars_except(next->var_info); + stack_copy.enforce_state(stack_target.vars()); + stack_copy.opt_show(); + if (stack_copy.vars() == stack.vars()) { + stack.o.undent(); + stack.o << "}>"; + stack.merge_const(stack_copy); + return true; + } + stack.o.undent(); + stack.o << "}>ELSE<{"; + stack.o.indent(); + stack.merge_state(stack_copy); + stack.opt_show(); + stack.o.undent(); + stack.o << "}>"; + return true; + } + if (block0->noreturn() || block1->noreturn()) { + bool is0 = block0->noreturn(); + Op* block_noreturn = is0 ? block0.get() : block1.get(); + Op* block_other = is0 ? block1.get() : block0.get(); + stack.o << (is0 ? "IFJMP:<{" : "IFNOTJMP:<{"); + stack.o.indent(); + Stack stack_copy{stack}; + stack_copy.mode &= ~Stack::_InlineFunc; + stack_copy.mode |= (block_other->noreturn() || next->noreturn()) ? 0 : Stack::_NeedRetAlt; + block_noreturn->generate_code_all(stack_copy); + stack.o.undent(); + stack.o << "}>"; + block_other->generate_code_all(stack); + return !block_other->noreturn(); + } + stack.o << "IF:<{"; + stack.o.indent(); + Stack stack_copy{stack}; + stack_copy.mode &= ~Stack::_InlineFunc; + block0->generate_code_all(stack_copy); + stack_copy.drop_vars_except(next->var_info); + stack_copy.opt_show(); + stack.o.undent(); + stack.o << "}>ELSE<{"; + stack.o.indent(); + stack.mode &= ~Stack::_InlineFunc; + block1->generate_code_all(stack); + stack.merge_state(stack_copy); + stack.opt_show(); + stack.o.undent(); + stack.o << "}>"; + return true; + } + case _Repeat: { + var_idx_t x = left[0]; + //stack.drop_vars_except(block0->var_info, x); + stack.rearrange_top(x, var_info[x] && var_info[x]->is_last()); + tolk_assert(stack[0] == x); + stack.opt_show(); + stack.s.pop_back(); + stack.modified(); + if (block0->noreturn()) { + stack.o.retalt_ = true; + } + if (true || !next->is_empty()) { + stack.o << "REPEAT:<{"; + stack.o.indent(); + stack.forget_const(); + if (block0->noreturn()) { + Stack stack_copy{stack}; + StackLayout layout1 = stack.vars(); + stack_copy.mode &= ~Stack::_InlineFunc; + stack_copy.mode |= Stack::_NeedRetAlt; + block0->generate_code_all(stack_copy); + } else { + StackLayout layout1 = stack.vars(); + stack.mode &= ~Stack::_InlineFunc; + stack.mode |= Stack::_NeedRetAlt; + block0->generate_code_all(stack); + stack.enforce_state(std::move(layout1)); + stack.opt_show(); + } + stack.o.undent(); + stack.o << "}>"; + return true; + } else { + stack.o << "REPEATEND"; + stack.forget_const(); + StackLayout layout1 = stack.vars(); + block0->generate_code_all(stack); + stack.enforce_state(std::move(layout1)); + stack.opt_show(); + return false; + } + } + case _Again: { + stack.drop_vars_except(block0->var_info); + stack.opt_show(); + if (block0->noreturn()) { + stack.o.retalt_ = true; + } + if (!next->is_empty() || inline_func) { + stack.o << "AGAIN:<{"; + stack.o.indent(); + stack.forget_const(); + StackLayout layout1 = stack.vars(); + stack.mode &= ~Stack::_InlineFunc; + stack.mode |= Stack::_NeedRetAlt; + block0->generate_code_all(stack); + stack.enforce_state(std::move(layout1)); + stack.opt_show(); + stack.o.undent(); + stack.o << "}>"; + return true; + } else { + stack.o << "AGAINEND"; + stack.forget_const(); + StackLayout layout1 = stack.vars(); + block0->generate_code_all(stack); + stack.enforce_state(std::move(layout1)); + stack.opt_show(); + return false; + } + } + case _Until: { + // stack.drop_vars_except(block0->var_info); + // stack.opt_show(); + if (block0->noreturn()) { + stack.o.retalt_ = true; + } + if (true || !next->is_empty()) { + stack.o << "UNTIL:<{"; + stack.o.indent(); + stack.forget_const(); + auto layout1 = stack.vars(); + stack.mode &= ~Stack::_InlineFunc; + stack.mode |= Stack::_NeedRetAlt; + block0->generate_code_all(stack); + layout1.push_back(left[0]); + stack.enforce_state(std::move(layout1)); + stack.opt_show(); + stack.o.undent(); + stack.o << "}>"; + stack.s.pop_back(); + stack.modified(); + return true; + } else { + stack.o << "UNTILEND"; + stack.forget_const(); + StackLayout layout1 = stack.vars(); + block0->generate_code_all(stack); + layout1.push_back(left[0]); + stack.enforce_state(std::move(layout1)); + stack.opt_show(); + return false; + } + } + case _While: { + // while (block0 | left) block1; ...next + var_idx_t x = left[0]; + stack.drop_vars_except(block0->var_info); + stack.opt_show(); + StackLayout layout1 = stack.vars(); + bool next_empty = false && next->is_empty(); + if (block0->noreturn()) { + stack.o.retalt_ = true; + } + stack.o << "WHILE:<{"; + stack.o.indent(); + stack.forget_const(); + stack.mode &= ~Stack::_InlineFunc; + stack.mode |= Stack::_NeedRetAlt; + block0->generate_code_all(stack); + stack.rearrange_top(x, !next->var_info[x] && !block1->var_info[x]); + stack.opt_show(); + stack.s.pop_back(); + stack.modified(); + stack.o.undent(); + Stack stack_copy{stack}; + stack.o << (next_empty ? "}>DO:" : "}>DO<{"); + if (!next_empty) { + stack.o.indent(); + } + stack_copy.opt_show(); + block1->generate_code_all(stack_copy); + stack_copy.enforce_state(std::move(layout1)); + stack_copy.opt_show(); + if (!next_empty) { + stack.o.undent(); + stack.o << "}>"; + return true; + } else { + return false; + } + } + case _TryCatch: { + if (block0->is_empty() && block1->is_empty()) { + return true; + } + if (block0->noreturn() || block1->noreturn()) { + stack.o.retalt_ = true; + } + Stack catch_stack{stack.o}; + std::vector catch_vars; + std::vector catch_last; + for (const VarDescr& var : block1->var_info.list) { + if (stack.find(var.idx) >= 0) { + catch_vars.push_back(var.idx); + catch_last.push_back(!block0->var_info[var.idx]); + } + } + const size_t block_size = 255; + for (size_t begin = catch_vars.size(), end = begin; end > 0; end = begin) { + begin = end >= block_size ? end - block_size : 0; + for (size_t i = begin; i < end; ++i) { + catch_stack.push_new_var(catch_vars[i]); + } + } + catch_stack.push_new_var(left[0]); + catch_stack.push_new_var(left[1]); + stack.rearrange_top(catch_vars, catch_last); + stack.opt_show(); + stack.o << "c4 PUSH"; + stack.o << "c5 PUSH"; + stack.o << "c7 PUSH"; + stack.o << "<{"; + stack.o.indent(); + if (block1->noreturn()) { + catch_stack.mode |= Stack::_NeedRetAlt; + } + block1->generate_code_all(catch_stack); + catch_stack.drop_vars_except(next->var_info); + catch_stack.opt_show(); + stack.o.undent(); + stack.o << "}>CONT"; + stack.o << "c7 SETCONT"; + stack.o << "c5 SETCONT"; + stack.o << "c4 SETCONT"; + for (size_t begin = catch_vars.size(), end = begin; end > 0; end = begin) { + begin = end >= block_size ? end - block_size : 0; + stack.o << std::to_string(end - begin) + " PUSHINT"; + stack.o << "-1 PUSHINT"; + stack.o << "SETCONTVARARGS"; + } + stack.s.erase(stack.s.end() - catch_vars.size(), stack.s.end()); + stack.modified(); + stack.o << "<{"; + stack.o.indent(); + if (block0->noreturn()) { + stack.mode |= Stack::_NeedRetAlt; + } + block0->generate_code_all(stack); + if (block0->noreturn()) { + stack.s = std::move(catch_stack.s); + } else if (!block1->noreturn()) { + stack.merge_state(catch_stack); + } + stack.opt_show(); + stack.o.undent(); + stack.o << "}>CONT"; + stack.o << "c1 PUSH"; + stack.o << "COMPOSALT"; + stack.o << "SWAP"; + stack.o << "TRY"; + return true; + } + default: + std::cerr << "fatal: unknown operation \n"; + throw ParseError{where, "unknown operation in generate_code()"}; + } +} + +void Op::generate_code_all(Stack& stack) { + int saved_mode = stack.mode; + auto cont = generate_code_step(stack); + stack.mode = (stack.mode & ~Stack::_ModeSave) | (saved_mode & Stack::_ModeSave); + if (cont && next) { + next->generate_code_all(stack); + } +} + +void CodeBlob::generate_code(AsmOpList& out, int mode) { + Stack stack{out, mode}; + tolk_assert(ops && ops->cl == Op::_Import); + auto args = (int)ops->left.size(); + for (var_idx_t x : ops->left) { + stack.push_new_var(x); + } + ops->generate_code_all(stack); + stack.apply_wrappers(require_callxargs && (mode & Stack::_InlineAny) ? args : -1); + if (!(mode & Stack::_DisableOpt)) { + optimize_code(out); + } +} + +void CodeBlob::generate_code(std::ostream& os, int mode, int indent) { + AsmOpList out_list(indent, &vars); + generate_code(out_list, mode); + out_list.out(os, mode); +} + +} // namespace tolk diff --git a/tolk/gen-abscode.cpp b/tolk/gen-abscode.cpp new file mode 100644 index 00000000..bfce6f0c --- /dev/null +++ b/tolk/gen-abscode.cpp @@ -0,0 +1,449 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include +#include "tolk.h" + +using namespace std::literals::string_literals; + +namespace tolk { + +/* + * + * EXPRESSIONS + * + */ + +Expr* Expr::copy() const { + auto res = new Expr{*this}; + for (auto& arg : res->args) { + arg = arg->copy(); + } + return res; +} + +Expr::Expr(int c, sym_idx_t name_idx, std::initializer_list _arglist) : cls(c), args(std::move(_arglist)) { + sym = lookup_symbol(name_idx); + if (!sym) { + } +} + +void Expr::chk_rvalue(const Lexem& lem) const { + if (!is_rvalue()) { + lem.error_at("rvalue expected before `", "`"); + } +} + +void Expr::chk_lvalue(const Lexem& lem) const { + if (!is_lvalue()) { + lem.error_at("lvalue expected before `", "`"); + } +} + +void Expr::chk_type(const Lexem& lem) const { + if (!is_type()) { + lem.error_at("type expression expected before `", "`"); + } +} + +bool Expr::deduce_type(const Lexem& lem) { + if (e_type) { + return true; + } + switch (cls) { + case _Apply: { + if (!sym) { + return false; + } + SymVal* sym_val = dynamic_cast(sym->value); + if (!sym_val || !sym_val->get_type()) { + return false; + } + std::vector arg_types; + for (const auto& arg : args) { + arg_types.push_back(arg->e_type); + } + TypeExpr* fun_type = TypeExpr::new_map(TypeExpr::new_tensor(arg_types), TypeExpr::new_hole()); + try { + unify(fun_type, sym_val->sym_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot apply function " << sym->name() << " : " << sym_val->get_type() << " to arguments of type " + << fun_type->args[0] << ": " << ue; + lem.error(os.str()); + } + e_type = fun_type->args[1]; + TypeExpr::remove_indirect(e_type); + return true; + } + case _VarApply: { + tolk_assert(args.size() == 2); + TypeExpr* fun_type = TypeExpr::new_map(args[1]->e_type, TypeExpr::new_hole()); + try { + unify(fun_type, args[0]->e_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot apply expression of type " << args[0]->e_type << " to an expression of type " << args[1]->e_type + << ": " << ue; + lem.error(os.str()); + } + e_type = fun_type->args[1]; + TypeExpr::remove_indirect(e_type); + return true; + } + case _Letop: { + tolk_assert(args.size() == 2); + try { + // std::cerr << "in assignment: " << args[0]->e_type << " from " << args[1]->e_type << std::endl; + unify(args[0]->e_type, args[1]->e_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot assign an expression of type " << args[1]->e_type << " to a variable or pattern of type " + << args[0]->e_type << ": " << ue; + lem.error(os.str()); + } + e_type = args[0]->e_type; + TypeExpr::remove_indirect(e_type); + return true; + } + case _LetFirst: { + tolk_assert(args.size() == 2); + TypeExpr* rhs_type = TypeExpr::new_tensor({args[0]->e_type, TypeExpr::new_hole()}); + try { + // std::cerr << "in implicit assignment of a modifying method: " << rhs_type << " and " << args[1]->e_type << std::endl; + unify(rhs_type, args[1]->e_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot implicitly assign an expression of type " << args[1]->e_type + << " to a variable or pattern of type " << rhs_type << " in modifying method `" << symbols.get_name(val) + << "` : " << ue; + lem.error(os.str()); + } + e_type = rhs_type->args[1]; + TypeExpr::remove_indirect(e_type); + // std::cerr << "result type is " << e_type << std::endl; + return true; + } + case _CondExpr: { + tolk_assert(args.size() == 3); + auto flag_type = TypeExpr::new_atomic(_Int); + try { + unify(args[0]->e_type, flag_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "condition in a conditional expression has non-integer type " << args[0]->e_type << ": " << ue; + lem.error(os.str()); + } + try { + unify(args[1]->e_type, args[2]->e_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "the two variants in a conditional expression have different types " << args[1]->e_type << " and " + << args[2]->e_type << " : " << ue; + lem.error(os.str()); + } + e_type = args[1]->e_type; + TypeExpr::remove_indirect(e_type); + return true; + } + } + return false; +} + +int Expr::define_new_vars(CodeBlob& code) { + switch (cls) { + case _Tensor: + case _MkTuple: + case _TypeApply: { + int res = 0; + for (const auto& x : args) { + res += x->define_new_vars(code); + } + return res; + } + case _Var: + if (val < 0) { + val = code.create_var(TmpVar::_Named, e_type, sym, &here); + return 1; + } + break; + case _Hole: + if (val < 0) { + val = code.create_var(TmpVar::_Tmp, e_type, nullptr, &here); + } + break; + } + return 0; +} + +int Expr::predefine_vars() { + switch (cls) { + case _Tensor: + case _MkTuple: + case _TypeApply: { + int res = 0; + for (const auto& x : args) { + res += x->predefine_vars(); + } + return res; + } + case _Var: + if (!sym) { + tolk_assert(val < 0 && here.defined()); + if (prohibited_var_names.count(symbols.get_name(~val))) { + throw ParseError{ + here, PSTRING() << "symbol `" << symbols.get_name(~val) << "` cannot be redefined as a variable"}; + } + sym = define_symbol(~val, false, here); + // std::cerr << "predefining variable " << symbols.get_name(~val) << std::endl; + if (!sym) { + throw ParseError{here, std::string{"redefined variable `"} + symbols.get_name(~val) + "`"}; + } + sym->value = new SymVal{SymVal::_Var, -1, e_type}; + return 1; + } + break; + } + return 0; +} + +var_idx_t Expr::new_tmp(CodeBlob& code) const { + return code.create_tmp_var(e_type, &here); +} + +void add_set_globs(CodeBlob& code, std::vector>& globs, const SrcLocation& here) { + for (const auto& p : globs) { + auto& op = code.emplace_back(here, Op::_SetGlob, std::vector{}, std::vector{ p.second }, p.first); + op.flags |= Op::_Impure; + } +} + +std::vector Expr::pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, const SrcLocation& here) { + while (lhs->is_type_apply()) { + lhs = lhs->args.at(0); + } + while (rhs->is_type_apply()) { + rhs = rhs->args.at(0); + } + if (lhs->is_mktuple()) { + if (rhs->is_mktuple()) { + return pre_compile_let(code, lhs->args.at(0), rhs->args.at(0), here); + } + auto right = rhs->pre_compile(code); + TypeExpr::remove_indirect(rhs->e_type); + auto unpacked_type = rhs->e_type->args.at(0); + std::vector tmp{code.create_tmp_var(unpacked_type, &rhs->here)}; + code.emplace_back(lhs->here, Op::_UnTuple, tmp, std::move(right)); + auto tvar = new Expr{_Var}; + tvar->set_val(tmp[0]); + tvar->set_location(rhs->here); + tvar->e_type = unpacked_type; + pre_compile_let(code, lhs->args.at(0), tvar, here); + return tmp; + } + auto right = rhs->pre_compile(code); + std::vector> globs; + auto left = lhs->pre_compile(code, &globs); + for (var_idx_t v : left) { + code.on_var_modification(v, here); + } + code.emplace_back(here, Op::_Let, std::move(left), right); + add_set_globs(code, globs, here); + return right; +} + +std::vector pre_compile_tensor(const std::vector args, CodeBlob &code, + std::vector> *lval_globs, + std::vector arg_order) { + if (arg_order.empty()) { + arg_order.resize(args.size()); + std::iota(arg_order.begin(), arg_order.end(), 0); + } + tolk_assert(args.size() == arg_order.size()); + std::vector> res_lists(args.size()); + + struct ModifiedVar { + size_t i, j; + Op* op; + }; + auto modified_vars = std::make_shared>(); + for (size_t i : arg_order) { + res_lists[i] = args[i]->pre_compile(code, lval_globs); + for (size_t j = 0; j < res_lists[i].size(); ++j) { + TmpVar& var = code.vars.at(res_lists[i][j]); + if (code.flags & CodeBlob::_AllowPostModification) { + if (!lval_globs && (var.cls & TmpVar::_Named)) { + Op *op = &code.emplace_back(nullptr, Op::_Let, std::vector(), std::vector()); + op->flags |= Op::_Disabled; + var.on_modification.push_back([modified_vars, i, j, op, done = false](const SrcLocation &here) mutable { + if (!done) { + done = true; + modified_vars->push_back({i, j, op}); + } + }); + } else { + var.on_modification.push_back([](const SrcLocation &) { + }); + } + } else { + var.on_modification.push_back([name = var.to_string()](const SrcLocation &here) { + throw ParseError{here, PSTRING() << "Modifying local variable " << name + << " after using it in the same expression"}; + }); + } + } + } + for (const auto& list : res_lists) { + for (var_idx_t v : list) { + tolk_assert(!code.vars.at(v).on_modification.empty()); + code.vars.at(v).on_modification.pop_back(); + } + } + for (const ModifiedVar &m : *modified_vars) { + var_idx_t& v = res_lists[m.i][m.j]; + var_idx_t v2 = code.create_tmp_var(code.vars[v].v_type, code.vars[v].where.get()); + m.op->left = {v2}; + m.op->right = {v}; + m.op->flags &= ~Op::_Disabled; + v = v2; + } + std::vector res; + for (const auto& list : res_lists) { + res.insert(res.end(), list.cbegin(), list.cend()); + } + return res; +} + +std::vector Expr::pre_compile(CodeBlob& code, std::vector>* lval_globs) const { + if (lval_globs && !(cls == _Tensor || cls == _Var || cls == _Hole || cls == _TypeApply || cls == _GlobVar)) { + std::cerr << "lvalue expression constructor is " << cls << std::endl; + throw Fatal{"cannot compile lvalue expression with unknown constructor"}; + } + switch (cls) { + case _Tensor: { + return pre_compile_tensor(args, code, lval_globs, {}); + } + case _Apply: { + tolk_assert(sym); + auto func = dynamic_cast(sym->value); + std::vector res; + if (func && func->arg_order.size() == args.size() && !(code.flags & CodeBlob::_ComputeAsmLtr)) { + //std::cerr << "!!! reordering " << args.size() << " arguments of " << sym->name() << std::endl; + res = pre_compile_tensor(args, code, lval_globs, func->arg_order); + } else { + res = pre_compile_tensor(args, code, lval_globs, {}); + } + auto rvect = new_tmp_vect(code); + auto& op = code.emplace_back(here, Op::_Call, rvect, std::move(res), sym); + if (flags & _IsImpure) { + op.flags |= Op::_Impure; + } + return rvect; + } + case _TypeApply: + return args[0]->pre_compile(code, lval_globs); + case _Var: + case _Hole: + if (val < 0) { + throw ParseError{here, "unexpected variable definition"}; + } + return {val}; + case _VarApply: + if (args[0]->cls == _Glob) { + auto res = args[1]->pre_compile(code); + auto rvect = new_tmp_vect(code); + auto& op = code.emplace_back(here, Op::_Call, rvect, std::move(res), args[0]->sym); + if (args[0]->flags & _IsImpure) { + op.flags |= Op::_Impure; + } + return rvect; + } else { + auto res = args[1]->pre_compile(code); + auto tfunc = args[0]->pre_compile(code); + if (tfunc.size() != 1) { + throw Fatal{"stack tuple used as a function"}; + } + res.push_back(tfunc[0]); + auto rvect = new_tmp_vect(code); + code.emplace_back(here, Op::_CallInd, rvect, std::move(res)); + return rvect; + } + case _Const: { + auto rvect = new_tmp_vect(code); + code.emplace_back(here, Op::_IntConst, rvect, intval); + return rvect; + } + case _Glob: + case _GlobVar: { + auto rvect = new_tmp_vect(code); + if (lval_globs) { + lval_globs->push_back({ sym, rvect[0] }); + return rvect; + } else { + code.emplace_back(here, Op::_GlobVar, rvect, std::vector{}, sym); + return rvect; + } + } + case _Letop: { + return pre_compile_let(code, args.at(0), args.at(1), here); + } + case _LetFirst: { + auto rvect = new_tmp_vect(code); + auto right = args[1]->pre_compile(code); + std::vector> local_globs; + if (!lval_globs) { + lval_globs = &local_globs; + } + auto left = args[0]->pre_compile(code, lval_globs); + left.push_back(rvect[0]); + for (var_idx_t v : left) { + code.on_var_modification(v, here); + } + code.emplace_back(here, Op::_Let, std::move(left), std::move(right)); + add_set_globs(code, local_globs, here); + return rvect; + } + case _MkTuple: { + auto left = new_tmp_vect(code); + auto right = args[0]->pre_compile(code); + code.emplace_back(here, Op::_Tuple, left, std::move(right)); + return left; + } + case _CondExpr: { + auto cond = args[0]->pre_compile(code); + tolk_assert(cond.size() == 1); + auto rvect = new_tmp_vect(code); + Op& if_op = code.emplace_back(here, Op::_If, cond); + code.push_set_cur(if_op.block0); + code.emplace_back(here, Op::_Let, rvect, args[1]->pre_compile(code)); + code.close_pop_cur(args[1]->here); + code.push_set_cur(if_op.block1); + code.emplace_back(here, Op::_Let, rvect, args[2]->pre_compile(code)); + code.close_pop_cur(args[2]->here); + return rvect; + } + case _SliceConst: { + auto rvect = new_tmp_vect(code); + code.emplace_back(here, Op::_SliceConst, rvect, strval); + return rvect; + } + default: + std::cerr << "expression constructor is " << cls << std::endl; + throw Fatal{"cannot compile expression with unknown constructor"}; + } +} + +} // namespace tolk diff --git a/tolk/keywords.cpp b/tolk/keywords.cpp new file mode 100644 index 00000000..db193deb --- /dev/null +++ b/tolk/keywords.cpp @@ -0,0 +1,126 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" + +namespace tolk { + +/* + * + * KEYWORD DEFINITION + * + */ + +void define_keywords() { + symbols.add_kw_char('+') + .add_kw_char('-') + .add_kw_char('*') + .add_kw_char('/') + .add_kw_char('%') + .add_kw_char('?') + .add_kw_char(':') + .add_kw_char(',') + .add_kw_char(';') + .add_kw_char('(') + .add_kw_char(')') + .add_kw_char('[') + .add_kw_char(']') + .add_kw_char('{') + .add_kw_char('}') + .add_kw_char('=') + .add_kw_char('_') + .add_kw_char('<') + .add_kw_char('>') + .add_kw_char('&') + .add_kw_char('|') + .add_kw_char('^') + .add_kw_char('~'); + + symbols.add_keyword("==", Keyword::_Eq) + .add_keyword("!=", Keyword::_Neq) + .add_keyword("<=", Keyword::_Leq) + .add_keyword(">=", Keyword::_Geq) + .add_keyword("<=>", Keyword::_Spaceship) + .add_keyword("<<", Keyword::_Lshift) + .add_keyword(">>", Keyword::_Rshift) + .add_keyword("~>>", Keyword::_RshiftR) + .add_keyword("^>>", Keyword::_RshiftC) + .add_keyword("~/", Keyword::_DivR) + .add_keyword("^/", Keyword::_DivC) + .add_keyword("~%", Keyword::_ModR) + .add_keyword("^%", Keyword::_ModC) + .add_keyword("/%", Keyword::_DivMod) + .add_keyword("+=", Keyword::_PlusLet) + .add_keyword("-=", Keyword::_MinusLet) + .add_keyword("*=", Keyword::_TimesLet) + .add_keyword("/=", Keyword::_DivLet) + .add_keyword("~/=", Keyword::_DivRLet) + .add_keyword("^/=", Keyword::_DivCLet) + .add_keyword("%=", Keyword::_ModLet) + .add_keyword("~%=", Keyword::_ModRLet) + .add_keyword("^%=", Keyword::_ModCLet) + .add_keyword("<<=", Keyword::_LshiftLet) + .add_keyword(">>=", Keyword::_RshiftLet) + .add_keyword("~>>=", Keyword::_RshiftRLet) + .add_keyword("^>>=", Keyword::_RshiftCLet) + .add_keyword("&=", Keyword::_AndLet) + .add_keyword("|=", Keyword::_OrLet) + .add_keyword("^=", Keyword::_XorLet); + + symbols.add_keyword("return", Keyword::_Return) + .add_keyword("var", Keyword::_Var) + .add_keyword("repeat", Keyword::_Repeat) + .add_keyword("do", Keyword::_Do) + .add_keyword("while", Keyword::_While) + .add_keyword("until", Keyword::_Until) + .add_keyword("try", Keyword::_Try) + .add_keyword("catch", Keyword::_Catch) + .add_keyword("if", Keyword::_If) + .add_keyword("ifnot", Keyword::_Ifnot) + .add_keyword("then", Keyword::_Then) + .add_keyword("else", Keyword::_Else) + .add_keyword("elseif", Keyword::_Elseif) + .add_keyword("elseifnot", Keyword::_Elseifnot); + + symbols.add_keyword("int", Keyword::_Int) + .add_keyword("cell", Keyword::_Cell) + .add_keyword("slice", Keyword::_Slice) + .add_keyword("builder", Keyword::_Builder) + .add_keyword("cont", Keyword::_Cont) + .add_keyword("tuple", Keyword::_Tuple) + .add_keyword("type", Keyword::_Type) + .add_keyword("->", Keyword::_Mapsto) + .add_keyword("forall", Keyword::_Forall); + + symbols.add_keyword("extern", Keyword::_Extern) + .add_keyword("global", Keyword::_Global) + .add_keyword("asm", Keyword::_Asm) + .add_keyword("impure", Keyword::_Impure) + .add_keyword("inline", Keyword::_Inline) + .add_keyword("inline_ref", Keyword::_InlineRef) + .add_keyword("auto_apply", Keyword::_AutoApply) + .add_keyword("method_id", Keyword::_MethodId) + .add_keyword("operator", Keyword::_Operator) + .add_keyword("infix", Keyword::_Infix) + .add_keyword("infixl", Keyword::_Infixl) + .add_keyword("infixr", Keyword::_Infixr) + .add_keyword("const", Keyword::_Const); + + symbols.add_keyword("#pragma", Keyword::_PragmaHashtag) + .add_keyword("#include", Keyword::_IncludeHashtag); +} + +} // namespace tolk diff --git a/tolk/lexer.cpp b/tolk/lexer.cpp new file mode 100644 index 00000000..f0838f5a --- /dev/null +++ b/tolk/lexer.cpp @@ -0,0 +1,335 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "lexer.h" +#include "symtable.h" +#include +#include + +namespace tolk { + +/* + * + * LEXER + * + */ + +std::string Lexem::lexem_name_str(int idx) { + if (idx == Eof) { + return "end of file"; + } else if (idx == Ident) { + return "identifier"; + } else if (idx == Number) { + return "number"; + } else if (idx == String) { + return "string"; + } else if (idx == Special) { + return "special"; + } else if (symbols.get_keyword(idx)) { + return "`" + symbols.get_keyword(idx)->str + "`"; + } else { + std::ostringstream os{""; + return os.str(); + } +} + +std::string Lexem::name_str() const { + if (tp == Ident) { + return std::string{"identifier `"} + symbols.get_name(val) + "`"; + } else if (tp == String) { + return std::string{"string \""} + str + '"'; + } else { + return lexem_name_str(tp); + } +} + +bool is_number(std::string str) { + auto st = str.begin(), en = str.end(); + if (st == en) { + return false; + } + if (*st == '-') { + st++; + } + bool hex = false; + if (st + 1 < en && *st == '0' && st[1] == 'x') { + st += 2; + hex = true; + } + if (st == en) { + return false; + } + while (st < en) { + int c = *st; + if (c >= '0' && c <= '9') { + ++st; + continue; + } + if (!hex) { + return false; + } + c |= 0x20; + if (c < 'a' || c > 'f') { + return false; + } + ++st; + } + return true; +} + +int Lexem::classify() { + if (tp != Unknown) { + return tp; + } + sym_idx_t i = symbols.lookup(str); + if (i) { + assert(str == symbols[i]->str); + str = symbols[i]->str; + sym_idx_t idx = symbols[i]->idx; + tp = (idx < 0 ? -idx : Ident); + val = i; + } else if (is_number(str)) { + tp = Number; + } else { + tp = 0; + } + if (tp == Unknown) { + tp = Ident; + val = symbols.lookup(str, 1); + } + return tp; +} + +int Lexem::set(std::string _str, const SrcLocation& _loc, int _tp, int _val) { + str = _str; + loc = _loc; + tp = _tp; + val = _val; + return classify(); +} + +Lexer::Lexer(SourceReader& _src, bool init, std::string active_chars, std::string eol_cmts, std::string open_cmts, + std::string close_cmts, std::string quote_chars, std::string multiline_quote) + : src(_src), eof(false), lexem("", src.here(), Lexem::Undefined), peek_lexem("", {}, Lexem::Undefined), + multiline_quote(std::move(multiline_quote)) { + std::memset(char_class, 0, sizeof(char_class)); + unsigned char activity = cc::active; + for (char c : active_chars) { + if (c == ' ') { + if (!--activity) { + activity = cc::allow_repeat; + } + } else if ((unsigned)c < 0x80) { + char_class[(unsigned)c] |= activity; + } + } + set_spec(eol_cmt, eol_cmts); + set_spec(cmt_op, open_cmts); + set_spec(cmt_cl, close_cmts); + for (int c : quote_chars) { + if (c > ' ' && c <= 0x7f) { + char_class[(unsigned)c] |= cc::quote_char; + } + } + if (init) { + next(); + } +} + +void Lexer::set_spec(std::array& arr, std::string setup) { + arr[0] = arr[1] = arr[2] = -0x100; + std::size_t n = setup.size(), i; + for (i = 0; i < n; i++) { + if (setup[i] == ' ') { + continue; + } + if (i == n - 1 || setup[i + 1] == ' ') { + arr[0] = setup[i]; + } else if (i == n - 2 || (i < n - 2 && setup[i + 2] == ' ')) { + arr[1] = setup[i]; + arr[2] = setup[++i]; + } else { + while (i < n && setup[i] != ' ') { + i++; + } + } + } +} + +bool Lexer::is_multiline_quote(const char* begin, const char* end) { + if (multiline_quote.empty()) { + return false; + } + for (const char& c : multiline_quote) { + if (begin == end || *begin != c) { + return false; + } + ++begin; + } + return true; +} + +void Lexer::expect(int exp_tp, const char* msg) { + if (tp() != exp_tp) { + throw ParseError{lexem.loc, (msg ? std::string{msg} : Lexem::lexem_name_str(exp_tp)) + " expected instead of " + + cur().name_str()}; + } + next(); +} + +const Lexem& Lexer::next() { + if (peek_lexem.valid()) { + lexem = std::move(peek_lexem); + peek_lexem.clear({}, Lexem::Undefined); + eof = (lexem.tp == Lexem::Eof); + return lexem; + } + if (eof) { + return lexem.clear(src.here(), Lexem::Eof); + } + long long comm = 1; + while (!src.seek_eof()) { + int cc = src.cur_char(), nc = src.next_char(); + if (cc == eol_cmt[0] || (cc == eol_cmt[1] && nc == eol_cmt[2])) { + src.load_line(); + } else if (cc == cmt_op[1] && nc == cmt_op[2]) { + src.advance(2); + comm = comm * 2 + 1; + } else if (cc == cmt_op[0]) { + src.advance(1); + comm *= 2; + } else if (comm == 1) { + break; + } else if (cc == cmt_cl[1] && nc == cmt_cl[2]) { + if (!(comm & 1)) { + src.error(std::string{"a `"} + (char)cmt_op[0] + "` comment closed by `" + (char)cmt_cl[1] + (char)cmt_cl[2] + + "`"); + } + comm >>= 1; + src.advance(2); + } else if (cc == cmt_cl[0]) { + if (!(comm & 1)) { + src.error(std::string{"a `"} + (char)cmt_op[1] + (char)cmt_op[2] + "` comment closed by `" + (char)cmt_cl[0] + + "`"); + } + comm >>= 1; + src.advance(1); + } else { + src.advance(1); + } + if (comm < 0) { + src.error("too many nested comments"); + } + } + if (src.seek_eof()) { + eof = true; + if (comm > 1) { + if (comm & 1) { + src.error(std::string{"`"} + (char)cmt_op[1] + (char)cmt_op[2] + "` comment extends past end of file"); + } else { + src.error(std::string{"`"} + (char)cmt_op[0] + "` comment extends past end of file"); + } + } + return lexem.clear(src.here(), Lexem::Eof); + } + if (is_multiline_quote(src.get_ptr(), src.get_end_ptr())) { + src.advance(multiline_quote.size()); + const char* end = nullptr; + SrcLocation here = src.here(); + std::string body; + while (!src.is_eof()) { + if (src.is_eoln()) { + body.push_back('\n'); + src.load_line(); + continue; + } + if (is_multiline_quote(src.get_ptr(), src.get_end_ptr())) { + end = src.get_ptr(); + src.advance(multiline_quote.size()); + break; + } + body.push_back(src.cur_char()); + src.advance(1); + } + if (!end) { + src.error("string extends past end of file"); + } + lexem.set(body, here, Lexem::String); + int c = src.cur_char(); + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { + lexem.val = c; + src.advance(1); + } + return lexem; + } + int c = src.cur_char(); + const char* end = src.get_ptr(); + if (is_quote_char(c) || c == '`') { + int qc = c; + ++end; + while (end < src.get_end_ptr() && *end != qc) { + ++end; + } + if (*end != qc) { + src.error(qc == '`' ? "a `back-quoted` token extends past end of line" : "string extends past end of line"); + } + lexem.set(std::string{src.get_ptr() + 1, end}, src.here(), qc == '`' ? Lexem::Unknown : Lexem::String); + src.set_ptr(end + 1); + c = src.cur_char(); + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { + lexem.val = c; + src.set_ptr(end + 2); + } + // std::cerr << lexem.name_str() << ' ' << lexem.str << std::endl; + return lexem; + } + int len = 0, pc = -0x100; + while (end < src.get_end_ptr()) { + c = *end; + bool repeated = (c == pc && is_repeatable(c)); + if (c == ' ' || c == 9 || (len && is_left_active(c) && !repeated)) { + break; + } + ++len; + ++end; + if (is_right_active(c) && !repeated) { + break; + } + pc = c; + } + lexem.set(std::string{src.get_ptr(), end}, src.here()); + src.set_ptr(end); + // std::cerr << lexem.name_str() << ' ' << lexem.str << std::endl; + return lexem; +} + +const Lexem& Lexer::peek() { + if (peek_lexem.valid()) { + return peek_lexem; + } + if (eof) { + return lexem.clear(src.here(), Lexem::Eof); + } + Lexem keep = std::move(lexem); + next(); + peek_lexem = std::move(lexem); + lexem = std::move(keep); + eof = false; + return peek_lexem; +} + +} // namespace tolk diff --git a/tolk/lexer.h b/tolk/lexer.h new file mode 100644 index 00000000..79d86906 --- /dev/null +++ b/tolk/lexer.h @@ -0,0 +1,113 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once +#include "srcread.h" +#include +#include +#include + +namespace tolk { + +/* + * + * LEXER + * + */ + +struct Lexem { + enum { Undefined = -2, Eof = -1, Unknown = 0, Ident = 0, Number = 1, Special = 2, String = 3 }; + int tp; + int val; + std::string str; + SrcLocation loc; + int classify(); + Lexem(std::string _str = "", const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0) + : tp(_tp), val(_val), str(_str), loc(_loc) { + classify(); + } + int set(std::string _str = "", const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0); + Lexem& clear(const SrcLocation& _loc = {}, int _tp = Unknown, int _val = 0) { + tp = _tp; + val = _val; + loc = _loc; + str = ""; + return *this; + } + bool valid() const { + return tp != Undefined; + } + std::string name_str() const; + void error(std::string _str) const { + throw ParseError{loc, _str}; + } + void error_at(std::string str1, std::string str2) const { + error(str1 + str + str2); + } + + static std::string lexem_name_str(int idx); +}; + +class Lexer { + SourceReader& src; + bool eof; + Lexem lexem, peek_lexem; + unsigned char char_class[128]; + std::array eol_cmt, cmt_op, cmt_cl; + std::string multiline_quote; + enum cc { left_active = 2, right_active = 1, active = 3, allow_repeat = 4, quote_char = 8 }; + + public: + bool eof_found() const { + return eof; + } + Lexer(SourceReader& _src, bool init = false, std::string active_chars = ";,() ~.", std::string eol_cmts = ";;", + std::string open_cmts = "{-", std::string close_cmts = "-}", std::string quote_chars = "\"", + std::string multiline_quote = "\"\"\""); + const Lexem& next(); + const Lexem& cur() const { + return lexem; + } + const Lexem& peek(); + int tp() const { + return lexem.tp; + } + void expect(int exp_tp, const char* msg = 0); + int classify_char(unsigned c) const { + return c < 0x80 ? char_class[c] : 0; + } + bool is_active(int c) const { + return (classify_char(c) & cc::active) == cc::active; + } + bool is_left_active(int c) const { + return (classify_char(c) & cc::left_active); + } + bool is_right_active(int c) const { + return (classify_char(c) & cc::right_active); + } + bool is_repeatable(int c) const { + return (classify_char(c) & cc::allow_repeat); + } + bool is_quote_char(int c) const { + return (classify_char(c) & cc::quote_char); + } + + private: + void set_spec(std::array& arr, std::string setup); + bool is_multiline_quote(const char* begin, const char* end); +}; + +} // namespace tolk diff --git a/tolk/optimize.cpp b/tolk/optimize.cpp new file mode 100644 index 00000000..64087032 --- /dev/null +++ b/tolk/optimize.cpp @@ -0,0 +1,652 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" + +namespace tolk { + +/* + * + * PEEPHOLE OPTIMIZER + * + */ + +void Optimizer::set_code(AsmOpConsList code) { + code_ = std::move(code); + unpack(); +} + +void Optimizer::unpack() { + int i = 0, j = 0; + for (AsmOpCons *p = code_.get(); p && i < n; p = p->cdr.get(), ++j) { + if (p->car->is_very_custom()) { + break; + } + if (p->car->is_comment()) { + continue; + } + op_cons_[i] = p; + op_[i] = std::move(p->car); + offs_[i] = j; + ++i; + } + l_ = i; + indent_ = (i ? op_[0]->indent : 0); +} + +void Optimizer::pack() { + for (int i = 0; i < l_; i++) { + op_cons_[i]->car = std::move(op_[i]); + op_cons_[i] = nullptr; + } + l_ = 0; +} + +void Optimizer::apply() { + if (!p_ && !q_) { + return; + } + tolk_assert(p_ > 0 && p_ <= l_ && q_ >= 0 && q_ <= n && l_ <= n); + for (int i = p_; i < l_; i++) { + tolk_assert(op_[i]); + op_cons_[i]->car = std::move(op_[i]); + op_cons_[i] = nullptr; + } + for (int c = offs_[p_ - 1]; c >= 0; --c) { + code_ = std::move(code_->cdr); + } + for (int j = q_ - 1; j >= 0; j--) { + tolk_assert(oq_[j]); + oq_[j]->indent = indent_; + code_ = AsmOpCons::cons(std::move(oq_[j]), std::move(code_)); + } + l_ = 0; +} + +AsmOpConsList Optimizer::extract_code() { + pack(); + return std::move(code_); +} + +void Optimizer::show_head() const { + if (!debug_) { + return; + } + std::cerr << "optimizing"; + for (int i = 0; i < l_; i++) { + if (op_[i]) { + std::cerr << ' ' << *op_[i] << ' '; + } else { + std::cerr << " (null) "; + } + } + std::cerr << std::endl; +} + +void Optimizer::show_left() const { + if (!debug_) { + return; + } + std::cerr << "// *** rewriting"; + for (int i = 0; i < p_; i++) { + if (op_[i]) { + std::cerr << ' ' << *op_[i] << ' '; + } else { + std::cerr << " (null) "; + } + } +} + +void Optimizer::show_right() const { + if (!debug_) { + return; + } + std::cerr << "->"; + for (int i = 0; i < q_; i++) { + if (oq_[i]) { + std::cerr << ' ' << *oq_[i] << ' '; + } else { + std::cerr << " (null) "; + } + } + std::cerr << std::endl; +} + +bool Optimizer::say(std::string str) const { + if (debug_) { + std::cerr << str << std::endl; + } + return true; +} + +bool Optimizer::find_const_op(int* op_idx, int cst) { + for (int i = 0; i < l2_; i++) { + if (op_[i]->is_gconst() && tr_[i].get(0) == cst) { + *op_idx = i; + return true; + } + } + return false; +} + +bool Optimizer::is_push_const(int* i, int* c) const { + return pb_ >= 3 && pb_ <= l2_ && tr_[pb_ - 1].is_push_const(i, c); +} + +// PUSHCONST c ; PUSH s(i+1) ; SWAP -> PUSH s(i) ; PUSHCONST c +bool Optimizer::rewrite_push_const(int i, int c) { + p_ = pb_; + q_ = 2; + int idx = -1; + if (!(p_ >= 2 && find_const_op(&idx, c) && idx < p_)) { + return false; + } + show_left(); + oq_[1] = std::move(op_[idx]); + oq_[0] = std::move(op_[!idx]); + *oq_[0] = AsmOp::Push(i); + show_right(); + return true; +} + +bool Optimizer::is_const_rot(int* c) const { + return pb_ >= 3 && pb_ <= l2_ && tr_[pb_ - 1].is_const_rot(c); +} + +bool Optimizer::rewrite_const_rot(int c) { + p_ = pb_; + q_ = 2; + int idx = -1; + if (!(p_ >= 2 && find_const_op(&idx, c) && idx < p_)) { + return false; + } + show_left(); + oq_[0] = std::move(op_[idx]); + oq_[1] = std::move(op_[!idx]); + *oq_[1] = AsmOp::Custom("ROT", 3, 3); + show_right(); + return true; +} + +bool Optimizer::is_const_pop(int* c, int* i) const { + return pb_ >= 3 && pb_ <= l2_ && tr_[pb_ - 1].is_const_pop(c, i); +} + +bool Optimizer::rewrite_const_pop(int c, int i) { + p_ = pb_; + q_ = 2; + int idx = -1; + if (!(p_ >= 2 && find_const_op(&idx, c) && idx < p_)) { + return false; + } + show_left(); + oq_[0] = std::move(op_[idx]); + oq_[1] = std::move(op_[!idx]); + *oq_[1] = AsmOp::Pop(i); + show_right(); + return true; +} + +bool Optimizer::is_const_push_xchgs() { + if (!(pb_ >= 2 && pb_ <= l2_ && op_[0]->is_gconst())) { + return false; + } + StackTransform t; + int pos = 0, i; + for (i = 1; i < pb_; i++) { + int a, b; + if (op_[i]->is_xchg(&a, &b)) { + if (pos == a) { + pos = b; + } else if (pos == b) { + pos = a; + } else { + t.apply_xchg(a - (a > pos), b - (b > pos)); + } + } else if (op_[i]->is_push(&a)) { + if (pos == a) { + return false; + } + t.apply_push(a - (a > pos)); + ++pos; + } else { + return false; + } + } + if (pos) { + return false; + } + t.apply_push_newconst(); + if (t <= tr_[i - 1]) { + p_ = i; + return true; + } else { + return false; + } +} + +bool Optimizer::rewrite_const_push_xchgs() { + if (!p_) { + return false; + } + show_left(); + auto c_op = std::move(op_[0]); + tolk_assert(c_op->is_gconst()); + StackTransform t; + q_ = 0; + int pos = 0; + for (int i = 1; i < p_; i++) { + int a, b; + if (op_[i]->is_xchg(&a, &b)) { + if (a == pos) { + pos = b; + } else if (b == pos) { + pos = a; + } else { + oq_[q_] = std::move(op_[i]); + if (a > pos) { + oq_[q_]->a = a - 1; + } + if (b > pos) { + oq_[q_]->b = b - 1; + } + tolk_assert(apply_op(t, *oq_[q_])); + ++q_; + } + } else { + tolk_assert(op_[i]->is_push(&a)); + tolk_assert(a != pos); + oq_[q_] = std::move(op_[i]); + if (a > pos) { + oq_[q_]->a = a - 1; + } + tolk_assert(apply_op(t, *oq_[q_])); + ++q_; + ++pos; + } + } + tolk_assert(!pos); + t.apply_push_newconst(); + tolk_assert(t <= tr_[p_ - 1]); + oq_[q_++] = std::move(c_op); + show_right(); + return true; +} + +bool Optimizer::rewrite(int p, AsmOp&& new_op) { + tolk_assert(p > 0 && p <= l_); + p_ = p; + q_ = 1; + show_left(); + oq_[0] = std::move(op_[0]); + *oq_[0] = new_op; + show_right(); + return true; +} + +bool Optimizer::rewrite(int p, AsmOp&& new_op1, AsmOp&& new_op2) { + tolk_assert(p > 1 && p <= l_); + p_ = p; + q_ = 2; + show_left(); + oq_[0] = std::move(op_[0]); + *oq_[0] = new_op1; + oq_[1] = std::move(op_[1]); + *oq_[1] = new_op2; + show_right(); + return true; +} + +bool Optimizer::rewrite(int p, AsmOp&& new_op1, AsmOp&& new_op2, AsmOp&& new_op3) { + tolk_assert(p > 2 && p <= l_); + p_ = p; + q_ = 3; + show_left(); + oq_[0] = std::move(op_[0]); + *oq_[0] = new_op1; + oq_[1] = std::move(op_[1]); + *oq_[1] = new_op2; + oq_[2] = std::move(op_[2]); + *oq_[2] = new_op3; + show_right(); + return true; +} + +bool Optimizer::rewrite_nop() { + tolk_assert(p_ > 0 && p_ <= l_); + q_ = 0; + show_left(); + show_right(); + return true; +} + +bool Optimizer::is_pred(const std::function& pred, int min_p) { + min_p = std::max(min_p, pb_); + for (int p = l2_; p >= min_p; p--) { + if (pred(tr_[p - 1])) { + p_ = p; + return true; + } + } + return false; +} + +bool Optimizer::is_same_as(const StackTransform& trans, int min_p) { + return is_pred([&trans](const auto& t) { return t >= trans; }, min_p); +} + +// s1 s3 XCHG ; s0 s2 XCHG -> 2SWAP +bool Optimizer::is_2swap() { + static const StackTransform t_2swap{2, 3, 0, 1, 4}; + return is_same_as(t_2swap); +} + +// s3 PUSH ; s3 PUSH -> 2OVER +bool Optimizer::is_2over() { + static const StackTransform t_2over{2, 3, 0}; + return is_same_as(t_2over); +} + +bool Optimizer::is_2dup() { + static const StackTransform t_2dup{0, 1, 0}; + return is_same_as(t_2dup); +} + +bool Optimizer::is_tuck() { + static const StackTransform t_tuck{0, 1, 0, 2}; + return is_same_as(t_tuck); +} + +bool Optimizer::is_2drop() { + static const StackTransform t_2drop{2}; + return is_same_as(t_2drop); +} + +bool Optimizer::is_rot() { + return is_pred([](const auto& t) { return t.is_rot(); }); +} + +bool Optimizer::is_rotrev() { + return is_pred([](const auto& t) { return t.is_rotrev(); }); +} + +bool Optimizer::is_nop() { + return is_pred([](const auto& t) { return t.is_id(); }, 1); +} + +bool Optimizer::is_xchg(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_xchg(i, j) && ((*i < 16 && *j < 16) || (!*i && *j < 256)); }); +} + +bool Optimizer::is_xchg_xchg(int* i, int* j, int* k, int* l) { + return is_pred([i, j, k, l](const auto& t) { + return t.is_xchg_xchg(i, j, k, l) && (*i < 2 && *j < (*i ? 16 : 256) && *k < 2 && *l < (*k ? 16 : 256)); + }) && + (!(p_ == 2 && op_[0]->is_xchg(*i, *j) && op_[1]->is_xchg(*k, *l))); +} + +bool Optimizer::is_push(int* i) { + return is_pred([i](const auto& t) { return t.is_push(i) && *i < 256; }); +} + +bool Optimizer::is_pop(int* i) { + return is_pred([i](const auto& t) { return t.is_pop(i) && *i < 256; }); +} + +bool Optimizer::is_pop_pop(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_pop_pop(i, j) && *i < 256 && *j < 256; }, 3); +} + +bool Optimizer::is_push_rot(int* i) { + return is_pred([i](const auto& t) { return t.is_push_rot(i) && *i < 16; }, 3); +} + +bool Optimizer::is_push_rotrev(int* i) { + return is_pred([i](const auto& t) { return t.is_push_rotrev(i) && *i < 16; }, 3); +} + +bool Optimizer::is_push_xchg(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_push_xchg(i, j, k) && *i < 16 && *j < 16 && *k < 16; }) && + !(p_ == 2 && op_[0]->is_push() && op_[1]->is_xchg()); +} + +bool Optimizer::is_xchg2(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_xchg2(i, j) && *i < 16 && *j < 16; }); +} + +bool Optimizer::is_xcpu(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_xcpu(i, j) && *i < 16 && *j < 16; }); +} + +bool Optimizer::is_puxc(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_puxc(i, j) && *i < 16 && *j < 15; }); +} + +bool Optimizer::is_push2(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_push2(i, j) && *i < 16 && *j < 16; }); +} + +bool Optimizer::is_xchg3(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_xchg3(i, j, k) && *i < 16 && *j < 16 && *k < 16; }); +} + +bool Optimizer::is_xc2pu(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_xc2pu(i, j, k) && *i < 16 && *j < 16 && *k < 16; }); +} + +bool Optimizer::is_xcpuxc(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_xcpuxc(i, j, k) && *i < 16 && *j < 16 && *k < 15; }); +} + +bool Optimizer::is_xcpu2(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_xcpu2(i, j, k) && *i < 16 && *j < 16 && *k < 16; }); +} + +bool Optimizer::is_puxc2(int* i, int* j, int* k) { + return is_pred( + [i, j, k](const auto& t) { return t.is_puxc2(i, j, k) && *i < 16 && *j < 15 && *k < 15 && *j + *k != -1; }); +} + +bool Optimizer::is_puxcpu(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_puxcpu(i, j, k) && *i < 16 && *j < 15 && *k < 15; }); +} + +bool Optimizer::is_pu2xc(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_pu2xc(i, j, k) && *i < 16 && *j < 15 && *k < 14; }); +} + +bool Optimizer::is_push3(int* i, int* j, int* k) { + return is_pred([i, j, k](const auto& t) { return t.is_push3(i, j, k) && *i < 16 && *j < 16 && *k < 16; }); +} + +bool Optimizer::is_blkswap(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_blkswap(i, j) && *i > 0 && *j > 0 && *i <= 16 && *j <= 16; }); +} + +bool Optimizer::is_blkpush(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_blkpush(i, j) && *i > 0 && *i < 16 && *j < 16; }); +} + +bool Optimizer::is_blkdrop(int* i) { + return is_pred([i](const auto& t) { return t.is_blkdrop(i) && *i > 0 && *i < 16; }); +} + +bool Optimizer::is_blkdrop2(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_blkdrop2(i, j) && *i > 0 && *i < 16 && *j > 0 && *j < 16; }); +} + +bool Optimizer::is_reverse(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_reverse(i, j) && *i >= 2 && *i <= 17 && *j < 16; }); +} + +bool Optimizer::is_nip_seq(int* i, int* j) { + return is_pred([i, j](const auto& t) { return t.is_nip_seq(i, j) && *i >= 3 && *i <= 15; }); +} + +bool Optimizer::is_pop_blkdrop(int* i, int* k) { + return is_pred([i, k](const auto& t) { return t.is_pop_blkdrop(i, k) && *i >= *k && *k >= 2 && *k <= 15; }, 3); +} + +bool Optimizer::is_2pop_blkdrop(int* i, int* j, int* k) { + return is_pred( + [i, j, k](const auto& t) { return t.is_2pop_blkdrop(i, j, k) && *i >= *k && *j >= *k && *k >= 2 && *k <= 15; }, + 3); +} + +bool Optimizer::compute_stack_transforms() { + StackTransform trans; + for (int i = 0; i < l_; i++) { + if (!apply_op(trans, *op_[i])) { + l2_ = i; + return true; + } + tr_[i] = trans; + } + l2_ = l_; + return true; +} + +bool Optimizer::show_stack_transforms() const { + show_head(); + // slow version + /* + StackTransform trans2; + std::cerr << "id = " << trans2 << std::endl; + for (int i = 0; i < l_; i++) { + StackTransform op; + if (!apply_op(op, *op_[i])) { + std::cerr << "* (" << *op_[i] << " = invalid)\n"; + break; + } + trans2 *= op; + std::cerr << "* " << *op_[i] << " = " << op << " -> " << trans2 << std::endl; + } + */ + // fast version + StackTransform trans; + for (int i = 0; i < l_; i++) { + std::cerr << trans << std::endl << *op_[i] << " -> "; + if (!apply_op(trans, *op_[i])) { + std::cerr << " " << std::endl; + return true; + } + } + std::cerr << trans << std::endl; + return true; +} + +bool Optimizer::find_at_least(int pb) { + p_ = q_ = 0; + pb_ = pb; + // show_stack_transforms(); + int i, j, k, l, c; + return (is_push_const(&i, &c) && rewrite_push_const(i, c)) || (is_nop() && rewrite_nop()) || + (!(mode_ & 1) && is_const_rot(&c) && rewrite_const_rot(c)) || + (is_const_push_xchgs() && rewrite_const_push_xchgs()) || (is_const_pop(&c, &i) && rewrite_const_pop(c, i)) || + (is_xchg(&i, &j) && rewrite(AsmOp::Xchg(i, j))) || (is_push(&i) && rewrite(AsmOp::Push(i))) || + (is_pop(&i) && rewrite(AsmOp::Pop(i))) || (is_pop_pop(&i, &j) && rewrite(AsmOp::Pop(i), AsmOp::Pop(j))) || + (is_xchg_xchg(&i, &j, &k, &l) && rewrite(AsmOp::Xchg(i, j), AsmOp::Xchg(k, l))) || + (!(mode_ & 1) && + ((is_rot() && rewrite(AsmOp::Custom("ROT", 3, 3))) || (is_rotrev() && rewrite(AsmOp::Custom("-ROT", 3, 3))) || + (is_2dup() && rewrite(AsmOp::Custom("2DUP", 2, 4))) || + (is_2swap() && rewrite(AsmOp::Custom("2SWAP", 2, 4))) || + (is_2over() && rewrite(AsmOp::Custom("2OVER", 2, 4))) || + (is_tuck() && rewrite(AsmOp::Custom("TUCK", 2, 3))) || + (is_2drop() && rewrite(AsmOp::Custom("2DROP", 2, 0))) || (is_xchg2(&i, &j) && rewrite(AsmOp::Xchg2(i, j))) || + (is_xcpu(&i, &j) && rewrite(AsmOp::XcPu(i, j))) || (is_puxc(&i, &j) && rewrite(AsmOp::PuXc(i, j))) || + (is_push2(&i, &j) && rewrite(AsmOp::Push2(i, j))) || (is_blkswap(&i, &j) && rewrite(AsmOp::BlkSwap(i, j))) || + (is_blkpush(&i, &j) && rewrite(AsmOp::BlkPush(i, j))) || (is_blkdrop(&i) && rewrite(AsmOp::BlkDrop(i))) || + (is_push_rot(&i) && rewrite(AsmOp::Push(i), AsmOp::Custom("ROT"))) || + (is_push_rotrev(&i) && rewrite(AsmOp::Push(i), AsmOp::Custom("-ROT"))) || + (is_push_xchg(&i, &j, &k) && rewrite(AsmOp::Push(i), AsmOp::Xchg(j, k))) || + (is_reverse(&i, &j) && rewrite(AsmOp::BlkReverse(i, j))) || + (is_blkdrop2(&i, &j) && rewrite(AsmOp::BlkDrop2(i, j))) || + (is_nip_seq(&i, &j) && rewrite(AsmOp::Xchg(i, j), AsmOp::BlkDrop(i))) || + (is_pop_blkdrop(&i, &k) && rewrite(AsmOp::Pop(i), AsmOp::BlkDrop(k))) || + (is_2pop_blkdrop(&i, &j, &k) && (k >= 3 && k <= 13 && i != j + 1 && i <= 15 && j <= 14 + ? rewrite(AsmOp::Xchg2(j + 1, i), AsmOp::BlkDrop(k + 2)) + : rewrite(AsmOp::Pop(i), AsmOp::Pop(j), AsmOp::BlkDrop(k)))) || + (is_xchg3(&i, &j, &k) && rewrite(AsmOp::Xchg3(i, j, k))) || + (is_xc2pu(&i, &j, &k) && rewrite(AsmOp::Xc2Pu(i, j, k))) || + (is_xcpuxc(&i, &j, &k) && rewrite(AsmOp::XcPuXc(i, j, k))) || + (is_xcpu2(&i, &j, &k) && rewrite(AsmOp::XcPu2(i, j, k))) || + (is_puxc2(&i, &j, &k) && rewrite(AsmOp::PuXc2(i, j, k))) || + (is_puxcpu(&i, &j, &k) && rewrite(AsmOp::PuXcPu(i, j, k))) || + (is_pu2xc(&i, &j, &k) && rewrite(AsmOp::Pu2Xc(i, j, k))) || + (is_push3(&i, &j, &k) && rewrite(AsmOp::Push3(i, j, k))))); +} + +bool Optimizer::find() { + if (!compute_stack_transforms()) { + return false; + } + for (int pb = l_; pb > 0; --pb) { + if (find_at_least(pb)) { + return true; + } + } + return false; +} + +bool Optimizer::optimize() { + bool f = false; + while (find()) { + f = true; + apply(); + unpack(); + } + return f; +} + +AsmOpConsList optimize_code_head(AsmOpConsList op_list, int mode) { + Optimizer opt(std::move(op_list), op_rewrite_comments, mode); + opt.optimize(); + return opt.extract_code(); +} + +AsmOpConsList optimize_code(AsmOpConsList op_list, int mode) { + std::vector> v; + while (op_list) { + if (!op_list->car->is_comment()) { + op_list = optimize_code_head(std::move(op_list), mode); + } + if (op_list) { + v.push_back(std::move(op_list->car)); + op_list = std::move(op_list->cdr); + } + } + for (auto it = v.rbegin(); it < v.rend(); ++it) { + op_list = AsmOpCons::cons(std::move(*it), std::move(op_list)); + } + return std::move(op_list); +} + +void optimize_code(AsmOpList& ops) { + AsmOpConsList op_list; + for (auto it = ops.list_.rbegin(); it < ops.list_.rend(); ++it) { + op_list = AsmOpCons::cons(std::make_unique(std::move(*it)), std::move(op_list)); + } + for (int mode : {1, 1, 1, 1, 0, 0, 0, 0}) { + op_list = optimize_code(std::move(op_list), mode); + } + ops.list_.clear(); + while (op_list) { + ops.list_.push_back(std::move(*(op_list->car))); + op_list = std::move(op_list->cdr); + } +} + +} // namespace tolk diff --git a/tolk/parse-tolk.cpp b/tolk/parse-tolk.cpp new file mode 100644 index 00000000..7fffb15a --- /dev/null +++ b/tolk/parse-tolk.cpp @@ -0,0 +1,1809 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" +#include "td/utils/crypto.h" +#include "common/refint.h" +#include "openssl/digest.hpp" +#include "block/block.h" +#include "block-parse.h" + +namespace tolk { +using namespace std::literals::string_literals; + +int compute_symbol_subclass(std::string str) { + if (str.size() < 2) { + return IdSc::undef; + } else if (str[0] == '.') { + return IdSc::dotid; + } else if (str[0] == '~') { + return IdSc::tildeid; + } else { + return IdSc::undef; + } +} + +inline bool is_dot_ident(sym_idx_t idx) { + return symbols.get_subclass(idx) == IdSc::dotid; +} + +inline bool is_tilde_ident(sym_idx_t idx) { + return symbols.get_subclass(idx) == IdSc::tildeid; +} + +inline bool is_special_ident(sym_idx_t idx) { + return symbols.get_subclass(idx) != IdSc::undef; +} + +/* + * + * PARSE SOURCE + * + */ + +// TE ::= TA | TA -> TE +// TA ::= int | ... | cont | var | _ | () | ( TE { , TE } ) | [ TE { , TE } ] +TypeExpr* parse_type(Lexer& lex); + +TypeExpr* parse_type1(Lexer& lex) { + switch (lex.tp()) { + case _Int: + lex.next(); + return TypeExpr::new_atomic(_Int); + case _Cell: + lex.next(); + return TypeExpr::new_atomic(_Cell); + case _Slice: + lex.next(); + return TypeExpr::new_atomic(_Slice); + case _Builder: + lex.next(); + return TypeExpr::new_atomic(_Builder); + case _Cont: + lex.next(); + return TypeExpr::new_atomic(_Cont); + case _Tuple: + lex.next(); + return TypeExpr::new_atomic(_Tuple); + case _Var: + case '_': + lex.next(); + return TypeExpr::new_hole(); + case _Ident: { + auto sym = lookup_symbol(lex.cur().val); + if (sym && dynamic_cast(sym->value)) { + auto val = dynamic_cast(sym->value); + lex.next(); + return val->get_type(); + } + lex.cur().error_at("`", "` is not a type identifier"); + } + } + int c; + if (lex.tp() == '[') { + lex.next(); + c = ']'; + } else { + lex.expect('('); + c = ')'; + } + if (lex.tp() == c) { + lex.next(); + return c == ')' ? TypeExpr::new_unit() : TypeExpr::new_tuple({}); + } + auto t1 = parse_type(lex); + if (lex.tp() == ')') { + lex.expect(c); + return t1; + } + std::vector tlist{1, t1}; + while (lex.tp() == ',') { + lex.next(); + tlist.push_back(parse_type(lex)); + } + lex.expect(c); + return c == ')' ? TypeExpr::new_tensor(std::move(tlist)) : TypeExpr::new_tuple(std::move(tlist)); +} + +TypeExpr* parse_type(Lexer& lex) { + auto res = parse_type1(lex); + if (lex.tp() == _Mapsto) { + lex.next(); + auto to = parse_type(lex); + return TypeExpr::new_map(res, to); + } else { + return res; + } +} + +FormalArg parse_formal_arg(Lexer& lex, int fa_idx) { + TypeExpr* arg_type = 0; + SrcLocation loc = lex.cur().loc; + if (lex.tp() == '_') { + lex.next(); + if (lex.tp() == ',' || lex.tp() == ')') { + return std::make_tuple(TypeExpr::new_hole(), (SymDef*)nullptr, loc); + } + arg_type = TypeExpr::new_hole(); + loc = lex.cur().loc; + } else if (lex.tp() != _Ident) { + arg_type = parse_type(lex); + } else { + auto sym = lookup_symbol(lex.cur().val); + if (sym && dynamic_cast(sym->value)) { + auto val = dynamic_cast(sym->value); + lex.next(); + arg_type = val->get_type(); + } else { + arg_type = TypeExpr::new_hole(); + } + } + if (lex.tp() == '_' || lex.tp() == ',' || lex.tp() == ')') { + if (lex.tp() == '_') { + loc = lex.cur().loc; + lex.next(); + } + return std::make_tuple(arg_type, (SymDef*)nullptr, loc); + } + if (lex.tp() != _Ident) { + lex.expect(_Ident, "formal parameter name"); + } + loc = lex.cur().loc; + if (prohibited_var_names.count(symbols.get_name(lex.cur().val))) { + throw ParseError{ + loc, PSTRING() << "symbol `" << symbols.get_name(lex.cur().val) << "` cannot be redefined as a variable"}; + } + SymDef* new_sym_def = define_symbol(lex.cur().val, true, loc); + if (!new_sym_def) { + lex.cur().error_at("cannot define symbol `", "`"); + } + if (new_sym_def->value) { + lex.cur().error_at("redefined formal parameter `", "`"); + } + new_sym_def->value = new SymVal{SymVal::_Param, fa_idx, arg_type}; + lex.next(); + return std::make_tuple(arg_type, new_sym_def, loc); +} + +void parse_global_var_decl(Lexer& lex) { + TypeExpr* var_type = 0; + SrcLocation loc = lex.cur().loc; + if (lex.tp() == '_') { + lex.next(); + var_type = TypeExpr::new_hole(); + loc = lex.cur().loc; + } else if (lex.tp() != _Ident) { + var_type = parse_type(lex); + } else { + auto sym = lookup_symbol(lex.cur().val); + if (sym && dynamic_cast(sym->value)) { + auto val = dynamic_cast(sym->value); + lex.next(); + var_type = val->get_type(); + } else { + var_type = TypeExpr::new_hole(); + } + } + if (lex.tp() != _Ident) { + lex.expect(_Ident, "global variable name"); + } + loc = lex.cur().loc; + SymDef* sym_def = define_global_symbol(lex.cur().val, false, loc); + if (!sym_def) { + lex.cur().error_at("cannot define global symbol `", "`"); + } + if (sym_def->value) { + auto val = dynamic_cast(sym_def->value); + if (!val) { + lex.cur().error_at("symbol `", "` cannot be redefined as a global variable"); + } + try { + unify(var_type, val->sym_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot unify new type " << var_type << " of global variable `" << sym_def->name() + << "` with its previous type " << val->sym_type << ": " << ue; + lex.cur().error(os.str()); + } + } else { + sym_def->value = new SymValGlobVar{glob_var_cnt++, var_type}; + glob_vars.push_back(sym_def); + } + lex.next(); +} + +extern int const_cnt; +Expr* parse_expr(Lexer& lex, CodeBlob& code, bool nv = false); + +void parse_const_decl(Lexer& lex) { + SrcLocation loc = lex.cur().loc; + int wanted_type = Expr::_None; + if (lex.tp() == _Int) { + wanted_type = Expr::_Const; + lex.next(); + } else if (lex.tp() == _Slice) { + wanted_type = Expr::_SliceConst; + lex.next(); + } + if (lex.tp() != _Ident) { + lex.expect(_Ident, "constant name"); + } + loc = lex.cur().loc; + SymDef* sym_def = define_global_symbol(lex.cur().val, false, loc); + if (!sym_def) { + lex.cur().error_at("cannot define global symbol `", "`"); + } + Lexem ident = lex.cur(); + lex.next(); + if (lex.tp() != '=') { + lex.cur().error_at("expected = instead of ", ""); + } + lex.next(); + CodeBlob code; + if (pragma_allow_post_modification.enabled()) { + code.flags |= CodeBlob::_AllowPostModification; + } + if (pragma_compute_asm_ltr.enabled()) { + code.flags |= CodeBlob::_ComputeAsmLtr; + } + // Handles processing and resolution of literals and consts + auto x = parse_expr(lex, code, false); // also does lex.next() ! + if (x->flags != Expr::_IsRvalue) { + lex.cur().error("expression is not strictly Rvalue"); + } + if ((wanted_type == Expr::_Const) && (x->cls == Expr::_Apply)) + wanted_type = Expr::_None; // Apply is additionally checked to result in an integer + if ((wanted_type != Expr::_None) && (x->cls != wanted_type)) { + lex.cur().error("expression type does not match wanted type"); + } + SymValConst* new_value = nullptr; + if (x->cls == Expr::_Const) { // Integer constant + new_value = new SymValConst{const_cnt++, x->intval}; + } else if (x->cls == Expr::_SliceConst) { // Slice constant (string) + new_value = new SymValConst{const_cnt++, x->strval}; + } else if (x->cls == Expr::_Apply) { + code.emplace_back(loc, Op::_Import, std::vector()); + auto tmp_vars = x->pre_compile(code); + code.emplace_back(loc, Op::_Return, std::move(tmp_vars)); + code.emplace_back(loc, Op::_Nop); // This is neccessary to prevent SIGSEGV! + // It is REQUIRED to execute "optimizations" as in tolk.cpp + code.simplify_var_types(); + code.prune_unreachable_code(); + code.split_vars(true); + for (int i = 0; i < 16; i++) { + code.compute_used_code_vars(); + code.fwd_analyze(); + code.prune_unreachable_code(); + } + code.mark_noreturn(); + AsmOpList out_list(0, &code.vars); + code.generate_code(out_list); + if (out_list.list_.size() != 1) { + lex.cur().error("precompiled expression must result in single operation"); + } + auto op = out_list.list_[0]; + if (!op.is_const()) { + lex.cur().error("precompiled expression must result in compilation time constant"); + } + if (op.origin.is_null() || !op.origin->is_valid()) { + lex.cur().error("precompiled expression did not result in a valid integer constant"); + } + new_value = new SymValConst{const_cnt++, op.origin}; + } else { + lex.cur().error("integer or slice literal or constant expected"); + } + if (sym_def->value) { + SymValConst* old_value = dynamic_cast(sym_def->value); + Keyword new_type = new_value->get_type(); + if (!old_value || old_value->get_type() != new_type || + (new_type == _Int && *old_value->get_int_value() != *new_value->get_int_value()) || + (new_type == _Slice && old_value->get_str_value() != new_value->get_str_value())) { + ident.error_at("global symbol `", "` already exists"); + } + } + sym_def->value = new_value; +} + +FormalArgList parse_formal_args(Lexer& lex) { + FormalArgList args; + lex.expect('(', "formal argument list"); + if (lex.tp() == ')') { + lex.next(); + return args; + } + int fa_idx = 0; + args.push_back(parse_formal_arg(lex, fa_idx++)); + while (lex.tp() == ',') { + lex.next(); + args.push_back(parse_formal_arg(lex, fa_idx++)); + } + lex.expect(')'); + return args; +} + +void parse_const_decls(Lexer& lex) { + lex.expect(_Const); + while (true) { + parse_const_decl(lex); + if (lex.tp() != ',') { + break; + } + lex.expect(','); + } + lex.expect(';'); +} + +TypeExpr* extract_total_arg_type(const FormalArgList& arg_list) { + if (arg_list.empty()) { + return TypeExpr::new_unit(); + } + if (arg_list.size() == 1) { + return std::get<0>(arg_list[0]); + } + std::vector type_list; + for (auto& x : arg_list) { + type_list.push_back(std::get<0>(x)); + } + return TypeExpr::new_tensor(std::move(type_list)); +} + +void parse_global_var_decls(Lexer& lex) { + lex.expect(_Global); + while (true) { + parse_global_var_decl(lex); + if (lex.tp() != ',') { + break; + } + lex.expect(','); + } + lex.expect(';'); +} + +SymValCodeFunc* make_new_glob_func(SymDef* func_sym, TypeExpr* func_type, bool impure = false) { + SymValCodeFunc* res = new SymValCodeFunc{glob_func_cnt, func_type, impure}; + func_sym->value = res; + glob_func.push_back(func_sym); + glob_func_cnt++; + return res; +} + +bool check_global_func(const Lexem& cur, sym_idx_t func_name = 0) { + if (!func_name) { + func_name = cur.val; + } + SymDef* def = lookup_symbol(func_name); + if (!def) { + cur.loc.show_error(std::string{"undefined function `"} + symbols.get_name(func_name) + + "`, defining a global function of unknown type"); + def = define_global_symbol(func_name, 0, cur.loc); + tolk_assert(def && "cannot define global function"); + ++undef_func_cnt; + make_new_glob_func(def, TypeExpr::new_func()); // was: ... ::new_func() + return true; + } + SymVal* val = dynamic_cast(def->value); + if (!val) { + cur.error(std::string{"symbol `"} + symbols.get_name(func_name) + "` has no value and no type"); + return false; + } else if (!val->get_type()) { + cur.error(std::string{"symbol `"} + symbols.get_name(func_name) + "` has no type, possibly not a function"); + return false; + } else { + return true; + } +} + +Expr* make_func_apply(Expr* fun, Expr* x) { + Expr* res; + if (fun->cls == Expr::_Glob) { + if (x->cls == Expr::_Tensor) { + res = new Expr{Expr::_Apply, fun->sym, x->args}; + } else { + res = new Expr{Expr::_Apply, fun->sym, {x}}; + } + res->flags = Expr::_IsRvalue | (fun->flags & Expr::_IsImpure); + } else { + res = new Expr{Expr::_VarApply, {fun, x}}; + res->flags = Expr::_IsRvalue; + } + return res; +} + +// parse ( E { , E } ) | () | [ E { , E } ] | [] | id | num | _ +Expr* parse_expr100(Lexer& lex, CodeBlob& code, bool nv) { + if (lex.tp() == '(' || lex.tp() == '[') { + bool tf = (lex.tp() == '['); + int clbr = (tf ? ']' : ')'); + SrcLocation loc{lex.cur().loc}; + lex.next(); + if (lex.tp() == clbr) { + lex.next(); + Expr* res = new Expr{Expr::_Tensor, {}}; + res->flags = Expr::_IsRvalue; + res->here = loc; + res->e_type = TypeExpr::new_unit(); + if (tf) { + res = new Expr{Expr::_MkTuple, {res}}; + res->flags = Expr::_IsRvalue; + res->here = loc; + res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); + } + return res; + } + Expr* res = parse_expr(lex, code, nv); + if (lex.tp() == ')') { + lex.expect(clbr); + return res; + } + std::vector type_list; + type_list.push_back(res->e_type); + int f = res->flags; + res = new Expr{Expr::_Tensor, {res}}; + while (lex.tp() == ',') { + lex.next(); + auto x = parse_expr(lex, code, nv); + res->pb_arg(x); + if ((f ^ x->flags) & Expr::_IsType) { + lex.cur().error("mixing type and non-type expressions inside the same tuple"); + } + f &= x->flags; + type_list.push_back(x->e_type); + } + res->here = loc; + res->flags = f; + res->e_type = TypeExpr::new_tensor(std::move(type_list), !tf); + if (tf) { + res = new Expr{Expr::_MkTuple, {res}}; + res->flags = f; + res->here = loc; + res->e_type = TypeExpr::new_tuple(res->args.at(0)->e_type); + } + lex.expect(clbr); + return res; + } + int t = lex.tp(); + if (t == Lexem::Number) { + Expr* res = new Expr{Expr::_Const, lex.cur().loc}; + res->flags = Expr::_IsRvalue; + res->intval = td::string_to_int256(lex.cur().str); + if (res->intval.is_null() || !res->intval->signed_fits_bits(257)) { + lex.cur().error_at("invalid integer constant `", "`"); + } + res->e_type = TypeExpr::new_atomic(_Int); + lex.next(); + return res; + } + if (t == Lexem::String) { + std::string str = lex.cur().str; + int str_type = lex.cur().val; + Expr* res; + switch (str_type) { + case 0: + case 's': + case 'a': + { + res = new Expr{Expr::_SliceConst, lex.cur().loc}; + res->e_type = TypeExpr::new_atomic(_Slice); + break; + } + case 'u': + case 'h': + case 'H': + case 'c': + { + res = new Expr{Expr::_Const, lex.cur().loc}; + res->e_type = TypeExpr::new_atomic(_Int); + break; + } + default: + { + res = new Expr{Expr::_Const, lex.cur().loc}; + res->e_type = TypeExpr::new_atomic(_Int); + lex.cur().error("invalid string type `" + std::string(1, static_cast(str_type)) + "`"); + return res; + } + } + res->flags = Expr::_IsRvalue; + switch (str_type) { + case 0: { + res->strval = td::hex_encode(str); + break; + } + case 's': { + res->strval = str; + unsigned char buff[128]; + int bits = (int)td::bitstring::parse_bitstring_hex_literal(buff, sizeof(buff), str.data(), str.data() + str.size()); + if (bits < 0) { + lex.cur().error_at("Invalid hex bitstring constant `", "`"); + } + break; + } + case 'a': { // MsgAddressInt + // todo rewrite stdaddress parsing (if done, CMake dep "ton_crypto" can be replaced with "ton_crypto_core") + block::StdAddress a; + if (a.parse_addr(str)) { + res->strval = block::tlb::MsgAddressInt().pack_std_address(a)->as_bitslice().to_hex(); + } else { + lex.cur().error_at("invalid standard address `", "`"); + } + break; + } + case 'u': { + res->intval = td::hex_string_to_int256(td::hex_encode(str)); + if (!str.size()) { + lex.cur().error("empty integer ascii-constant"); + } + if (res->intval.is_null()) { + lex.cur().error_at("too long integer ascii-constant `", "`"); + } + break; + } + case 'h': + case 'H': + { + unsigned char hash[32]; + digest::hash_str(hash, str.data(), str.size()); + res->intval = td::bits_to_refint(hash, (str_type == 'h') ? 32 : 256, false); + break; + } + case 'c': + { + res->intval = td::make_refint(td::crc32(td::Slice{str})); + break; + } + } + lex.next(); + return res; + } + if (t == '_') { + Expr* res = new Expr{Expr::_Hole, lex.cur().loc}; + res->val = -1; + res->flags = (Expr::_IsLvalue | Expr::_IsHole | Expr::_IsNewVar); + res->e_type = TypeExpr::new_hole(); + lex.next(); + return res; + } + if (t == _Var) { + Expr* res = new Expr{Expr::_Type, lex.cur().loc}; + res->flags = Expr::_IsType; + res->e_type = TypeExpr::new_hole(); + lex.next(); + return res; + } + if (t == _Int || t == _Cell || t == _Slice || t == _Builder || t == _Cont || t == _Type || t == _Tuple) { + Expr* res = new Expr{Expr::_Type, lex.cur().loc}; + res->flags = Expr::_IsType; + res->e_type = TypeExpr::new_atomic(t); + lex.next(); + return res; + } + if (t == _Ident) { + auto sym = lookup_symbol(lex.cur().val); + if (sym && dynamic_cast(sym->value)) { + auto val = dynamic_cast(sym->value); + Expr* res = new Expr{Expr::_Type, lex.cur().loc}; + res->flags = Expr::_IsType; + res->e_type = val->get_type(); + lex.next(); + return res; + } + if (sym && dynamic_cast(sym->value)) { + auto val = dynamic_cast(sym->value); + Expr* res = new Expr{Expr::_GlobVar, lex.cur().loc}; + res->e_type = val->get_type(); + res->sym = sym; + res->flags = Expr::_IsLvalue | Expr::_IsRvalue | Expr::_IsImpure; + lex.next(); + return res; + } + if (sym && dynamic_cast(sym->value)) { + auto val = dynamic_cast(sym->value); + Expr* res = new Expr{Expr::_None, lex.cur().loc}; + res->flags = Expr::_IsRvalue; + if (val->type == _Int) { + res->cls = Expr::_Const; + res->intval = val->get_int_value(); + } + else if (val->type == _Slice) { + res->cls = Expr::_SliceConst; + res->strval = val->get_str_value(); + } + else { + lex.cur().error("Invalid symbolic constant type"); + } + res->e_type = TypeExpr::new_atomic(val->type); + lex.next(); + return res; + } + bool auto_apply = false; + Expr* res = new Expr{Expr::_Var, lex.cur().loc}; + if (nv) { + res->val = ~lex.cur().val; + res->e_type = TypeExpr::new_hole(); + res->flags = Expr::_IsLvalue | Expr::_IsNewVar; + // std::cerr << "defined new variable " << lex.cur().str << " : " << res->e_type << std::endl; + } else { + if (!sym) { + check_global_func(lex.cur()); + sym = lookup_symbol(lex.cur().val); + } + res->sym = sym; + SymVal* val = nullptr; + if (sym) { + val = dynamic_cast(sym->value); + } + if (!val) { + lex.cur().error_at("undefined identifier `", "`"); + } else if (val->type == SymVal::_Func) { + res->e_type = val->get_type(); + res->cls = Expr::_Glob; + auto_apply = val->auto_apply; + } else if (val->idx < 0) { + lex.cur().error_at("accessing variable `", "` being defined"); + } else { + res->val = val->idx; + res->e_type = val->get_type(); + // std::cerr << "accessing variable " << lex.cur().str << " : " << res->e_type << std::endl; + } + // std::cerr << "accessing symbol " << lex.cur().str << " : " << res->e_type << (val->impure ? " (impure)" : " (pure)") << std::endl; + res->flags = Expr::_IsLvalue | Expr::_IsRvalue | (val->impure ? Expr::_IsImpure : 0); + } + if (auto_apply) { + int impure = res->flags & Expr::_IsImpure; + delete res; + res = new Expr{Expr::_Apply, sym, {}}; + res->flags = Expr::_IsRvalue | impure; + } + res->deduce_type(lex.cur()); + lex.next(); + return res; + } + lex.expect(Lexem::Ident); + return nullptr; +} + +// parse E { E } +Expr* parse_expr90(Lexer& lex, CodeBlob& code, bool nv) { + Expr* res = parse_expr100(lex, code, nv); + while (lex.tp() == '(' || lex.tp() == '[' || (lex.tp() == _Ident && !is_special_ident(lex.cur().val))) { + if (res->is_type()) { + Expr* x = parse_expr100(lex, code, true); + x->chk_lvalue(lex.cur()); // chk_lrvalue() ? + TypeExpr* tp = res->e_type; + delete res; + res = new Expr{Expr::_TypeApply, {x}}; + res->e_type = tp; + res->here = lex.cur().loc; + try { + unify(res->e_type, x->e_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "cannot transform expression of type " << x->e_type << " to explicitly requested type " << res->e_type + << ": " << ue; + lex.cur().error(os.str()); + } + res->flags = x->flags; + } else { + Expr* x = parse_expr100(lex, code, false); + x->chk_rvalue(lex.cur()); + res = make_func_apply(res, x); + res->here = lex.cur().loc; + res->deduce_type(lex.cur()); + } + } + return res; +} + +// parse E { .method E | ~method E } +Expr* parse_expr80(Lexer& lex, CodeBlob& code, bool nv) { + Expr* res = parse_expr90(lex, code, nv); + while (lex.tp() == _Ident && is_special_ident(lex.cur().val)) { + auto modify = is_tilde_ident(lex.cur().val); + auto obj = res; + if (modify) { + obj->chk_lvalue(lex.cur()); + } else { + obj->chk_rvalue(lex.cur()); + } + auto loc = lex.cur().loc; + auto name = lex.cur().val; + auto sym = lookup_symbol(name); + if (!sym || !dynamic_cast(sym->value)) { + auto name1 = symbols.lookup(lex.cur().str.substr(1)); + if (name1) { + auto sym1 = lookup_symbol(name1); + if (sym1 && dynamic_cast(sym1->value)) { + name = name1; + sym = sym1; + } + } + } + check_global_func(lex.cur(), name); + if (verbosity >= 2) { + std::cerr << "using symbol `" << symbols.get_name(name) << "` for method call of " << lex.cur().str << std::endl; + } + sym = lookup_symbol(name); + SymValFunc* val = sym ? dynamic_cast(sym->value) : nullptr; + if (!val) { + lex.cur().error_at("undefined method identifier `", "`"); + } + lex.next(); + auto x = parse_expr100(lex, code, false); + x->chk_rvalue(lex.cur()); + if (x->cls == Expr::_Tensor) { + res = new Expr{Expr::_Apply, name, {obj}}; + res->args.insert(res->args.end(), x->args.begin(), x->args.end()); + } else { + res = new Expr{Expr::_Apply, name, {obj, x}}; + } + res->here = loc; + res->flags = Expr::_IsRvalue | (val->impure ? Expr::_IsImpure : 0); + res->deduce_type(lex.cur()); + if (modify) { + auto tmp = res; + res = new Expr{Expr::_LetFirst, {obj->copy(), tmp}}; + res->here = loc; + res->flags = tmp->flags; + res->set_val(name); + res->deduce_type(lex.cur()); + } + } + return res; +} + +// parse [ ~ ] E +Expr* parse_expr75(Lexer& lex, CodeBlob& code, bool nv) { + if (lex.tp() == '~') { + sym_idx_t name = symbols.lookup_add("~_"); + check_global_func(lex.cur(), name); + SrcLocation loc{lex.cur().loc}; + lex.next(); + auto x = parse_expr80(lex, code, false); + x->chk_rvalue(lex.cur()); + auto res = new Expr{Expr::_Apply, name, {x}}; + res->here = loc; + res->set_val('~'); + res->flags = Expr::_IsRvalue; + res->deduce_type(lex.cur()); + return res; + } else { + return parse_expr80(lex, code, nv); + } +} + +// parse E { (* | / | % | /% ) E } +Expr* parse_expr30(Lexer& lex, CodeBlob& code, bool nv) { + Expr* res = parse_expr75(lex, code, nv); + while (lex.tp() == '*' || lex.tp() == '/' || lex.tp() == '%' || lex.tp() == _DivMod || lex.tp() == _DivC || + lex.tp() == _DivR || lex.tp() == _ModC || lex.tp() == _ModR || lex.tp() == '&') { + res->chk_rvalue(lex.cur()); + int t = lex.tp(); + sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur().str + "_"); + SrcLocation loc{lex.cur().loc}; + check_global_func(lex.cur(), name); + lex.next(); + auto x = parse_expr75(lex, code, false); + x->chk_rvalue(lex.cur()); + res = new Expr{Expr::_Apply, name, {res, x}}; + res->here = loc; + res->set_val(t); + res->flags = Expr::_IsRvalue; + res->deduce_type(lex.cur()); + } + return res; +} + +// parse [-] E { (+ | - | `|` | ^) E } +Expr* parse_expr20(Lexer& lex, CodeBlob& code, bool nv) { + Expr* res; + int t = lex.tp(); + if (t == '-') { + sym_idx_t name = symbols.lookup_add("-_"); + check_global_func(lex.cur(), name); + SrcLocation loc{lex.cur().loc}; + lex.next(); + auto x = parse_expr30(lex, code, false); + x->chk_rvalue(lex.cur()); + res = new Expr{Expr::_Apply, name, {x}}; + res->here = loc; + res->set_val(t); + res->flags = Expr::_IsRvalue; + res->deduce_type(lex.cur()); + } else { + res = parse_expr30(lex, code, nv); + } + while (lex.tp() == '-' || lex.tp() == '+' || lex.tp() == '|' || lex.tp() == '^') { + res->chk_rvalue(lex.cur()); + t = lex.tp(); + sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur().str + "_"); + check_global_func(lex.cur(), name); + SrcLocation loc{lex.cur().loc}; + lex.next(); + auto x = parse_expr30(lex, code, false); + x->chk_rvalue(lex.cur()); + res = new Expr{Expr::_Apply, name, {res, x}}; + res->here = loc; + res->set_val(t); + res->flags = Expr::_IsRvalue; + res->deduce_type(lex.cur()); + } + return res; +} + +// parse E { ( << | >> | >>~ | >>^ ) E } +Expr* parse_expr17(Lexer& lex, CodeBlob& code, bool nv) { + Expr* res = parse_expr20(lex, code, nv); + while (lex.tp() == _Lshift || lex.tp() == _Rshift || lex.tp() == _RshiftC || lex.tp() == _RshiftR) { + res->chk_rvalue(lex.cur()); + int t = lex.tp(); + sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur().str + "_"); + check_global_func(lex.cur(), name); + SrcLocation loc{lex.cur().loc}; + lex.next(); + auto x = parse_expr20(lex, code, false); + x->chk_rvalue(lex.cur()); + res = new Expr{Expr::_Apply, name, {res, x}}; + res->here = loc; + res->set_val(t); + res->flags = Expr::_IsRvalue; + res->deduce_type(lex.cur()); + } + return res; +} + +// parse E [ (== | < | > | <= | >= | != | <=> ) E ] +Expr* parse_expr15(Lexer& lex, CodeBlob& code, bool nv) { + Expr* res = parse_expr17(lex, code, nv); + if (lex.tp() == _Eq || lex.tp() == '<' || lex.tp() == '>' || lex.tp() == _Leq || lex.tp() == _Geq || + lex.tp() == _Neq || lex.tp() == _Spaceship) { + res->chk_rvalue(lex.cur()); + int t = lex.tp(); + sym_idx_t name = symbols.lookup_add(std::string{"_"} + lex.cur().str + "_"); + check_global_func(lex.cur(), name); + SrcLocation loc{lex.cur().loc}; + lex.next(); + auto x = parse_expr17(lex, code, false); + x->chk_rvalue(lex.cur()); + res = new Expr{Expr::_Apply, name, {res, x}}; + res->here = loc; + res->set_val(t); + res->flags = Expr::_IsRvalue; + res->deduce_type(lex.cur()); + } + return res; +} + +// parse E [ ? E : E ] +Expr* parse_expr13(Lexer& lex, CodeBlob& code, bool nv) { + Expr* res = parse_expr15(lex, code, nv); + if (lex.tp() == '?') { + res->chk_rvalue(lex.cur()); + SrcLocation loc{lex.cur().loc}; + lex.next(); + auto x = parse_expr(lex, code, false); + x->chk_rvalue(lex.cur()); + lex.expect(':'); + auto y = parse_expr13(lex, code, false); + y->chk_rvalue(lex.cur()); + res = new Expr{Expr::_CondExpr, {res, x, y}}; + res->here = loc; + res->flags = Expr::_IsRvalue; + res->deduce_type(lex.cur()); + } + return res; +} + +// parse LE1 (= | += | -= | ... ) E2 +Expr* parse_expr10(Lexer& lex, CodeBlob& code, bool nv) { + auto x = parse_expr13(lex, code, nv); + int t = lex.tp(); + if (t == _PlusLet || t == _MinusLet || t == _TimesLet || t == _DivLet || t == _DivRLet || t == _DivCLet || + t == _ModLet || t == _ModCLet || t == _ModRLet || t == _LshiftLet || t == _RshiftLet || t == _RshiftCLet || + t == _RshiftRLet || t == _AndLet || t == _OrLet || t == _XorLet) { + x->chk_lvalue(lex.cur()); + x->chk_rvalue(lex.cur()); + sym_idx_t name = symbols.lookup_add(std::string{"^_"} + lex.cur().str + "_"); + check_global_func(lex.cur(), name); + SrcLocation loc{lex.cur().loc}; + lex.next(); + auto y = parse_expr10(lex, code, false); + y->chk_rvalue(lex.cur()); + Expr* z = new Expr{Expr::_Apply, name, {x, y}}; + z->here = loc; + z->set_val(t); + z->flags = Expr::_IsRvalue; + z->deduce_type(lex.cur()); + Expr* res = new Expr{Expr::_Letop, {x->copy(), z}}; + res->here = loc; + res->flags = (x->flags & ~Expr::_IsType) | Expr::_IsRvalue; + res->set_val(t); + res->deduce_type(lex.cur()); + return res; + } else if (t == '=') { + x->chk_lvalue(lex.cur()); + SrcLocation loc{lex.cur().loc}; + lex.next(); + auto y = parse_expr10(lex, code, false); + y->chk_rvalue(lex.cur()); + x->predefine_vars(); + x->define_new_vars(code); + Expr* res = new Expr{Expr::_Letop, {x, y}}; + res->here = loc; + res->flags = (x->flags & ~Expr::_IsType) | Expr::_IsRvalue; + res->set_val(t); + res->deduce_type(lex.cur()); + return res; + } else { + return x; + } +} + +Expr* parse_expr(Lexer& lex, CodeBlob& code, bool nv) { + return parse_expr10(lex, code, nv); +} + +namespace blk_fl { +enum { end = 1, ret = 2, empty = 4 }; +typedef int val; +constexpr val init = end | empty; +void combine(val& x, const val y) { + x |= y & ret; + x &= y | ~(end | empty); +} +void combine_parallel(val& x, const val y) { + x &= y | ~(ret | empty); + x |= y & end; +} +} // namespace blk_fl + +blk_fl::val parse_return_stmt(Lexer& lex, CodeBlob& code) { + auto expr = parse_expr(lex, code); + expr->chk_rvalue(lex.cur()); + try { + // std::cerr << "in return: "; + unify(expr->e_type, code.ret_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "previous function return type " << code.ret_type + << " cannot be unified with return statement expression type " << expr->e_type << ": " << ue; + lex.cur().error(os.str()); + } + std::vector tmp_vars = expr->pre_compile(code); + code.emplace_back(lex.cur().loc, Op::_Return, std::move(tmp_vars)); + lex.expect(';'); + return blk_fl::ret; +} + +blk_fl::val parse_implicit_ret_stmt(Lexer& lex, CodeBlob& code) { + auto ret_type = TypeExpr::new_unit(); + try { + // std::cerr << "in implicit return: "; + unify(ret_type, code.ret_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "previous function return type " << code.ret_type + << " cannot be unified with implicit end-of-block return type " << ret_type << ": " << ue; + lex.cur().error(os.str()); + } + code.emplace_back(lex.cur().loc, Op::_Return); + return blk_fl::ret; +} + +blk_fl::val parse_stmt(Lexer& lex, CodeBlob& code); + +blk_fl::val parse_block_stmt(Lexer& lex, CodeBlob& code, bool no_new_scope = false) { + lex.expect('{'); + if (!no_new_scope) { + open_scope(lex); + } + blk_fl::val res = blk_fl::init; + bool warned = false; + while (lex.tp() != '}') { + if (!(res & blk_fl::end) && !warned) { + lex.cur().loc.show_warning("unreachable code"); + warned = true; + } + blk_fl::combine(res, parse_stmt(lex, code)); + } + if (!no_new_scope) { + close_scope(lex); + } + lex.expect('}'); + return res; +} + +blk_fl::val parse_repeat_stmt(Lexer& lex, CodeBlob& code) { + SrcLocation loc{lex.cur().loc}; + lex.expect(_Repeat); + auto expr = parse_expr(lex, code); + expr->chk_rvalue(lex.cur()); + auto cnt_type = TypeExpr::new_atomic(_Int); + try { + unify(expr->e_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "repeat count value of type " << expr->e_type << " is not an integer: " << ue; + lex.cur().error(os.str()); + } + std::vector tmp_vars = expr->pre_compile(code); + if (tmp_vars.size() != 1) { + lex.cur().error("repeat count value is not a singleton"); + } + Op& repeat_op = code.emplace_back(loc, Op::_Repeat, tmp_vars); + code.push_set_cur(repeat_op.block0); + blk_fl::val res = parse_block_stmt(lex, code); + code.close_pop_cur(lex.cur().loc); + return res | blk_fl::end; +} + +blk_fl::val parse_while_stmt(Lexer& lex, CodeBlob& code) { + SrcLocation loc{lex.cur().loc}; + lex.expect(_While); + auto expr = parse_expr(lex, code); + expr->chk_rvalue(lex.cur()); + auto cnt_type = TypeExpr::new_atomic(_Int); + try { + unify(expr->e_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "while condition value of type " << expr->e_type << " is not an integer: " << ue; + lex.cur().error(os.str()); + } + Op& while_op = code.emplace_back(loc, Op::_While); + code.push_set_cur(while_op.block0); + while_op.left = expr->pre_compile(code); + code.close_pop_cur(lex.cur().loc); + if (while_op.left.size() != 1) { + lex.cur().error("while condition value is not a singleton"); + } + code.push_set_cur(while_op.block1); + blk_fl::val res1 = parse_block_stmt(lex, code); + code.close_pop_cur(lex.cur().loc); + return res1 | blk_fl::end; +} + +blk_fl::val parse_do_stmt(Lexer& lex, CodeBlob& code) { + Op& while_op = code.emplace_back(lex.cur().loc, Op::_Until); + lex.expect(_Do); + code.push_set_cur(while_op.block0); + open_scope(lex); + blk_fl::val res = parse_block_stmt(lex, code, true); + lex.expect(_Until); + auto expr = parse_expr(lex, code); + expr->chk_rvalue(lex.cur()); + close_scope(lex); + auto cnt_type = TypeExpr::new_atomic(_Int); + try { + unify(expr->e_type, cnt_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "`until` condition value of type " << expr->e_type << " is not an integer: " << ue; + lex.cur().error(os.str()); + } + while_op.left = expr->pre_compile(code); + code.close_pop_cur(lex.cur().loc); + if (while_op.left.size() != 1) { + lex.cur().error("`until` condition value is not a singleton"); + } + return res & ~blk_fl::empty; +} + +blk_fl::val parse_try_catch_stmt(Lexer& lex, CodeBlob& code) { + code.require_callxargs = true; + lex.expect(_Try); + Op& try_catch_op = code.emplace_back(lex.cur().loc, Op::_TryCatch); + code.push_set_cur(try_catch_op.block0); + blk_fl::val res0 = parse_block_stmt(lex, code); + code.close_pop_cur(lex.cur().loc); + lex.expect(_Catch); + code.push_set_cur(try_catch_op.block1); + open_scope(lex); + Expr* expr = parse_expr(lex, code, true); + expr->chk_lvalue(lex.cur()); + TypeExpr* tvm_error_type = TypeExpr::new_tensor(TypeExpr::new_var(), TypeExpr::new_atomic(_Int)); + try { + unify(expr->e_type, tvm_error_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "`catch` arguments have incorrect type " << expr->e_type << ": " << ue; + lex.cur().error(os.str()); + } + expr->predefine_vars(); + expr->define_new_vars(code); + try_catch_op.left = expr->pre_compile(code); + tolk_assert(try_catch_op.left.size() == 2 || try_catch_op.left.size() == 1); + blk_fl::val res1 = parse_block_stmt(lex, code); + close_scope(lex); + code.close_pop_cur(lex.cur().loc); + blk_fl::combine_parallel(res0, res1); + return res0; +} + +blk_fl::val parse_if_stmt(Lexer& lex, CodeBlob& code, int first_lex = _If) { + SrcLocation loc{lex.cur().loc}; + lex.expect(first_lex); + auto expr = parse_expr(lex, code); + expr->chk_rvalue(lex.cur()); + auto flag_type = TypeExpr::new_atomic(_Int); + try { + unify(expr->e_type, flag_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "`if` condition value of type " << expr->e_type << " is not an integer: " << ue; + lex.cur().error(os.str()); + } + std::vector tmp_vars = expr->pre_compile(code); + if (tmp_vars.size() != 1) { + lex.cur().error("condition value is not a singleton"); + } + Op& if_op = code.emplace_back(loc, Op::_If, tmp_vars); + code.push_set_cur(if_op.block0); + blk_fl::val res1 = parse_block_stmt(lex, code); + blk_fl::val res2 = blk_fl::init; + code.close_pop_cur(lex.cur().loc); + if (lex.tp() == _Else) { + lex.expect(_Else); + code.push_set_cur(if_op.block1); + res2 = parse_block_stmt(lex, code); + code.close_pop_cur(lex.cur().loc); + } else if (lex.tp() == _Elseif || lex.tp() == _Elseifnot) { + code.push_set_cur(if_op.block1); + res2 = parse_if_stmt(lex, code, lex.tp()); + code.close_pop_cur(lex.cur().loc); + } else { + if_op.block1 = std::make_unique(lex.cur().loc, Op::_Nop); + } + if (first_lex == _Ifnot || first_lex == _Elseifnot) { + std::swap(if_op.block0, if_op.block1); + } + blk_fl::combine_parallel(res1, res2); + return res1; +} + +blk_fl::val parse_stmt(Lexer& lex, CodeBlob& code) { + switch (lex.tp()) { + case _Return: { + lex.next(); + return parse_return_stmt(lex, code); + } + case '{': { + return parse_block_stmt(lex, code); + } + case ';': { + lex.next(); + return blk_fl::init; + } + case _Repeat: + return parse_repeat_stmt(lex, code); + case _If: + case _Ifnot: + return parse_if_stmt(lex, code, lex.tp()); + case _Do: + return parse_do_stmt(lex, code); + case _While: + return parse_while_stmt(lex, code); + case _Try: + return parse_try_catch_stmt(lex, code); + default: { + auto expr = parse_expr(lex, code); + expr->chk_rvalue(lex.cur()); + expr->pre_compile(code); + lex.expect(';'); + return blk_fl::end; + } + } +} + +CodeBlob* parse_func_body(Lexer& lex, FormalArgList arg_list, TypeExpr* ret_type) { + lex.expect('{'); + CodeBlob* blob = new CodeBlob{ret_type}; + if (pragma_allow_post_modification.enabled()) { + blob->flags |= CodeBlob::_AllowPostModification; + } + if (pragma_compute_asm_ltr.enabled()) { + blob->flags |= CodeBlob::_ComputeAsmLtr; + } + blob->import_params(std::move(arg_list)); + blk_fl::val res = blk_fl::init; + bool warned = false; + while (lex.tp() != '}') { + if (!(res & blk_fl::end) && !warned) { + lex.cur().loc.show_warning("unreachable code"); + warned = true; + } + blk_fl::combine(res, parse_stmt(lex, *blob)); + } + if (res & blk_fl::end) { + parse_implicit_ret_stmt(lex, *blob); + } + blob->close_blk(lex.cur().loc); + lex.expect('}'); + return blob; +} + +SymValAsmFunc* parse_asm_func_body(Lexer& lex, TypeExpr* func_type, const FormalArgList& arg_list, TypeExpr* ret_type, + bool impure = false) { + auto loc = lex.cur().loc; + lex.expect(_Asm); + int cnt = (int)arg_list.size(); + int width = ret_type->get_width(); + if (width < 0 || width > 16) { + throw ParseError{loc, "return type of an assembler built-in function must have a well-defined fixed width"}; + } + if (arg_list.size() > 16) { + throw ParseError{loc, "assembler built-in function must have at most 16 arguments"}; + } + std::vector cum_arg_width; + cum_arg_width.push_back(0); + int tot_width = 0; + for (auto& arg : arg_list) { + int arg_width = std::get(arg)->get_width(); + if (arg_width < 0 || arg_width > 16) { + throw ParseError{std::get(arg), + "parameters of an assembler built-in function must have a well-defined fixed width"}; + } + cum_arg_width.push_back(tot_width += arg_width); + } + std::vector asm_ops; + std::vector arg_order, ret_order; + if (lex.tp() == '(') { + lex.expect('('); + if (lex.tp() != _Mapsto) { + std::vector visited(cnt, false); + for (int i = 0; i < cnt; i++) { + if (lex.tp() != _Ident) { + lex.expect(_Ident); + } + auto sym = lookup_symbol(lex.cur().val); + int j; + for (j = 0; j < cnt; j++) { + if (std::get(arg_list[j]) == sym) { + break; + } + } + if (j == cnt) { + lex.cur().error("formal argument name expected"); + } + if (visited[j]) { + lex.cur().error("formal argument listed twice"); + } + visited[j] = true; + int c1 = cum_arg_width[j], c2 = cum_arg_width[j + 1]; + while (c1 < c2) { + arg_order.push_back(c1++); + } + lex.next(); + } + tolk_assert(arg_order.size() == (unsigned)tot_width); + } + if (lex.tp() == _Mapsto) { + lex.expect(_Mapsto); + std::vector visited(width, false); + for (int i = 0; i < width; i++) { + if (lex.tp() != Lexem::Number || lex.cur().str.size() > 3) { + lex.expect(Lexem::Number); + } + int j = atoi(lex.cur().str.c_str()); + if (j < 0 || j >= width || visited[j]) { + lex.cur().error("expected integer return value index 0 .. width-1"); + } + visited[j] = true; + ret_order.push_back(j); + lex.next(); + } + } + lex.expect(')'); + } + while (lex.tp() == _String) { + std::string ops = lex.cur().str; // \n\n... + std::string op; + for (const char& c : ops) { + if (c == '\n') { + if (!op.empty()) { + asm_ops.push_back(AsmOp::Parse(op, cnt, width)); + if (asm_ops.back().is_custom()) { + cnt = width; + } + op.clear(); + } + } else { + op.push_back(c); + } + } + if (!op.empty()) { + asm_ops.push_back(AsmOp::Parse(op, cnt, width)); + if (asm_ops.back().is_custom()) { + cnt = width; + } + } + lex.next(); + } + if (asm_ops.empty()) { + throw ParseError{lex.cur().loc, "string with assembler instruction expected"}; + } + lex.expect(';'); + std::string crc_s; + for (const AsmOp& asm_op : asm_ops) { + crc_s += asm_op.op; + } + crc_s.push_back(impure); + for (const int& x : arg_order) { + crc_s += std::string((const char*) (&x), (const char*) (&x + 1)); + } + for (const int& x : ret_order) { + crc_s += std::string((const char*) (&x), (const char*) (&x + 1)); + } + auto res = new SymValAsmFunc{func_type, asm_ops, impure}; + res->arg_order = std::move(arg_order); + res->ret_order = std::move(ret_order); + res->crc = td::crc64(crc_s); + return res; +} + +std::vector parse_type_var_list(Lexer& lex) { + std::vector res; + lex.expect(_Forall); + int idx = 0; + while (true) { + if (lex.tp() == _Type) { + lex.next(); + } + if (lex.tp() != _Ident) { + throw ParseError{lex.cur().loc, "free type identifier expected"}; + } + auto loc = lex.cur().loc; + if (prohibited_var_names.count(symbols.get_name(lex.cur().val))) { + throw ParseError{loc, PSTRING() << "symbol `" << symbols.get_name(lex.cur().val) + << "` cannot be redefined as a variable"}; + } + SymDef* new_sym_def = define_symbol(lex.cur().val, true, loc); + if (!new_sym_def || new_sym_def->value) { + lex.cur().error_at("redefined type variable `", "`"); + } + auto var = TypeExpr::new_var(idx); + new_sym_def->value = new SymValType{SymVal::_Typename, idx++, var}; + res.push_back(var); + lex.next(); + if (lex.tp() != ',') { + break; + } + lex.next(); + } + lex.expect(_Mapsto); + return res; +} + +void type_var_usage(TypeExpr* expr, const std::vector& typevars, std::vector& used) { + if (expr->constr != TypeExpr::te_Var) { + for (auto arg : expr->args) { + type_var_usage(arg, typevars, used); + } + return; + } + for (std::size_t i = 0; i < typevars.size(); i++) { + if (typevars[i] == expr) { + used.at(i) = true; + return; + } + } + return; +} + +TypeExpr* compute_type_closure(TypeExpr* expr, const std::vector& typevars) { + if (typevars.empty()) { + return expr; + } + std::vector used(typevars.size(), false); + type_var_usage(expr, typevars, used); + std::vector used_vars; + for (std::size_t i = 0; i < typevars.size(); i++) { + if (used.at(i)) { + used_vars.push_back(typevars[i]); + } + } + if (!used_vars.empty()) { + expr = TypeExpr::new_forall(std::move(used_vars), expr); + } + return expr; +} + +void parse_func_def(Lexer& lex) { + SrcLocation loc{lex.cur().loc}; + open_scope(lex); + std::vector type_vars; + if (lex.tp() == _Forall) { + type_vars = parse_type_var_list(lex); + } + auto ret_type = parse_type(lex); + if (lex.tp() != _Ident) { + throw ParseError{lex.cur().loc, "function name identifier expected"}; + } + Lexem func_name = lex.cur(); + lex.next(); + FormalArgList arg_list = parse_formal_args(lex); + bool impure = (lex.tp() == _Impure); + if (impure) { + lex.next(); + } + int f = 0; + if (lex.tp() == _Inline || lex.tp() == _InlineRef) { + f = (lex.tp() == _Inline) ? 1 : 2; + lex.next(); + } + td::RefInt256 method_id; + std::string method_name; + if (lex.tp() == _MethodId) { + lex.next(); + if (lex.tp() == '(') { + lex.expect('('); + if (lex.tp() == Lexem::String) { + method_name = lex.cur().str; + } else if (lex.tp() == Lexem::Number) { + method_name = lex.cur().str; + method_id = td::string_to_int256(method_name); + if (method_id.is_null()) { + lex.cur().error_at("invalid integer constant `", "`"); + } + } else { + throw ParseError{lex.cur().loc, "integer or string method identifier expected"}; + } + lex.next(); + lex.expect(')'); + } else { + method_name = func_name.str; + } + if (method_id.is_null()) { + unsigned crc = td::crc16(method_name); + method_id = td::make_refint((crc & 0xffff) | 0x10000); + } + } + if (lex.tp() != ';' && lex.tp() != '{' && lex.tp() != _Asm) { + lex.expect('{', "function body block expected"); + } + TypeExpr* func_type = TypeExpr::new_map(extract_total_arg_type(arg_list), ret_type); + func_type = compute_type_closure(func_type, type_vars); + if (verbosity >= 1) { + std::cerr << "function " << func_name.str << " : " << func_type << std::endl; + } + SymDef* func_sym = define_global_symbol(func_name.val, 0, loc); + tolk_assert(func_sym); + SymValFunc* func_sym_val = dynamic_cast(func_sym->value); + if (func_sym->value) { + if (func_sym->value->type != SymVal::_Func || !func_sym_val) { + lex.cur().error("was not defined as a function before"); + } + try { + unify(func_sym_val->sym_type, func_type); + } catch (UnifyError& ue) { + std::ostringstream os; + os << "previous type of function " << func_name.str << " : " << func_sym_val->sym_type + << " cannot be unified with new type " << func_type << ": " << ue; + lex.cur().error(os.str()); + } + } + if (lex.tp() == ';') { + make_new_glob_func(func_sym, func_type, impure); + lex.next(); + } else if (lex.tp() == '{') { + if (dynamic_cast(func_sym_val)) { + lex.cur().error("function `"s + func_name.str + "` has been already defined as an assembler built-in"); + } + SymValCodeFunc* func_sym_code; + if (func_sym_val) { + func_sym_code = dynamic_cast(func_sym_val); + if (!func_sym_code) { + lex.cur().error("function `"s + func_name.str + "` has been already defined in an yet-unknown way"); + } + } else { + func_sym_code = make_new_glob_func(func_sym, func_type, impure); + } + if (func_sym_code->code) { + lex.cur().error("redefinition of function `"s + func_name.str + "`"); + } + CodeBlob* code = parse_func_body(lex, arg_list, ret_type); + code->name = func_name.str; + code->loc = loc; + // code->print(std::cerr); // !!!DEBUG!!! + func_sym_code->code = code; + } else { + Lexem asm_lexem = lex.cur(); + SymValAsmFunc* asm_func = parse_asm_func_body(lex, func_type, arg_list, ret_type, impure); + if (func_sym_val) { + if (dynamic_cast(func_sym_val)) { + asm_lexem.error("function `"s + func_name.str + "` was already declared as an ordinary function"); + } + SymValAsmFunc* asm_func_old = dynamic_cast(func_sym_val); + if (asm_func_old) { + if (asm_func->crc != asm_func_old->crc) { + asm_lexem.error("redefinition of built-in assembler function `"s + func_name.str + "`"); + } + } else { + asm_lexem.error("redefinition of previously (somehow) defined function `"s + func_name.str + "`"); + } + } + func_sym->value = asm_func; + } + if (method_id.not_null()) { + auto val = dynamic_cast(func_sym->value); + if (!val) { + lex.cur().error("cannot set method id for unknown function `"s + func_name.str + "`"); + } + if (val->method_id.is_null()) { + val->method_id = std::move(method_id); + } else if (td::cmp(val->method_id, method_id) != 0) { + lex.cur().error("integer method identifier for `"s + func_name.str + "` changed from " + + val->method_id->to_dec_string() + " to a different value " + method_id->to_dec_string()); + } + } + if (f) { + auto val = dynamic_cast(func_sym->value); + if (!val) { + lex.cur().error("cannot set unknown function `"s + func_name.str + "` as an inline"); + } + if (!(val->flags & 3)) { + val->flags = (short)(val->flags | f); + } else if ((val->flags & 3) != f) { + lex.cur().error("inline mode for `"s + func_name.str + "` changed with respect to a previous declaration"); + } + } + if (verbosity >= 1) { + std::cerr << "new type of function " << func_name.str << " : " << func_type << std::endl; + } + close_scope(lex); +} + +std::string tolk_ver_test = tolk_version; + +void parse_pragma(Lexer& lex) { + auto pragma = lex.cur(); + lex.next(); + if (lex.tp() != _Ident) { + lex.expect(_Ident, "pragma name expected"); + } + auto pragma_name = lex.cur().str; + lex.next(); + if (!pragma_name.compare("version") || !pragma_name.compare("not-version")) { + bool negate = !pragma_name.compare("not-version"); + char op = '='; bool eq = false; + int sem_ver[3] = {0, 0, 0}; + char segs = 1; + auto stoi = [&](const std::string& s) { + auto R = td::to_integer_safe(s); + if (R.is_error()) { + lex.cur().error("invalid semver format"); + } + return R.move_as_ok(); + }; + if (lex.tp() == _Number) { + sem_ver[0] = stoi(lex.cur().str); + } else if (lex.tp() == _Ident) { + auto id1 = lex.cur().str; + char ch1 = id1[0]; + if ((ch1 == '>') || (ch1 == '<') || (ch1 == '=') || (ch1 == '^')) { + op = ch1; + } else { + lex.cur().error("unexpected comparator operation"); + } + if (id1.length() < 2) { + lex.cur().error("expected number after comparator"); + } + if (id1[1] == '=') { + eq = true; + if (id1.length() < 3) { + lex.cur().error("expected number after comparator"); + } + sem_ver[0] = stoi(id1.substr(2)); + } else { + sem_ver[0] = stoi(id1.substr(1)); + } + } else { + lex.cur().error("expected semver with optional comparator"); + } + lex.next(); + if (lex.tp() != ';') { + if (lex.tp() != _Ident || lex.cur().str[0] != '.') { + lex.cur().error("invalid semver format"); + } + sem_ver[1] = stoi(lex.cur().str.substr(1)); + segs = 2; + lex.next(); + } + if (lex.tp() != ';') { + if (lex.tp() != _Ident || lex.cur().str[0] != '.') { + lex.cur().error("invalid semver format"); + } + sem_ver[2] = stoi(lex.cur().str.substr(1)); + segs = 3; + lex.next(); + } + // End reading semver from source code + int tolk_ver[3] = {0, 0, 0}; + std::istringstream iss(tolk_ver_test); + std::string s; + for (int idx = 0; idx < 3; idx++) { + std::getline(iss, s, '.'); + tolk_ver[idx] = stoi(s); + } + // End parsing embedded semver + std::string semver_expr; + if (negate) { + semver_expr += '!'; + } + semver_expr += op; + if (eq) { + semver_expr += '='; + } + for (int idx = 0; idx < 3; idx++) { + semver_expr += std::to_string(sem_ver[idx]); + if (idx < 2) + semver_expr += '.'; + } + bool match = true; + switch (op) { + case '=': + if ((tolk_ver[0] != sem_ver[0]) || + (tolk_ver[1] != sem_ver[1]) || + (tolk_ver[2] != sem_ver[2])) { + match = false; + } + break; + case '>': + if ( ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) || + ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] < sem_ver[2])) || + ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] < sem_ver[1])) || + ((tolk_ver[0] < sem_ver[0])) ) { + match = false; + } + break; + case '<': + if ( ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] == sem_ver[2]) && !eq) || + ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] == sem_ver[1]) && (tolk_ver[2] > sem_ver[2])) || + ((tolk_ver[0] == sem_ver[0]) && (tolk_ver[1] > sem_ver[1])) || + ((tolk_ver[0] > sem_ver[0])) ) { + match = false; + } + break; + case '^': + if ( ((segs == 3) && ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] != sem_ver[1]) || (tolk_ver[2] < sem_ver[2]))) + || ((segs == 2) && ((tolk_ver[0] != sem_ver[0]) || (tolk_ver[1] < sem_ver[1]))) + || ((segs == 1) && ((tolk_ver[0] < sem_ver[0]))) ) { + match = false; + } + break; + } + if ((match && negate) || (!match && !negate)) { + pragma.error(std::string("Tolk version ") + tolk_ver_test + " does not satisfy condition " + semver_expr); + } + } else if (!pragma_name.compare("test-version-set")) { + if (lex.tp() != _String) { + lex.cur().error("version string expected"); + } + tolk_ver_test = lex.cur().str; + lex.next(); + } else if (pragma_name == pragma_allow_post_modification.name()) { + pragma_allow_post_modification.enable(lex.cur().loc); + } else if (pragma_name == pragma_compute_asm_ltr.name()) { + pragma_compute_asm_ltr.enable(lex.cur().loc); + } else { + lex.cur().error(std::string{"unknown pragma `"} + pragma_name + "`"); + } + lex.expect(';'); +} + +std::vector source_fdescr; + +std::map source_files; +std::stack inclusion_locations; + +void parse_include(Lexer& lex, const FileDescr* fdescr) { + auto include = lex.cur(); + lex.expect(_IncludeHashtag); + if (lex.tp() != _String) { + lex.expect(_String, "source file name"); + } + std::string val = lex.cur().str; + std::string parent_dir = fdescr->filename; + if (parent_dir.rfind('/') != std::string::npos) { + val = parent_dir.substr(0, parent_dir.rfind('/') + 1) + val; + } + lex.next(); + lex.expect(';'); + if (!parse_source_file(val.c_str(), include, false)) { + include.error(std::string{"failed parsing included file `"} + val + "`"); + } +} + +bool parse_source(std::istream* is, FileDescr* fdescr) { + SourceReader reader{is, fdescr}; + Lexer lex{reader, true, ";,()[] ~."}; + while (lex.tp() != _Eof) { + if (lex.tp() == _PragmaHashtag) { + parse_pragma(lex); + } else if (lex.tp() == _IncludeHashtag) { + parse_include(lex, fdescr); + } else if (lex.tp() == _Global) { + parse_global_var_decls(lex); + } else if (lex.tp() == _Const) { + parse_const_decls(lex); + } else { + parse_func_def(lex); + } + } + return true; +} + +bool parse_source_file(const char* filename, Lexem lex, bool is_main) { + if (!filename || !*filename) { + auto msg = "source file name is an empty string"; + if (lex.tp) { + lex.error(msg); + } else { + throw Fatal{msg}; + } + } + + auto path_res = read_callback(ReadCallback::Kind::Realpath, filename); + if (path_res.is_error()) { + auto error = path_res.move_as_error(); + lex.error(error.message().c_str()); + return false; + } + std::string real_filename = path_res.move_as_ok(); + auto it = source_files.find(real_filename); + if (it != source_files.end()) { + it->second->is_main |= is_main; + if (verbosity >= 2) { + if (lex.tp) { + lex.loc.show_warning(std::string{"skipping file "} + real_filename + " because it was already included"); + } else { + std::cerr << "warning: skipping file " << real_filename << " because it was already included" << std::endl; + } + } + return true; + } + if (lex.tp) { // included + generated_from += std::string{"incl:"}; + } + generated_from += std::string{"`"} + filename + "` "; + FileDescr* cur_source = new FileDescr{filename}; + source_files[real_filename] = cur_source; + cur_source->is_main = is_main; + source_fdescr.push_back(cur_source); + auto file_res = read_callback(ReadCallback::Kind::ReadFile, filename); + if (file_res.is_error()) { + auto msg = file_res.move_as_error().message().str(); + if (lex.tp) { + lex.error(msg); + } else { + throw Fatal{msg}; + } + } + auto file_str = file_res.move_as_ok(); + std::stringstream ss{file_str}; + inclusion_locations.push(lex.loc); + bool res = parse_source(&ss, cur_source); + inclusion_locations.pop(); + return res; +} + +bool parse_source_stdin() { + FileDescr* cur_source = new FileDescr{"stdin", true}; + cur_source->is_main = true; + source_fdescr.push_back(cur_source); + return parse_source(&std::cin, cur_source); +} + +} // namespace tolk diff --git a/tolk/srcread.cpp b/tolk/srcread.cpp new file mode 100644 index 00000000..c71f498d --- /dev/null +++ b/tolk/srcread.cpp @@ -0,0 +1,228 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "srcread.h" +#include + +namespace tolk { + +/* + * + * SOURCE FILE READER + * + */ + +std::ostream& operator<<(std::ostream& os, const FileDescr* fdescr) { + return os << (fdescr ? (fdescr->is_stdin ? "stdin" : fdescr->filename) : "unknown-location"); +} + +std::ostream& operator<<(std::ostream& os, const Fatal& fatal) { + return os << fatal.get_msg(); +} + +const char* FileDescr::convert_offset(long offset, long* line_no, long* line_pos, long* line_size) const { + long lno = 0, lpos = -1, lsize = 0; + const char* lstart = nullptr; + if (offset >= 0 && offset < (long)text.size()) { + auto it = std::upper_bound(line_offs.begin(), line_offs.end(), offset); + lno = it - line_offs.begin(); + if (lno && it != line_offs.end()) { + lsize = it[0] - it[-1]; + lpos = offset - it[-1]; + lstart = text.data() + it[-1]; + } + } else { + lno = (long)line_offs.size(); + } + if (line_no) { + *line_no = lno; + } + if (line_pos) { + *line_pos = lpos; + } + if (line_size) { + *line_size = lsize; + } + return lstart; +} + +const char* FileDescr::push_line(std::string new_line) { + if (line_offs.empty()) { + line_offs.push_back(0); + } + std::size_t cur_size = text.size(); + text += new_line; + text += '\0'; + line_offs.push_back((long)text.size()); + return text.data() + cur_size; +} + +void SrcLocation::show(std::ostream& os) const { + os << fdescr; + long line_no, line_pos; + if (fdescr && convert_pos(&line_no, &line_pos)) { + os << ':' << line_no; + if (line_pos >= 0) { + os << ':' << (line_pos + 1); + } + } +} + +bool SrcLocation::show_context(std::ostream& os) const { + long line_no, line_pos, line_size; + if (!fdescr || !convert_pos(&line_no, &line_pos, &line_size)) { + return false; + } + bool skip_left = (line_pos > 200), skip_right = (line_pos + 200u < line_size); + const char* here = fdescr->text.data() + char_offs; + const char* base = here - line_pos; + const char* start = skip_left ? here - 100 : base; + const char* end = skip_right ? here + 100 : base + line_size; + os << " "; + if (skip_left) { + os << "... "; + } + for (const char* ptr = start; ptr < end; ptr++) { + os << (char)*ptr; + } + if (skip_right) { + os << " ..."; + } + os << std::endl; + os << " "; + if (skip_left) { + os << "... "; + } + for (const char* ptr = start; ptr < here; ptr++) { + char c = *ptr; + os << (c == 9 || c == 10 ? c : ' '); + } + os << '^' << std::endl; + return true; +} + +std::ostream& operator<<(std::ostream& os, const SrcLocation& loc) { + loc.show(os); + return os; +} + +void SrcLocation::show_gen_error(std::ostream& os, std::string message, std::string err_type) const { + show(os); + if (!err_type.empty()) { + os << ": " << err_type; + } + os << ": " << message << std::endl; + show_context(os); +} + +std::ostream& operator<<(std::ostream& os, const Error& error) { + error.show(os); + return os; +} + +void ParseError::show(std::ostream& os) const { + os << where << ": error: " << message << std::endl; + where.show_context(os); +} + +SourceReader::SourceReader(std::istream* _is, FileDescr* _fdescr) + : ifs(_is), fdescr(_fdescr), loc(_fdescr), eof(false), cur_line_len(0), start(0), cur(0), end(0) { + load_line(); +} + +void SourceReader::set_eof() { + if (!eof) { + eof = true; + start = cur = end = 0; + } +} + +int SourceReader::skip_spc() { + if (!cur) { + return 0; + } + const char* ptr = cur; + int res = 0; + while (*ptr == ' ' || *ptr == 9) { + ++ptr; + ++res; + } + set_ptr(ptr); + return res; +} + +bool SourceReader::seek_eof() { + while (seek_eoln()) { + if (!load_line()) { + return true; + } + } + return false; +} + +const char* SourceReader::set_ptr(const char* ptr) { + if (ptr != cur) { + if (ptr < cur || ptr > end) { + error("parsing position went outside of line"); + } + loc.char_offs += ptr - cur; + cur = ptr; + } + return ptr; +} + +bool SourceReader::load_line() { + if (eof) { + return false; + } + loc.set_eof(); + if (ifs->eof()) { + set_eof(); + return false; + } + std::getline(*ifs, cur_line); + if (ifs->fail()) { + set_eof(); + if (!ifs->eof()) { + error("cannot read line from source stream"); + } + return false; + } + std::size_t len = cur_line.size(); + if (len > 0xffffff) { + set_eof(); + error("line too long"); + return false; + } + if (len && cur_line.back() == '\r') { + // CP/M line breaks support + cur_line.pop_back(); + --len; + } + cur_line_len = (int)len; + if (fdescr) { + cur = start = fdescr->push_line(std::move(cur_line)); + end = start + len; + loc.char_offs = (std::size_t)(cur - fdescr->text.data()); + cur_line.clear(); + } else { + cur = start = cur_line.c_str(); + end = start + cur_line_len; + } + return true; +} + +} // namespace tolk diff --git a/tolk/srcread.h b/tolk/srcread.h new file mode 100644 index 00000000..3731a5ca --- /dev/null +++ b/tolk/srcread.h @@ -0,0 +1,162 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include +#include +#include + +namespace tolk { + +/* + * + * SOURCE FILE READER + * + */ + +struct FileDescr { + std::string filename; + std::string text; + std::vector line_offs; + bool is_stdin; + bool is_main = false; + FileDescr(std::string _fname, bool _stdin = false) : filename(std::move(_fname)), is_stdin(_stdin) { + } + const char* push_line(std::string new_line); + const char* convert_offset(long offset, long* line_no, long* line_pos, long* line_size = nullptr) const; +}; + +struct Fatal { + std::string message; + Fatal(std::string _msg) : message(std::move(_msg)) { + } + std::string get_msg() const { + return message; + } +}; + +std::ostream& operator<<(std::ostream& os, const Fatal& fatal); + +struct SrcLocation { + const FileDescr* fdescr; + long char_offs; + SrcLocation() : fdescr(nullptr), char_offs(-1) { + } + SrcLocation(const FileDescr* _fdescr, long offs = -1) : fdescr(_fdescr), char_offs(-1) { + } + bool defined() const { + return fdescr; + } + bool eof() const { + return char_offs == -1; + } + void set_eof() { + char_offs = -1; + } + const char* convert_pos(long* line_no, long* line_pos, long* line_size = nullptr) const { + return defined() ? fdescr->convert_offset(char_offs, line_no, line_pos, line_size) : nullptr; + } + void show(std::ostream& os) const; + bool show_context(std::ostream& os) const; + void show_gen_error(std::ostream& os, std::string message, std::string err_type = "") const; + void show_note(std::string err_msg) const { + show_gen_error(std::cerr, err_msg, "note"); + } + void show_warning(std::string err_msg) const { + show_gen_error(std::cerr, err_msg, "warning"); + } + void show_error(std::string err_msg) const { + show_gen_error(std::cerr, err_msg, "error"); + } +}; + +std::ostream& operator<<(std::ostream& os, const SrcLocation& loc); + +struct Error { + virtual ~Error() = default; + virtual void show(std::ostream& os) const = 0; +}; + +std::ostream& operator<<(std::ostream& os, const Error& error); + +struct ParseError : Error { + SrcLocation where; + std::string message; + ParseError(const SrcLocation& _where, std::string _msg) : where(_where), message(_msg) { + } + ParseError(const SrcLocation* _where, std::string _msg) : message(_msg) { + if (_where) { + where = *_where; + } + } + ~ParseError() override = default; + void show(std::ostream& os) const override; +}; + +class SourceReader { + std::istream* ifs; + FileDescr* fdescr; + SrcLocation loc; + bool eof; + std::string cur_line; + int cur_line_len; + void set_eof(); + const char *start, *cur, *end; + + public: + SourceReader(std::istream* _is, FileDescr* _fdescr); + bool load_line(); + bool is_eof() const { + return eof; + } + int is_eoln() const { + return cur == end; + } + int skip_spc(); + bool seek_eoln() { + skip_spc(); + return is_eoln(); + } + bool seek_eof(); + const char* cur_line_cstr() const { + return cur_line.c_str(); + } + const SrcLocation& here() const { + return loc; + } + char cur_char() const { + return *cur; + } + char next_char() const { + return cur[1]; + } + const char* get_ptr() const { + return cur; + } + const char* get_end_ptr() const { + return end; + } + const char* set_ptr(const char* ptr); + void advance(int n) { + set_ptr(get_ptr() + n); + } + void error(std::string err_msg) { + throw ParseError{loc, err_msg}; + } +}; + +} // namespace tolk diff --git a/tolk/stack-transform.cpp b/tolk/stack-transform.cpp new file mode 100644 index 00000000..fe5735e5 --- /dev/null +++ b/tolk/stack-transform.cpp @@ -0,0 +1,1054 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" + +namespace tolk { + +/* + * + * GENERIC STACK TRANSFORMATIONS + * + */ + +StackTransform::StackTransform(std::initializer_list list) { + *this = list; +} + +StackTransform &StackTransform::operator=(std::initializer_list list) { + if (list.size() > 255) { + invalidate(); + return *this; + } + set_id(); + if (!list.size()) { + return *this; + } + int m = (int)list.size(); + d = list.begin()[m - 1] - (m - 1); + if (d >= 128 || d < -128) { + invalidate(); + return *this; + } + for (int i = 0; i < m - 1; i++) { + int x = d + i; + int y = list.begin()[i]; + if (y != x) { + if (x != (short)x || y != (short)y || n == max_n) { + invalidate(); + return *this; + } + dp = std::max(dp, std::max(x, y) + 1); + A[n++] = std::make_pair((short)x, (short)y); + } + } + return *this; +} + +bool StackTransform::assign(const StackTransform &other) { + if (!other.is_valid() || (unsigned)other.n > max_n) { + return invalidate(); + } + d = other.d; + n = other.n; + dp = other.dp; + c = other.c; + invalid = false; + for (int i = 0; i < n; i++) { + A[i] = other.A[i]; + } + return true; +} + +int StackTransform::get(int x) const { + if (!is_valid()) { + return -1; + } + if (x <= c_start) { + return x - c; + } + x += d; + int i; + for (i = 0; i < n && A[i].first < x; i++) { + } + if (i < n && A[i].first == x) { + return A[i].second; + } else { + return x; + } +} + +bool StackTransform::set(int x, int y, bool relaxed) { + if (!is_valid()) { + return false; + } + if (x < 0) { + return (relaxed && y == x + d) || invalidate(); + } + if (!relaxed) { + touch(x); + } + x += d; + int i; + for (i = 0; i < n && A[i].first < x; i++) { + } + if (i < n && A[i].first == x) { + if (x != y) { + if (y != (short)y) { + return invalidate(); + } + A[i].second = (short)y; + } else { + --n; + for (; i < n; i++) { + A[i] = A[i + 1]; + } + } + } else { + if (x != y) { + if (x != (short)x || y != (short)y || n == max_n) { + return invalidate(); + } + for (int j = n++; j > i; j--) { + A[j] = A[j - 1]; + } + A[i].first = (short)x; + A[i].second = (short)y; + touch(x - d); + touch(y); + } + } + return true; +} + +// f(x') = x' + d for all x' >= x ? +bool StackTransform::is_trivial_after(int x) const { + return is_valid() && (!n || A[n - 1].first < x + d); +} + +// card f^{-1}(y) +int StackTransform::preimage_count(int y) const { + if (!is_valid()) { + return -1; + } + int count = (y >= d); + for (const auto &pair : A) { + if (pair.second == y) { + ++count; + } else if (pair.first == y) { + --count; + } + } + return count; +} + +// f^{-1}(y) +std::vector StackTransform::preimage(int y) const { + if (!is_valid()) { + return {}; + } + std::vector res; + bool f = (y >= d); + for (const auto &pair : A) { + if (pair.first > y && f) { + res.push_back(y - d); + f = false; + } + if (pair.first == y) { + f = false; + } else if (pair.second == y) { + res.push_back(pair.first - d); + } + } + return res; +} + +// is f:N->N bijective ? +bool StackTransform::is_permutation() const { + if (!is_valid() || d) { + return false; + } + tolk_assert(n <= max_n); + std::array X, Y; + for (int i = 0; i < n; i++) { + X[i] = A[i].first; + Y[i] = A[i].second; + if (Y[i] < 0) { + return false; + } + } + std::sort(Y.begin(), Y.begin() + n); + for (int i = 0; i < n; i++) { + if (X[i] != Y[i]) { + return false; + } + } + return true; +} + +bool StackTransform::remove_negative() { + int s = 0; + while (s < n && A[s].first < d) { + ++s; + } + if (s) { + n -= s; + for (int i = 0; i < n; i++) { + A[i] = A[i + s]; + } + } + return true; +} + +int StackTransform::try_load(int &i, int offs) const { + return i < n ? A[i++].first + offs : inf_x; +} + +bool StackTransform::try_store(int x, int y) { + if (x == y || x < d) { + return true; + } + if (n == max_n || x != (short)x || y != (short)y) { + return invalidate(); + } + A[n].first = (short)x; + A[n++].second = (short)y; + return true; +} + +// c := a * b +bool StackTransform::compose(const StackTransform &a, const StackTransform &b, StackTransform &c) { + if (!a.is_valid() || !b.is_valid()) { + return c.invalidate(); + } + c.d = a.d + b.d; + c.n = 0; + c.dp = std::max(a.dp, b.dp + a.d); + c.c = a.c + b.c; + c.invalid = false; + int i = 0, j = 0; + int x1 = a.try_load(i); + int x2 = b.try_load(j, a.d); + while (true) { + if (x1 < x2) { + int y = a.A[i - 1].second; + if (!c.try_store(x1, y)) { + return false; + } + x1 = a.try_load(i); + } else if (x2 < inf_x) { + if (x1 == x2) { + x1 = a.try_load(i); + } + int y = b.A[j - 1].second; + if (!c.try_store(x2, a(y))) { + return false; + } + x2 = b.try_load(j, a.d); + } else { + return true; + } + } +} + +// this = this * other +bool StackTransform::apply(const StackTransform &other) { + StackTransform res; + if (!compose(*this, other, res)) { + return invalidate(); + } + return assign(res); +} + +// this = other * this +bool StackTransform::preapply(const StackTransform &other) { + StackTransform res; + if (!compose(other, *this, res)) { + return invalidate(); + } + return assign(res); +} + +StackTransform StackTransform::operator*(const StackTransform &b) const & { + StackTransform res; + compose(*this, b, res); + return res; +} + +// this = this * other +StackTransform &StackTransform::operator*=(const StackTransform &other) { + StackTransform res; + (compose(*this, other, res) && assign(res)) || invalidate(); + return *this; +} + +bool StackTransform::apply_xchg(int i, int j, bool relaxed) { + if (!is_valid() || i < 0 || j < 0) { + return invalidate(); + } + if (i == j) { + return relaxed || touch(i); + } + int u = touch_get(i), v = touch_get(j); + return set(i, v) && set(j, u); +} + +bool StackTransform::apply_push(int i) { + if (!is_valid() || i < 0) { + return invalidate(); + } + int u = touch_get(i); + return shift(-1) && set(0, u); +} + +bool StackTransform::apply_push_newconst() { + if (!is_valid()) { + return false; + } + return shift(-1) && set(0, c_start - c++); +} + +bool StackTransform::apply_pop(int i) { + if (!is_valid() || i < 0) { + return invalidate(); + } + if (!i) { + return touch(0) && shift(1); + } else { + return set(i, get(0)) && shift(1); + } +} + +bool StackTransform::apply_blkpop(int k) { + if (!is_valid() || k < 0) { + return invalidate(); + } + return !k || (touch(k - 1) && shift(k)); +} + +bool StackTransform::equal(const StackTransform &other, bool relaxed) const { + if (!is_valid() || !other.is_valid()) { + return false; + } + if (!(n == other.n && d == other.d)) { + return false; + } + for (int i = 0; i < n; i++) { + if (A[i] != other.A[i]) { + return false; + } + } + return relaxed || dp == other.dp; +} + +StackTransform StackTransform::Xchg(int i, int j, bool relaxed) { + StackTransform t; + t.apply_xchg(i, j, relaxed); + return t; +} + +StackTransform StackTransform::Push(int i) { + StackTransform t; + t.apply_push(i); + return t; +} + +StackTransform StackTransform::Pop(int i) { + StackTransform t; + t.apply_pop(i); + return t; +} + +bool StackTransform::is_xchg(int i, int j) const { + if (i == j) { + return is_id(); + } + return is_valid() && !d && n == 2 && i >= 0 && j >= 0 && get(i) == j && get(j) == i; +} + +bool StackTransform::is_xchg(int *i, int *j) const { + if (!is_valid() || d || n > 2 || !dp) { + return false; + } + if (!n) { + *i = *j = 0; + return true; + } + if (n != 2) { + return false; + } + int a = A[0].first, b = A[1].first; + if (A[0].second != b || A[1].second != a) { + return false; + } + *i = std::min(a, b); + *j = std::max(a, b); + return true; +} + +bool StackTransform::is_xchg_xchg(int i, int j, int k, int l) const { + if (is_valid() && !d && n <= 4 && (i | j | k | l) >= 0) { + StackTransform t; + return t.apply_xchg(i, j) && t.apply_xchg(k, l) && t <= *this; + } else { + return false; + } +} + +bool StackTransform::is_xchg_xchg(int *i, int *j, int *k, int *l) const { + if (!is_valid() || d || n > 4 || !dp || !is_permutation()) { + return false; + } + if (!n) { + *i = *j = *k = *l = 0; + return true; + } + if (n <= 2) { + *k = *l = 0; + return is_xchg(i, j); + } + if (n == 3) { + // rotation: a -> b -> c -> a + int a = A[0].first; + int b = A[0].second; + int s = (b == A[2].first ? 2 : 1); + int c = A[s].second; + if (b != A[s].first || c != A[3 - s].first || a != A[3 - s].second) { + return false; + } + // implement as XCHG s(a),s(c) ; XCHG s(a),s(b) + *i = *k = a; + *j = c; + *l = b; + return is_xchg_xchg(*i, *j, *k, *l); + } + *i = A[0].first; + *j = A[0].second; + if (get(*j) != *i) { + return false; + } + for (int s = 1; s < 4; s++) { + if (A[s].first != *j) { + *k = A[s].first; + *l = A[s].second; + return get(*l) == *k && is_xchg_xchg(*i, *j, *k, *l); + } + } + return false; +} + +bool StackTransform::is_push(int i) const { + return is_valid() && d == -1 && n == 1 && A[0].first == -1 && A[0].second == i; +} + +bool StackTransform::is_push(int *i) const { + if (is_valid() && d == -1 && n == 1 && A[0].first == -1 && A[0].second >= 0) { + *i = A[0].second; + return true; + } else { + return false; + } +} + +// 1 2 3 4 .. = pop0 +// 0 2 3 4 .. = pop1 +// 1 0 3 4 .. = pop2 +// 1 2 0 4 .. = pop3 +// POP s(i) : 1 2 ... i-1 0 i+1 ... ; d=1, n=1, {(i,0)} +bool StackTransform::is_pop(int i) const { + if (!is_valid() || d != 1 || n > 1 || i < 0) { + return false; + } + if (!i) { + return !n; + } + return n == 1 && A[0].first == i && !A[0].second; +} + +bool StackTransform::is_pop(int *i) const { + if (!is_valid() || d != 1 || n > 1) { + return false; + } + if (!n) { + *i = 0; + return true; + } + if (n == 1 && !A[0].second) { + *i = A[0].first; + return true; + } + return false; +} + +// POP s(i) ; POP s(j) : 2 ... i-1 0 i+1 ... j 1 j+2 ... ; d=2, n=2, {(i,0),(j+1,1)} if i <> j+1 +bool StackTransform::is_pop_pop(int i, int j) const { + if (is_valid() && d == 2 && n <= 2 && i >= 0 && j >= 0) { + StackTransform t; + return t.apply_pop(i) && t.apply_pop(j) && t <= *this; + } else { + return false; + } +} + +bool StackTransform::is_pop_pop(int *i, int *j) const { + if (!is_valid() || d != 2 || n > 2) { + return false; + } + if (!n) { + *i = *j = 0; // 2DROP + } else if (n == 2) { + *i = A[0].first - A[0].second; + *j = A[1].first - A[1].second; + if (A[0].second > A[1].second) { + std::swap(*i, *j); + } + } else if (!A[0].second) { + *i = A[0].first; + *j = 0; + } else { + *i = 0; + *j = A[0].first - 1; + } + return is_pop_pop(*i, *j); +} + +const StackTransform StackTransform::rot{2, 0, 1, 3}; +const StackTransform StackTransform::rot_rev{1, 2, 0, 3}; + +bool StackTransform::is_rot() const { + return equal(rot, true); +} + +bool StackTransform::is_rotrev() const { + return equal(rot_rev, true); +} + +// PUSH i ; ROT == 1 i 0 2 3 +bool StackTransform::is_push_rot(int i) const { + return is_valid() && d == -1 && i >= 0 && is_trivial_after(3) && get(0) == 1 && get(1) == i && get(2) == 0; +} + +bool StackTransform::is_push_rot(int *i) const { + return is_valid() && (*i = get(1)) >= 0 && is_push_rot(*i); +} + +// PUSH i ; -ROT == 0 1 i 2 3 +bool StackTransform::is_push_rotrev(int i) const { + return is_valid() && d == -1 && i >= 0 && is_trivial_after(3) && get(0) == 0 && get(1) == 1 && get(2) == i; +} + +bool StackTransform::is_push_rotrev(int *i) const { + return is_valid() && (*i = get(2)) >= 0 && is_push_rotrev(*i); +} + +// PUSH s(i) ; XCHG s(j),s(k) --> i 0 1 .. i .. +// PUSH s(i) ; XCHG s(0),s(k) --> k-1 0 1 .. k-2 i k .. +bool StackTransform::is_push_xchg(int i, int j, int k) const { + StackTransform t; + return is_valid() && d == -1 && n <= 3 && t.apply_push(i) && t.apply_xchg(j, k) && t <= *this; +} + +bool StackTransform::is_push_xchg(int *i, int *j, int *k) const { + if (!(is_valid() && d == -1 && n <= 3 && n > 0)) { + return false; + } + int s = get(0); + if (s < 0) { + return false; + } + *i = s; + *j = 0; + if (n == 1) { + *k = 0; + } else if (n == 2) { + *k = s + 1; + *i = get(s + 1); + } else { + *j = A[1].first + 1; + *k = A[2].first + 1; + } + return is_push_xchg(*i, *j, *k); +} + +// XCHG s1,s(i) ; XCHG s0,s(j) +bool StackTransform::is_xchg2(int i, int j) const { + StackTransform t; + return is_valid() && !d && t.apply_xchg(1, i) && t.apply_xchg(0, j) && t <= *this; +} + +bool StackTransform::is_xchg2(int *i, int *j) const { + if (!is_valid() || d || n > 4 || n == 1 || dp < 2) { + return false; + } + *i = get(1); + *j = get(0); + if (!n) { + return true; + } + if (*i < 0 || *j < 0) { + return false; + } + if (n == 2 && !*i) { + *j = *i; // XCHG s0,s1 = XCHG2 s0,s0 + } else if (n == 3 && *i) { + // XCHG2 s(i),s(i) = XCHG s1,s(i) ; XCHG s0,s(i) : 0->1, 1->i + *j = *i; + } // XCHG2 s0,s(i) = XCHG s0,s1 ; XCHG s0,s(i) : 0->i, 1->0 + return is_xchg2(*i, *j); +} + +// XCHG s0,s(i) ; PUSH s(j) = PUSH s(j') ; XCHG s1,s(i+1) +// j'=j if j!=0, j!=i +// j'=0 if j=i +// j'=i if j=0 +bool StackTransform::is_xcpu(int i, int j) const { + StackTransform t; + return is_valid() && d == -1 && t.apply_xchg(0, i) && t.apply_push(j) && t <= *this; +} + +bool StackTransform::is_xcpu(int *i, int *j) const { + if (!is_valid() || d != -1 || n > 3 || dp < 1) { + return false; + } + *i = get(1); + *j = get(0); + if (!*j) { + *j = *i; + } else if (*j == *i) { + *j = 0; + } + return is_xcpu(*i, *j); +} + +// PUSH s(i) ; XCHG s0, s1 ; XCHG s0, s(j+1) +bool StackTransform::is_puxc(int i, int j) const { + StackTransform t; + return is_valid() && d == -1 && t.apply_push(i) && t.apply_xchg(0, 1) && t.apply_xchg(0, j + 1) && t <= *this; +} + +// j > 0 : 0 -> j, 1 -> i +// j = 0 : 0 -> i, 1 -> 0 ( PUSH s(i) ) +// j = -1 : 0 -> 0, 1 -> i ( PUSH s(i) ; XCHG s0, s1 ) +bool StackTransform::is_puxc(int *i, int *j) const { + if (!is_valid() || d != -1 || n > 3) { + return false; + } + *i = get(1); + *j = get(0); + if (!*i && is_push(*j)) { + std::swap(*i, *j); + return is_puxc(*i, *j); + } + if (!*j) { + --*j; + } + return is_puxc(*i, *j); +} + +// PUSH s(i) ; PUSH s(j+1) +bool StackTransform::is_push2(int i, int j) const { + StackTransform t; + return is_valid() && d == -2 && t.apply_push(i) && t.apply_push(j + 1) && t <= *this; +} + +bool StackTransform::is_push2(int *i, int *j) const { + if (!is_valid() || d != -2 || n > 2) { + return false; + } + *i = get(1); + *j = get(0); + return is_push2(*i, *j); +} + +// XCHG s2,s(i) ; XCHG s1,s(j) ; XCHG s0,s(k) +bool StackTransform::is_xchg3(int *i, int *j, int *k) const { + if (!is_valid() || d || dp < 3 || !is_permutation()) { + return false; + } + for (int s = 2; s >= 0; s--) { + *i = get(s); + StackTransform t = Xchg(2, *i) * *this; + if (t.is_xchg2(j, k)) { + return true; + } + } + return false; +} + +// XCHG s1,s(i) ; XCHG s0,s(j) ; PUSH s(k) +bool StackTransform::is_xc2pu(int *i, int *j, int *k) const { + if (!is_valid() || d != -1 || dp < 2) { + return false; + } + for (int s = 2; s >= 1; s--) { + *i = get(s); + StackTransform t = Xchg(1, *i) * *this; + if (t.is_xcpu(j, k)) { + return true; + } + } + return false; +} + +// XCHG s1,s(i) ; PUSH s(j) ; XCHG s0,s1 ; XCHG s0,s(k+1) +bool StackTransform::is_xcpuxc(int *i, int *j, int *k) const { + if (!is_valid() || d != -1 || dp < 2) { + return false; + } + for (int s = 2; s >= 0; s--) { + *i = get(s); + StackTransform t = Xchg(1, *i) * *this; + if (t.is_puxc(j, k)) { + return true; + } + } + return false; +} + +// XCHG s0,s(i) ; PUSH s(j) ; PUSH s(k+1) +bool StackTransform::is_xcpu2(int *i, int *j, int *k) const { + if (!is_valid() || d != -2 || dp < 1) { + return false; + } + *i = get(2); + StackTransform t = Xchg(0, *i) * *this; + return t.is_push2(j, k); +} + +// PUSH s(i) ; XCHG s0,s2 ; XCHG s1,s(j+1) ; XCHG s0,s(k+1) +// 0 -> i or 1 -> i or 2 -> i ; i has two preimages +// 0 -> k if k >= 2, k != j +// 1 -> j=k if j = k >= 2 +// 1 -> j if j >= 2, k != 0 +// 0 -> j if j >= 2, k = 0 +// => i in {f(0), f(1), f(2)} ; j in {-1, 0, 1, f(0), f(1)} ; k in {-1, 0, 1, f(0), f(1)} +bool StackTransform::is_puxc2(int *i, int *j, int *k) const { + if (!is_valid() || d != -1 || dp < 2) { + return false; + } + for (int s = 2; s >= 0; s--) { + *i = get(s); + if (preimage_count(*i) != 2) { + continue; + } + for (int u = -1; u <= 3; u++) { + *j = (u >= 2 ? get(u - 2) : u); + for (int v = -1; v <= 3; v++) { + *k = (v >= 2 ? get(v - 2) : v); + if (is_puxc2(*i, *j, *k)) { + return true; + } + } + } + } + return false; +} + +// PUSH s(i) ; XCHG s0,s2 ; XCHG s1,s(j+1) ; XCHG s0,s(k+1) +bool StackTransform::is_puxc2(int i, int j, int k) const { + StackTransform t; + return is_valid() && d == -1 && dp >= 2 // basic checks + && t.apply_push(i) && t.apply_xchg(0, 2) // PUSH s(i) ; XCHG s0,s2 + && t.apply_xchg(1, j + 1) // XCHG s1,s(j+1) + && t.apply_xchg(0, k + 1) && t <= *this; // XCHG s0,s(k+2) +} + +// PUSH s(i) ; XCHG s0,s1 ; XCHG s0,s(j+1) ; PUSH s(k+1) +bool StackTransform::is_puxcpu(int *i, int *j, int *k) const { + if (!is_valid() || d != -2 || dp < 1) { + return false; + } + StackTransform t = *this; + if (t.apply_pop() && t.is_puxc(i, j)) { + int y = get(0); + auto v = t.preimage(y); + if (!v.empty()) { + *k = v[0] - 1; + t.apply_push(*k + 1); + return t <= *this; + } + } + return false; +} + +// PUSH s(i) ; XCHG s0,s1 ; PUSH s(j+1) ; XCHG s0,s1 ; XCHG s0,s(k+2) +// 2 -> i; 1 -> j (if j >= 1, k != -1), 1 -> i (if j = 0, k != -1), 1 -> 0 (if j = -1, k != -1) +// 0 -> k (if k >= 1), 0 -> i (if k = 0), 0 -> j (if k = -1, j >= 1) +bool StackTransform::is_pu2xc(int *i, int *j, int *k) const { + if (!is_valid() || d != -2 || dp < 1) { + return false; + } + *i = get(2); + for (int v = -2; v <= 1; v++) { + *k = (v <= 0 ? v : get(0)); // one of -2, -1, 0, get(0) + for (int u = -1; u <= 1; u++) { + *j = (u <= 0 ? u : get(v != -1)); // one of -1, 0, get(0), get(1) + if (is_pu2xc(*i, *j, *k)) { + return true; + } + } + } + return false; +} + +bool StackTransform::is_pu2xc(int i, int j, int k) const { + StackTransform t; + return is_valid() && d == -2 && dp >= 1 // basic checks + && t.apply_push(i) && t.apply_xchg(0, 1) // PUSH s(i) ; XCHG s0,s1 + && t.apply_push(j + 1) && t.apply_xchg(0, 1) // PUSH s(j+1) ; XCHG s0,s1 + && t.apply_xchg(0, k + 2) && t <= *this; // XCHG s0,s(k+2) +} + +// PUSH s(i) ; PUSH s(j+1) ; PUSH s(k+2) +bool StackTransform::is_push3(int i, int j, int k) const { + StackTransform t; + return is_valid() && d == -3 && t.apply_push(i) && t.apply_push(j + 1) && t.apply_push(k + 2) && t <= *this; +} + +bool StackTransform::is_push3(int *i, int *j, int *k) const { + if (!is_valid() || d != -3 || n > 3) { + return false; + } + *i = get(2); + *j = get(1); + *k = get(0); + return is_push3(*i, *j, *k); +} + +bool StackTransform::is_blkswap(int *i, int *j) const { + if (!is_valid() || d || !is_permutation()) { + return false; + } + *j = get(0); + if (*j <= 0) { + return false; + } + auto v = preimage(0); + if (v.size() != 1) { + return false; + } + *i = v[0]; + return *i > 0 && is_blkswap(*i, *j); +} + +bool StackTransform::is_blkswap(int i, int j) const { + if (!is_valid() || d || i <= 0 || j <= 0 || dp < i + j || !is_trivial_after(i + j)) { + return false; + } + for (int s = 0; s < i; s++) { + if (get(s) != s + j) { + return false; + } + } + for (int s = 0; s < j; s++) { + if (get(s + i) != s) { + return false; + } + } + return true; +} + +// equivalent to i times DROP +bool StackTransform::is_blkdrop(int *i) const { + if (is_valid() && d > 0 && !n) { + *i = d; + return true; + } + return false; +} + +// 0 1 .. j-1 j+i j+i+1 ... +bool StackTransform::is_blkdrop2(int i, int j) const { + if (!is_valid() || d != i || i <= 0 || j < 0 || dp < i + j || n != j || !is_trivial_after(j)) { + return false; + } + for (int s = 0; s < j; s++) { + if (get(s) != s) { + return false; + } + } + return true; +} + +bool StackTransform::is_blkdrop2(int *i, int *j) const { + if (is_valid() && is_blkdrop2(d, n)) { + *i = d; + *j = n; + return true; + } + return false; +} + +// equivalent to i times PUSH s(j) +bool StackTransform::is_blkpush(int *i, int *j) const { + if (!is_valid() || d >= 0) { + return false; + } + *i = -d; + *j = get(*i - 1); + return is_blkpush(*i, *j); +} + +bool StackTransform::is_blkpush(int i, int j) const { + if (!is_valid() || d >= 0 || d != -i || j < 0 || dp < i + j || !is_trivial_after(i)) { + return false; + } + StackTransform t; + for (int s = 0; s < i; s++) { + if (!t.apply_push(j)) { + return false; + } + } + return t <= *this; +} + +bool StackTransform::is_reverse(int *i, int *j) const { + if (!is_valid() || d || !is_permutation() || n < 2) { + return false; + } + *j = A[0].first; + *i = A[n - 1].first - A[0].first + 1; + return is_reverse(*i, *j); +} + +bool StackTransform::is_reverse(int i, int j) const { + if (!is_valid() || d || !is_trivial_after(i + j) || n < 2 || A[0].first != j || A[n - 1].first != j + i - 1) { + return false; + } + for (int s = 0; s < i; s++) { + if (get(j + s) != j + i - 1 - s) { + return false; + } + } + return true; +} + +// 0 i+1 i+2 ... == i*NIP +// j i+1 i+2 ... == XCHG s(i),s(j) ; BLKDROP i +bool StackTransform::is_nip_seq(int i, int j) const { + return is_valid() && d == i && i > j && j >= 0 && n == 1 && A[0].first == i && A[0].second == j; +} + +bool StackTransform::is_nip_seq(int *i) const { + *i = d; + return is_nip_seq(*i); +} + +bool StackTransform::is_nip_seq(int *i, int *j) const { + if (is_valid() && n > 0) { + *i = d; + *j = A[0].second; + return is_nip_seq(*i, *j); + } else { + return false; + } +} + +// POP s(i); BLKDROP k (usually for i >= k >= 0) +bool StackTransform::is_pop_blkdrop(int i, int k) const { + StackTransform t; + return is_valid() && d == k + 1 && t.apply_pop(i) && t.apply_blkpop(k) && t <= *this; +} + +// POP s(i); BLKDROP k == XCHG s0,s(i); BLKDROP k+1 for i >= k >= 0 +// k+1 k+2 .. i-1 0 i+1 .. +bool StackTransform::is_pop_blkdrop(int *i, int *k) const { + if (is_valid() && n == 1 && d > 0 && !A[0].second) { + *k = d - 1; + *i = A[0].first; + return is_pop_blkdrop(*i, *k); + } else { + return false; + } +} + +// POP s(i); POP s(j); BLKDROP k (usually for i<>j >= k >= 0) +bool StackTransform::is_2pop_blkdrop(int i, int j, int k) const { + StackTransform t; + return is_valid() && d == k + 2 && t.apply_pop(i) && t.apply_pop(j) && t.apply_blkpop(k) && t <= *this; +} + +// POP s(i); POP s(j); BLKDROP k == XCHG s0,s(i); XCHG s1,s(j+1); BLKDROP k+2 (usually for i<>j >= k >= 2) +// k+2 k+3 .. i-1 0 i+1 ... j 1 j+2 ... +bool StackTransform::is_2pop_blkdrop(int *i, int *j, int *k) const { + if (is_valid() && n == 2 && d >= 2 && A[0].second + A[1].second == 1) { + *k = d - 2; + int t = (A[0].second > 0); + *i = A[t].first; + *j = A[1 - t].first - 1; + return is_2pop_blkdrop(*i, *j, *k); + } else { + return false; + } +} + +// PUSHCONST c ; ROT == 1 -1000 0 2 3 +bool StackTransform::is_const_rot(int c) const { + return is_valid() && d == -1 && is_trivial_after(3) && get(0) == 1 && c <= c_start && get(1) == c && get(2) == 0; +} + +bool StackTransform::is_const_rot(int *c) const { + return is_valid() && (*c = get(1)) <= c_start && is_const_rot(*c); +} + +// PUSHCONST c ; POP s(i) == 0 1 .. i-1 -1000 i+1 ... +bool StackTransform::is_const_pop(int c, int i) const { + return is_valid() && !d && n == 1 && i > 0 && c <= c_start && get(i - 1) == c; +} + +bool StackTransform::is_const_pop(int *c, int *i) const { + if (is_valid() && !d && n == 1 && A[0].second <= c_start) { + *i = A[0].first + 1; + *c = A[0].second; + return is_const_pop(*c, *i); + } else { + return false; + } +} + +// PUSH i ; PUSHCONST c == c i 0 1 2 ... +bool StackTransform::is_push_const(int i, int c) const { + return is_valid() && d == -2 && c <= c_start && i >= 0 && is_trivial_after(2) && get(0) == c && get(1) == i; +} + +bool StackTransform::is_push_const(int *i, int *c) const { + return is_valid() && d == -2 && n == 2 && is_push_const(*i = get(1), *c = get(0)); +} + +void StackTransform::show(std::ostream &os, int mode) const { + if (!is_valid()) { + os << ""; + return; + } + int mi = 0, ma = 0; + if (n > 0 && A[0].first < d) { + mi = A[0].first - d; + } + if (n > 0) { + ma = std::max(ma, A[n - 1].first - d + 1); + } + ma = std::max(ma + 1, dp - d); + os << '{'; + if (dp == d) { + os << '|'; + } + for (int i = mi; i < ma; i++) { + os << get(i) << (i == -1 ? '?' : (i == dp - d - 1 ? '|' : ' ')); + } + os << get(ma) << "..}"; +} + +} // namespace tolk diff --git a/tolk/symtable.cpp b/tolk/symtable.cpp new file mode 100644 index 00000000..ea2a1f91 --- /dev/null +++ b/tolk/symtable.cpp @@ -0,0 +1,179 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "symtable.h" +#include +#include + +namespace tolk { + +/* + * + * SYMBOL VALUES (DECLARED) + * + */ + +int scope_level; + +SymTable<100003> symbols; + +SymDef* sym_def[symbols.hprime + 1]; +SymDef* global_sym_def[symbols.hprime + 1]; +std::vector> symbol_stack; +std::vector scope_opened_at; + +std::string Symbol::unknown_symbol_name(sym_idx_t i) { + if (!i) { + return "_"; + } else { + std::ostringstream os; + os << "SYM#" << i; + return os.str(); + } +} + +sym_idx_t SymTableBase::gen_lookup(std::string str, int mode, sym_idx_t idx) { + unsigned long long h1 = 1, h2 = 1; + for (char c : str) { + h1 = ((h1 * 239) + (unsigned char)(c)) % p; + h2 = ((h2 * 17) + (unsigned char)(c)) % (p - 1); + } + ++h2; + ++h1; + while (true) { + if (sym_table[h1]) { + if (sym_table[h1]->str == str) { + return (mode & 2) ? not_found : sym_idx_t(h1); + } + h1 += h2; + if (h1 > p) { + h1 -= p; + } + } else { + if (!(mode & 1)) { + return not_found; + } + if (def_sym >= ((long long)p * 3) / 4) { + throw SymTableOverflow{def_sym}; + } + sym_table[h1] = std::make_unique(str, idx <= 0 ? sym_idx_t(h1) : -idx); + ++def_sym; + return sym_idx_t(h1); + } + } +} + +SymTableBase& SymTableBase::add_keyword(std::string str, sym_idx_t idx) { + if (idx <= 0) { + idx = ++def_kw; + } + sym_idx_t res = gen_lookup(str, -1, idx); + if (!res) { + throw SymTableKwRedef{str}; + } + if (idx < max_kw_idx) { + keywords[idx] = res; + } + return *this; +} + +void open_scope(Lexer& lex) { + ++scope_level; + scope_opened_at.push_back(lex.cur().loc); +} + +void close_scope(Lexer& lex) { + if (!scope_level) { + throw Fatal{"cannot close the outer scope"}; + } + while (!symbol_stack.empty() && symbol_stack.back().first == scope_level) { + SymDef old_def = symbol_stack.back().second; + auto idx = old_def.sym_idx; + symbol_stack.pop_back(); + SymDef* cur_def = sym_def[idx]; + assert(cur_def); + assert(cur_def->level == scope_level && cur_def->sym_idx == idx); + //std::cerr << "restoring local symbol `" << old_def.name << "` of level " << scope_level << " to its previous level " << old_def.level << std::endl; + if (cur_def->value) { + //std::cerr << "deleting value of symbol " << old_def.name << ":" << old_def.level << " at " << (const void*) it->second.value << std::endl; + delete cur_def->value; + } + if (!old_def.level && !old_def.value) { + delete cur_def; // ??? keep the definition always? + sym_def[idx] = nullptr; + } else { + cur_def->value = std::move(old_def.value); + cur_def->level = old_def.level; + } + old_def.value = nullptr; + } + --scope_level; + scope_opened_at.pop_back(); +} + +SymDef* lookup_symbol(sym_idx_t idx, int flags) { + if (!idx) { + return nullptr; + } + if ((flags & 1) && sym_def[idx]) { + return sym_def[idx]; + } + if ((flags & 2) && global_sym_def[idx]) { + return global_sym_def[idx]; + } + return nullptr; +} + +SymDef* lookup_symbol(std::string name, int flags) { + return lookup_symbol(symbols.lookup(name), flags); +} + +SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new, const SrcLocation& loc) { + if (!name_idx) { + return nullptr; + } + auto found = global_sym_def[name_idx]; + if (found) { + return force_new && found->value ? nullptr : found; + } + return global_sym_def[name_idx] = new SymDef(0, name_idx, loc); +} + +SymDef* define_symbol(sym_idx_t name_idx, bool force_new, const SrcLocation& loc) { + if (!name_idx) { + return nullptr; + } + if (!scope_level) { + return define_global_symbol(name_idx, force_new, loc); + } + auto found = sym_def[name_idx]; + if (found) { + if (found->level < scope_level) { + symbol_stack.push_back(std::make_pair(scope_level, *found)); + found->level = scope_level; + } else if (found->value && force_new) { + return nullptr; + } + found->value = 0; + found->loc = loc; + return found; + } + found = sym_def[name_idx] = new SymDef(scope_level, name_idx, loc); + symbol_stack.push_back(std::make_pair(scope_level, SymDef{0, name_idx})); + return found; +} + +} // namespace tolk diff --git a/tolk/symtable.h b/tolk/symtable.h new file mode 100644 index 00000000..c0a0912a --- /dev/null +++ b/tolk/symtable.h @@ -0,0 +1,175 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once +#include "srcread.h" +#include "lexer.h" +#include + +namespace tolk { + +/* + * + * SYMBOL VALUES (DECLARED) + * + */ + +typedef int var_idx_t; + +struct SymValBase { + enum { _Param, _Var, _Func, _Typename, _GlobVar, _Const }; + int type; + int idx; + SymValBase(int _type, int _idx) : type(_type), idx(_idx) { + } + virtual ~SymValBase() = default; +}; + +/* + * + * SYMBOL TABLE + * + */ + +// defined outside this module (by the end user) +int compute_symbol_subclass(std::string str); // return 0 if unneeded + +typedef int sym_idx_t; + +struct Symbol { + std::string str; + sym_idx_t idx; + int subclass; + Symbol(std::string _str, sym_idx_t _idx, int _sc) : str(_str), idx(_idx), subclass(_sc) { + } + Symbol(std::string _str, sym_idx_t _idx) : str(_str), idx(_idx) { + subclass = compute_symbol_subclass(std::move(_str)); + } + static std::string unknown_symbol_name(sym_idx_t i); +}; + +class SymTableBase { + unsigned p; + std::unique_ptr* sym_table; + sym_idx_t def_kw, def_sym; + static constexpr int max_kw_idx = 10000; + sym_idx_t keywords[max_kw_idx]; + + public: + SymTableBase(unsigned p_, std::unique_ptr* sym_table_) + : p(p_), sym_table(sym_table_), def_kw(0x100), def_sym(0) { + std::memset(keywords, 0, sizeof(keywords)); + } + static constexpr sym_idx_t not_found = 0; + SymTableBase& add_keyword(std::string str, sym_idx_t idx = 0); + SymTableBase& add_kw_char(char c) { + return add_keyword(std::string{c}, c); + } + sym_idx_t lookup(std::string str, int mode = 0) { + return gen_lookup(str, mode); + } + sym_idx_t lookup_add(std::string str) { + return gen_lookup(str, 1); + } + Symbol* operator[](sym_idx_t i) const { + return sym_table[i].get(); + } + bool is_keyword(sym_idx_t i) const { + return sym_table[i] && sym_table[i]->idx < 0; + } + std::string get_name(sym_idx_t i) const { + return sym_table[i] ? sym_table[i]->str : Symbol::unknown_symbol_name(i); + } + int get_subclass(sym_idx_t i) const { + return sym_table[i] ? sym_table[i]->subclass : 0; + } + Symbol* get_keyword(int i) const { + return ((unsigned)i < (unsigned)max_kw_idx) ? sym_table[keywords[i]].get() : nullptr; + } + + protected: + sym_idx_t gen_lookup(std::string str, int mode = 0, sym_idx_t idx = 0); +}; + +template +class SymTable : public SymTableBase { + public: + static constexpr int hprime = pp; + static int size() { + return pp + 1; + } + + private: + std::unique_ptr sym[pp + 1]; + + public: + SymTable() : SymTableBase(pp, sym) { + } + SymTable& add_keyword(std::string str, sym_idx_t idx = 0) { + SymTableBase::add_keyword(str, idx); + return *this; + } + SymTable& add_kw_char(char c) { + return add_keyword(std::string{c}, c); + } +}; + +struct SymTableOverflow { + int sym_def; + SymTableOverflow(int x) : sym_def(x) { + } +}; + +struct SymTableKwRedef { + std::string kw; + SymTableKwRedef(std::string _kw) : kw(_kw) { + } +}; + +extern SymTable<100003> symbols; + +extern int scope_level; + +struct SymDef { + int level; + sym_idx_t sym_idx; + SymValBase* value; + SrcLocation loc; + SymDef(int lvl, sym_idx_t idx, const SrcLocation& _loc = {}, SymValBase* val = 0) + : level(lvl), sym_idx(idx), value(val), loc(_loc) { + } + bool has_name() const { + return sym_idx; + } + std::string name() const { + return symbols.get_name(sym_idx); + } +}; + +extern SymDef* sym_def[symbols.hprime + 1]; +extern SymDef* global_sym_def[symbols.hprime + 1]; +extern std::vector> symbol_stack; +extern std::vector scope_opened_at; + +void open_scope(Lexer& lex); +void close_scope(Lexer& lex); +SymDef* lookup_symbol(sym_idx_t idx, int flags = 3); +SymDef* lookup_symbol(std::string name, int flags = 3); + +SymDef* define_global_symbol(sym_idx_t name_idx, bool force_new = false, const SrcLocation& loc = {}); +SymDef* define_symbol(sym_idx_t name_idx, bool force_new = false, const SrcLocation& loc = {}); + +} // namespace tolk diff --git a/tolk/tolk-main.cpp b/tolk/tolk-main.cpp new file mode 100644 index 00000000..e4b6ebdb --- /dev/null +++ b/tolk/tolk-main.cpp @@ -0,0 +1,122 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#include "tolk.h" +#include +#include +#include "git.h" + +void usage(const char* progname) { + std::cerr + << "usage: " << progname + << " [-vIAPSR][-O][-i][-o][-W] { ...}\n" + "\tGenerates Fift TVM assembler code from a Tolk source\n" + "-I\tEnables interactive mode (parse stdin)\n" + "-o\tWrites generated code into specified file instead of stdout\n" + "-v\tIncreases verbosity level (extra information output into stderr)\n" + "-i\tSets indentation for the output code (in two-space units)\n" + "-A\tPrefix code with `\"Asm.fif\" include` preamble\n" + "-O\tSets optimization level (2 by default)\n" + "-P\tEnvelope code into PROGRAM{ ... }END>c\n" + "-S\tInclude stack layout comments in the output code\n" + "-R\tInclude operation rewrite comments in the output code\n" + "-W\tInclude Fift code to serialize and save generated code into specified BoC file. Enables " + "-A and -P.\n" + "\t-s\tOutput semantic version of Tolk and exit\n" + "\t-V\tShow Tolk build information\n"; + std::exit(2); +} + +int main(int argc, char* const argv[]) { + int i; + std::string output_filename; + while ((i = getopt(argc, argv, "Ahi:Io:O:PRsSvW:V")) != -1) { + switch (i) { + case 'A': + tolk::asm_preamble = true; + break; + case 'I': + tolk::interactive = true; + break; + case 'i': + tolk::indent = std::max(0, atoi(optarg)); + break; + case 'o': + output_filename = optarg; + break; + case 'O': + tolk::opt_level = std::max(0, atoi(optarg)); + break; + case 'P': + tolk::program_envelope = true; + break; + case 'R': + tolk::op_rewrite_comments = true; + break; + case 'S': + tolk::stack_layout_comments = true; + break; + case 'v': + ++tolk::verbosity; + break; + case 'W': + tolk::boc_output_filename = optarg; + tolk::asm_preamble = tolk::program_envelope = true; + break; + case 's': + std::cout << tolk::tolk_version << "\n"; + std::exit(0); + case 'V': + std::cout << "Tolk semantic version: v" << tolk::tolk_version << "\n"; + std::cout << "Build information: [ Commit: " << GitMetadata::CommitSHA1() << ", Date: " << GitMetadata::CommitDate() << "]\n"; + std::exit(0); + case 'h': + default: + usage(argv[0]); + } + } + + std::ostream *outs = &std::cout; + + std::unique_ptr fs; + if (!output_filename.empty()) { + fs = std::make_unique(output_filename, std::fstream::trunc | std::fstream::out); + if (!fs->is_open()) { + std::cerr << "failed to create output file " << output_filename << '\n'; + return 2; + } + outs = fs.get(); + } + + std::vector sources; + + while (optind < argc) { + sources.push_back(std::string(argv[optind++])); + } + + tolk::read_callback = tolk::fs_read_callback; + + return tolk::tolk_proceed(sources, *outs, std::cerr); +} diff --git a/tolk/tolk-wasm.cpp b/tolk/tolk-wasm.cpp new file mode 100644 index 00000000..a7ca37b4 --- /dev/null +++ b/tolk/tolk-wasm.cpp @@ -0,0 +1,148 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#include "tolk.h" +#include "git.h" +#include "td/utils/JsonBuilder.h" +#include "fift/utils.h" +#include "td/utils/base64.h" +#include "td/utils/Status.h" +#include +#include + +td::Result compile_internal(char *config_json) { + TRY_RESULT(input_json, td::json_decode(td::MutableSlice(config_json))) + auto &obj = input_json.get_object(); + + TRY_RESULT(opt_level, td::get_json_object_int_field(obj, "optLevel", false)); + TRY_RESULT(sources_obj, td::get_json_object_field(obj, "sources", td::JsonValue::Type::Array, false)); + + auto &sources_arr = sources_obj.get_array(); + + std::vector sources; + + for (auto &item : sources_arr) { + sources.push_back(item.get_string().str()); + } + + tolk::opt_level = std::max(0, opt_level); + tolk::program_envelope = true; + tolk::verbosity = 0; + tolk::indent = 1; + + std::ostringstream outs, errs; + auto compile_res = tolk::tolk_proceed(sources, outs, errs); + + if (compile_res != 0) { + return td::Status::Error(std::string("Tolk compilation error: ") + errs.str()); + } + + TRY_RESULT(code_cell, fift::compile_asm(outs.str(), "/fiftlib/", false)); + TRY_RESULT(boc, vm::std_boc_serialize(code_cell)); + + td::JsonBuilder result_json; + auto result_obj = result_json.enter_object(); + result_obj("status", "ok"); + result_obj("codeBoc", td::base64_encode(boc)); + result_obj("fiftCode", outs.str()); + result_obj("codeHashHex", code_cell->get_hash().to_hex()); + result_obj.leave(); + + outs.clear(); + errs.clear(); + + return result_json.string_builder().as_cslice().str(); +} + +/// Callback used to retrieve additional source files or data. +/// +/// @param _kind The kind of callback (a string). +/// @param _data The data for the callback (a string). +/// @param o_contents A pointer to the contents of the file, if found. Allocated via malloc(). +/// @param o_error A pointer to an error message, if there is one. Allocated via malloc(). +/// +/// The callback implementor must use malloc() to allocate storage for +/// contents or error. The callback implementor must use free() to free +/// said storage after tolk_compile returns. +/// +/// If the callback is not supported, *o_contents and *o_error must be set to NULL. +typedef void (*CStyleReadFileCallback)(char const* _kind, char const* _data, char** o_contents, char** o_error); + +tolk::ReadCallback::Callback wrapReadCallback(CStyleReadFileCallback _readCallback) +{ + tolk::ReadCallback::Callback readCallback; + if (_readCallback) { + readCallback = [=](tolk::ReadCallback::Kind _kind, char const* _data) -> td::Result { + char* contents_c = nullptr; + char* error_c = nullptr; + _readCallback(tolk::ReadCallback::kindString(_kind).data(), _data, &contents_c, &error_c); + if (!contents_c && !error_c) { + return td::Status::Error("Callback not supported"); + } + if (contents_c) { + return contents_c; + } + return td::Status::Error(std::string(error_c)); + }; + } + return readCallback; +} + +extern "C" { + +const char* version() { + auto version_json = td::JsonBuilder(); + auto obj = version_json.enter_object(); + obj("tolkVersion", tolk::tolk_version); + obj("tolkFiftLibCommitHash", GitMetadata::CommitSHA1()); + obj("tolkFiftLibCommitDate", GitMetadata::CommitDate()); + obj.leave(); + return strdup(version_json.string_builder().as_cslice().c_str()); +} + +const char *tolk_compile(char *config_json, CStyleReadFileCallback callback) { + if (callback) { + tolk::read_callback = wrapReadCallback(callback); + } else { + tolk::read_callback = tolk::fs_read_callback; + } + + auto res = compile_internal(config_json); + + if (res.is_error()) { + auto result = res.move_as_error(); + auto error_res = td::JsonBuilder(); + auto error_o = error_res.enter_object(); + error_o("status", "error"); + error_o("message", result.message().str()); + error_o.leave(); + return strdup(error_res.string_builder().as_cslice().c_str()); + } + + auto res_string = res.move_as_ok(); + + return strdup(res_string.c_str()); +} +} diff --git a/tolk/tolk.cpp b/tolk/tolk.cpp new file mode 100644 index 00000000..eb15155a --- /dev/null +++ b/tolk/tolk.cpp @@ -0,0 +1,260 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#include "tolk.h" +#include "srcread.h" +#include "lexer.h" +#include +#include "git.h" +#include +#include "td/utils/port/path.h" + +namespace tolk { + +int verbosity, indent, opt_level = 2; +bool stack_layout_comments, op_rewrite_comments, program_envelope, asm_preamble; +bool interactive = false; +GlobalPragma pragma_allow_post_modification{"allow-post-modification"}; +GlobalPragma pragma_compute_asm_ltr{"compute-asm-ltr"}; +std::string generated_from, boc_output_filename; +ReadCallback::Callback read_callback; + +td::Result fs_read_callback(ReadCallback::Kind kind, const char* query) { + switch (kind) { + case ReadCallback::Kind::ReadFile: { + std::ifstream ifs{query}; + if (ifs.fail()) { + auto msg = std::string{"cannot open source file `"} + query + "`"; + return td::Status::Error(msg); + } + std::stringstream ss; + ss << ifs.rdbuf(); + return ss.str(); + } + case ReadCallback::Kind::Realpath: { + return td::realpath(td::CSlice(query)); + } + default: { + return td::Status::Error("Unknown query kind"); + } + } +} + +/* + * + * OUTPUT CODE GENERATOR + * + */ + +void generate_output_func(SymDef* func_sym, std::ostream &outs, std::ostream &errs) { + SymValCodeFunc* func_val = dynamic_cast(func_sym->value); + tolk_assert(func_val); + std::string name = symbols.get_name(func_sym->sym_idx); + if (verbosity >= 2) { + errs << "\n\n=========================\nfunction " << name << " : " << func_val->get_type() << std::endl; + } + if (!func_val->code) { + errs << "( function `" << name << "` undefined )\n"; + throw ParseError(func_sym->loc, name); + } else { + CodeBlob& code = *(func_val->code); + if (verbosity >= 3) { + code.print(errs, 9); + } + code.simplify_var_types(); + if (verbosity >= 5) { + errs << "after simplify_var_types: \n"; + code.print(errs, 0); + } + code.prune_unreachable_code(); + if (verbosity >= 5) { + errs << "after prune_unreachable: \n"; + code.print(errs, 0); + } + code.split_vars(true); + if (verbosity >= 5) { + errs << "after split_vars: \n"; + code.print(errs, 0); + } + for (int i = 0; i < 8; i++) { + code.compute_used_code_vars(); + if (verbosity >= 4) { + errs << "after compute_used_vars: \n"; + code.print(errs, 6); + } + code.fwd_analyze(); + if (verbosity >= 5) { + errs << "after fwd_analyze: \n"; + code.print(errs, 6); + } + code.prune_unreachable_code(); + if (verbosity >= 5) { + errs << "after prune_unreachable: \n"; + code.print(errs, 6); + } + } + code.mark_noreturn(); + if (verbosity >= 3) { + code.print(errs, 15); + } + if (verbosity >= 2) { + errs << "\n---------- resulting code for " << name << " -------------\n"; + } + bool inline_func = (func_val->flags & 1); + bool inline_ref = (func_val->flags & 2); + const char* modifier = ""; + if (inline_func) { + modifier = "INLINE"; + } else if (inline_ref) { + modifier = "REF"; + } + outs << std::string(indent * 2, ' ') << name << " PROC" << modifier << ":<{\n"; + int mode = 0; + if (stack_layout_comments) { + mode |= Stack::_StkCmt | Stack::_CptStkCmt; + } + if (opt_level < 2) { + mode |= Stack::_DisableOpt; + } + auto fv = dynamic_cast(func_sym->value); + // Flags: 1 - inline, 2 - inline_ref + if (fv && (fv->flags & 1) && code.ops->noreturn()) { + mode |= Stack::_InlineFunc; + } + if (fv && (fv->flags & 3)) { + mode |= Stack::_InlineAny; + } + code.generate_code(outs, mode, indent + 1); + outs << std::string(indent * 2, ' ') << "}>\n"; + if (verbosity >= 2) { + errs << "--------------\n"; + } + } +} + +int generate_output(std::ostream &outs, std::ostream &errs) { + if (asm_preamble) { + outs << "\"Asm.fif\" include\n"; + } + outs << "// automatically generated from " << generated_from << std::endl; + if (program_envelope) { + outs << "PROGRAM{\n"; + } + for (SymDef* func_sym : glob_func) { + SymValCodeFunc* func_val = dynamic_cast(func_sym->value); + tolk_assert(func_val); + std::string name = symbols.get_name(func_sym->sym_idx); + outs << std::string(indent * 2, ' '); + if (func_val->method_id.is_null()) { + outs << "DECLPROC " << name << "\n"; + } else { + outs << func_val->method_id << " DECLMETHOD " << name << "\n"; + } + } + for (SymDef* gvar_sym : glob_vars) { + tolk_assert(dynamic_cast(gvar_sym->value)); + std::string name = symbols.get_name(gvar_sym->sym_idx); + outs << std::string(indent * 2, ' ') << "DECLGLOBVAR " << name << "\n"; + } + int errors = 0; + for (SymDef* func_sym : glob_func) { + try { + generate_output_func(func_sym, outs, errs); + } catch (Error& err) { + errs << "cannot generate code for function `" << symbols.get_name(func_sym->sym_idx) << "`:\n" + << err << std::endl; + ++errors; + } + } + if (program_envelope) { + outs << "}END>c\n"; + } + if (!boc_output_filename.empty()) { + outs << "2 boc+>B \"" << boc_output_filename << "\" B>file\n"; + } + return errors; +} + +void output_inclusion_stack(std::ostream &errs) { + while (!inclusion_locations.empty()) { + SrcLocation loc = inclusion_locations.top(); + inclusion_locations.pop(); + if (loc.fdescr) { + errs << "note: included from "; + loc.show(errs); + errs << std::endl; + } + } +} + + +int tolk_proceed(const std::vector &sources, std::ostream &outs, std::ostream &errs) { + if (program_envelope && !indent) { + indent = 1; + } + + define_keywords(); + define_builtins(); + + int ok = 0, proc = 0; + try { + for (auto src : sources) { + ok += parse_source_file(src.c_str(), {}, true); + proc++; + } + if (interactive) { + generated_from += "stdin "; + ok += parse_source_stdin(); + proc++; + } + if (ok < proc) { + throw Fatal{"output code generation omitted because of errors"}; + } + if (!proc) { + throw Fatal{"no source files, no output"}; + } + pragma_allow_post_modification.check_enable_in_libs(); + pragma_compute_asm_ltr.check_enable_in_libs(); + return generate_output(outs, errs); + } catch (Fatal& fatal) { + errs << "fatal: " << fatal << std::endl; + output_inclusion_stack(errs); + return 2; + } catch (Error& error) { + errs << error << std::endl; + output_inclusion_stack(errs); + return 2; + } catch (UnifyError& unif_err) { + errs << "fatal: "; + unif_err.print_message(errs); + errs << std::endl; + output_inclusion_stack(errs); + return 2; + } + + return 0; +} + +} // namespace tolk diff --git a/tolk/tolk.h b/tolk/tolk.h new file mode 100644 index 00000000..66441099 --- /dev/null +++ b/tolk/tolk.h @@ -0,0 +1,1785 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include "common/refcnt.hpp" +#include "common/bigint.hpp" +#include "common/refint.h" +#include "srcread.h" +#include "lexer.h" +#include "symtable.h" +#include "td/utils/Status.h" + +#define tolk_assert(expr) \ + (bool(expr) ? void(0) \ + : throw Fatal(PSTRING() << "Assertion failed at " << __FILE__ << ":" << __LINE__ << ": " << #expr)) + +namespace tolk { + +extern int verbosity; +extern bool op_rewrite_comments; +extern std::string generated_from; + +constexpr int optimize_depth = 20; + +const std::string tolk_version{"0.4.5"}; + +enum Keyword { + _Eof = -1, + _Ident = 0, + _Number, + _Special, + _String, + _Return = 0x80, + _Var, + _Repeat, + _Do, + _While, + _Until, + _Try, + _Catch, + _If, + _Ifnot, + _Then, + _Else, + _Elseif, + _Elseifnot, + _Eq, + _Neq, + _Leq, + _Geq, + _Spaceship, + _Lshift, + _Rshift, + _RshiftR, + _RshiftC, + _DivR, + _DivC, + _ModR, + _ModC, + _DivMod, + _PlusLet, + _MinusLet, + _TimesLet, + _DivLet, + _DivRLet, + _DivCLet, + _ModLet, + _ModRLet, + _ModCLet, + _LshiftLet, + _RshiftLet, + _RshiftRLet, + _RshiftCLet, + _AndLet, + _OrLet, + _XorLet, + _Int, + _Cell, + _Slice, + _Builder, + _Cont, + _Tuple, + _Type, + _Mapsto, + _Forall, + _Asm, + _Impure, + _Global, + _Extern, + _Inline, + _InlineRef, + _AutoApply, + _MethodId, + _Operator, + _Infix, + _Infixl, + _Infixr, + _Const, + _PragmaHashtag, + _IncludeHashtag +}; + +void define_keywords(); + +class IdSc { + int cls; + + public: + enum { undef = 0, dotid = 1, tildeid = 2 }; + IdSc(int _cls = undef) : cls(_cls) { + } + operator int() { + return cls; + } +}; + +// symbol subclass: +// 1 = begins with . (a const method) +// 2 = begins with ~ (a non-const method) +// 0 = else + +/* + * + * TYPE EXPRESSIONS + * + */ + +struct TypeExpr { + enum te_type { te_Unknown, te_Var, te_Indirect, te_Atomic, te_Tensor, te_Tuple, te_Map, te_Type, te_ForAll } constr; + enum { + _Int = Keyword::_Int, + _Cell = Keyword::_Cell, + _Slice = Keyword::_Slice, + _Builder = Keyword::_Builder, + _Cont = Keyword::_Cont, + _Tuple = Keyword::_Tuple, + _Type = Keyword::_Type + }; + int value; + int minw, maxw; + static constexpr int w_inf = 1023; + std::vector args; + bool was_forall_var = false; + TypeExpr(te_type _constr, int _val = 0) : constr(_constr), value(_val), minw(0), maxw(w_inf) { + } + TypeExpr(te_type _constr, int _val, int width) : constr(_constr), value(_val), minw(width), maxw(width) { + } + TypeExpr(te_type _constr, std::vector list) + : constr(_constr), value((int)list.size()), args(std::move(list)) { + compute_width(); + } + TypeExpr(te_type _constr, std::initializer_list list) + : constr(_constr), value((int)list.size()), args(std::move(list)) { + compute_width(); + } + TypeExpr(te_type _constr, TypeExpr* elem0) : constr(_constr), value(1), args{elem0} { + compute_width(); + } + TypeExpr(te_type _constr, TypeExpr* elem0, std::vector list) + : constr(_constr), value((int)list.size() + 1), args{elem0} { + args.insert(args.end(), list.begin(), list.end()); + compute_width(); + } + TypeExpr(te_type _constr, TypeExpr* elem0, std::initializer_list list) + : constr(_constr), value((int)list.size() + 1), args{elem0} { + args.insert(args.end(), list.begin(), list.end()); + compute_width(); + } + bool is_atomic() const { + return constr == te_Atomic; + } + bool is_atomic(int v) const { + return constr == te_Atomic && value == v; + } + bool is_int() const { + return is_atomic(_Int); + } + bool is_var() const { + return constr == te_Var; + } + bool is_map() const { + return constr == te_Map; + } + bool is_tuple() const { + return constr == te_Tuple; + } + bool has_fixed_width() const { + return minw == maxw; + } + int get_width() const { + return has_fixed_width() ? minw : -1; + } + void compute_width(); + bool recompute_width(); + void show_width(std::ostream& os); + std::ostream& print(std::ostream& os, int prio = 0); + void replace_with(TypeExpr* te2); + int extract_components(std::vector& comp_list); + static int holes, type_vars; + static TypeExpr* new_hole() { + return new TypeExpr{te_Unknown, ++holes}; + } + static TypeExpr* new_hole(int width) { + return new TypeExpr{te_Unknown, ++holes, width}; + } + static TypeExpr* new_unit() { + return new TypeExpr{te_Tensor, 0, 0}; + } + static TypeExpr* new_atomic(int value) { + return new TypeExpr{te_Atomic, value, 1}; + } + static TypeExpr* new_map(TypeExpr* from, TypeExpr* to); + static TypeExpr* new_func() { + return new_map(new_hole(), new_hole()); + } + static TypeExpr* new_tensor(std::vector list, bool red = true) { + return red && list.size() == 1 ? list[0] : new TypeExpr{te_Tensor, std::move(list)}; + } + static TypeExpr* new_tensor(std::initializer_list list) { + return new TypeExpr{te_Tensor, std::move(list)}; + } + static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2) { + return new_tensor({te1, te2}); + } + static TypeExpr* new_tensor(TypeExpr* te1, TypeExpr* te2, TypeExpr* te3) { + return new_tensor({te1, te2, te3}); + } + static TypeExpr* new_tuple(TypeExpr* arg0) { + return new TypeExpr{te_Tuple, arg0}; + } + static TypeExpr* new_tuple(std::vector list, bool red = false) { + return new_tuple(new_tensor(std::move(list), red)); + } + static TypeExpr* new_tuple(std::initializer_list list) { + return new_tuple(new_tensor(std::move(list))); + } + static TypeExpr* new_var() { + return new TypeExpr{te_Var, --type_vars, 1}; + } + static TypeExpr* new_var(int idx) { + return new TypeExpr{te_Var, idx, 1}; + } + static TypeExpr* new_forall(std::vector list, TypeExpr* body) { + return new TypeExpr{te_ForAll, body, std::move(list)}; + } + static TypeExpr* new_forall(std::initializer_list list, TypeExpr* body) { + return new TypeExpr{te_ForAll, body, std::move(list)}; + } + static bool remove_indirect(TypeExpr*& te, TypeExpr* forbidden = nullptr); + static std::vector remove_forall(TypeExpr*& te); + static bool remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector& new_vars); +}; + +std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr); + +struct UnifyError { + TypeExpr* te1; + TypeExpr* te2; + std::string msg; + UnifyError(TypeExpr* _te1, TypeExpr* _te2, std::string _msg = "") : te1(_te1), te2(_te2), msg(_msg) { + } + void print_message(std::ostream& os) const; + std::string message() const; +}; + +std::ostream& operator<<(std::ostream& os, const UnifyError& ue); + +void unify(TypeExpr*& te1, TypeExpr*& te2); + +// extern int TypeExpr::holes; + +/* + * + * ABSTRACT CODE + * + */ + +using const_idx_t = int; + +struct TmpVar { + TypeExpr* v_type; + var_idx_t idx; + enum { _In = 1, _Named = 2, _Tmp = 4, _UniqueName = 0x20 }; + int cls; + sym_idx_t name; + int coord; + std::unique_ptr where; + std::vector> on_modification; + bool undefined = false; + TmpVar(var_idx_t _idx, int _cls, TypeExpr* _type = 0, SymDef* sym = 0, const SrcLocation* loc = 0); + void show(std::ostream& os, int omit_idx = 0) const; + void dump(std::ostream& os) const; + void set_location(const SrcLocation& loc); + std::string to_string() const { + std::ostringstream s; + show(s, 2); + return s.str(); + } +}; + +struct VarDescr { + var_idx_t idx; + enum { _Last = 1, _Unused = 2 }; + int flags; + enum { + _Const = 16, + _Int = 32, + _Zero = 64, + _NonZero = 128, + _Pos = 256, + _Neg = 512, + _Bool = 1024, + _Bit = 2048, + _Finite = 4096, + _Nan = 8192, + _Even = 16384, + _Odd = 32768, + _Null = (1 << 16), + _NotNull = (1 << 17) + }; + static constexpr int ConstZero = _Int | _Zero | _Pos | _Neg | _Bool | _Bit | _Finite | _Even | _NotNull; + static constexpr int ConstOne = _Int | _NonZero | _Pos | _Bit | _Finite | _Odd | _NotNull; + static constexpr int ConstTrue = _Int | _NonZero | _Neg | _Bool | _Finite | _Odd | _NotNull; + static constexpr int ValBit = ConstZero & ConstOne; + static constexpr int ValBool = ConstZero & ConstTrue; + static constexpr int FiniteInt = _Int | _Finite | _NotNull; + static constexpr int FiniteUInt = FiniteInt | _Pos; + int val; + td::RefInt256 int_const; + std::string str_const; + + VarDescr(var_idx_t _idx = -1, int _flags = 0, int _val = 0) : idx(_idx), flags(_flags), val(_val) { + } + bool operator<(var_idx_t other_idx) const { + return idx < other_idx; + } + bool is_unused() const { + return flags & _Unused; + } + bool is_last() const { + return flags & _Last; + } + bool always_true() const { + return val & _NonZero; + } + bool always_false() const { + return val & _Zero; + } + bool always_nonzero() const { + return val & _NonZero; + } + bool always_zero() const { + return val & _Zero; + } + bool always_even() const { + return val & _Even; + } + bool always_odd() const { + return val & _Odd; + } + bool always_null() const { + return val & _Null; + } + bool always_not_null() const { + return val & _NotNull; + } + bool is_const() const { + return val & _Const; + } + bool is_int_const() const { + return (val & (_Int | _Const)) == (_Int | _Const) && int_const.not_null(); + } + bool always_nonpos() const { + return val & _Neg; + } + bool always_nonneg() const { + return val & _Pos; + } + bool always_pos() const { + return (val & (_Pos | _NonZero)) == (_Pos | _NonZero); + } + bool always_neg() const { + return (val & (_Neg | _NonZero)) == (_Neg | _NonZero); + } + bool always_finite() const { + return val & _Finite; + } + bool always_less(const VarDescr& other) const; + bool always_leq(const VarDescr& other) const; + bool always_greater(const VarDescr& other) const; + bool always_geq(const VarDescr& other) const; + bool always_equal(const VarDescr& other) const; + bool always_neq(const VarDescr& other) const; + void unused() { + flags |= _Unused; + } + void clear_unused() { + flags &= ~_Unused; + } + void set_const(long long value); + void set_const(td::RefInt256 value); + void set_const(std::string value); + void set_const_nan(); + void operator+=(const VarDescr& y) { + flags &= y.flags; + } + void operator|=(const VarDescr& y); + void operator&=(const VarDescr& y); + void set_value(const VarDescr& y); + void set_value(VarDescr&& y); + void set_value(const VarDescr* y) { + if (y) { + set_value(*y); + } + } + void clear_value(); + void show_value(std::ostream& os) const; + void show(std::ostream& os, const char* var_name = nullptr) const; +}; + +inline std::ostream& operator<<(std::ostream& os, const VarDescr& vd) { + vd.show(os); + return os; +} + +struct VarDescrList { + std::vector list; + bool unreachable{false}; + VarDescrList() : list() { + } + VarDescrList(const std::vector& _list) : list(_list) { + } + VarDescrList(std::vector&& _list) : list(std::move(_list)) { + } + std::size_t size() const { + return list.size(); + } + VarDescr* operator[](var_idx_t idx); + const VarDescr* operator[](var_idx_t idx) const; + VarDescrList operator+(const VarDescrList& y) const; + VarDescrList& operator+=(const VarDescrList& y); + VarDescrList& clear_last(); + VarDescrList& operator+=(var_idx_t idx) { + return add_var(idx); + } + VarDescrList& operator+=(const std::vector& idx_list) { + return add_vars(idx_list); + } + VarDescrList& add_var(var_idx_t idx, bool unused = false); + VarDescrList& add_vars(const std::vector& idx_list, bool unused = false); + VarDescrList& operator-=(const std::vector& idx_list); + VarDescrList& operator-=(var_idx_t idx); + std::size_t count(const std::vector idx_list) const; + std::size_t count_used(const std::vector idx_list) const; + VarDescr& add(var_idx_t idx); + VarDescr& add_newval(var_idx_t idx); + VarDescrList& operator&=(const VarDescrList& values); + VarDescrList& import_values(const VarDescrList& values); + VarDescrList operator|(const VarDescrList& y) const; + VarDescrList& operator|=(const VarDescrList& values); + void show(std::ostream& os) const; + void set_unreachable() { + list.clear(); + unreachable = true; + } +}; + +inline std::ostream& operator<<(std::ostream& os, const VarDescrList& values) { + values.show(os); + return os; +} + +struct CodeBlob; + +template +class ListIterator { + T* ptr; + + public: + ListIterator() : ptr(nullptr) { + } + ListIterator(T* _ptr) : ptr(_ptr) { + } + ListIterator& operator++() { + ptr = ptr->next.get(); + return *this; + } + ListIterator operator++(int) { + T* z = ptr; + ptr = ptr->next.get(); + return ListIterator{z}; + } + T& operator*() const { + return *ptr; + } + T* operator->() const { + return ptr; + } + bool operator==(const ListIterator& y) const { + return ptr == y.ptr; + } + bool operator!=(const ListIterator& y) const { + return ptr != y.ptr; + } +}; + +struct Stack; + +struct Op { + enum { + _Undef, + _Nop, + _Call, + _CallInd, + _Let, + _IntConst, + _GlobVar, + _SetGlob, + _Import, + _Return, + _Tuple, + _UnTuple, + _If, + _While, + _Until, + _Repeat, + _Again, + _TryCatch, + _SliceConst + }; + int cl; + enum { _Disabled = 1, _Reachable = 2, _NoReturn = 4, _ImpureR = 8, _ImpureW = 16, _Impure = 24 }; + int flags; + std::unique_ptr next; + SymDef* fun_ref; + SrcLocation where; + VarDescrList var_info; + std::vector args; + std::vector left, right; + std::unique_ptr block0, block1; + td::RefInt256 int_const; + std::string str_const; + Op(const SrcLocation& _where = {}, int _cl = _Undef) : cl(_cl), flags(0), fun_ref(nullptr), where(_where) { + } + Op(const SrcLocation& _where, int _cl, const std::vector& _left) + : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left) { + } + Op(const SrcLocation& _where, int _cl, std::vector&& _left) + : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(std::move(_left)) { + } + Op(const SrcLocation& _where, int _cl, const std::vector& _left, td::RefInt256 _const) + : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), int_const(_const) { + } + Op(const SrcLocation& _where, int _cl, const std::vector& _left, std::string _const) + : cl(_cl), flags(0), fun_ref(nullptr), where(_where), left(_left), str_const(_const) { + } + Op(const SrcLocation& _where, int _cl, const std::vector& _left, const std::vector& _right, + SymDef* _fun = nullptr) + : cl(_cl), flags(0), fun_ref(_fun), where(_where), left(_left), right(_right) { + } + Op(const SrcLocation& _where, int _cl, std::vector&& _left, std::vector&& _right, + SymDef* _fun = nullptr) + : cl(_cl), flags(0), fun_ref(_fun), where(_where), left(std::move(_left)), right(std::move(_right)) { + } + bool disabled() const { + return flags & _Disabled; + } + bool enabled() const { + return !disabled(); + } + void disable() { + flags |= _Disabled; + } + bool unreachable() { + return !(flags & _Reachable); + } + void flags_set_clear(int set, int clear); + void show(std::ostream& os, const std::vector& vars, std::string pfx = "", int mode = 0) const; + void show_var_list(std::ostream& os, const std::vector& idx_list, const std::vector& vars) const; + void show_var_list(std::ostream& os, const std::vector& list, const std::vector& vars) const; + static void show_block(std::ostream& os, const Op* block, const std::vector& vars, std::string pfx = "", + int mode = 0); + void split_vars(const std::vector& vars); + static void split_var_list(std::vector& var_list, const std::vector& vars); + bool compute_used_vars(const CodeBlob& code, bool edit); + bool std_compute_used_vars(bool disabled = false); + bool set_var_info(const VarDescrList& new_var_info); + bool set_var_info(VarDescrList&& new_var_info); + bool set_var_info_except(const VarDescrList& new_var_info, const std::vector& var_list); + bool set_var_info_except(VarDescrList&& new_var_info, const std::vector& var_list); + void prepare_args(VarDescrList values); + VarDescrList fwd_analyze(VarDescrList values); + bool set_noreturn(bool nr); + bool mark_noreturn(); + bool noreturn() const { + return flags & _NoReturn; + } + bool is_empty() const { + return cl == _Nop && !next; + } + bool is_pure() const { + return !(flags & _Impure); + } + bool generate_code_step(Stack& stack); + void generate_code_all(Stack& stack); + Op& last() { + return next ? next->last() : *this; + } + const Op& last() const { + return next ? next->last() : *this; + } + ListIterator begin() { + return ListIterator{this}; + } + ListIterator end() const { + return ListIterator{}; + } + ListIterator cbegin() { + return ListIterator{this}; + } + ListIterator cend() const { + return ListIterator{}; + } +}; + +inline ListIterator begin(const std::unique_ptr& op_list) { + return ListIterator{op_list.get()}; +} + +inline ListIterator end(const std::unique_ptr& op_list) { + return ListIterator{}; +} + +inline ListIterator cbegin(const Op* op_list) { + return ListIterator{op_list}; +} + +inline ListIterator cend(const Op* op_list) { + return ListIterator{}; +} + +inline ListIterator begin(const Op* op_list) { + return ListIterator{op_list}; +} + +inline ListIterator end(const Op* op_list) { + return ListIterator{}; +} + +inline ListIterator begin(Op* op_list) { + return ListIterator{op_list}; +} + +inline ListIterator end(Op* op_list) { + return ListIterator{}; +} + +typedef std::tuple FormalArg; +typedef std::vector FormalArgList; + +struct AsmOpList; + +struct CodeBlob { + enum { _AllowPostModification = 1, _ComputeAsmLtr = 2 }; + int var_cnt, in_var_cnt, op_cnt; + TypeExpr* ret_type; + std::string name; + SrcLocation loc; + std::vector vars; + std::unique_ptr ops; + std::unique_ptr* cur_ops; + std::stack*> cur_ops_stack; + int flags = 0; + bool require_callxargs = false; + CodeBlob(TypeExpr* ret = nullptr) : var_cnt(0), in_var_cnt(0), op_cnt(0), ret_type(ret), cur_ops(&ops) { + } + template + Op& emplace_back(const Args&... args) { + Op& res = *(*cur_ops = std::make_unique(args...)); + cur_ops = &(res.next); + return res; + } + bool import_params(FormalArgList arg_list); + var_idx_t create_var(int cls, TypeExpr* var_type = 0, SymDef* sym = 0, const SrcLocation* loc = 0); + var_idx_t create_tmp_var(TypeExpr* var_type = 0, const SrcLocation* loc = 0) { + return create_var(TmpVar::_Tmp, var_type, nullptr, loc); + } + int split_vars(bool strict = false); + bool compute_used_code_vars(); + bool compute_used_code_vars(std::unique_ptr& ops, const VarDescrList& var_info, bool edit) const; + void print(std::ostream& os, int flags = 0) const; + void push_set_cur(std::unique_ptr& new_cur_ops) { + cur_ops_stack.push(cur_ops); + cur_ops = &new_cur_ops; + } + void close_blk(const SrcLocation& location) { + *cur_ops = std::make_unique(location, Op::_Nop); + } + void pop_cur() { + cur_ops = cur_ops_stack.top(); + cur_ops_stack.pop(); + } + void close_pop_cur(const SrcLocation& location) { + close_blk(location); + pop_cur(); + } + void simplify_var_types(); + void flags_set_clear(int set, int clear); + void prune_unreachable_code(); + void fwd_analyze(); + void mark_noreturn(); + void generate_code(AsmOpList& out_list, int mode = 0); + void generate_code(std::ostream& os, int mode = 0, int indent = 0); + + void on_var_modification(var_idx_t idx, const SrcLocation& here) const { + for (auto& f : vars.at(idx).on_modification) { + f(here); + } + } +}; + +/* + * + * SYMBOL VALUES + * + */ + +struct SymVal : SymValBase { + TypeExpr* sym_type; + td::RefInt256 method_id; + bool impure; + bool auto_apply{false}; + short flags; // +1 = inline, +2 = inline_ref + SymVal(int _type, int _idx, TypeExpr* _stype = nullptr, bool _impure = false) + : SymValBase(_type, _idx), sym_type(_stype), impure(_impure), flags(0) { + } + ~SymVal() override = default; + TypeExpr* get_type() const { + return sym_type; + } + virtual const std::vector* get_arg_order() const { + return nullptr; + } + virtual const std::vector* get_ret_order() const { + return nullptr; + } +}; + +struct SymValFunc : SymVal { + std::vector arg_order, ret_order; + ~SymValFunc() override = default; + SymValFunc(int val, TypeExpr* _ft, bool _impure = false) : SymVal(_Func, val, _ft, _impure) { + } + SymValFunc(int val, TypeExpr* _ft, std::initializer_list _arg_order, std::initializer_list _ret_order = {}, + bool _impure = false) + : SymVal(_Func, val, _ft, _impure), arg_order(_arg_order), ret_order(_ret_order) { + } + + const std::vector* get_arg_order() const override { + return arg_order.empty() ? nullptr : &arg_order; + } + const std::vector* get_ret_order() const override { + return ret_order.empty() ? nullptr : &ret_order; + } +}; + +struct SymValCodeFunc : SymValFunc { + CodeBlob* code; + ~SymValCodeFunc() override = default; + SymValCodeFunc(int val, TypeExpr* _ft, bool _impure = false) : SymValFunc(val, _ft, _impure), code(nullptr) { + } +}; + +struct SymValType : SymValBase { + TypeExpr* sym_type; + SymValType(int _type, int _idx, TypeExpr* _stype = nullptr) : SymValBase(_type, _idx), sym_type(_stype) { + } + ~SymValType() override = default; + TypeExpr* get_type() const { + return sym_type; + } +}; + +struct SymValGlobVar : SymValBase { + TypeExpr* sym_type; + int out_idx{0}; + SymValGlobVar(int val, TypeExpr* gvtype, int oidx = 0) + : SymValBase(_GlobVar, val), sym_type(gvtype), out_idx(oidx) { + } + ~SymValGlobVar() override = default; + TypeExpr* get_type() const { + return sym_type; + } +}; + +struct SymValConst : SymValBase { + td::RefInt256 intval; + std::string strval; + Keyword type; + SymValConst(int idx, td::RefInt256 value) + : SymValBase(_Const, idx), intval(value) { + type = _Int; + } + SymValConst(int idx, std::string value) + : SymValBase(_Const, idx), strval(value) { + type = _Slice; + } + ~SymValConst() override = default; + td::RefInt256 get_int_value() const { + return intval; + } + std::string get_str_value() const { + return strval; + } + Keyword get_type() const { + return type; + } +}; + +extern int glob_func_cnt, undef_func_cnt, glob_var_cnt; +extern std::vector glob_func, glob_vars; +extern std::set prohibited_var_names; + +/* + * + * PARSE SOURCE + * + */ + +class ReadCallback { +public: + /// Noncopyable. + ReadCallback(ReadCallback const&) = delete; + ReadCallback& operator=(ReadCallback const&) = delete; + + enum class Kind + { + ReadFile, + Realpath + }; + + static std::string kindString(Kind _kind) + { + switch (_kind) + { + case Kind::ReadFile: + return "source"; + case Kind::Realpath: + return "realpath"; + default: + throw ""; // todo ? + } + } + + /// File reading or generic query callback. + using Callback = std::function(ReadCallback::Kind, const char*)>; +}; + +// defined in parse-tolk.cpp +bool parse_source(std::istream* is, const FileDescr* fdescr); +bool parse_source_file(const char* filename, Lexem lex = {}, bool is_main = false); +bool parse_source_stdin(); + +extern std::stack inclusion_locations; + +/* + * + * EXPRESSIONS + * + */ + +struct Expr { + enum { + _None, + _Apply, + _VarApply, + _TypeApply, + _MkTuple, + _Tensor, + _Const, + _Var, + _Glob, + _GlobVar, + _Letop, + _LetFirst, + _Hole, + _Type, + _CondExpr, + _SliceConst + }; + int cls; + int val{0}; + enum { _IsType = 1, _IsRvalue = 2, _IsLvalue = 4, _IsHole = 8, _IsNewVar = 16, _IsImpure = 32 }; + int flags{0}; + SrcLocation here; + td::RefInt256 intval; + std::string strval; + SymDef* sym{nullptr}; + TypeExpr* e_type{nullptr}; + std::vector args; + Expr(int c = _None) : cls(c) { + } + Expr(int c, const SrcLocation& loc) : cls(c), here(loc) { + } + Expr(int c, std::vector _args) : cls(c), args(std::move(_args)) { + } + Expr(int c, std::initializer_list _arglist) : cls(c), args(std::move(_arglist)) { + } + Expr(int c, SymDef* _sym, std::initializer_list _arglist) : cls(c), sym(_sym), args(std::move(_arglist)) { + } + Expr(int c, SymDef* _sym, std::vector _arglist) : cls(c), sym(_sym), args(std::move(_arglist)) { + } + Expr(int c, sym_idx_t name_idx, std::initializer_list _arglist); + ~Expr() { + for (auto& arg_ptr : args) { + delete arg_ptr; + } + } + Expr* copy() const; + void pb_arg(Expr* expr) { + args.push_back(expr); + } + void set_val(int _val) { + val = _val; + } + bool is_rvalue() const { + return flags & _IsRvalue; + } + bool is_lvalue() const { + return flags & _IsLvalue; + } + bool is_type() const { + return flags & _IsType; + } + bool is_type_apply() const { + return cls == _TypeApply; + } + bool is_mktuple() const { + return cls == _MkTuple; + } + void chk_rvalue(const Lexem& lem) const; + void chk_lvalue(const Lexem& lem) const; + void chk_type(const Lexem& lem) const; + bool deduce_type(const Lexem& lem); + void set_location(const SrcLocation& loc) { + here = loc; + } + const SrcLocation& get_location() const { + return here; + } + int define_new_vars(CodeBlob& code); + int predefine_vars(); + std::vector pre_compile(CodeBlob& code, std::vector>* lval_globs = nullptr) const; + static std::vector pre_compile_let(CodeBlob& code, Expr* lhs, Expr* rhs, const SrcLocation& here); + var_idx_t new_tmp(CodeBlob& code) const; + std::vector new_tmp_vect(CodeBlob& code) const { + return {new_tmp(code)}; + } +}; + +/* + * + * GENERATE CODE + * + */ + +typedef std::vector StackLayout; +typedef std::pair var_const_idx_t; +typedef std::vector StackLayoutExt; +constexpr const_idx_t not_const = -1; +using Const = td::RefInt256; + +struct AsmOp { + enum Type { a_none, a_xchg, a_push, a_pop, a_const, a_custom, a_magic }; + int t{a_none}; + int indent{0}; + int a, b, c; + bool gconst{false}; + std::string op; + td::RefInt256 origin; + struct SReg { + int idx; + SReg(int _idx) : idx(_idx) { + } + }; + AsmOp() = default; + AsmOp(int _t) : t(_t) { + } + AsmOp(int _t, std::string _op) : t(_t), op(std::move(_op)) { + } + AsmOp(int _t, int _a) : t(_t), a(_a) { + } + AsmOp(int _t, int _a, std::string _op) : t(_t), a(_a), op(std::move(_op)) { + } + AsmOp(int _t, int _a, int _b) : t(_t), a(_a), b(_b) { + } + AsmOp(int _t, int _a, int _b, std::string _op) : t(_t), a(_a), b(_b), op(std::move(_op)) { + compute_gconst(); + } + AsmOp(int _t, int _a, int _b, std::string _op, td::RefInt256 x) : t(_t), a(_a), b(_b), op(std::move(_op)), origin(x) { + compute_gconst(); + } + AsmOp(int _t, int _a, int _b, int _c) : t(_t), a(_a), b(_b), c(_c) { + } + AsmOp(int _t, int _a, int _b, int _c, std::string _op) : t(_t), a(_a), b(_b), c(_c), op(std::move(_op)) { + } + void out(std::ostream& os) const; + void out_indent_nl(std::ostream& os, bool no_nl = false) const; + std::string to_string() const; + void compute_gconst() { + gconst = (is_custom() && (op == "PUSHNULL" || op == "NEWC" || op == "NEWB" || op == "TRUE" || op == "FALSE" || op == "NOW")); + } + bool is_nop() const { + return t == a_none && op.empty(); + } + bool is_comment() const { + return t == a_none && !op.empty(); + } + bool is_custom() const { + return t == a_custom; + } + bool is_very_custom() const { + return is_custom() && a >= 255; + } + bool is_push() const { + return t == a_push; + } + bool is_push(int x) const { + return is_push() && a == x; + } + bool is_push(int* x) const { + *x = a; + return is_push(); + } + bool is_pop() const { + return t == a_pop; + } + bool is_pop(int x) const { + return is_pop() && a == x; + } + bool is_xchg() const { + return t == a_xchg; + } + bool is_xchg(int x, int y) const { + return is_xchg() && b == y && a == x; + } + bool is_xchg(int* x, int* y) const { + *x = a; + *y = b; + return is_xchg(); + } + bool is_xchg_short() const { + return is_xchg() && (a <= 1 || b <= 1); + } + bool is_swap() const { + return is_xchg(0, 1); + } + bool is_const() const { + return t == a_const && !a && b == 1; + } + bool is_gconst() const { + return !a && b == 1 && (t == a_const || gconst); + } + static AsmOp Nop() { + return AsmOp(a_none); + } + static AsmOp Xchg(int a, int b = 0) { + return a == b ? AsmOp(a_none) : (a < b ? AsmOp(a_xchg, a, b) : AsmOp(a_xchg, b, a)); + } + static AsmOp Push(int a) { + return AsmOp(a_push, a); + } + static AsmOp Pop(int a = 0) { + return AsmOp(a_pop, a); + } + static AsmOp Xchg2(int a, int b) { + return make_stk2(a, b, "XCHG2", 0); + } + static AsmOp XcPu(int a, int b) { + return make_stk2(a, b, "XCPU", 1); + } + static AsmOp PuXc(int a, int b) { + return make_stk2(a, b, "PUXC", 1); + } + static AsmOp Push2(int a, int b) { + return make_stk2(a, b, "PUSH2", 2); + } + static AsmOp Xchg3(int a, int b, int c) { + return make_stk3(a, b, c, "XCHG3", 0); + } + static AsmOp Xc2Pu(int a, int b, int c) { + return make_stk3(a, b, c, "XC2PU", 1); + } + static AsmOp XcPuXc(int a, int b, int c) { + return make_stk3(a, b, c, "XCPUXC", 1); + } + static AsmOp XcPu2(int a, int b, int c) { + return make_stk3(a, b, c, "XCPU2", 3); + } + static AsmOp PuXc2(int a, int b, int c) { + return make_stk3(a, b, c, "PUXC2", 3); + } + static AsmOp PuXcPu(int a, int b, int c) { + return make_stk3(a, b, c, "PUXCPU", 3); + } + static AsmOp Pu2Xc(int a, int b, int c) { + return make_stk3(a, b, c, "PU2XC", 3); + } + static AsmOp Push3(int a, int b, int c) { + return make_stk3(a, b, c, "PUSH3", 3); + } + static AsmOp BlkSwap(int a, int b); + static AsmOp BlkPush(int a, int b); + static AsmOp BlkDrop(int a); + static AsmOp BlkDrop2(int a, int b); + static AsmOp BlkReverse(int a, int b); + static AsmOp make_stk2(int a, int b, const char* str, int delta); + static AsmOp make_stk3(int a, int b, int c, const char* str, int delta); + static AsmOp IntConst(td::RefInt256 value); + static AsmOp BoolConst(bool f); + static AsmOp Const(std::string push_op, td::RefInt256 origin = {}) { + return AsmOp(a_const, 0, 1, std::move(push_op), origin); + } + static AsmOp Const(int arg, std::string push_op, td::RefInt256 origin = {}); + static AsmOp Comment(std::string comment) { + return AsmOp(a_none, std::string{"// "} + comment); + } + static AsmOp Custom(std::string custom_op) { + return AsmOp(a_custom, 255, 255, custom_op); + } + static AsmOp Parse(std::string custom_op); + static AsmOp Custom(std::string custom_op, int args, int retv = 1) { + return AsmOp(a_custom, args, retv, custom_op); + } + static AsmOp Parse(std::string custom_op, int args, int retv = 1); + static AsmOp Tuple(int a); + static AsmOp UnTuple(int a); +}; + +inline std::ostream& operator<<(std::ostream& os, const AsmOp& op) { + op.out(os); + return os; +} + +std::ostream& operator<<(std::ostream& os, AsmOp::SReg stack_reg); + +struct AsmOpList { + std::vector list_; + int indent_{0}; + const std::vector* var_names_{nullptr}; + std::vector constants_; + bool retalt_{false}; + void out(std::ostream& os, int mode = 0) const; + AsmOpList(int indent = 0, const std::vector* var_names = nullptr) : indent_(indent), var_names_(var_names) { + } + template + AsmOpList& add(Args&&... args) { + append(AsmOp(std::forward(args)...)); + adjust_last(); + return *this; + } + bool append(const AsmOp& op) { + list_.push_back(op); + adjust_last(); + return true; + } + bool append(const std::vector& ops); + bool append(std::initializer_list ops) { + return append(std::vector(std::move(ops))); + } + AsmOpList& operator<<(const AsmOp& op) { + return add(op); + } + AsmOpList& operator<<(AsmOp&& op) { + return add(std::move(op)); + } + AsmOpList& operator<<(std::string str) { + return add(AsmOp::Type::a_custom, 255, 255, str); + } + const_idx_t register_const(Const new_const); + Const get_const(const_idx_t idx); + void show_var(std::ostream& os, var_idx_t idx) const; + void show_var_ext(std::ostream& os, std::pair idx_pair) const; + void adjust_last() { + if (list_.back().is_nop()) { + list_.pop_back(); + } else { + list_.back().indent = indent_; + } + } + void indent() { + ++indent_; + } + void undent() { + --indent_; + } + void set_indent(int new_indent) { + indent_ = new_indent; + } + void insert(size_t pos, std::string str) { + insert(pos, AsmOp(AsmOp::a_custom, 255, 255, str)); + } + void insert(size_t pos, const AsmOp& op) { + auto ip = list_.begin() + pos; + ip = list_.insert(ip, op); + ip->indent = (ip == list_.begin()) ? indent_ : (ip - 1)->indent; + } + void indent_all() { + for (auto &op : list_) { + ++op.indent; + } + } +}; + +inline std::ostream& operator<<(std::ostream& os, const AsmOpList& op_list) { + op_list.out(os); + return os; +} + +class IndentGuard { + AsmOpList& aol_; + + public: + IndentGuard(AsmOpList& aol) : aol_(aol) { + aol.indent(); + } + ~IndentGuard() { + aol_.undent(); + } +}; + +struct AsmOpCons { + std::unique_ptr car; + std::unique_ptr cdr; + AsmOpCons(std::unique_ptr head, std::unique_ptr tail) : car(std::move(head)), cdr(std::move(tail)) { + } + static std::unique_ptr cons(std::unique_ptr head, std::unique_ptr tail) { + return std::make_unique(std::move(head), std::move(tail)); + } +}; + +using AsmOpConsList = std::unique_ptr; + +int is_pos_pow2(td::RefInt256 x); +int is_neg_pow2(td::RefInt256 x); + +/* + * + * STACK TRANSFORMS + * + */ + +/* +A stack transform is a map f:N={0,1,...} -> N, such that f(x) = x + d_f for almost all x:N and for a fixed d_f:N. +They form a monoid under composition: (fg)(x)=f(g(x)). +They act on stacks S on the right: Sf=S', such that S'[n]=S[f(n)]. + +A stack transform f is determined by d_f and the finite set A of all pairs (x,y), such that x>=d_f, f(x-d_f) = y and y<>x. They are listed in increasing order by x. +*/ +struct StackTransform { + enum { max_n = 16, inf_x = 0x7fffffff, c_start = -1000 }; + int d{0}, n{0}, dp{0}, c{0}; + bool invalid{false}; + std::array, max_n> A; + StackTransform() = default; + // list of f(0),f(1),...,f(s); assumes next values are f(s)+1,f(s)+2,... + StackTransform(std::initializer_list list); + StackTransform& operator=(std::initializer_list list); + bool assign(const StackTransform& other); + static StackTransform id() { + return {}; + } + bool invalidate() { + invalid = true; + return false; + } + bool is_valid() const { + return !invalid; + } + bool set_id() { + d = n = dp = c = 0; + invalid = false; + return true; + } + bool shift(int offs) { // post-composes with x -> x + offs + d += offs; + return offs <= 0 || remove_negative(); + } + bool remove_negative(); + bool touch(int i) { + dp = std::max(dp, i + d + 1); + return true; + } + bool is_permutation() const; // is f:N->N bijective ? + bool is_trivial_after(int x) const; // f(x') = x' + d for all x' >= x + int preimage_count(int y) const; // card f^{-1}(y) + std::vector preimage(int y) const; + bool apply_xchg(int i, int j, bool relaxed = false); + bool apply_push(int i); + bool apply_pop(int i = 0); + bool apply_push_newconst(); + bool apply_blkpop(int k); + bool apply(const StackTransform& other); // this = this * other + bool preapply(const StackTransform& other); // this = other * this + // c := a * b + static bool compose(const StackTransform& a, const StackTransform& b, StackTransform& c); + StackTransform& operator*=(const StackTransform& other); + StackTransform operator*(const StackTransform& b) const &; + bool equal(const StackTransform& other, bool relaxed = false) const; + bool almost_equal(const StackTransform& other) const { + return equal(other, true); + } + bool operator==(const StackTransform& other) const { + return dp == other.dp && almost_equal(other); + } + bool operator<=(const StackTransform& other) const { + return dp <= other.dp && almost_equal(other); + } + bool operator>=(const StackTransform& other) const { + return dp >= other.dp && almost_equal(other); + } + int get(int i) const; + int touch_get(int i, bool relaxed = false) { + if (!relaxed) { + touch(i); + } + return get(i); + } + bool set(int i, int v, bool relaxed = false); + int operator()(int i) const { + return get(i); + } + class Pos { + StackTransform& t_; + int p_; + + public: + Pos(StackTransform& t, int p) : t_(t), p_(p) { + } + Pos& operator=(const Pos& other) = delete; + operator int() const { + return t_.get(p_); + } + const Pos& operator=(int v) const { + t_.set(p_, v); + return *this; + } + }; + Pos operator[](int i) { + return Pos(*this, i); + } + static const StackTransform rot; + static const StackTransform rot_rev; + bool is_id() const { + return is_valid() && !d && !n; + } + bool is_xchg(int i, int j) const; + bool is_xchg(int* i, int* j) const; + bool is_xchg_xchg(int i, int j, int k, int l) const; + bool is_xchg_xchg(int* i, int* j, int* k, int* l) const; + bool is_push(int i) const; + bool is_push(int* i) const; + bool is_pop(int i) const; + bool is_pop(int* i) const; + bool is_pop_pop(int i, int j) const; + bool is_pop_pop(int* i, int* j) const; + bool is_rot() const; + bool is_rotrev() const; + bool is_push_rot(int i) const; + bool is_push_rot(int* i) const; + bool is_push_rotrev(int i) const; + bool is_push_rotrev(int* i) const; + bool is_push_xchg(int i, int j, int k) const; + bool is_push_xchg(int* i, int* j, int* k) const; + bool is_xchg2(int i, int j) const; + bool is_xchg2(int* i, int* j) const; + bool is_xcpu(int i, int j) const; + bool is_xcpu(int* i, int* j) const; + bool is_puxc(int i, int j) const; + bool is_puxc(int* i, int* j) const; + bool is_push2(int i, int j) const; + bool is_push2(int* i, int* j) const; + bool is_xchg3(int* i, int* j, int* k) const; + bool is_xc2pu(int* i, int* j, int* k) const; + bool is_xcpuxc(int* i, int* j, int* k) const; + bool is_xcpu2(int* i, int* j, int* k) const; + bool is_puxc2(int i, int j, int k) const; + bool is_puxc2(int* i, int* j, int* k) const; + bool is_puxcpu(int* i, int* j, int* k) const; + bool is_pu2xc(int i, int j, int k) const; + bool is_pu2xc(int* i, int* j, int* k) const; + bool is_push3(int i, int j, int k) const; + bool is_push3(int* i, int* j, int* k) const; + bool is_blkswap(int i, int j) const; + bool is_blkswap(int* i, int* j) const; + bool is_blkpush(int i, int j) const; + bool is_blkpush(int* i, int* j) const; + bool is_blkdrop(int* i) const; + bool is_blkdrop2(int i, int j) const; + bool is_blkdrop2(int* i, int* j) const; + bool is_reverse(int i, int j) const; + bool is_reverse(int* i, int* j) const; + bool is_nip_seq(int i, int j = 0) const; + bool is_nip_seq(int* i) const; + bool is_nip_seq(int* i, int* j) const; + bool is_pop_blkdrop(int i, int k) const; + bool is_pop_blkdrop(int* i, int* k) const; + bool is_2pop_blkdrop(int i, int j, int k) const; + bool is_2pop_blkdrop(int* i, int* j, int* k) const; + bool is_const_rot(int c) const; + bool is_const_rot(int* c) const; + bool is_const_pop(int c, int i) const; + bool is_const_pop(int* c, int* i) const; + bool is_push_const(int i, int c) const; + bool is_push_const(int* i, int* c) const; + + void show(std::ostream& os, int mode = 0) const; + + static StackTransform Xchg(int i, int j, bool relaxed = false); + static StackTransform Push(int i); + static StackTransform Pop(int i); + + private: + int try_load(int& i, int offs = 0) const; // returns A[i++].first + offs or inf_x + bool try_store(int x, int y); // appends (x,y) to A +}; + +//extern const StackTransform StackTransform::rot, StackTransform::rot_rev; + +inline std::ostream& operator<<(std::ostream& os, const StackTransform& trans) { + trans.show(os); + return os; +} + +bool apply_op(StackTransform& trans, const AsmOp& op); + +/* + * + * STACK OPERATION OPTIMIZER + * + */ + +struct Optimizer { + enum { n = optimize_depth }; + AsmOpConsList code_; + int l_{0}, l2_{0}, p_, pb_, q_, indent_; + bool debug_{false}; + std::unique_ptr op_[n], oq_[n]; + AsmOpCons* op_cons_[n]; + int offs_[n]; + StackTransform tr_[n]; + int mode_{0}; + Optimizer() { + } + Optimizer(bool debug, int mode = 0) : debug_(debug), mode_(mode) { + } + Optimizer(AsmOpConsList code, bool debug = false, int mode = 0) : Optimizer(debug, mode) { + set_code(std::move(code)); + } + void set_code(AsmOpConsList code_); + void unpack(); + void pack(); + void apply(); + bool find_at_least(int pb); + bool find(); + bool optimize(); + bool compute_stack_transforms(); + bool say(std::string str) const; + bool show_stack_transforms() const; + void show_head() const; + void show_left() const; + void show_right() const; + bool find_const_op(int* op_idx, int cst); + bool is_push_const(int* i, int* c) const; + bool rewrite_push_const(int i, int c); + bool is_const_push_xchgs(); + bool rewrite_const_push_xchgs(); + bool is_const_rot(int* c) const; + bool rewrite_const_rot(int c); + bool is_const_pop(int* c, int* i) const; + bool rewrite_const_pop(int c, int i); + bool rewrite(int p, AsmOp&& new_op); + bool rewrite(int p, AsmOp&& new_op1, AsmOp&& new_op2); + bool rewrite(int p, AsmOp&& new_op1, AsmOp&& new_op2, AsmOp&& new_op3); + bool rewrite(AsmOp&& new_op) { + return rewrite(p_, std::move(new_op)); + } + bool rewrite(AsmOp&& new_op1, AsmOp&& new_op2) { + return rewrite(p_, std::move(new_op1), std::move(new_op2)); + } + bool rewrite(AsmOp&& new_op1, AsmOp&& new_op2, AsmOp&& new_op3) { + return rewrite(p_, std::move(new_op1), std::move(new_op2), std::move(new_op3)); + } + bool rewrite_nop(); + bool is_pred(const std::function& pred, int min_p = 2); + bool is_same_as(const StackTransform& trans, int min_p = 2); + bool is_rot(); + bool is_rotrev(); + bool is_tuck(); + bool is_2dup(); + bool is_2drop(); + bool is_2swap(); + bool is_2over(); + bool is_xchg(int* i, int* j); + bool is_xchg_xchg(int* i, int* j, int* k, int* l); + bool is_push(int* i); + bool is_pop(int* i); + bool is_pop_pop(int* i, int* j); + bool is_nop(); + bool is_push_rot(int* i); + bool is_push_rotrev(int* i); + bool is_push_xchg(int* i, int* j, int* k); + bool is_xchg2(int* i, int* j); + bool is_xcpu(int* i, int* j); + bool is_puxc(int* i, int* j); + bool is_push2(int* i, int* j); + bool is_xchg3(int* i, int* j, int* k); + bool is_xc2pu(int* i, int* j, int* k); + bool is_xcpuxc(int* i, int* j, int* k); + bool is_xcpu2(int* i, int* j, int* k); + bool is_puxc2(int* i, int* j, int* k); + bool is_puxcpu(int* i, int* j, int* k); + bool is_pu2xc(int* i, int* j, int* k); + bool is_push3(int* i, int* j, int* k); + bool is_blkswap(int* i, int* j); + bool is_blkpush(int* i, int* j); + bool is_blkdrop(int* i); + bool is_blkdrop2(int* i, int* j); + bool is_reverse(int* i, int* j); + bool is_nip_seq(int* i, int* j); + bool is_pop_blkdrop(int* i, int* k); + bool is_2pop_blkdrop(int* i, int* j, int* k); + AsmOpConsList extract_code(); +}; + +AsmOpConsList optimize_code_head(AsmOpConsList op_list, int mode = 0); +AsmOpConsList optimize_code(AsmOpConsList op_list, int mode); +void optimize_code(AsmOpList& ops); + +struct Stack { + StackLayoutExt s; + AsmOpList& o; + enum { + _StkCmt = 1, _CptStkCmt = 2, _DisableOpt = 4, _DisableOut = 128, _Shown = 256, + _InlineFunc = 512, _NeedRetAlt = 1024, _InlineAny = 2048, + _ModeSave = _InlineFunc | _NeedRetAlt | _InlineAny, + _Garbage = -0x10000 + }; + int mode; + Stack(AsmOpList& _o, int _mode = 0) : o(_o), mode(_mode) { + } + Stack(AsmOpList& _o, const StackLayoutExt& _s, int _mode = 0) : s(_s), o(_o), mode(_mode) { + } + Stack(AsmOpList& _o, StackLayoutExt&& _s, int _mode = 0) : s(std::move(_s)), o(_o), mode(_mode) { + } + int depth() const { + return (int)s.size(); + } + var_idx_t operator[](int i) const { + validate(i); + return s[depth() - i - 1].first; + } + var_const_idx_t& at(int i) { + validate(i); + return s[depth() - i - 1]; + } + var_const_idx_t at(int i) const { + validate(i); + return s[depth() - i - 1]; + } + var_const_idx_t get(int i) const { + return at(i); + } + bool output_disabled() const { + return mode & _DisableOut; + } + bool output_enabled() const { + return !output_disabled(); + } + void disable_output() { + mode |= _DisableOut; + } + StackLayout vars() const; + int find(var_idx_t var, int from = 0) const; + int find(var_idx_t var, int from, int to) const; + int find_const(const_idx_t cst, int from = 0) const; + int find_outside(var_idx_t var, int from, int to) const; + void forget_const(); + void validate(int i) const { + if (i > 255) { + throw Fatal{"Too deep stack"}; + } + tolk_assert(i >= 0 && i < depth() && "invalid stack reference"); + } + void modified() { + mode &= ~_Shown; + } + void issue_pop(int i); + void issue_push(int i); + void issue_xchg(int i, int j); + int drop_vars_except(const VarDescrList& var_info, int excl_var = 0x80000000); + void forget_var(var_idx_t idx); + void push_new_var(var_idx_t idx); + void push_new_const(var_idx_t idx, const_idx_t cidx); + void assign_var(var_idx_t new_idx, var_idx_t old_idx); + void do_copy_var(var_idx_t new_idx, var_idx_t old_idx); + void enforce_state(const StackLayout& req_stack); + void rearrange_top(const StackLayout& top, std::vector last); + void rearrange_top(var_idx_t top, bool last); + void merge_const(const Stack& req_stack); + void merge_state(const Stack& req_stack); + void show(int _mode); + void show() { + show(mode); + } + void opt_show() { + if ((mode & (_StkCmt | _Shown)) == _StkCmt) { + show(mode); + } + } + bool operator==(const Stack& y) const & { + return s == y.s; + } + void apply_wrappers(int callxargs_count) { + bool is_inline = mode & _InlineFunc; + if (o.retalt_) { + o.insert(0, "SAMEALTSAVE"); + o.insert(0, "c2 SAVE"); + } + if (callxargs_count != -1 || (is_inline && o.retalt_)) { + o.indent_all(); + o.insert(0, "CONT:<{"); + o << "}>"; + if (callxargs_count != -1) { + if (callxargs_count <= 15) { + o << AsmOp::Custom(PSTRING() << callxargs_count << " -1 CALLXARGS"); + } else { + tolk_assert(callxargs_count <= 254); + o << AsmOp::Custom(PSTRING() << callxargs_count << " PUSHINT -1 PUSHINT CALLXVARARGS"); + } + } else { + o << "EXECUTE"; + } + } + } +}; + +/* + * + * SPECIFIC SYMBOL VALUES, + * BUILT-IN FUNCTIONS AND OPERATIONS + * + */ + +typedef std::function&, std::vector&, const SrcLocation)> simple_compile_func_t; +typedef std::function&, std::vector&)> compile_func_t; + +inline simple_compile_func_t make_simple_compile(AsmOp op) { + return [op](std::vector& out, std::vector& in, const SrcLocation&) -> AsmOp { return op; }; +} + +inline compile_func_t make_ext_compile(std::vector ops) { + return [ops = std::move(ops)](AsmOpList & dest, std::vector & out, std::vector & in)->bool { + return dest.append(ops); + }; +} + +inline compile_func_t make_ext_compile(AsmOp op) { + return + [op](AsmOpList& dest, std::vector& out, std::vector& in) -> bool { return dest.append(op); }; +} + +struct SymValAsmFunc : SymValFunc { + simple_compile_func_t simple_compile; + compile_func_t ext_compile; + td::uint64 crc; + ~SymValAsmFunc() override = default; + SymValAsmFunc(TypeExpr* ft, const AsmOp& _macro, bool impure = false) + : SymValFunc(-1, ft, impure), simple_compile(make_simple_compile(_macro)) { + } + SymValAsmFunc(TypeExpr* ft, std::vector _macro, bool impure = false) + : SymValFunc(-1, ft, impure), ext_compile(make_ext_compile(std::move(_macro))) { + } + SymValAsmFunc(TypeExpr* ft, simple_compile_func_t _compile, bool impure = false) + : SymValFunc(-1, ft, impure), simple_compile(std::move(_compile)) { + } + SymValAsmFunc(TypeExpr* ft, compile_func_t _compile, bool impure = false) + : SymValFunc(-1, ft, impure), ext_compile(std::move(_compile)) { + } + SymValAsmFunc(TypeExpr* ft, simple_compile_func_t _compile, std::initializer_list arg_order, + std::initializer_list ret_order = {}, bool impure = false) + : SymValFunc(-1, ft, arg_order, ret_order, impure), simple_compile(std::move(_compile)) { + } + SymValAsmFunc(TypeExpr* ft, compile_func_t _compile, std::initializer_list arg_order, + std::initializer_list ret_order = {}, bool impure = false) + : SymValFunc(-1, ft, arg_order, ret_order, impure), ext_compile(std::move(_compile)) { + } + bool compile(AsmOpList& dest, std::vector& out, std::vector& in, const SrcLocation& where) const; +}; + +// defined in builtins.cpp +AsmOp exec_arg_op(std::string op, long long arg); +AsmOp exec_arg_op(std::string op, long long arg, int args, int retv = 1); +AsmOp exec_arg_op(std::string op, td::RefInt256 arg); +AsmOp exec_arg_op(std::string op, td::RefInt256 arg, int args, int retv = 1); +AsmOp exec_arg2_op(std::string op, long long imm1, long long imm2, int args, int retv = 1); +AsmOp push_const(td::RefInt256 x); + +void define_builtins(); + + +extern int verbosity, indent, opt_level; +extern bool stack_layout_comments, op_rewrite_comments, program_envelope, asm_preamble, interactive; +extern std::string generated_from, boc_output_filename; +extern ReadCallback::Callback read_callback; + +td::Result fs_read_callback(ReadCallback::Kind kind, const char* query); + +class GlobalPragma { + public: + explicit GlobalPragma(std::string name) : name_(std::move(name)) { + } + const std::string& name() const { + return name_; + } + bool enabled() const { + return enabled_; + } + void enable(SrcLocation loc) { + enabled_ = true; + locs_.push_back(std::move(loc)); + } + void check_enable_in_libs() { + if (locs_.empty()) { + return; + } + for (const SrcLocation& loc : locs_) { + if (loc.fdescr->is_main) { + return; + } + } + locs_[0].show_warning(PSTRING() << "#pragma " << name_ + << " is enabled in included libraries, it may change the behavior of your code. " + << "Add this #pragma to the main source file to suppress this warning."); + } + + private: + std::string name_; + bool enabled_ = false; + std::vector locs_; +}; +extern GlobalPragma pragma_allow_post_modification, pragma_compute_asm_ltr; + +/* + * + * OUTPUT CODE GENERATOR + * + */ + +int tolk_proceed(const std::vector &sources, std::ostream &outs, std::ostream &errs); + +} // namespace tolk + + diff --git a/tolk/unify-types.cpp b/tolk/unify-types.cpp new file mode 100644 index 00000000..4e28dc83 --- /dev/null +++ b/tolk/unify-types.cpp @@ -0,0 +1,429 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "tolk.h" + +namespace tolk { + +/* + * + * TYPE EXPRESSIONS + * + */ + +int TypeExpr::holes = 0, TypeExpr::type_vars = 0; // not thread safe, but it is ok for now + +void TypeExpr::compute_width() { + switch (constr) { + case te_Atomic: + case te_Map: + minw = maxw = 1; + break; + case te_Tensor: + minw = maxw = 0; + for (TypeExpr* arg : args) { + minw += arg->minw; + maxw += arg->maxw; + } + if (minw > w_inf) { + minw = w_inf; + } + if (maxw > w_inf) { + maxw = w_inf; + } + break; + case te_Tuple: + minw = maxw = 1; + for (TypeExpr* arg : args) { + arg->compute_width(); + } + break; + case te_Indirect: + minw = args[0]->minw; + maxw = args[0]->maxw; + break; + default: + minw = 0; + maxw = w_inf; + break; + } +} + +bool TypeExpr::recompute_width() { + switch (constr) { + case te_Tensor: + case te_Indirect: { + int min = 0, max = 0; + for (TypeExpr* arg : args) { + min += arg->minw; + max += arg->maxw; + } + if (min > maxw || max < minw) { + return false; + } + if (min > w_inf) { + min = w_inf; + } + if (max > w_inf) { + max = w_inf; + } + if (minw < min) { + minw = min; + } + if (maxw > max) { + maxw = max; + } + return true; + } + case te_Tuple: { + for (TypeExpr* arg : args) { + if (arg->minw > 1 || arg->maxw < 1 || arg->minw > arg->maxw) { + return false; + } + } + return true; + } + default: + return false; + } +} + +int TypeExpr::extract_components(std::vector& comp_list) { + if (constr != te_Indirect && constr != te_Tensor) { + comp_list.push_back(this); + return 1; + } + int res = 0; + for (TypeExpr* arg : args) { + res += arg->extract_components(comp_list); + } + return res; +} + +TypeExpr* TypeExpr::new_map(TypeExpr* from, TypeExpr* to) { + return new TypeExpr{te_Map, std::vector{from, to}}; +} + +void TypeExpr::replace_with(TypeExpr* te2) { + if (te2 == this) { + return; + } + constr = te_Indirect; + value = 0; + minw = te2->minw; + maxw = te2->maxw; + args.clear(); + args.push_back(te2); +} + +bool TypeExpr::remove_indirect(TypeExpr*& te, TypeExpr* forbidden) { + tolk_assert(te); + while (te->constr == te_Indirect) { + te = te->args[0]; + } + if (te->constr == te_Unknown) { + return te != forbidden; + } + bool res = true; + for (auto& x : te->args) { + res &= remove_indirect(x, forbidden); + } + return res; +} + +std::vector TypeExpr::remove_forall(TypeExpr*& te) { + tolk_assert(te && te->constr == te_ForAll); + tolk_assert(te->args.size() >= 1); + std::vector new_vars; + for (std::size_t i = 1; i < te->args.size(); i++) { + new_vars.push_back(new_hole(1)); + } + TypeExpr* te2 = te; + // std::cerr << "removing universal quantifier in " << te << std::endl; + te = te->args[0]; + remove_forall_in(te, te2, new_vars); + // std::cerr << "-> " << te << std::endl; + return new_vars; +} + +bool TypeExpr::remove_forall_in(TypeExpr*& te, TypeExpr* te2, const std::vector& new_vars) { + tolk_assert(te); + tolk_assert(te2 && te2->constr == te_ForAll); + if (te->constr == te_Var) { + for (std::size_t i = 0; i < new_vars.size(); i++) { + if (te == te2->args[i + 1]) { + te = new_vars[i]; + return true; + } + } + return false; + } + if (te->constr == te_ForAll) { + return false; + } + if (te->args.empty()) { + return false; + } + auto te1 = new TypeExpr(*te); + bool res = false; + for (auto& arg : te1->args) { + res |= remove_forall_in(arg, te2, new_vars); + } + if (res) { + te = te1; + } else { + delete te1; + } + return res; +} + +void TypeExpr::show_width(std::ostream& os) { + os << minw; + if (maxw != minw) { + os << ".."; + if (maxw < w_inf) { + os << maxw; + } + } +} + +std::ostream& operator<<(std::ostream& os, TypeExpr* type_expr) { + if (!type_expr) { + return os << "(null-type-ptr)"; + } + return type_expr->print(os); +} + +std::ostream& TypeExpr::print(std::ostream& os, int lex_level) { + switch (constr) { + case te_Unknown: + return os << "??" << value; + case te_Var: + if (value >= -26 && value < 0) { + return os << "_" << (char)(91 + value); + } else if (value >= 0 && value < 26) { + return os << (char)(65 + value); + } else { + return os << "TVAR" << value; + } + case te_Indirect: + return os << args[0]; + case te_Atomic: { + switch (value) { + case _Int: + return os << "int"; + case _Cell: + return os << "cell"; + case _Slice: + return os << "slice"; + case _Builder: + return os << "builder"; + case _Cont: + return os << "cont"; + case _Tuple: + return os << "tuple"; + case _Type: + return os << "type"; + default: + return os << "atomic-type-" << value; + } + } + case te_Tensor: { + if (lex_level > -127) { + os << "("; + } + auto c = args.size(); + if (c) { + for (const auto& x : args) { + x->print(os); + if (--c) { + os << ", "; + } + } + } + if (lex_level > -127) { + os << ")"; + } + return os; + } + case te_Tuple: { + os << "["; + auto c = args.size(); + if (c == 1 && args[0]->constr == te_Tensor) { + args[0]->print(os, -127); + } else if (c) { + for (const auto& x : args) { + x->print(os); + if (--c) { + os << ", "; + } + } + } + return os << "]"; + } + case te_Map: { + tolk_assert(args.size() == 2); + if (lex_level > 0) { + os << "("; + } + args[0]->print(os, 1); + os << " -> "; + args[1]->print(os); + if (lex_level > 0) { + os << ")"; + } + return os; + } + case te_ForAll: { + tolk_assert(args.size() >= 1); + if (lex_level > 0) { + os << '('; + } + os << "Forall "; + for (std::size_t i = 1; i < args.size(); i++) { + os << (i > 1 ? ' ' : '('); + args[i]->print(os); + } + os << ") "; + args[0]->print(os); + if (lex_level > 0) { + os << ')'; + } + return os; + } + default: + return os << "unknown-type-expr-" << constr; + } +} + +void UnifyError::print_message(std::ostream& os) const { + os << "cannot unify type " << te1 << " with " << te2; + if (!msg.empty()) { + os << ": " << msg; + } +} + +std::ostream& operator<<(std::ostream& os, const UnifyError& ue) { + ue.print_message(os); + return os; +} + +std::string UnifyError::message() const { + std::ostringstream os; + print_message(os); + return os.str(); +} + +void check_width_compat(TypeExpr* te1, TypeExpr* te2) { + if (te1->minw > te2->maxw || te2->minw > te1->maxw) { + std::ostringstream os{"cannot unify types of widths ", std::ios_base::ate}; + te1->show_width(os); + os << " and "; + te2->show_width(os); + throw UnifyError{te1, te2, os.str()}; + } +} + +void check_update_widths(TypeExpr* te1, TypeExpr* te2) { + check_width_compat(te1, te2); + te1->minw = te2->minw = std::max(te1->minw, te2->minw); + te1->maxw = te2->maxw = std::min(te1->maxw, te2->maxw); + tolk_assert(te1->minw <= te1->maxw); +} + +void unify(TypeExpr*& te1, TypeExpr*& te2) { + tolk_assert(te1 && te2); + // std::cerr << "unify( " << te1 << " , " << te2 << " )\n"; + while (te1->constr == TypeExpr::te_Indirect) { + te1 = te1->args[0]; + } + while (te2->constr == TypeExpr::te_Indirect) { + te2 = te2->args[0]; + } + if (te1 == te2) { + return; + } + if (te1->constr == TypeExpr::te_ForAll) { + TypeExpr* te = te1; + std::vector new_vars = TypeExpr::remove_forall(te); + for (TypeExpr* t : new_vars) { + t->was_forall_var = true; + } + unify(te, te2); + for (TypeExpr* t : new_vars) { + t->was_forall_var = false; + } + return; + } + if (te2->constr == TypeExpr::te_ForAll) { + TypeExpr* te = te2; + std::vector new_vars = TypeExpr::remove_forall(te); + for (TypeExpr* t : new_vars) { + t->was_forall_var = true; + } + unify(te1, te); + for (TypeExpr* t : new_vars) { + t->was_forall_var = false; + } + return; + } + if (te1->was_forall_var && te2->constr == TypeExpr::te_Tensor) { + throw UnifyError{te1, te2, "cannot unify generic type and tensor"}; + } + if (te2->was_forall_var && te1->constr == TypeExpr::te_Tensor) { + throw UnifyError{te2, te1, "cannot unify generic type and tensor"}; + } + if (te1->constr == TypeExpr::te_Unknown) { + if (te2->constr == TypeExpr::te_Unknown) { + tolk_assert(te1->value != te2->value); + } + if (!TypeExpr::remove_indirect(te2, te1)) { + throw UnifyError{te1, te2, "type unification results in an infinite cyclic type"}; + } + check_update_widths(te1, te2); + te1->replace_with(te2); + te1 = te2; + return; + } + if (te2->constr == TypeExpr::te_Unknown) { + if (!TypeExpr::remove_indirect(te1, te2)) { + throw UnifyError{te2, te1, "type unification results in an infinite cyclic type"}; + } + check_update_widths(te2, te1); + te2->replace_with(te1); + te2 = te1; + return; + } + if (te1->constr != te2->constr || te1->value != te2->value || te1->args.size() != te2->args.size()) { + throw UnifyError{te1, te2}; + } + for (std::size_t i = 0; i < te1->args.size(); i++) { + unify(te1->args[i], te2->args[i]); + } + if (te1->constr == TypeExpr::te_Tensor) { + if (!te1->recompute_width()) { + throw UnifyError{te1, te2, "type unification incompatible with known width of first type"}; + } + if (!te2->recompute_width()) { + throw UnifyError{te2, te1, "type unification incompatible with known width of first type"}; + } + check_update_widths(te1, te2); + } + te1->replace_with(te2); + te1 = te2; +} + +} // namespace tolk