diff --git a/src/main.tex b/src/main.tex
index 120a68f..f63d3e0 100644
--- a/src/main.tex
+++ b/src/main.tex
@@ -113,7 +113,7 @@
 
 %-------------SINGLE-AUTHOR HEADER STARTS (uncomment below if your paper has a single author)-----------------------
 \affiliation{
-\paperauthorA \,\sthanks{This work was supported by JSPS KAKENHI (Grant No. JP19K21615). }}
+\paperauthorA}
 {\href{https://matsuuratomoya.com}{Tokyo University of the Arts} \\ Tokyo, Japan\\
 {\tt \href{mailto:me@matsuuratomoya.com}{me@matsuuratomoya.com}}
 }
@@ -251,11 +251,12 @@ mimium by the author has a keyword $self$ that can be used in function definitio
 
 Additional typing rules to usual simply-typed lambda calculus are shown in Figure \ref{fig:typing}.
 
+As primitive types, there are a real number type to used in most of signal processing and a natural number type that is used to indice of delay. 
+
 In the W-calculus, a starting point of designing \lambdammm, function types can takes tuples of real numbers and return tuples of real numbers. This means that higher-order functions cannot be written. While this restriction is reasonable as a design choice for a language for signal processing since higher-order functions require data structures that require dynamic memory allocation, such as closures, for their implementation, it also lacks the generality of the lambda calculus.
 
 In \lambdammm, the problem of memory allocation for closures is left to the implementation of the runtime in the Section\ref{sec:vm}, and higher-order functions are allowed. However, the $feed$ abstraction does not allow function types as its input and output. Allowing the return of function types in the $feed$ abstraction means that it is possible to define functions whose processing contents change time-to-time. While this may be interesting theoritically, there are currently no practical cases in real-world signal processing, and it is expected to further complicate implementations.
 
-
 \input{typing.tex}
 
 
@@ -264,19 +265,20 @@ In \lambdammm, the problem of memory allocation for closures is left to the impl
 
 \input{semantics.tex}
 
-The operational semantics of the \lambdammm is shown in Figure \ref{fig:semantics}. This big-step semantics is a conceptual explanation of the evaluation that, when the current time is $n$, the previous evaluation environment $t$ samples before can be referred to as $E^{n-t}$ , and that when the time < 0, the evaluation of any term is evaluated to the default value of its type (0 for the numeric types).
+The excerpt of operational semantics of the \lambdammm is shown in Figure \ref{fig:semantics}. This big-step semantics is a conceptual explanation of the evaluation that, when the current time is $n$, the previous evaluation environment $t$ samples before can be referred to as $E^{n-t}$ , and that when the time < 0, the evaluation of any term is evaluated to the default value of its type (0 for the numeric types).
 
+Of course, if we tried to execute this semantics in a straightforward manner, we would have to redo the calculation from time 0 to the current time every sample, with saving all the variable environments at each sample. In practice, therefore, a virtual machine is defined that takes into account the internal memory space used by delay and feed, and the \lambdammm terms are converted into instructions for that machine before execution.
 
 \section{VM Model and Instruction Set}
 \label{sec:vm}
 
-A model for the virtual machine and its instruction to run \lambdammm set is based on Lua 5.0\cite{ierusalimschy2005}. 
+A model for the virtual machine and its instruction set to run \\ \lambdammm is based on Lua 5.0\cite{ierusalimschy2005}. 
 
 When executing a computational model based on lambda calculus, the problem is how to handle a data structure called a closure that captures the variable environment where the inner function is defined, to refer the outer variables from the inner function context. If the dictionary data of names and values of variables are paired with inner function, implementation of the compiler(intepreter) is simple, but run-time performance is limited.
 
 On the contrary, a runtime performance can be improved by performing a process called closure transformation (or lambda lifting), which analyses all the names of outer variables referred by the inner function and transforms the inner function by adding argument so that the variables can be referred explicitly, but the compiler implementation of the transformation is relatively complex.
 
-The Lua VM takes an intermediate approach between these two by adding the VM instructions \texttt{GETUPVALUE} / \texttt{SETUPVALUE}, which allows the outer variables to be referenced dynamically at runtime. The implementation of compiler and VM using \textit{Upvalue} is simpler than closure conversion, while at the same time preventing execution performance degradation, as outer variables can be referenced via the call stack rather than on the heap memory unless the closure object escapes from the context of the original function\cite{nystrom2021}.
+The Lua VM takes an intermediate approach between these two by adding the VM instructions \texttt{GETUPVALUE} / \\ \texttt{SETUPVALUE}, which allows the outer variables to be referenced dynamically at runtime. The implementation of compiler and VM using \textit{Upvalue} is simpler than closure conversion, while at the same time preventing execution performance degradation, as outer variables can be referenced via the call stack rather than on the heap memory unless the closure object escapes from the context of the original function\cite{nystrom2021}.
 
 Also, Upvalue helps interoperations between other programming languages, as Lua can be easily embedded through C language API and when implementing external libraries in C, programmer can access to upvalues of Lua Runtime not only the stack values in C API.
 
@@ -342,34 +344,34 @@ ADDF       & A B C & R(A) := R(B) as int   + R(C) as in \\
 
 \subsection{Compilation to the VM instructions}
 
-Listing \ref{lst:bytecodes_onepole} shows an basic example when the mimium code in Listing \ref{lst:onepole} is compiled into VM bytecode. When \texttt{self} is referenced, the value is obtained with the \texttt{GETSTATE} instruction, and the internal state is updated by storing the return value with the \\ \texttt{SETSTATE} instruction before returning the value with \texttt{RETURN} from the function. Here, the actual return value is obtained by the second GETSTATE instruction in order to return the initial value of the internal state when time=0.
-
-For example, when a time counter is written as \texttt{| | \{self + 1\}}, it is the compiler's design choice whether the return value of time=0 should be 0 or 1 but at present, the author think it is more intuitive to return 0. If the design is to return 1 when time = 0, the second \texttt{GETSTATE} instruction can be removed and the value for the \texttt{RETURN} instruction should be \texttt{R(2)}.
-
 \begin{lstlisting}[float,floatplacement=H,label=lst:bytecodes_onepole,caption=\it Compiled VM instructions of one-pole filter example in Listing \ref{lst:onepole}]
-CONSTANTS:[1.0]
-fn onepole(x,g) state_size:1
-MOVECONST 2 0   // load 1.0
-MOVE      3 1   // load g
-SUBF      2 2 3 // 1.0 - g
-MOVE      3 0   // load x
-MULF      2 2 3 // x * (1.0-g)
-GETSTATE  3     // load self
-MOVE      4 1   // load g 
-MULF      3 3 4 // self * g
-ADDF      2 2 3 // compute result 
-GETSTATE  3     //  prepare return value
-SETSTATE  2     // store to self
-RETURN    3 1
+  CONSTANTS:[1.0]
+  fn onepole(x,g) state_size:1
+  MOVECONST 2 0   // load 1.0
+  MOVE      3 1   // load g
+  SUBF      2 2 3 // 1.0 - g
+  MOVE      3 0   // load x
+  MULF      2 2 3 // x * (1.0-g)
+  GETSTATE  3     // load self
+  MOVE      4 1   // load g 
+  MULF      3 3 4 // self * g
+  ADDF      2 2 3 // compute result 
+  GETSTATE  3     //  prepare return value
+  SETSTATE  2     // store to self
+  RETURN    3 1
 \end{lstlisting}
 
+Listing \ref{lst:bytecodes_onepole} shows an basic example when the mimium code in Listing \ref{lst:onepole} is compiled into VM bytecode. When \texttt{self} is referenced, the value is obtained with the \texttt{GETSTATE} instruction, and the internal state is updated by storing the return value with the \\ \texttt{SETSTATE} instruction before returning the value with \texttt{RETURN} from the function. Here, the actual return value is obtained by the second \texttt{GETSTATE} instruction in order to return the initial value of the internal state when time=0.
+
+For example, when a time counter is written as \texttt{| | \{self + 1\}}, it is the compiler's design choice whether the return value of time=0 should be 0 or 1 though the latter does not strictly follow the semantics in Figure \ref{fig:semantics}. If the design is to return 1 when time = 0, the second \texttt{GETSTATE} instruction can be removed and the value for the \texttt{RETURN} instruction should be \texttt{R(2)}.
+
 \subsection{Overview of the VM structure}
 \label{sec:vmstructure}
 
 
 The overview of a data structure of the VM, program and the instantiated closure for \lambdammm is shown in Figure \ref{fig:vmstructure} . In addition to the normal call stack, the VM has a storage area for managing internal state data for feedback and delay.
 
-This storage area is accompanied by data indicating the position from which the internal state is retrieved by the \texttt{GETSTATE} / \texttt{SETSTATE} instructions. This position is modified by \texttt{SHIFTSTATE} operation. The the actual data in the state storage memory are statically layed out at compile time by analyzing function calls that include references to \texttt{self}, call of \texttt{delay} and the functions which will call such statefull functions recursively.
+This storage area is accompanied by data indicating the position from which the internal state is retrieved by the \texttt{GETSTATE} / \texttt{SETSTATE} instructions. This position is modified by \\ \texttt{SHIFTSTATE} operation. The the actual data in the state storage memory are statically layed out at compile time by analyzing function calls that include references to \texttt{self}, call of \texttt{delay} and the functions which will call such statefull functions recursively.
 
 However, in the case of higher-order functions that receive a function as an argument and return another function, the layout of the internal state of the given function is unknown at the compilation, so an internal state storage area is created for each instantiated closure separately from the global storage area held by the VM instance itself. The VM switches the \texttt{State\_Ptr}, which points the internal state storage to be used, at each closure call, to the storage area on the closure, and returns a pointer pointing to the global storage area each time the closure context ends.
 
@@ -382,6 +384,75 @@ When the closure escapes from the original function with \\ \texttt{RETURN} inst
   \caption{\label{fig:vmstructure}{\it Overview of the virtual machine, program and instantiated closures for \lambdammm.}}
 \end{figure*}
 
+\subsection{Example of the compilation of statefull functions}
+
+A more complex example code and its expected bytecode instructions are shown in Listing \ref{lst:fbdelay} and Listing \ref{lst:bytecodes_fbdelay}. The codes define delay with a feedback as \texttt{fbdelay}, the other function \texttt{twodelay} uses two feedback delay with different parameters, and \texttt{dsp} finally uses two \texttt{twodelay} function.
+
+Each after the referring to \texttt{self} through \texttt{GETSTATE} instruction, or call to the other statefull function, \\ \texttt{SHIFTSTATE} instruction inserted to move the \texttt{StatePtr} forward to prepare the next non-closure function call. Before exits function, \texttt{StatePtr} is reset to the same position as that the current function context has begun by \texttt{SHIFTSTATE} (A sum of the operand for \texttt{SHIFTSTATE} in the function must be always).
+
+\begin{lstlisting}[label=lst:fbdelay,language=Rust,caption=\it Example code that combines self and delay without closure call.]
+fn fbdelay(x,fb,dtime){
+    x + delay(1000,self,dtime)*fb
+}
+fn twodelay(x,dtime){
+    fbdelay(x,dtime,0.7)
+      +fbdelay(x,dtime*2,0.8)
+}
+fn dsp(x){
+    twodelay(x,400)+twodelay(x,800)
+}
+\end{lstlisting}
+
+\begin{lstlisting}[float,floatplacement=H,label=lst:bytecodes_fbdelay,caption=\it Compiled VM instructions of one-pole filter example in Listing \ref{lst:fbdelay}]
+CONSTANTS:[0.7,2,0.8,400,800,0,1]
+fn fbdelay(x,fb,dtime) state_size:2
+MOVE 3 0
+GETSTATE 4
+SHIFTSTATE 1
+DELAY 4 2
+MOVE 5 1
+MULF 4 4 5
+ADDF 3 3 4
+SHIFTSTATE -1
+GETSTATE 4
+SETSTATE 3
+RETURN 4 1 
+
+fn twodelay(x,dtime) state_size:4
+MOVECONST 2 5 //load "fbdelay" prototype
+MOVE 3 0
+MOVE 4 1
+MOVECONST 5 0 //load 0.7
+CALL 2 3 1
+SHIFTSTATE 2 //=state_size of fbdelay
+MOVECONST 3 5 //load "fbdelay" prototype
+MOVE 4 0
+MOVECONST 5 1 //load 2
+MULF 4 4 5
+MOVECONST 5 0 //load 0.7
+CALL 3 3 1
+ADDF 3 3 4
+SHIFTSTATE -2
+RETURN 3 1
+
+fn dsp (x)
+MOVECONST 1 6 //load "twodelay prototype"
+MOVE 2 0
+MOVECONST 3 3 //load 400
+CALL 1 2 1 
+SHIFTSTATE 4 //=state_size of twodelay
+MOVECONST 2 6
+MOVE 2 3 //load "twodelay prototype"
+MOVE 3 0
+MOVECONST 3 4 //load 400
+CALL 2 2 1
+ADD 1 1 2
+SHIFTSTATE -4
+RETURN 1 1
+\end{lstlisting}
+  
+
+
 \section{Discussion : Different behaviour depending on the position of let binding}
 \label{sec:discussion}
 
@@ -440,8 +511,6 @@ fn dsp(){
 \end{lstlisting}
 
 
-\section{Conclusions}
-
 This means that the major compiler optimization techniques such as the constant folding and the function inlining can not simply be appropriated. Those optimizations should be done after the evaluation of a global context and before evaluating \texttt{dsp} function.
 
 To solve this situation, introducing distinction whether the term should be used in global context evaluation(Stage 0) and in the actual signal processing(stage 1) in type system. This can be realized with Multi-Stage Computation\cite{Taha1997}. Listing \ref{lst:filterbank_multi} is the example of \texttt{filterbank} code using MetaOCaml's syntaxes \texttt{.<term>.} which will generate evaluated program used in a next stage, and \texttt{\textasciitilde term} which embed terms evaluated at the previous stage. 
@@ -451,6 +520,8 @@ To solve this situation, introducing distinction whether the term should be used
 This multi-stage computation code has a same semantics in a generative signal graph generation and execution of the signal processing, in contrast to that Faust have 2 different semantics of the term rewriting macro and BDA.
 
 
+\section{Conclusions}
+
 
 This template can be found on the conference website. For changing the number
 of author affiliations (1 to 4), uncomment the corresponding regions in the
@@ -460,7 +531,7 @@ Submission System. DO NOT send us papers directly by e-mail.
 
 \section{Acknowledgments}
 
-Many thanks to the great number of anonymous reviewers!
+This work was supported by JSPS KAKENHI (Grant No. JP19K21615). Also great thanks for many anonymous reviewers.
 
 %\newpage
 \nocite{*}
diff --git a/src/semantics.tex b/src/semantics.tex
index b83d514..b8e928b 100644
--- a/src/semantics.tex
+++ b/src/semantics.tex
@@ -2,36 +2,36 @@
     \centering
    \begin{tabular}{ccc}
   
-  \begin{minipage}[b]{5cm}
+  \begin{minipage}[b]{5.5cm}
     \centering
     \begin{equation*}
-      \frac{E^n \vdash e_1 \Downarrow v_1\ E^{n-v_1} \vdash  e_2 \Downarrow v_2}{E^n \vdash\ delay\ e_1\ e_2 \Downarrow  v_2}
+      \frac{E^n \vdash e_1 \Downarrow v_1 \ n>v_1 \ E^{n-v_1} \vdash  e_2 \Downarrow v_2}{E^n \vdash\ delay\ n\ e_1\ e_2 \Downarrow  v_2}
     \end{equation*}\textrm{E-DELAY}
   \end{minipage} &
-  \begin{minipage}[b]{5cm}
+  \begin{minipage}[b]{5.5cm}
     \centering
     \begin{equation*}
       \frac{}{E^n \vdash\ \lambda x.e \Downarrow  cls(\lambda x.e , E^n) }
     \end{equation*}\textrm{E-LAM}
     \end{minipage}& 
-    \begin{minipage}[b]{5cm}
+    \begin{minipage}[b]{5.5cm}
     \centering
     \begin{equation*}
-        \frac{ E^{n-1} \vdash e \Downarrow v_1\ E^n, x \mapsto v_1 \vdash e \Downarrow v_2 }{E^n \vdash\ feed\ x\ e \Downarrow  v_2}
+        \frac{ E^{n-1} \vdash e \Downarrow v_1\ E^n, x \mapsto v_1 \vdash e \Downarrow v_2 }{E^n, x \mapsto v_2\ \vdash\ feed\ x\ e \Downarrow v_1}
     \end{equation*}\textrm{E-FEED}
     \end{minipage}
   \\
-  \begin{minipage}[b]{5cm}
+  \begin{minipage}[b]{5.5cm}
     \centering
     \begin{equation*}
-      \frac{E^n \vdash e_c \Downarrow n \quad n > 0\ E^n \vdash e_t\ \Downarrow v\ }{E^n \vdash\ if (e_c)\ e_t else e_t \Downarrow v }
+      \frac{E^n \vdash e_c \Downarrow n \quad n > 0\ E^n \vdash e_t\ \Downarrow v\ }{E^n \vdash\ if (e_c)\ e_t\ else\ e_t \Downarrow v }
     \end{equation*}\textrm{E-IFTRUE}
     \end{minipage}
     &
-  \begin{minipage}[b]{5cm}
+  \begin{minipage}[b]{5.5cm}
       \centering
       \begin{equation*}
-        \frac{E^n \vdash e_c \Downarrow n \quad n \leqq0\ E^n \vdash e_e\ \Downarrow v\ }{E^n \vdash\ if (e_c)\ e_t else e_t \Downarrow v }
+        \frac{E^n \vdash e_c \Downarrow n \quad n \leqq0\ E^n \vdash e_e\ \Downarrow v\ }{E^n \vdash\ if (e_c)\ e_t\ else\ e_t \Downarrow v }
       \end{equation*}\textrm{E-IFFALSE}
       \end{minipage}
     &