Rules not working after upgrading versions

Jan 14, 2010 at 11:56 PM

HI there. Firstly, I'm brand new to Irony so could be doing something really stupid. Nevertheless...

I've upgraded my Irony DLL from the one I was using in an example (http://www.codeproject.com/KB/recipes/YourFirstDSL.aspx?display=Print) to the most recent (8499). (The DLLs dont appear to be versioned?)

The main reason I upgraded was to take advange of MarkTransient() to clean up my tree.

My basic grammar was (as far as I know) correct for all the use cases I had. It appears that even simple rules now no longer work. For example:

var alphas = new NonTerminal("alphas");
var alpha = new NonTerminal("alpha");

alphas.Rule = alpha + alpha;
alpha.Rule = ToTerm("a") | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z";

This should (I believe) allow me to enter in something trivial like "xy" (without quotes) however it's failing with the message: Invalid character: 'y'. 
This is a simplified version of the real problem I'm having which is that MakePlusRule() doesn't appear to be working.

Any ideas? Any help would be very much appreciated! It's a great product but I'm going bonkers! Cheers!

My full (existing and new) grammars are below:

EXISTING

 

        public LocSpecGrammar()
        {
            #region Initial setup of the grammar

            CaseSensitive = false;

            // define all the terminals and non-terminals
            var program = new NonTerminal("program");

            var stmts = new NonTerminal("stmts");
            var stmt = new NonTerminal("stmt");

            var variableDist = new NonTerminal("variableDist");
            var valueDist = new NonTerminal("valueDist");

            var variable = new NonTerminal("variable");

            var loc_locSet_locRanges = new NonTerminal("loc_locSet_locRanges");
            var loc_locSet_locRange = new NonTerminal("loc_locSet_locRange");
            var locSet = new NonTerminal("locSet");
            var locRange = new NonTerminal("locRange");
            var loc = new NonTerminal("loc");

            var alphanumerics = new NonTerminal("alphanumerics");
            var alphanumeric = new NonTerminal("alphanumeric");
            var alpha = new NonTerminal("alpha");

            var number = new NumberLiteral("number") { DefaultIntTypes = new[] { TypeCode.Int32 } };

            // specify the non-terminal which is the root of the AST
            Root = program;

            #endregion

            #region Grammar definition

            program.Rule = stmts;

            stmts.Rule = MakePlusRule(stmts, Symbol(","), stmt);
            stmt.Rule = variableDist | valueDist | loc_locSet_locRange;

            variableDist.Rule = "dist(" + loc_locSet_locRanges + ":" + variable + ")";
            valueDist.Rule = "dist(" + loc_locSet_locRanges + ")";

            variable.Rule = alpha | alpha + alphanumerics;

            loc_locSet_locRanges.Rule = MakePlusRule(loc_locSet_locRanges, Symbol(","), loc_locSet_locRange);
            loc_locSet_locRange.Rule = loc | locSet | locRange;
            locSet.Rule = "#" + alphanumerics;
            locRange.Rule = number + "-" + number | alphanumerics + number + "-" + number;
            loc.Rule = alphanumerics;

            alphanumerics.Rule = MakePlusRule(alphanumerics, null, alphanumeric);
            alphanumeric.Rule = alpha | number;

            alpha.Rule = Symbol("a") | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z";

            #endregion
        }
    }

NEW

 

[Language("Qubit LocSpec", "1.0", "Grammar for the definition of a location specification within a dimension.")]
    public class LocSpecGrammar : Grammar
    {
        public LocSpecGrammar()
            : base(false) // false means case insensitive
        {
            #region Initial setup of the grammar

            GrammarComments = @"Expressions for dimension location specifications.";

            var program = new NonTerminal("program");

            var stmts = new NonTerminal("stmts");
            var stmt = new NonTerminal("stmt");

            var variableDist = new NonTerminal("variableDist");
            var valueDist = new NonTerminal("valueDist");

            var variable = new NonTerminal("variable");

            var loc_locSet_locRanges = new NonTerminal("loc_locSet_locRanges");
            var loc_locSet_locRange = new NonTerminal("loc_locSet_locRange");
            var locSet = new NonTerminal("locSet");
            var locRange = new NonTerminal("locRange");
            var loc = new NonTerminal("loc");

            var alphanumerics = new NonTerminal("alphanumerics");
            var alphanumeric = new NonTerminal("alphanumeric");
            var alpha = new NonTerminal("alpha");

            var number = new NumberLiteral("number") { DefaultIntTypes = new[] { TypeCode.Int32 } };

            // specify the non-terminal which is the root of the AST
            Root = program;

            #endregion

            #region Grammar definition

            program.Rule = stmts;

            stmts.Rule = MakePlusRule(stmts, ToTerm(","), stmt);
            stmt.Rule = variableDist | valueDist | loc_locSet_locRange;

            variableDist.Rule = "dist(" + loc_locSet_locRanges + ":" + variable + ")";
            valueDist.Rule = "dist(" + loc_locSet_locRanges + ")";

            variable.Rule = alpha | alpha + alphanumerics;

            loc_locSet_locRanges.Rule = MakePlusRule(loc_locSet_locRanges, ToTerm(","), loc_locSet_locRange);
            loc_locSet_locRange.Rule = loc | locSet | locRange;
            locSet.Rule = "#" + alphanumerics;
            locRange.Rule = number + "-" + number | alphanumerics + number + "-" + number;
            loc.Rule = alphanumerics;

            alphanumerics.Rule = MakePlusRule(alphanumerics, alphanumeric);
            alphanumeric.Rule = alpha | number;

            alpha.Rule = ToTerm("a") | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z";

            #endregion
        }
Coordinator
Jan 15, 2010 at 3:24 AM

I think you are missing one important concept - how terminals work in Irony. Looks like you use an expression

variable.Rule = alpha | alpha + alphanumerics;

to define an identifier (starts with letter followed by letters or digits).

You should use IdentifierTerminal instead - look how it is done in sample grammars, ExprEvaluator grammar in particular

 

Jan 15, 2010 at 3:48 AM

Thanks for the quick reply. I didn't see IdentifierTerminal when I was looking around the types -- thanks for that.

Nevertheless, why is my grammar that was working now failing? Have the meanings changed?

I was going to start looking into the other stuff such as CompoundTerminalBase and cleaning up the grammar once I upgraded but it's a bit hard when it suddenly just broke!

To create a Terminal which allows any of the alphanumeric characters [A-Za-z0-9] (with no restriction on ordering) I'm guessing I'd have to do something very similar to IdentifierTerminal?

Sorry if these are stupid questions. I really appreciate your help.

Jan 15, 2010 at 4:19 AM

I guess a partial answer to my question is that IdentifierTerminal("identifier", String.Empty, Strings.DecimalDigits) would give me an alphanumeric with no ordering requirements? e.g. "1abc1".

Jan 15, 2010 at 4:49 AM

I'm still having troubles...

So take a very simple grammar:

 

var identifier = new IdentifierTerminal("identifier");
var valueDist = new NonTerminal("valueDist");
valueDist.Rule = "dist(" + identifier + ")";
Root = valueDist;

I believe that

dist(ABC123)

should be valid, right? I get the error 'Invalid character: 'A'.

What am I doing wrong? :(

 

 

 

 

Coordinator
Jan 15, 2010 at 1:33 PM

There's additional check added lately in KeyTerm code - that keyword should not be followed by a letter or digit. In your case "dist(" is a keyword - it is string constant used in grammar rule and it starts with a letter.

This is logical. To fix this, separate opening brace from initial "dist" prefix:

valueDist.Rule = ToTerm("dist") + "(" + identifier + ")";

This additional check is probably the reason why your original grammar stopped working: all single-letter symbols in this huge OR expression are treated by Irony as keywords, so it won't allow them to be followed by a leter.

Jan 18, 2010 at 1:13 AM

Thanks a lot for that.

One last problem to tackle (I think)... I currently have allow:

var locRange = new NonTerminal("locRange");
var number = new NumberLiteral("number") { DefaultIntTypes = new[] { TypeCode.Int32 } };
var alphanumerics = new IdentifierTerminal("alphanumerics", String.Empty, "1234567890");
locRange.Rule = alphanumerics + number + "-" + number;

i.e. many numbers or a letters followed by a number followed by a dash followed by a number, e.g. LEVEL1-5.

What's the best way to accomplish this? I believe it should be a terminal however I'm still feeling a little overwhelmed...should I create a CustomTerminal? Or try to reuse IdentifierTerminal in some way? An example to get me on the right track would be very much appreciated. Thanks again for all your help!

Jan 18, 2010 at 1:53 AM

I didn't mention it, but there's also a requirement that the 2nd number be greater than the first.

e.g. L1-5 is valid, L2-1 is invalid. I'm having trouble working out where this logic goes. (Should it go in the terminal or do a post-check of the tree?)

Coordinator
Jan 18, 2010 at 11:43 PM

Ok, looks like you have elements like "L1-5", and with these, I don't think that defining locRange as a non-terminal would work for you. You need a custom terminal for sure. Just create a class subclassed from Terminal, create appropriate constructor, and then overwrite TryMatch method and implement custom code doing all this parsing and validation. Follow existing terminals implementations as an example.

 

Jan 19, 2010 at 4:04 AM

Thanks for that. I've taken your suggestion and tried to create a subclassed Terminal, but am still having difficulties. I've based mine off RegexBasedTerminal (increasing source.PreviewPosition, and calling source.CreateToken(OutputTerminal) if succesful. Nevertheless, it's still erroring (entering in L1-5 gives the error: Invalid character: 'L'). Any ideas? Sorry it's in VB (the project is actually in VB, I was just writing a prototype in C#).

Public Class LocRangeTerminal
    Inherits Terminal

    Private Shared _rgxLocRange As Regex = New Regex("(?<Prefix>[a-z0-9_].*?)(?<RangeStart>[0-9]+)-(?<RangeEnd>[0-9]+)", RegexOptions.Compiled Or RegexOptions.IgnoreCase)

    Public Sub New(ByVal sName As String)
        MyBase.New(sName)
    End Sub

    Public Overrides Function TryMatch(ByVal context As ParsingContext, ByVal source As ISourceStream) As Irony.Parsing.Token
        Dim match As Match = _rgxLocRange.Match(source.Text)

        If (match.Success) Then
            Dim sPrefix = match.Groups("Prefix").Value
            Dim iRangeStart = CInt(match.Groups("RangeStart").Value)
            Dim iRangeEnd = CInt(match.Groups("RangeEnd").Value)

            If (iRangeEnd <= iRangeStart) Then
                Return CreateErrorToken(source)
            End If

            Dim token As Token = source.CreateToken(OutputTerminal)
            token.Details = New LocRangeTokenDetails(sPrefix, iRangeStart, iRangeEnd)
            source.PreviewPosition += source.Text.Length

            Return token
        Else
            Return CreateErrorToken(source)
        End If
    End Function

    Private Function CreateErrorToken(ByVal source As ISourceStream) As Token
        Return source.CreateErrorToken("Invalid format. Format for a location range is: Loc(number1)-(number2) where number2 > number1")
    End Function

End Class

Public Class LocRangeTokenDetails

    Private _sPrefix As String
    Private _iRangeStart As Integer
    Private _iRangeEnd As Integer

    Public Sub New(ByVal sPrefix As String, ByVal iRangeStart As Integer, ByVal iRangeEnd As Integer)
        _sPrefix = If(sPrefix Is Nothing, String.Empty, sPrefix)
        _iRangeStart = iRangeStart
        _iRangeEnd = iRangeEnd
    End Sub

End Class

 

 

Jan 19, 2010 at 4:58 AM

By the way, `token` is being returned (the regex succeeds & is valid (enough for testing)).

Coordinator
Jan 19, 2010 at 5:26 AM

One problem I see is this statement:

Dim match As Match = _rgxLocRange.Match(source.Text)

The problem is that source.Text is the entire text, so this statement tries to match source text from the beginning. You should instead match starting with current position: Source.Location.Position

 

Jan 19, 2010 at 10:18 AM

Thanks for that -- I realised that that could cause problems pretty much straight after I posted. As I said though, I was giving it valid data which succeeded with the match (literally the text 'L1-5').

Nevertheless, even though a token was created and the preview position was increased, an error token still exists. Any ideas regarding that?

Coordinator
Jan 19, 2010 at 4:18 PM

Well, just step through in debugger and see what's going on...

Jan 20, 2010 at 12:39 AM

Ahh, I have been, it's just pretty complex for somebody that doesn't have a great understanding of parsers :)

It looks like 'L1-5' is first consuming 'L1' as alphanumerics, then consuming '-5' which doesn't match any rule. I'll keep investigating.

Jan 20, 2010 at 11:06 PM

Hi, I just thought I'd let you know I got it all working...

The problem was I was doing source.PreviewPosition += match.Length AFTER I'd created my new token, which didn't work.

I've attached the working code in case somebody has a similar problem. Thanks again for all your help and for creaing and supporting a great tool.

        Public Class LocRangeTerminal
            Inherits Terminal

            Private Const PrefixGroupIdentifier As String = "Prefix"
            Private Const RangeStartGroupIdentifier As String = "RangeStart"
            Private Const RangeEndGroupIdentifier As String = "RangeEnd"

            Private Shared ReadOnly _sRegex As String = String.Format("\G((?<{0}>[a-z0-9_]*?)(?<{1}>[0-9]+)-(?<{2}>[0-9]+))", PrefixGroupIdentifier, RangeStartGroupIdentifier, RangeEndGroupIdentifier)
            Private Shared ReadOnly _rgxLocRange As Regex = New Regex(_sRegex, RegexOptions.IgnoreCase)

            Public Sub New(ByVal name As String)
                MyBase.New(name)
            End Sub

            Public Overloads Overrides Function TryMatch(ByVal context As ParsingContext, ByVal source As ISourceStream) As Token
                Dim match As Match = _rgxLocRange.Match(source.Text, source.PreviewPosition)

                If Not match.Success OrElse match.Index <> source.PreviewPosition Then
                    Return Nothing
                End If

                Dim sPrefix = match.Groups(PrefixGroupIdentifier).Value
                Dim iRangeStart = CInt(match.Groups(RangeStartGroupIdentifier).Value)
                Dim iRangeEnd = CInt(match.Groups(RangeEndGroupIdentifier).Value)

                If (iRangeEnd <= iRangeStart) Then
                    Return CreateErrorToken(source)
                End If

                source.PreviewPosition += match.Length

                Dim token As Token = source.CreateToken(OutputTerminal)
                token.Details = New LocRangeTokenDetails(sPrefix, iRangeStart, iRangeEnd)

                Return token
            End Function

            Private Function CreateErrorToken(ByVal source As ISourceStream) As Token
                Return source.CreateErrorToken("Location range format is: Loc(number1)-(number2) where number1 < number2.")
            End Function

            Public Class LocRangeTokenDetails

                Private _sPrefix As String
                Private _iRangeStart As Integer
                Private _iRangeEnd As Integer

                Public Sub New(ByVal sPrefix As String, ByVal iRangeStart As Integer, ByVal iRangeEnd As Integer)
                    _sPrefix = If(sPrefix Is Nothing, String.Empty, sPrefix)
                    _iRangeStart = iRangeStart
                    _iRangeEnd = iRangeEnd
                End Sub

            End Class

        End Class
Coordinator
Jan 23, 2010 at 7:20 AM

Well, congratulations! you got it working. The code looks ok to me. My only comment would be is that RegEx are relatively slow, compared to custom code going char-by-char. But if performance is not critical for you then regex work fine, and the code is shorter and simpler.