Generating SQL from a Data Structure

SERIES LINKS

How to Create a Query Language DSL in C#
Creating a Simple Tokenizer (Lexer) in C#
Understanding Grammars
Implementing a DSL Parser in C#

--> Generating SQL from a Data Structure

Super simple version on GitHub

We will continue with the same intermediate representation of our DSL and generate the artefacts required to perform a query: the query text and a collection of parameters. The query that will be generated is for a ranking query. It will list the top X errors between two dates.

We have the tables

  • ErrorDefinition - which is a de-duplicated list of errors, with each error being identified by an MD5 hash of the cleaned up error data
  • Timeline - The number of occurrences per fingerprint in 5 minute time buckets

To generate the SQL we have three classes

  • AdoQueryPayload - A class that contains a StringBuilder, a list of SqlParameters and a few methods for adding lines of text, adding parameters and such like
  • SqlGenerator - A class that instantiates an AdoQueryBuilder and builds a query based on the DslQueryModel class
  • SqlExecutor - A class that receives an AdoQueryPayload class and executes the query and returns the results

I won't go into a long winded explanation as the code explains itself.

AdoQueryPayload Class


public class AdoQueryPayload
{
    private int _paramCounter;
    private StringBuilder _sb;

    public AdoQueryPayload()
    {
        Parameters = new List<SqlParameter>();
        _sb = new StringBuilder();
    }

    public IList<SqlParameter> Parameters { get; private set; }

    public void Append(string queryText)
    {
        _sb.Append(queryText);
    }

    public void AppendLine(string queryText)
    {
        _sb.Append(queryText + Environment.NewLine);
    }

    public void AddNewLine()
    {
        _sb.Append(Environment.NewLine);
    }

    public void AppendColumnName(DslObject dslObject)
    {
        _sb.Append(GetColumnName(dslObject));
    }

    public void AddParameter(MatchCondition matchCondition)
    {
        if (matchCondition.Operator == DslOperator.In || matchCondition.Operator == DslOperator.NotIn)
        {
            AddInParameters(matchCondition.Object, matchCondition.Values);
        }
        else
        {
            AddParameter(matchCondition.Object, matchCondition.Value);
        }
    }

    public void AddFromDateParameter(DateTime dateValue)
    {
        AddDateParameter(dateValue, "@FromDate");
    }

    public void AddToDateParameter(DateTime dateValue)
    {
        AddDateParameter(dateValue, "@ToDate");
    }

    public string GetSqlText()
    {
        return _sb.ToString();
    }

    private void AddDateParameter(DateTime dateValue, string paramName)
    {
        _sb.Append(paramName);

        if (!Parameters.Any(x => x.ParameterName.Equals(paramName)))
        {
            var parameter = new SqlParameter(paramName, SqlDbType.DateTime);
            parameter.Value = dateValue;

            Parameters.Add(parameter);
        }
    }

    private void AddInParameters(DslObject dslObject, List<string> values)
    {
        int counter = 0;
        foreach (var value in values)
        {
            if (counter > 0)
                _sb.Append(",");

            IncrementParamCounter();
            var paramName = GetParameterName();
            var parameter = new SqlParameter(paramName, SqlDbType.VarChar, GetVarcharLength(dslObject));
            parameter.Value = value;
            Parameters.Add(parameter);
            _sb.Append(paramName);

            counter++;
        }
    }

    private void AddParameter(DslObject dslObject, string value)
    {
        IncrementParamCounter();
        var paramName = GetParameterName();
        _sb.Append(paramName);

        var parameter = new SqlParameter(paramName, SqlDbType.VarChar, GetVarcharLength(dslObject));
        parameter.Value = value;
        Parameters.Add(parameter);
    }

    private void IncrementParamCounter()
    {
        _paramCounter++;
    }

    private string GetParameterName()
    {
        return "@Param" + _paramCounter;
    }

    private string GetColumnName(DslObject dslObject)
    {
        switch (dslObject)
        {
            case DslObject.Application:
                return "ED.ApplicationId";
            case DslObject.Fingerprint:
                return "ED.FingerprintText";
            case DslObject.StackFrame:
                return "EB.StackFrame";
            case DslObject.ExceptionType:
                return "EB.ExceptionType";
            case DslObject.Message:
                return "T.MessageDetails";
            default:
                throw new Exception("LQL object not supported for SQL generation: " + dslObject.ToString());
        }
    }

    private int GetVarcharLength(DslObject dslObject)
    {
        switch (dslObject)
        {
            case DslObject.Application:
                return 200;
            case DslObject.StackFrame:
                return 1000;
            case DslObject.ExceptionType:
                return 200;
            case DslObject.Message:
                return 1000;
            case DslObject.Fingerprint:
                return 32;
            default:
                return 50;
        }
    }
}

 

SqlGenerator Class


public class SqlGenerator
{
    public AdoQueryPayload GenerateQueryPayload(DslQueryModel dslQueryModel)
    {
        var adoQueryPayload = new AdoQueryPayload();

        if(dslQueryModel.Limit.HasValue)
            adoQueryPayload.AppendLine("SELECT TOP " + dslQueryModel.Limit.Value);
        else
            adoQueryPayload.AppendLine("SELECT");

        adoQueryPayload.AppendLine(@"        ED.FingerprintText
    ,ED.ApplicationId
    ,ED.OriginExceptionType
    ,ED.OriginStackFrame
    ,ED.LowestAppStackFrame
    ,ED.HighestAppStackFrame
    ,SUM(T.Frequency) AS TotalErrors
FROM Timeline T
JOIN ErrorDefinition AS ED ON T.Fingerprint = ED.Fingerprint");

        adoQueryPayload.Append("WHERE T.ErrorDateTime BETWEEN ");
        adoQueryPayload.AddFromDateParameter(dslQueryModel.DateRange.From);
        adoQueryPayload.Append(" AND ");
        adoQueryPayload.AddToDateParameter(dslQueryModel.DateRange.To);
        adoQueryPayload.AddNewLine();

        for(int i=0; i<dslQueryModel.MatchConditions.Count; i++)
        {
            if (i == 0)
                AddLogicalOperator(adoQueryPayload, DslLogicalOperator.And); 
            else
                AddLogicalOperator(adoQueryPayload, dslQueryModel.MatchConditions[i-1].LogOpToNextCondition);

            AddClause(dslQueryModel.MatchConditions[i], adoQueryPayload);
        }
        
        adoQueryPayload.AppendLine(@"GROUP BY ED.FingerprintText
    ,ED.ApplicationId
    ,ED.OriginExceptionType
    ,ED.OriginStackFrame
    ,ED.LowestAppStackFrame
    ,ED.HighestAppStackFrame
ORDER BY TotalErrors DESC");

        return adoQueryPayload;
    }

    private void AddLogicalOperator(AdoQueryPayload queryPayload, DslLogicalOperator logicalOperator)
    {
        if (logicalOperator == DslLogicalOperator.And)
            queryPayload.Append("AND ");
        else if (logicalOperator == DslLogicalOperator.Or)
            queryPayload.Append("OR ");
    }

    private void AddClause(MatchCondition matchCondition, AdoQueryPayload queryPayload)
    {
        queryPayload.AppendColumnName(matchCondition.Object);

        switch (matchCondition.Operator)
        {
            case DslOperator.Equals:
                queryPayload.Append(" = ");
                queryPayload.AddParameter(matchCondition);
                break;
            case DslOperator.NotEquals:
                queryPayload.Append(" <> ");
                queryPayload.AddParameter(matchCondition);
                break;
            case DslOperator.Like:
                queryPayload.Append(" LIKE '%' + ");
                queryPayload.AddParameter(matchCondition);
                queryPayload.Append(" + '%'");
                break;
            case DslOperator.NotLike:
                queryPayload.Append(" NOT LIKE '%' + ");
                queryPayload.AddParameter(matchCondition);
                queryPayload.Append(" + '%'");
                break;
            case DslOperator.In:
                queryPayload.Append(" IN (");
                queryPayload.AddParameter(matchCondition);
                queryPayload.Append(")");
                break;
            case DslOperator.NotIn:
                queryPayload.Append(" NOT IN (");
                queryPayload.AddParameter(matchCondition);
                queryPayload.Append(")");
                break;
            default:
                throw new Exception("DSL Operator not supported for SQL query generation: " + matchCondition.Operator);
        }

        queryPayload.AddNewLine();
    }      
}

SqlExecutor Class

You can come up with a better name than this. An improvement to this code would to be to add a list of the column names and types to the AdoQueryPayload class so that the SqlExecutor could be completely generic. I'll leave that as an exercise for the reader.


public class SqlExecutor
{
    public IList<ErrorCountRecord> GetTopRankingErrors(AdoQueryPayload adoQueryPayload)
    {
        var results = new List<ErrorCountRecord>();

        using (var connection = new SqlConnection("Database=ErrorsDb;Server=(local);Trusted_Connection=true;"))
        {
            using (var command = new SqlCommand(adoQueryPayload.GetSqlText(), connection))
            {
                foreach (var parameter in adoQueryPayload.Parameters)
                    command.Parameters.Add(parameter);

                using (var reader = command.ExecuteReader())
                {
                    var record = new ErrorCountRecord();
                    record.ApplicationId = reader["ApplicationId"].ToString();
                    record.Count = (int)reader["Count"];
                    record.Fingerprint = reader["FingerprintText"].ToString();
                    record.HighestAppStackFrame = reader["HighestAppStackFrame"].ToString();
                    record.LowestAppStackFrame = reader["LowestAppStackFrame"].ToString();
                    record.OriginExceptionType = reader["OriginExceptionType"].ToString();
                    record.OriginStackFrame = reader["OriginStackFrame"].ToString();

                    results.Add(record);
                }
            }
        }

        return results;
    }
}

The Code in Action

Let's see the SQL that the combination of SqlGenerator and AdoQueryPayload can produce.


var dslModel = new DslQueryModel()
{
    DateRange = new DateRange() { From = DateTime.UtcNow.AddHours(-1), To = DateTime.UtcNow },
    Limit = 10,
    MatchConditions = new List<MatchCondition>()
    {
        new MatchCondition()
        {   Object = DslObject.Application,
            Operator = DslOperator.Equals,
            Value = "ApplicationX",
            LogOpToNextCondition = DslLogicalOperator.And
        },
        new MatchCondition()
        {   Object = DslObject.ExceptionType,
            Operator = DslOperator.In,
            Values = new List<string>() { "System.TypeLoadException", "System.MissingmMethodException" }
        }
    }
};

var adoQueryPayload = new SqlGenerator().GenerateQueryPayload(dslModel);
var text = adoQueryPayload.GetSqlText();

The text variable in this case contains


SELECT TOP 10
        ED.FingerprintText
        ,ED.ApplicationId
        ,ED.OriginExceptionType
        ,ED.OriginStackFrame
        ,ED.LowestAppStackFrame
        ,ED.HighestAppStackFrame
        ,SUM(T.Frequency) AS TotalErrors
FROM Timeline T
JOIN ErrorDefinition AS ED ON T.Fingerprint = ED.Fingerprint
WHERE T.ErrorDateTime BETWEEN @FromDate AND @ToDate
AND ED.ApplicationId = @Param1
AND EB.ExceptionType IN (@Param2,@Param3)
GROUP BY ED.FingerprintText
        ,ED.ApplicationId
        ,ED.OriginExceptionType
        ,ED.OriginStackFrame
        ,ED.LowestAppStackFrame
        ,ED.HighestAppStackFrame
ORDER BY TotalErrors DESC

The list of SqlParameter objects contains the five parameters: two dates, application name and two exception types.

All these examples might be quite tightly coupled to this very simplified version my Logging Query Language DSL but serves as an idea of how you can construct SQL queries from an intermediate representation of some DSL text. 

I have a super simple version on https://github.com/Vanlightly/DslParser and I also have the real, more complex, LQL-to-SQL code in a repository on Github. A while ago I started porting to Github the in-house log analytics solution I built that was the inspiration for these posts. But I got busy with other projects and I never finished the port. The query execution engine is mostly there though it is rather complex as the queries it generates are highly optimized and uses some full text search and so not easy to understand. Still check it out if you're interested.

The port also has two parsers that use the same approach to this blog post series:

I hope this series helped, feel free to contact me about any questions or other related posts that you would like to see. We have barely scratched the surface on language parsers, next may be we'll look abstract syntax trees.

SERIES LINKS

How to Create a Query Language DSL in C#
Creating a Simple Tokenizer (Lexer) in C#
Understanding Grammars
Implementing a DSL Parser in C#

--> Generating SQL from a Data Structure