Hacking with Winograd

Discussion of chess software programming and technical issues.

Moderators: hgm, Rebel, chrisw

Henk
Posts: 7216
Joined: Mon May 27, 2013 10:31 am

Hacking with Winograd

Post by Henk »

Tried to implement Winograd. Could not make it work. But after a lot of trial and error I get this ugly code which seems to work.
Don't know yet if it is already faster then without Winograd. To be optimized/rewritten

Code: Select all

 
 ...
  		var winoGradSliceFactors = new List<(double, double)>();

                var wEnum = Weight[d].GetEnumerator();
                wEnum.Reset();
                for (int i = 0; i < Weight[d].nElements(); i += 3)
                {
                    wEnum.MoveNext();
                    var w0 = wEnum.Current;

                    wEnum.MoveNext();
                    var w1 = wEnum.Current;

                    wEnum.MoveNext();
                    var w2 = wEnum.Current;

                    var winGrad1Factor = (w0 + w1 + w2) / 2;
                    var winGrad2Factor = (w0 - w1 + w2) / 2;
                    var tuple = (winGrad1Factor, winGrad2Factor);
                    winoGradSliceFactors.Add(tuple);
                }
                winoGradBlockFactors.Add(winoGradSliceFactors);
 
 ….
 
 // computes innerproduct x * w for two consecutive windows x in a slice (window(l,m), window(l, m+ 1))
 // l : row number
 // m: column number
 // d: depth or position of slice in block/set of slices to be evaluated
 //
       (double s1, double s2) CombinedInnerProduct(int d, int l, int m, List<(double, double)> winoGradFactors)
        {
            Debug.Assert(m + 1 < Start.Height - F + 1 + Padding);

            var slice = Start.SliceList[d];
            var w = Weight[d];
            var wEnum = w.GetEnumerator();

            var winoGradFactorsEnum = winoGradFactors.GetEnumerator();

            wEnum.Reset(); // essential

            double sum1 = 0;
            double sum2 = 0;
 
            for (int i = 0; i <= F - 1; i++)
            {
                if (l + i >= 0 && l + i < Start.Width)
                {
                    for (int j = 0; j <= F - 1; j += 3)
                    {
                        var w0 = wEnum.MoveNext() ? wEnum.Current : 0;
                        var w1 = wEnum.MoveNext() ? wEnum.Current : 0;
                        var w2 = wEnum.MoveNext() ? wEnum.Current : 0;
                        var winoGradfactorPair = winoGradFactorsEnum.MoveNext() ? winoGradFactorsEnum.Current : (0, 0);

                        if (m + j + 3 >= 0 && m + j < Start.Height)
                        {
                            var sliceRow = slice.Value[l + i];

                            var u0 = (m + j >= 0) ? sliceRow[m + j] : 0;
                            var u1 = (m + j + 1 >= 0 && m + j + 1 < Start.Height) ? sliceRow[m + j + 1] : 0;
                            var u2 = (m + j + 2 >= 0 && m + j + 2 < Start.Height) ? sliceRow[m + j + 2] : 0;
                            var u3 = (m + j + 3 < Start.Height) ? sliceRow[m + j + 3] : 0;

                            var m1 = (u0 - u2) * w0;
                            var m2 = (u1 + u2) * winoGradfactorPair.Item1;
                            var m3 = (u2 - u1) * winoGradfactorPair.Item2;
                            var m4 = (u1 - u3) * w2;

                            Debug.Assert(Math.Abs(m1 + m2 + m3 - (u0 * w0 + u1 * w1 + u2 * w2)) <= 1E-7);
                            Debug.Assert(Math.Abs(m2 - m3 - m4 - (u1 * w0 + u2 * w1 + u3 * w2)) <= 1E-7);

                            sum1 += m1 + m2 + m3;
                            sum2 += m2 - m3 - m4;
                        }
                    }
                }
                else
                {
                    for (int j = 0; j <= F - 1; j += 3)
                    {
                        wEnum.MoveNext();
                        wEnum.MoveNext();
                        wEnum.MoveNext();
                        winoGradFactorsEnum.MoveNext();
                    }
                }
            }
            return (sum1, sum2);
        }
        
By the way this paper may also be helpful but maths look quite complicated
https://arxiv.org/pdf/1509.09308.pdf
Daniel Shawul
Posts: 4185
Joined: Tue Mar 14, 2006 11:34 am
Location: Ethiopia

Re: Hacking with Winograd

Post by Daniel Shawul »

Henk is in love with his neurons :)

Why reinvent the wheel when there are ton of deep learning libraries to choose from.
Joost Buijs
Posts: 1563
Joined: Thu Jul 16, 2009 10:47 am
Location: Almere, The Netherlands

Re: Hacking with Winograd

Post by Joost Buijs »

Daniel Shawul wrote: Thu Oct 11, 2018 7:29 pm Henk is in love with his neurons :)

Why reinvent the wheel when there are ton of deep learning libraries to choose from.
Because these 'open source' libraries are never optimal for the things you want to do with it.
Most people are lazy, basically writing a script in Python to use with Tensorflow/Keras, but they have no idea what is actually going on underneath.
Daniel Shawul
Posts: 4185
Joined: Tue Mar 14, 2006 11:34 am
Location: Ethiopia

Re: Hacking with Winograd

Post by Daniel Shawul »

Joost Buijs wrote: Thu Oct 11, 2018 8:01 pm
Daniel Shawul wrote: Thu Oct 11, 2018 7:29 pm Henk is in love with his neurons :)

Why reinvent the wheel when there are ton of deep learning libraries to choose from.
Because these 'open source' libraries are never optimal for the things you want to do with it.
Most people are lazy, basically writing a script in Python to use with Tensorflow/Keras, but they have no idea what is actually going on underneath.
I don't disagree on the need to understand the inner workings but you will have a hard time
beating vendor supplied optimized libraries such as Intel MKL, CuDNN, TensorRT etc...
Lczero already tried the former approach first and eventualy switched to cuDNN and MKL blas etc.
I am sure GCP put a lot of effort into coding winograd etc but these AI libraries are used by a lot of industry
so nvida/intel has a lot to gain from offering highly optimized libraries.
Plus you would be spending your time on what really matters to your project.
User avatar
flok
Posts: 481
Joined: Tue Jul 03, 2018 10:19 am
Full name: Folkert van Heusden

Re: Hacking with Winograd

Post by flok »

Daniel Shawul wrote: Thu Oct 11, 2018 7:29 pm Henk is in love with his neurons :)

Why reinvent the wheel when there are ton of deep learning libraries to choose from.
what's the fun in using existing libraries?
Henk
Posts: 7216
Joined: Mon May 27, 2013 10:31 am

Re: Hacking with Winograd

Post by Henk »

When debugging I also found that many weights or inputs were zero. I don't know if these libraries make good use of sparsity.
Henk
Posts: 7216
Joined: Mon May 27, 2013 10:31 am

Re: Hacking with Winograd

Post by Henk »

After rewrite (1):

Code: Select all

 
     (double s1, double s2) CombinedInProduct(int d, int l, int m, List<(double, double)> winoGradFactors)
        {
            var startHeight = Start.Height;
            Debug.Assert(m + 1 < startHeight - F + 1 + Padding);

            var slice = Start.SliceList[d];
            var w = Weight[d];
 
            double sum1 = 0;
            double sum2 = 0;
  
            for (int i = 0; i <= F - 1; i++)
            {
                if (l + i >= 0 && l + i < Start.Width)
                {
                    var sliceRow = slice.Value[l + i];
                    var weightRow = w[i];
                    var  offset = i * F;

                    for (int j = 0; j <= F - 1; j += 3)
                    {
                        int k =  (offset + j) / 3;
                        var winoGradfactorPair = winoGradFactors[k];

                        int m_j = m + j;

                        if (m_j + 3 >= 0 && m_j < startHeight)
                        {
                            var w0 = weightRow[j];
                            var w1 = weightRow[j + 1];
                            var w2 = weightRow[j + 2];

                            var u0 = (m_j >= 0) ? sliceRow[m_j] : 0;
                            var u1 = (m_j + 1 >= 0 && m_j + 1 < startHeight) ? sliceRow[m_j + 1] : 0;
                            var u2 = (m_j + 2 >= 0 && m_j + 2 < startHeight) ? sliceRow[m_j + 2] : 0;
                            var u3 = (m_j + 3 < startHeight) ? sliceRow[m_j + 3] : 0;

                            var m1 = (u0 - u2) * w0;
                            var m2 = (u1 + u2) * winoGradfactorPair.Item1;
                            var m3 = (u2 - u1) * winoGradfactorPair.Item2;
                            var m4 = (u1 - u3) * w2;

                            Debug.Assert(Math.Abs(m1 + m2 + m3 - (u0 * w0 + u1 * w1 + u2 * w2)) <= 1E-7);
                            Debug.Assert(Math.Abs(m2 - m3 - m4 - (u1 * w0 + u2 * w1 + u3 * w2)) <= 1E-7);

                            sum1 += m1 + m2 + m3;
                            sum2 += m2 - m3 - m4;
                        }
                    }
                }             
            }
            return (sum1, sum2);
        }
       
Henk
Posts: 7216
Joined: Mon May 27, 2013 10:31 am

Re: Hacking with Winograd

Post by Henk »

Rewrite(2):

Code: Select all

  	    var winoGradBlockFactors = new List<List<List<(double, double)>>>();
            for (int d = 0; d <= Start.Depth - 1; d++)
            {        
                var winoGradSliceFactors = new List<List<(double, double)>>();              
                for (int i = 0; i < Weight[d].nRows(); i++)
                {
                    var winoGradSliceRowFactors = new List<(double, double)>();
                    var nCol = Weight[d].nColumns();
                    var weightRow = Weight[d][i];

                    for (int j = 0; j < nCol; j += 3)
                    {                   
                        var w0 = Weight[d][i][j];                      
                        var w1 = j + 1 < nCol ? weightRow[j + 1] : 0;                    
                        var w2 = j + 2 < nCol ? weightRow[j + 2] : 0;

                        var winGrad1Factor = (w0 + w1 + w2) / 2;
                        var winGrad2Factor = (w0 - w1 + w2) / 2;
                        var tuple = (winGrad1Factor, winGrad2Factor);
                        winoGradSliceRowFactors.Add(tuple);
                    }
                    winoGradSliceFactors.Add(winoGradSliceRowFactors);
                }
                winoGradBlockFactors.Add(winoGradSliceFactors);
            }

….


   (double s1, double s2) CombinedInProduct(int d, int l, int m, List<List<(double, double)>> winoGradFactors)
        {
            var startHeight = Start.Height;
            Debug.Assert(m + 1 < startHeight - F + 1 + Padding);

            var slice = Start.SliceList[d];
            var w = Weight[d];
 
            double sum1 = 0;
            double sum2 = 0;
  
            for (int i = 0; i <= F - 1; i++)
            {

                var winoGradRowFactors = winoGradFactors[i];
                if (l + i >= 0 && l + i < Start.Width)
                {
                    var sliceRow = slice.Value[l + i];
                    var weightRow = w[i];

                    int k = 0;
                    for (int j = 0; j <= F - 1; j += 3)
                    {
                        int m_j = m + j;

                        if (m_j + 3 >= 0 && m_j < startHeight)
                        {
                            var w0 = weightRow[j];
                            var w1 = weightRow[j + 1];
                            var w2 = weightRow[j + 2];

                            var u0 = (m_j >= 0) ? sliceRow[m_j] : 0;
                            var u1 = (m_j + 1 >= 0 && m_j + 1 < startHeight) ? sliceRow[m_j + 1] : 0;
                            var u2 = (m_j + 2 >= 0 && m_j + 2 < startHeight) ? sliceRow[m_j + 2] : 0;
                            var u3 = (m_j + 3 < startHeight) ? sliceRow[m_j + 3] : 0;

                            var winoGradfactorPair = winoGradRowFactors[k];
                            var m1 = (u0 - u2) * w0;
                            var m2 = (u1 + u2) * winoGradfactorPair.Item1;
                            var m3 = (u2 - u1) * winoGradfactorPair.Item2;
                            var m4 = (u1 - u3) * w2;

                            Debug.Assert(Math.Abs(m1 + m2 + m3 - (u0 * w0 + u1 * w1 + u2 * w2)) <= 1E-7);
                            Debug.Assert(Math.Abs(m2 - m3 - m4 - (u1 * w0 + u2 * w1 + u3 * w2)) <= 1E-7);

                            sum1 += m1 + m2 + m3;
                            sum2 += m2 - m3 - m4;
                        }
                        k++;
                    }
                }             
            }
            return (sum1, sum2);
        }
        
Henk
Posts: 7216
Joined: Mon May 27, 2013 10:31 am

Re: Hacking with Winograd

Post by Henk »

Rewrite(3). Computes four innerproducts x.w for four consecutive windows x of size 3x3 starting at slice[d][l][m].

Also assumes Padding [ 0 .. 1]

Code: Select all

 
  (double s1, double s2, double s3, double s4) CombinedInProduct4(int d, int l, int m, List<List<(double, double)>> winoGradFactors)
        {
            var startHeight = Start.Height;
            var startWidth = Start.Width;
            Debug.Assert(l <= startWidth + Padding - 3);        
            Debug.Assert(m < startHeight + Padding - 5);
            Debug.Assert(l >= -Padding);
            Debug.Assert(m >= -Padding);
            Debug.Assert(Padding <= 1);

            var slice = Start.SliceList[d];
            var w = Weight[d];

            double sum1 = 0;
            double sum2 = 0;
            double sum3 = 0;
            double sum4 = 0;

            if (l >= 0)
            {
                unchecked
                {
                    var sliceRow = slice.Value[l];
                    var weightRow = w[0];
                    var w0 = weightRow[0];
                    var w1 = weightRow[1];
                    var w2 = weightRow[2];
                    var winoGradfactorPair0 = winoGradFactors[0][0];
                    var win01 = winoGradfactorPair0.Item1;
                    var win02 = winoGradfactorPair0.Item2;

                    var u0 = (m >= 0) ? sliceRow[m] : 0;
                    var u1 = sliceRow[m + 1];
                    var u2 = sliceRow[m + 2];
                    var u3 = sliceRow[m + 3];

                    var m1 = (u0 - u2) * w0;
                    var m2 = (u1 + u2) * win01;
                    var m3 = (u2 - u1) * win02;
                    var m4 = (u1 - u3) * w2;

                    Debug.Assert(Math.Abs(m1 + m2 + m3 - (u0 * w0 + u1 * w1 + u2 * w2)) <= 1E-7);
                    Debug.Assert(Math.Abs(m2 - m3 - m4 - (u1 * w0 + u2 * w1 + u3 * w2)) <= 1E-7);

                    sum1 += m1 + m2 + m3;
                    sum2 += m2 - m3 - m4;

                    var u32 = sliceRow[m + 4];
                    var u33 = (m + 5 < startHeight) ? sliceRow[m + 5] : 0;

                    var m31 = (u2 - u32) * w0;
                    var m32 = (u3 + u32) * win01;
                    var m33 = (u32 - u3) * win02;
                    var m34 = (u3 - u33) * w2;

                    Debug.Assert(Math.Abs(m31 + m32 + m33 - (u2 * w0 + u3 * w1 + u32 * w2)) <= 1E-7);
                    Debug.Assert(Math.Abs(m32 - m33 - m34 - (u3 * w0 + u32 * w1 + u33 * w2)) <= 1E-7);

                    sum3 += m31 + m32 + m33;
                    sum4 += m32 - m33 - m34;
                }
            }
            unchecked
            {
                var sliceRow1 = slice.Value[l + 1];
                var weightRow1 = w[1];
                var w10 = weightRow1[0];
                var w11 = weightRow1[1];
                var w12 = weightRow1[2];
                var winoGradfactorPair1 = winoGradFactors[1][0];
                var win11 = winoGradfactorPair1.Item1;
                var win12 = winoGradfactorPair1.Item2;

                var u10 = (m >= 0) ? sliceRow1[m] : 0;
                var u11 = sliceRow1[m + 1];
                var u12 = sliceRow1[m + 2];
                var u13 = sliceRow1[m + 3];

                var m11 = (u10 - u12) * w10;
                var m12 = (u11 + u12) * win11;
                var m13 = (u12 - u11) * win12;
                var m14 = (u11 - u13) * w12;

                Debug.Assert(Math.Abs(m11 + m12 + m13 - (u10 * w10 + u11 * w11 + u12 * w12)) <= 1E-7);
                Debug.Assert(Math.Abs(m12 - m13 - m14 - (u11 * w10 + u12 * w11 + u13 * w12)) <= 1E-7);

                sum1 += m11 + m12 + m13;
                sum2 += m12 - m13 - m14;

                var u42 = sliceRow1[m + 4];
                var u43 = (m + 5 < startHeight) ? sliceRow1[m + 5] : 0;

                var m41 = (u12 - u42) * w10;
                var m42 = (u13 + u42) * win11;
                var m43 = (u42 - u13) * win12;
                var m44 = (u13 - u43) * w12;

                Debug.Assert(Math.Abs(m41 + m42 + m43 - (u12 * w10 + u13 * w11 + u42 * w12)) <= 1E-7);
                Debug.Assert(Math.Abs(m42 - m43 - m44 - (u13 * w10 + u42 * w11 + u43 * w12)) <= 1E-7);

                sum3 += m41 + m42 + m43;
                sum4 += m42 - m43 - m44;
            }



            if (l + 2 < startWidth)
            {
                unchecked
                {
                    var sliceRow2 = slice.Value[l + 2];
                    var weightRow2 = w[2];

                    var u00 = (m >= 0) ? sliceRow2[m] : 0;
                    var u01 = sliceRow2[m + 1];
                    var u02 = sliceRow2[m + 2];
                    var u03 = sliceRow2[m + 3];

                    var w20 = weightRow2[0];
                    var w21 = weightRow2[1];
                    var w22 = weightRow2[2];

                    var winoGradfactorPair2 = winoGradFactors[2][0];
                    var win21 = winoGradfactorPair2.Item1;
                    var win22 = winoGradfactorPair2.Item2;
                    var m21 = (u00 - u02) * w20;
                    var m22 = (u01 + u02) * win21;
                    var m23 = (u02 - u01) * win22;
                    var m24 = (u01 - u03) * w22;

                    Debug.Assert(Math.Abs(m21 + m22 + m23 - (u00 * w20 + u01 * w21 + u02 * w22)) <= 1E-7);
                    Debug.Assert(Math.Abs(m22 - m23 - m24 - (u01 * w20 + u02 * w21 + u03 * w22)) <= 1E-7);

                    sum1 += m21 + m22 + m23;
                    sum2 += m22 - m23 - m24;


                    var u52 = sliceRow2[m + 4];
                    var u53 = (m + 5 < startHeight) ? sliceRow2[m + 5] : 0;

                    var m51 = (u02 - u52) * w20;
                    var m52 = (u03 + u52) * win21;
                    var m53 = (u52 - u03) * win22;
                    var m54 = (u03 - u53) * w22;

                    Debug.Assert(Math.Abs(m51 + m52 + m53 - (u02 * w20 + u03 * w21 + u52 * w22)) <= 1E-7);
                    Debug.Assert(Math.Abs(m52 - m53 - m54 - (u03 * w20 + u52 * w21 + u53 * w22)) <= 1E-7);

                    sum3 += m51 + m52 + m53;
                    sum4 += m52 - m53 - m54;
                }
            }      
            return (sum1, sum2, sum3, sum4);
        }
        
        public override IMatrix Eval()
        {
            var result = new Matrix(Start.Width + 2 * Padding - F + 1, Start.Height + 2 * Padding - F + 1);

            var winoGradBlockFactors = new List<List<List<(double, double)>>>();
            for (int d = 0; d <= Start.Depth - 1; d++)
            {        
                var winoGradSliceFactors = new List<List<(double, double)>>();              
                for (int i = 0; i < Weight[d].nRows(); i++)
                {
                    var winoGradSliceRowFactors = new List<(double, double)>();
                    var nCol = Weight[d].nColumns();
                    var weightRow = Weight[d][i];

                    for (int j = 0; j < nCol; j += 3)
                    {                   
                        var w0 = Weight[d][i][j];                      
                        var w1 = j + 1 < nCol ? weightRow[j + 1] : 0;                    
                        var w2 = j + 2 < nCol ? weightRow[j + 2] : 0;

                        var winGrad1Factor = (w0 + w1 + w2) / 2;
                        var winGrad2Factor = (w0 - w1 + w2) / 2;
                        var tuple = (winGrad1Factor, winGrad2Factor);
                        winoGradSliceRowFactors.Add(tuple);
                    }
                    winoGradSliceFactors.Add(winoGradSliceRowFactors);
                }
                winoGradBlockFactors.Add(winoGradSliceFactors);
            }
          

            for (int l = -Padding; l < Start.Width - F + 1 + Padding; l++)
            {
                for (int m = -Padding; m < Start.Height - F + 1 + Padding; m += 2)
                {
                    if (m + 3 < Start.Height - F + 1 + Padding && Padding <= 1 && F == 3)
                    {
                        double sum = Bias;
                        double sum2 = Bias;
                        double sum3 = Bias;
                        double sum4 = Bias;
                        for (int d = 0; d <= Start.Depth - 1; d++)
                        {
                            (double s1, double s2, double s3, double s4) = CombinedInProduct4(d, l, m, winoGradBlockFactors[d]);
                            sum += s1;
                            sum2 += s2;
                            sum3 += s3;
                            sum4 += s4;

                            Debug.Assert(Math.Abs(s1 - Matrix.InnerProduct(EvalSumMatrix(d, l, m), Weight[d])) < 1E-10);
                            Debug.Assert(Math.Abs(s2 - Matrix.InnerProduct(EvalSumMatrix(d, l, m+1), Weight[d])) < 1E-10);
                            Debug.Assert(Math.Abs(s3 - Matrix.InnerProduct(EvalSumMatrix(d, l, m+2), Weight[d])) < 1E-10);
                            Debug.Assert(Math.Abs(s4 - Matrix.InnerProduct(EvalSumMatrix(d, l, m+3), Weight[d])) < 1E-10);
                         
                        }
                        result[l + Padding][m + Padding] = sum;
                        result[l + Padding][m + Padding + 1] = sum2;
                        result[l + Padding][m + Padding + 2] = sum3;
                        result[l + Padding][m + Padding + 3] = sum4;
                        m += 2;
                    }

                    if (m + 1 == Start.Height - F + 1 + Padding || Padding > 1)
                    {
                        double sum = Bias;
                        for (int d = 0; d <= Start.Depth - 1; d++)
                        {
                            sum += Matrix.InnerProduct(EvalSumMatrix(d, l, m), Weight[d]);
                        }
                        result[l + Padding][m + Padding] = sum;
                    }
                    else
                    {
                        double sum = Bias;
                        double sum2 = Bias;
                        for (int d = 0; d <= Start.Depth - 1; d++)
                        {
                            (double s1, double s2) = F == 3? CombinedInProduct3(d, l, m, winoGradBlockFactors[d]): CombinedInProduct(d, l, m , winoGradBlockFactors[d]);
                            sum += s1;
                            sum2 += s2;
                            Debug.Assert(Math.Abs(s1 - Matrix.InnerProduct(EvalSumMatrix(d, l, m), Weight[d])) < 1E-10);
                            Debug.Assert(Math.Abs(s2 - Matrix.InnerProduct(EvalSumMatrix(d, l, m + 1), Weight[d])) < 1E-10);
                        }
                        result[l + Padding][m + Padding] = sum;
                        result[l + Padding][m + Padding + 1] = sum2;
                    }
                }
            }

            Debug.Assert(result.Error(EvalDebug()) <= 1E-10);
            return result;
        }
        
        
   
Henk
Posts: 7216
Joined: Mon May 27, 2013 10:31 am

Re: Hacking with Winograd

Post by Henk »

rewrite(4). Renaming some variables

Code: Select all

(double s1, double s2, double s3, double s4) CombinedInProduct4(int d, int l, int m, List<List<(double, double)>> winoGradFactors)
        {
            var startHeight = Start.Height;
            var startWidth = Start.Width;
            Debug.Assert(l <= startWidth + Padding - 3);        
            Debug.Assert(m < startHeight + Padding - 5);
            Debug.Assert(l >= -Padding);
            Debug.Assert(m >= -Padding);
            Debug.Assert(Padding <= 1);

            var slice = Start.SliceList[d];
            var w = Weight[d];

            double sum1 = 0;
            double sum2 = 0;
            double sum3 = 0;
            double sum4 = 0;

            if (l >= 0)
            {
                unchecked
                {
                    var sliceRow0 = slice.Value[l];
                    var weightRow = w[0];
                    var w00 = weightRow[0];
                    var w01 = weightRow[1];
                    var w02 = weightRow[2];
                    var winoGradfactorPair0 = winoGradFactors[0][0];
                    var win01 = winoGradfactorPair0.Item1;
                    var win02 = winoGradfactorPair0.Item2;

                    var u00 = (m >= 0) ? sliceRow0[m] : 0;
                    var u01 = sliceRow0[m + 1];
                    var u02 = sliceRow0[m + 2];
                    var u03 = sliceRow0[m + 3];

                    var m01 = (u00 - u02) * w00;
                    var m02 = (u01 + u02) * win01;
                    var m03 = (u02 - u01) * win02;
                    var m04 = (u01 - u03) * w02;

                    Debug.Assert(Math.Abs(m01 + m02 + m03 - (u00 * w00 + u01 * w01 + u02 * w02)) <= 1E-7);
                    Debug.Assert(Math.Abs(m02 - m03 - m04 - (u01 * w00 + u02 * w01 + u03 * w02)) <= 1E-7);

                    sum1 += m01 + m02 + m03;
                    sum2 += m02 - m03 - m04;

                    var u32 = sliceRow0[m + 4];
                    var u33 = (m + 5 < startHeight) ? sliceRow0[m + 5] : 0;

                    var m31 = (u02 - u32) * w00;
                    var m32 = (u03 + u32) * win01;
                    var m33 = (u32 - u03) * win02;
                    var m34 = (u03 - u33) * w02;

                    Debug.Assert(Math.Abs(m31 + m32 + m33 - (u02 * w00 + u03 * w01 + u32 * w02)) <= 1E-7);
                    Debug.Assert(Math.Abs(m32 - m33 - m34 - (u03 * w00 + u32 * w01 + u33 * w02)) <= 1E-7);

                    sum3 += m31 + m32 + m33;
                    sum4 += m32 - m33 - m34;
                }
            }
            unchecked
            {
                var sliceRow1 = slice.Value[l + 1];
                var weightRow1 = w[1];
                var w10 = weightRow1[0];
                var w11 = weightRow1[1];
                var w12 = weightRow1[2];
                var winoGradfactorPair1 = winoGradFactors[1][0];
                var win11 = winoGradfactorPair1.Item1;
                var win12 = winoGradfactorPair1.Item2;

                var u10 = (m >= 0) ? sliceRow1[m] : 0;
                var u11 = sliceRow1[m + 1];
                var u12 = sliceRow1[m + 2];
                var u13 = sliceRow1[m + 3];

                var m11 = (u10 - u12) * w10;
                var m12 = (u11 + u12) * win11;
                var m13 = (u12 - u11) * win12;
                var m14 = (u11 - u13) * w12;

                Debug.Assert(Math.Abs(m11 + m12 + m13 - (u10 * w10 + u11 * w11 + u12 * w12)) <= 1E-7);
                Debug.Assert(Math.Abs(m12 - m13 - m14 - (u11 * w10 + u12 * w11 + u13 * w12)) <= 1E-7);

                sum1 += m11 + m12 + m13;
                sum2 += m12 - m13 - m14;

                var u42 = sliceRow1[m + 4];
                var u43 = (m + 5 < startHeight) ? sliceRow1[m + 5] : 0;

                var m41 = (u12 - u42) * w10;
                var m42 = (u13 + u42) * win11;
                var m43 = (u42 - u13) * win12;
                var m44 = (u13 - u43) * w12;

                Debug.Assert(Math.Abs(m41 + m42 + m43 - (u12 * w10 + u13 * w11 + u42 * w12)) <= 1E-7);
                Debug.Assert(Math.Abs(m42 - m43 - m44 - (u13 * w10 + u42 * w11 + u43 * w12)) <= 1E-7);

                sum3 += m41 + m42 + m43;
                sum4 += m42 - m43 - m44;
            }



            if (l + 2 < startWidth)
            {
                unchecked
                {
                    var sliceRow2 = slice.Value[l + 2];
                    var weightRow2 = w[2];

                    var u20 = (m >= 0) ? sliceRow2[m] : 0;
                    var u21 = sliceRow2[m + 1];
                    var u22 = sliceRow2[m + 2];
                    var u23 = sliceRow2[m + 3];

                    var w20 = weightRow2[0];
                    var w21 = weightRow2[1];
                    var w22 = weightRow2[2];

                    var winoGradfactorPair2 = winoGradFactors[2][0];
                    var win21 = winoGradfactorPair2.Item1;
                    var win22 = winoGradfactorPair2.Item2;
                    var m21 = (u20 - u22) * w20;
                    var m22 = (u21 + u22) * win21;
                    var m23 = (u22 - u21) * win22;
                    var m24 = (u21 - u23) * w22;

                    Debug.Assert(Math.Abs(m21 + m22 + m23 - (u20 * w20 + u21 * w21 + u22 * w22)) <= 1E-7);
                    Debug.Assert(Math.Abs(m22 - m23 - m24 - (u21 * w20 + u22 * w21 + u23 * w22)) <= 1E-7);

                    sum1 += m21 + m22 + m23;
                    sum2 += m22 - m23 - m24;


                    var u52 = sliceRow2[m + 4];
                    var u53 = (m + 5 < startHeight) ? sliceRow2[m + 5] : 0;

                    var m51 = (u22 - u52) * w20;
                    var m52 = (u23 + u52) * win21;
                    var m53 = (u52 - u23) * win22;
                    var m54 = (u23 - u53) * w22;

                    Debug.Assert(Math.Abs(m51 + m52 + m53 - (u22 * w20 + u23 * w21 + u52 * w22)) <= 1E-7);
                    Debug.Assert(Math.Abs(m52 - m53 - m54 - (u23 * w20 + u52 * w21 + u53 * w22)) <= 1E-7);

                    sum3 += m51 + m52 + m53;
                    sum4 += m52 - m53 - m54;
                }
            }      
            return (sum1, sum2, sum3, sum4);
        }