From 280636acaac4ff86aaab75b5e101b0e5b370298a Mon Sep 17 00:00:00 2001
From: krypdkat <cryptodkat@gmail.com>
Date: Sat, 30 Mar 2024 17:45:00 +0700
Subject: [PATCH 1/7] add thread tracker

---
 src/qubic.cpp | 57 ++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 54 insertions(+), 3 deletions(-)

diff --git a/src/qubic.cpp b/src/qubic.cpp
index 5c13a359..914f7fce 100644
--- a/src/qubic.cpp
+++ b/src/qubic.cpp
@@ -207,9 +207,12 @@ static struct
     RequestedTickTransactions requestedTickTransactions;
 } requestedTickTransactions;
 
-
-
-
+static struct {
+    unsigned char day;
+    unsigned char hour;
+    unsigned char minute;
+    unsigned char second;
+} threadTimeCheckin[MAX_NUMBER_OF_PROCESSORS];
 
 static void logToConsole(const CHAR16* message)
 {
@@ -1229,6 +1232,15 @@ static void processSpecialCommand(Peer* peer, RequestResponseHeader* header)
     }
 }
 
+// a tracker to detect if a thread is crashed
+static void checkinTime(unsigned long long processorNumber)
+{
+    threadTimeCheckin[processorNumber].second = time.Second;
+    threadTimeCheckin[processorNumber].minute = time.Minute;
+    threadTimeCheckin[processorNumber].hour = time.Hour;
+    threadTimeCheckin[processorNumber].day = time.Day;
+}
+
 static void requestProcessor(void* ProcedureArgument)
 {
     enableAVX();
@@ -1240,6 +1252,7 @@ static void requestProcessor(void* ProcedureArgument)
     RequestResponseHeader* header = (RequestResponseHeader*)processor->buffer;
     while (!shutDownNode)
     {
+        checkinTime(processorNumber);
         // in epoch transition, wait here
         if (epochTransitionState)
         {
@@ -3044,6 +3057,7 @@ static void tickProcessor(void*)
     unsigned int latestProcessedTick = 0;
     while (!shutDownNode)
     {
+        checkinTime(processorNumber);
         const unsigned long long curTimeTick = __rdtsc();
         const unsigned int nextTick = system.tick + 1;
 
@@ -4598,6 +4612,43 @@ static void processKeyPresses()
             appendText(message, L"&");
             appendText(message, (mainAuxStatus & 2) ? L"MAIN" : L"aux");
             logToConsole(message);
+
+            // print statuses of thread
+            // this accepts a small error when switching day to first day of the next month
+            bool allThreadsAreGood = true;
+            setText(message, L"Thread status: ");
+            for (int i = 0; i < nTickProcessorIDs; i++)
+            {
+                unsigned long long tid = tickProcessorIDs[i];
+                long long diffInSecond = 86400 * (time.Day - threadTimeCheckin[tid].day) + 3600 * (time.Hour - threadTimeCheckin[tid].hour)
+                    + 60 * (time.Minute - threadTimeCheckin[tid].minute) + (time.Second - threadTimeCheckin[tid].second);
+                if (diffInSecond > 120) // if they don't check in in 2 minutes, we can assume the thread is already crashed
+                {
+                    allThreadsAreGood = false;
+                    appendText(message, L"Tick Processor #");
+                    appendNumber(message, tid, false);
+                    appendText(message, L" is not responsive | ");
+                }
+            }
+
+            for (int i = 0; i < nRequestProcessorIDs; i++)
+            {
+                unsigned long long tid = requestProcessorIDs[i];
+                long long diffInSecond = 86400 * (time.Day - threadTimeCheckin[tid].day) + 3600 * (time.Hour - threadTimeCheckin[tid].hour)
+                    + 60 * (time.Minute - threadTimeCheckin[tid].minute) + (time.Second - threadTimeCheckin[tid].second);
+                if (diffInSecond > 120) // if they don't check in in 2 minutes, we can assume the thread is already crashed
+                {
+                    allThreadsAreGood = false;
+                    appendText(message, L"Request Processor #");
+                    appendNumber(message, tid, false);
+                    appendText(message, L" is not responsive | ");
+                }
+            }
+            if (allThreadsAreGood)
+            {
+                appendText(message, L"All threads are healthy.");
+            }
+            logToConsole(message);
         }
         break;
 

From d8e802f1516125adceb9ec06ea8453eef65b19ae Mon Sep 17 00:00:00 2001
From: krypdkat <cryptodkat@gmail.com>
Date: Mon, 8 Apr 2024 13:57:10 +0700
Subject: [PATCH 2/7] update neuron computation

---
 src/score.h            | 31 +++++++++++++++++++------------
 test/score_reference.h | 14 +++++++++-----
 2 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/src/score.h b/src/score.h
index 80b8c61a..62894163 100644
--- a/src/score.h
+++ b/src/score.h
@@ -28,6 +28,7 @@ struct ScoreFunction
     {
         long long input[dataLength + numberOfInputNeurons + infoLength];
         long long output[infoLength + numberOfOutputNeurons + dataLength];
+        long long buffer[dataLength + numberOfInputNeurons + infoLength];
     } _neurons[solutionBufferCount];
     struct
     {
@@ -328,6 +329,7 @@ struct ScoreFunction
     void computeInputNeuron(int solutionBufIdx)
     {
         auto& neurons = _neurons[solutionBufIdx];
+        auto& neuronBuffer = neurons.buffer;
         auto& synapses = _synapses[solutionBufIdx];
         auto& indicePosInput = _indicePosInput[solutionBufIdx];
         auto& bucketPosInput = _bucketPosInput[solutionBufIdx];
@@ -338,22 +340,23 @@ struct ScoreFunction
         copyMem(&neurons.input[0], miningData, sizeof(miningData));
         int totalIndice;
         for (int tick = 1; tick <= maxInputDuration; tick++) {
+            copyMem(neuronBuffer, &neurons.input[0], sizeof(neurons.input[0]) * (dataLength + numberOfInputNeurons + infoLength));
             for (unsigned int inputNeuronIndex = 0; inputNeuronIndex < numberOfInputNeurons + infoLength; inputNeuronIndex++) {
                 // pre scan for shortcut
                 if (tick > 3)
                 {
                     int elemCount = 0;
                     getLastNeurons(indicePosInput[inputNeuronIndex], bucketPosInput[inputNeuronIndex], _modNum[tick], indices,
-                        nSample, _totalModNum[tick], elemCount, neurons.input);
+                        nSample, _totalModNum[tick], elemCount, neuronBuffer);
                     for (int i = 0; i < elemCount; i++)
                     {
                         unsigned int anotherInputNeuronIndex = indices[i];
                         const unsigned int offset = inputNeuronIndex * (dataLength + numberOfInputNeurons + infoLength) + anotherInputNeuronIndex;
                         if (synapses.inputLength[offset] > 0) {
-                            sumBuffer[i] = neurons.input[anotherInputNeuronIndex];
+                            sumBuffer[i] = neuronBuffer[anotherInputNeuronIndex];
                         }
                         else {
-                            sumBuffer[i] = -neurons.input[anotherInputNeuronIndex];
+                            sumBuffer[i] = -neuronBuffer[anotherInputNeuronIndex];
                         }
                     }
                     int found = -1;
@@ -386,10 +389,10 @@ struct ScoreFunction
                     unsigned int anotherInputNeuronIndex = indices[i];
                     const unsigned int offset = inputNeuronIndex * (dataLength + numberOfInputNeurons + infoLength) + anotherInputNeuronIndex;
                     if (synapses.inputLength[offset] > 0) {
-                        sumBuffer[i] = neurons.input[anotherInputNeuronIndex];
+                        sumBuffer[i] = neuronBuffer[anotherInputNeuronIndex];
                     }
                     else {
-                        sumBuffer[i] = -neurons.input[anotherInputNeuronIndex];
+                        sumBuffer[i] = -neuronBuffer[anotherInputNeuronIndex];
                     }
                 }
                 for (int i = 0; i < totalIndice; i++)
@@ -443,31 +446,34 @@ struct ScoreFunction
     void computeOutputNeuron(int solutionBufIdx)
     {
         auto& neurons = _neurons[solutionBufIdx];
+        auto& neuronBuffer = neurons.buffer;
         auto& synapses = _synapses[solutionBufIdx];
-
         auto& indicePosOutput = _indicePosOutput[solutionBufIdx];
         auto& bucketPosOutput = _bucketPosOutput[solutionBufIdx];
         auto& bufferPosOutput = _bufferPosOutput[solutionBufIdx];
         auto& sumBuffer = _sumBuffer[solutionBufIdx];
         auto& indices = _indices[solutionBufIdx];
+
+        setMem(&neuronBuffer[0], sizeof(neuronBuffer), 0);
         int totalIndice;
         for (int tick = 1; tick <= maxOutputDuration; tick++) {
+            copyMem(neuronBuffer, &neurons.output[0], sizeof(neurons.output));
             for (unsigned int outputNeuronIndex = 0; outputNeuronIndex < numberOfOutputNeurons + dataLength; outputNeuronIndex++) {
                 // pre scan for shortcut
                 if (tick > 3)
                 {
                     int elemCount = 0;
                     getLastNeurons(indicePosOutput[outputNeuronIndex], bucketPosOutput[outputNeuronIndex], _modNum[tick], indices,
-                        nSample, _totalModNum[tick], elemCount, neurons.output);
+                        nSample, _totalModNum[tick], elemCount, neuronBuffer);
                     for (int i = 0; i < elemCount; i++)
                     {
                         unsigned int anotherOutputNeuronIndex = indices[i];
                         const unsigned int offset = outputNeuronIndex * (dataLength + numberOfOutputNeurons + infoLength) + anotherOutputNeuronIndex;
                         if (synapses.outputLength[offset] > 0) {
-                            sumBuffer[i] = neurons.output[anotherOutputNeuronIndex];
+                            sumBuffer[i] = neuronBuffer[anotherOutputNeuronIndex];
                         }
                         else {
-                            sumBuffer[i] = -neurons.output[anotherOutputNeuronIndex];
+                            sumBuffer[i] = -neuronBuffer[anotherOutputNeuronIndex];
                         }
                     }
                     int found = -1;
@@ -500,12 +506,12 @@ struct ScoreFunction
                         unsigned int anotherOutputNeuronIndex = indices[i];
                         const unsigned int offset = outputNeuronIndex * (dataLength + numberOfOutputNeurons + infoLength) + anotherOutputNeuronIndex;
                         if (synapses.outputLength[offset] > 0) {
-                            sumBuffer[i] = neurons.output[anotherOutputNeuronIndex];
+                            sumBuffer[i] = neuronBuffer[anotherOutputNeuronIndex];
                         }
                         else {
-                            sumBuffer[i] = -neurons.output[anotherOutputNeuronIndex];
+                            sumBuffer[i] = -neuronBuffer[anotherOutputNeuronIndex];
                         }
-                    }
+                    }                    
 
                     for (int i = 0; i < totalIndice; i++)
                     {
@@ -543,6 +549,7 @@ struct ScoreFunction
 
         computeInputNeuron(solutionBufIdx);
 
+        setMem(&neurons.output[0], sizeof(neurons.output), 0);
         for (unsigned int i = 0; i < infoLength; i++)
         {
             neurons.output[i] = (neurons.input[dataLength + numberOfInputNeurons + i] >= 0 ? 1 : -1);
diff --git a/test/score_reference.h b/test/score_reference.h
index 213786de..81f5b7cd 100644
--- a/test/score_reference.h
+++ b/test/score_reference.h
@@ -21,6 +21,7 @@ struct ScoreReferenceImplementation
     {
         long long input[dataLength + numberOfInputNeurons + infoLength];
         long long output[infoLength + numberOfOutputNeurons + dataLength];
+        long long neuronBuffer[dataLength + numberOfInputNeurons + infoLength];
     } _neurons[solutionBufferCount];
     struct
     {
@@ -54,6 +55,7 @@ struct ScoreReferenceImplementation
     {
         processorNumber %= solutionBufferCount;
         auto& neurons = _neurons[processorNumber];
+        auto& buffer = neurons.neuronBuffer;
         auto& synapses = _synapses[processorNumber];
         memset(&neurons, 0, sizeof(neurons));
         random(publicKey, nonce, (unsigned char*)&synapses, sizeof(synapses));
@@ -88,10 +90,11 @@ struct ScoreReferenceImplementation
             synapses.outputLength[outputNeuronIndex * (infoLength + numberOfOutputNeurons + dataLength) + (infoLength + outputNeuronIndex)] = 0;
         }
 
-        memcpy(&neurons.input[0], &miningData, sizeof(miningData));
+        memcpy(&neurons.input[0], &miningData, sizeof(miningData));        
 
         for (int tick = 1; tick <= maxInputDuration; tick++)
         {
+            memcpy(buffer, neurons.input, sizeof(neurons.input));
             for (unsigned int inputNeuronIndex = 0; inputNeuronIndex < numberOfInputNeurons + infoLength; inputNeuronIndex++)
             {
                 for (unsigned int anotherInputNeuronIndex = 0; anotherInputNeuronIndex < dataLength + numberOfInputNeurons + infoLength; anotherInputNeuronIndex++)
@@ -102,11 +105,11 @@ struct ScoreReferenceImplementation
                     {
                         if (synapses.inputLength[offset] > 0)
                         {
-                            neurons.input[dataLength + inputNeuronIndex] += neurons.input[anotherInputNeuronIndex];
+                            neurons.input[dataLength + inputNeuronIndex] += buffer[anotherInputNeuronIndex];
                         }
                         else
                         {
-                            neurons.input[dataLength + inputNeuronIndex] -= neurons.input[anotherInputNeuronIndex];
+                            neurons.input[dataLength + inputNeuronIndex] -= buffer[anotherInputNeuronIndex];
                         }
                         clampNeuron(neurons.input[dataLength + inputNeuronIndex]);
                     }
@@ -121,6 +124,7 @@ struct ScoreReferenceImplementation
 
         for (int tick = 1; tick <= maxOutputDuration; tick++)
         {
+            memcpy(buffer, neurons.output, sizeof(neurons.output));
             for (unsigned int outputNeuronIndex = 0; outputNeuronIndex < numberOfOutputNeurons + dataLength; outputNeuronIndex++)
             {
                 for (unsigned int anotherOutputNeuronIndex = 0; anotherOutputNeuronIndex < infoLength + numberOfOutputNeurons + dataLength; anotherOutputNeuronIndex++)
@@ -131,11 +135,11 @@ struct ScoreReferenceImplementation
                     {
                         if (synapses.outputLength[offset] > 0)
                         {
-                            neurons.output[infoLength + outputNeuronIndex] += neurons.output[anotherOutputNeuronIndex];
+                            neurons.output[infoLength + outputNeuronIndex] += buffer[anotherOutputNeuronIndex];
                         }
                         else
                         {
-                            neurons.output[infoLength + outputNeuronIndex] -= neurons.output[anotherOutputNeuronIndex];
+                            neurons.output[infoLength + outputNeuronIndex] -= buffer[anotherOutputNeuronIndex];
                         }
                         clampNeuron(neurons.output[infoLength + outputNeuronIndex]);
                     }

From 749cf2e2109f14c14729f5ce86c861fc3a4259b5 Mon Sep 17 00:00:00 2001
From: krypdkat <cryptodkat@gmail.com>
Date: Mon, 8 Apr 2024 15:45:12 +0700
Subject: [PATCH 3/7] improve score func

---
 src/score.h | 344 ++++++++++++++++++++--------------------------------
 1 file changed, 130 insertions(+), 214 deletions(-)

diff --git a/src/score.h b/src/score.h
index 62894163..97d083d3 100644
--- a/src/score.h
+++ b/src/score.h
@@ -42,17 +42,16 @@ struct ScoreFunction
     // i is divisible by _modNum[i][j], j < _totalModNum[i]
     int _modNum[257][129];
     // indice pos
-    unsigned short _indicePosInput[solutionBufferCount][numberOfInputNeurons + infoLength][dataLength + numberOfInputNeurons + infoLength];
-    unsigned short _indicePosOutput[solutionBufferCount][numberOfOutputNeurons + dataLength][infoLength + numberOfOutputNeurons + dataLength];
-    
+#if (numberOfInputNeurons+infoLength)>(numberOfOutputNeurons+dataLength)
+    unsigned short _indicePos[solutionBufferCount][numberOfInputNeurons + infoLength][dataLength + numberOfInputNeurons + infoLength];
+    int _bucketPos[solutionBufferCount][numberOfInputNeurons + infoLength][129];
+    int _bufferPos[solutionBufferCount][numberOfInputNeurons + infoLength][129];
+#else
+    unsigned short _indicePos[solutionBufferCount][numberOfOutputNeurons + dataLength][dataLength + numberOfInputNeurons + infoLength];
+    int _bucketPos[solutionBufferCount][numberOfOutputNeurons + dataLength][129];
+    int _bufferPos[solutionBufferCount][numberOfOutputNeurons + dataLength][129];
+#endif
     int nSample;
-
-    int _bucketPosInput[solutionBufferCount][numberOfInputNeurons + infoLength][129];
-    int _bufferPosInput[solutionBufferCount][numberOfInputNeurons + infoLength][129];
-
-    int _bucketPosOutput[solutionBufferCount][dataLength + numberOfOutputNeurons][129];
-    int _bufferPosOutput[solutionBufferCount][dataLength + numberOfOutputNeurons][129];
-
     long long _sumBuffer[solutionBufferCount][dataLength + numberOfInputNeurons + infoLength];
     unsigned short _indices[solutionBufferCount][dataLength + numberOfInputNeurons + infoLength];
 
@@ -112,8 +111,9 @@ struct ScoreFunction
     {
         return (a < 0) ? -a : a;
     }
-
-    void clampNeuron(long long& val)
+    
+    template  <typename T>
+    void clampNeuron(T& val)
     {
         if (val > NEURON_VALUE_LIMIT) {
             val = NEURON_VALUE_LIMIT;
@@ -261,11 +261,8 @@ struct ScoreFunction
     }
 
     void getLastNeurons(const unsigned short* indices, const int* bucket, const int* modNum, unsigned short* topMax,
-                        const int nMax, const int totalModNum, int& currentCount, long long* neuron)
+                        const int nMax, const int totalModNum, int& currentCount, long long* neuron, int* index)
     {
-        int index[64];
-        static_assert(MAX_INPUT_DURATION <= 256 && MAX_OUTPUT_DURATION <= 256, "Need to increase array length");
-        setMem(index, sizeof(index), 0);
         while (currentCount < nMax)
         {
             int current_max = -1;
@@ -282,241 +279,160 @@ struct ScoreFunction
                 }
             }
             if (current_max == -1) return;
-            if (neuron[current_max]) topMax[currentCount++] = current_max;
+            if (neuron[current_max>>1]) topMax[currentCount++] = current_max;
             index[max_id]++;
         }
     }
 
-    void computeInputBucket(int solutionBufIdx)
+    short getLastNeuronsIndex(const unsigned short* indices, const int* bucket, const int* modNum,
+                              const int totalModNum, long long* neuron, int* index)
     {
-        auto& synapses = _synapses[solutionBufIdx];
-        auto& indicePosInput = _indicePosInput[solutionBufIdx];
-        auto& bucketPosInput = _bucketPosInput[solutionBufIdx];
-        auto& bufferPosInput = _bufferPosInput[solutionBufIdx];
-        // compute bucket for input
-        setMem(bucketPosInput, sizeof(bucketPosInput), 0);
-        setMem(bufferPosInput, sizeof(bufferPosInput), 0);
-        setMem(indicePosInput, sizeof(indicePosInput), 0);
-
-        for (int i = 0; i < numberOfInputNeurons + infoLength; i++) {
-            const unsigned int base = i * (infoLength + numberOfInputNeurons + dataLength);
-            for (int j = 0; j < dataLength + numberOfInputNeurons + infoLength; j++) {
-                int v = synapses.inputLength[base + j];
-                if (v == 0) continue;
-                v = abs(v);
-                bucketPosInput[i][v]++;
+        int current_max = -1;
+        int max_id = -1;
+        for (int i = 0; i < totalModNum; i++) {
+            int mod = modNum[i];
+            int start = bucket[mod];
+            int end = bucket[mod + 1];
+            if (start + index[i] < end) {
+                if (indices[end - index[i] - 1] > current_max) {
+                    current_max = indices[end - index[i] - 1];
+                    max_id = i;
+                }
             }
         }
-        // do exclusive sum per row
-        for (int i = 0; i < numberOfInputNeurons + infoLength; i++) {
-            for (int j = 1; j <= 128; j++) {
-                bufferPosInput[i][j] = bufferPosInput[i][j - 1] + bucketPosInput[i][j - 1];
-            }
-        }
-        copyMem(bucketPosInput, bufferPosInput, sizeof(bucketPosInput));
-        // fill indices to index table
-        for (int i = 0; i < numberOfInputNeurons + infoLength; i++) {
-            const unsigned int base = i * (infoLength + numberOfInputNeurons + dataLength);
-            for (int j = 0; j < dataLength + numberOfInputNeurons + infoLength; j++) {
-                int v = synapses.inputLength[base + j];
-                if (v == 0) continue;
-                v = abs(v);
-                indicePosInput[i][bufferPosInput[i][v]++] = j;
-            }
+        if (current_max == -1) return -2;
+        index[max_id]++;
+        if (neuron[current_max >> 1]) {
+            return current_max;
         }
+        return -1;
     }
 
-    void computeInputNeuron(int solutionBufIdx)
+    template <bool isInput, int beginLength, int neuronLength, int endLength>
+    void computeBucket(int solutionBufIdx)
     {
-        auto& neurons = _neurons[solutionBufIdx];
-        auto& neuronBuffer = neurons.buffer;
-        auto& synapses = _synapses[solutionBufIdx];
-        auto& indicePosInput = _indicePosInput[solutionBufIdx];
-        auto& bucketPosInput = _bucketPosInput[solutionBufIdx];
-        auto& bufferPosInput = _bufferPosInput[solutionBufIdx];
-        auto& sumBuffer = _sumBuffer[solutionBufIdx];
-        auto& indices = _indices[solutionBufIdx];
-
-        copyMem(&neurons.input[0], miningData, sizeof(miningData));
-        int totalIndice;
-        for (int tick = 1; tick <= maxInputDuration; tick++) {
-            copyMem(neuronBuffer, &neurons.input[0], sizeof(neurons.input[0]) * (dataLength + numberOfInputNeurons + infoLength));
-            for (unsigned int inputNeuronIndex = 0; inputNeuronIndex < numberOfInputNeurons + infoLength; inputNeuronIndex++) {
-                // pre scan for shortcut
-                if (tick > 3)
-                {
-                    int elemCount = 0;
-                    getLastNeurons(indicePosInput[inputNeuronIndex], bucketPosInput[inputNeuronIndex], _modNum[tick], indices,
-                        nSample, _totalModNum[tick], elemCount, neuronBuffer);
-                    for (int i = 0; i < elemCount; i++)
-                    {
-                        unsigned int anotherInputNeuronIndex = indices[i];
-                        const unsigned int offset = inputNeuronIndex * (dataLength + numberOfInputNeurons + infoLength) + anotherInputNeuronIndex;
-                        if (synapses.inputLength[offset] > 0) {
-                            sumBuffer[i] = neuronBuffer[anotherInputNeuronIndex];
-                        }
-                        else {
-                            sumBuffer[i] = -neuronBuffer[anotherInputNeuronIndex];
-                        }
-                    }
-                    int found = -1;
-                    long long s = 0;
-                    for (int i = 0; i < elemCount - 1; i++)
-                    {
-                        if ((sumBuffer[i] > 0) == (sumBuffer[i + 1] > 0))
-                        {
-                            found = i + 1;
-                            break;
-                        }
-                    }
-                    if (found != -1)
-                    {
-                        s = 0;
-                        for (int i = found; i >= 0; i--)
-                        {
-                            s += sumBuffer[i];
-                            clampNeuron(s);
-                        }
-                        neurons.input[dataLength + inputNeuronIndex] = s;
-                        continue;
-                    }
-                }
-                // full compute
-                totalIndice = mergeSortBucket(indicePosInput[inputNeuronIndex], bucketPosInput[inputNeuronIndex], _modNum[tick], indices, (unsigned short*)sumBuffer, _totalModNum[tick]);
-                if (totalIndice == 0) continue;
-
-                for (int i = 0; i < totalIndice; i++) {
-                    unsigned int anotherInputNeuronIndex = indices[i];
-                    const unsigned int offset = inputNeuronIndex * (dataLength + numberOfInputNeurons + infoLength) + anotherInputNeuronIndex;
-                    if (synapses.inputLength[offset] > 0) {
-                        sumBuffer[i] = neuronBuffer[anotherInputNeuronIndex];
-                    }
-                    else {
-                        sumBuffer[i] = -neuronBuffer[anotherInputNeuronIndex];
-                    }
-                }
-                for (int i = 0; i < totalIndice; i++)
-                {
-                    neurons.input[dataLength + inputNeuronIndex] += sumBuffer[i];
-                    clampNeuron(neurons.input[dataLength + inputNeuronIndex]);
-                }
-            }
+        char* synapses = nullptr;
+        if (isInput)
+        {
+            synapses = _synapses[solutionBufIdx].inputLength;
         }
-    }
-
-    void computeOutputBucket(int solutionBufIdx)
-    {
-        auto& synapses = _synapses[solutionBufIdx];
-        auto& indicePosOutput = _indicePosOutput[solutionBufIdx];
-        auto& bucketPosOutput = _bucketPosOutput[solutionBufIdx];
-        auto& bufferPosOutput = _bufferPosOutput[solutionBufIdx];
-        // compute bucket for output
-        setMem(bucketPosOutput, sizeof(bucketPosOutput), 0);
-        setMem(bufferPosOutput, sizeof(bufferPosOutput), 0);
-        setMem(indicePosOutput, sizeof(indicePosOutput), 0);
-
-        for (int i = 0; i < numberOfOutputNeurons + dataLength; i++) {
-            const unsigned int base = i * (infoLength + numberOfOutputNeurons + dataLength);
-            for (int j = 0; j < dataLength + numberOfOutputNeurons + infoLength; j++) {
-                int v = synapses.outputLength[base + j];
+        else
+        {
+            synapses = _synapses[solutionBufIdx].outputLength;
+        }
+        auto& indicePos = _indicePos[solutionBufIdx];
+        auto& bucketPos = _bucketPos[solutionBufIdx];
+        auto& bufferPos = _bufferPos[solutionBufIdx];
+        // compute bucket
+        setMem(bucketPos, sizeof(bucketPos), 0);
+        setMem(bufferPos, sizeof(bufferPos), 0);
+        setMem(indicePos, sizeof(indicePos), 0);
+
+        for (int i = 0; i < neuronLength + endLength; i++) {
+            const unsigned int base = i * (beginLength + neuronLength + endLength);
+            for (int j = 0; j < beginLength + neuronLength + endLength; j++) {
+                int v = synapses[base + j];
                 if (v == 0) continue;
                 v = abs(v);
-                bucketPosOutput[i][v]++;
+                bucketPos[i][v]++;
             }
         }
         // do exclusive sum per row
-        for (int i = 0; i < numberOfOutputNeurons + dataLength; i++) {
+        for (int i = 0; i < neuronLength + endLength; i++) {
             for (int j = 1; j <= 128; j++) {
-                bufferPosOutput[i][j] = bufferPosOutput[i][j - 1] + bucketPosOutput[i][j - 1];
+                bufferPos[i][j] = bufferPos[i][j - 1] + bucketPos[i][j - 1];
             }
         }
-        copyMem(bucketPosOutput, bufferPosOutput, sizeof(bucketPosOutput));
+        copyMem(bucketPos, bufferPos, sizeof(bucketPos));
         // fill indices to index table
-        for (int i = 0; i < numberOfOutputNeurons + dataLength; i++) {
-            const unsigned int base = i * (infoLength + numberOfOutputNeurons + dataLength);
-            for (int j = 0; j < dataLength + numberOfOutputNeurons + infoLength; j++) {
-                int v = synapses.outputLength[base + j];
+        for (int i = 0; i < neuronLength + endLength; i++) {
+            const unsigned int base = i * (beginLength + neuronLength + endLength);
+            for (int j = 0; j < beginLength + neuronLength + endLength; j++) {
+                int v = synapses[base + j];
                 if (v == 0) continue;
-                v = abs(v);
-                indicePosOutput[i][bufferPosOutput[i][v]++] = j;
+                unsigned short sign = (v > 0) ? 1 : 0;
+                indicePos[i][bufferPos[i][abs(v)]++] = (j<<1)|sign;
             }
         }
     }
 
-    void computeOutputNeuron(int solutionBufIdx)
+    template <bool isInput, int beginLength, int neuronLength, int endLength, int duration>
+    void computeNeuron(int solutionBufIdx)
     {
-        auto& neurons = _neurons[solutionBufIdx];
-        auto& neuronBuffer = neurons.buffer;
-        auto& synapses = _synapses[solutionBufIdx];
-        auto& indicePosOutput = _indicePosOutput[solutionBufIdx];
-        auto& bucketPosOutput = _bucketPosOutput[solutionBufIdx];
-        auto& bufferPosOutput = _bufferPosOutput[solutionBufIdx];
+        long long* neurons = nullptr;
+        char* synapses = nullptr;
+        if (isInput) {
+            neurons = _neurons[solutionBufIdx].input;
+            synapses = _synapses[solutionBufIdx].inputLength;
+        }
+        else {
+            neurons = _neurons[solutionBufIdx].output;
+            synapses = _synapses[solutionBufIdx].outputLength;
+        }
+        auto& neuronBuffer = _neurons[solutionBufIdx].buffer;
+        auto& indicePos = _indicePos[solutionBufIdx];
+        auto& bucketPos = _bucketPos[solutionBufIdx];
+        auto& bufferPos = _bufferPos[solutionBufIdx];
         auto& sumBuffer = _sumBuffer[solutionBufIdx];
         auto& indices = _indices[solutionBufIdx];
+        int index[64];
+        static_assert(MAX_INPUT_DURATION <= 256 && MAX_OUTPUT_DURATION <= 256, "Need to increase index array length");
 
-        setMem(&neuronBuffer[0], sizeof(neuronBuffer), 0);
-        int totalIndice;
-        for (int tick = 1; tick <= maxOutputDuration; tick++) {
-            copyMem(neuronBuffer, &neurons.output[0], sizeof(neurons.output));
-            for (unsigned int outputNeuronIndex = 0; outputNeuronIndex < numberOfOutputNeurons + dataLength; outputNeuronIndex++) {
+        for (int tick = 1; tick <= duration; tick++) {
+            copyMem(neuronBuffer, &neurons[0], sizeof(neurons[0]) * (beginLength + neuronLength + endLength));
+            for (unsigned int neuronIndex = 0; neuronIndex < neuronLength + endLength; neuronIndex++) {
                 // pre scan for shortcut
-                if (tick > 3)
+                bool found = false;
+                if (tick > 1)
                 {
+                    setMem(index, sizeof(index), 0);
+                    char sum0 = 0;
+                    char sum1 = 0;
+                    char sum2 = 0;
+                    char nSum = 0;
                     int elemCount = 0;
-                    getLastNeurons(indicePosOutput[outputNeuronIndex], bucketPosOutput[outputNeuronIndex], _modNum[tick], indices,
-                        nSample, _totalModNum[tick], elemCount, neuronBuffer);
-                    for (int i = 0; i < elemCount; i++)
+                    while (1)
                     {
-                        unsigned int anotherOutputNeuronIndex = indices[i];
-                        const unsigned int offset = outputNeuronIndex * (dataLength + numberOfOutputNeurons + infoLength) + anotherOutputNeuronIndex;
-                        if (synapses.outputLength[offset] > 0) {
-                            sumBuffer[i] = neuronBuffer[anotherOutputNeuronIndex];
-                        }
-                        else {
-                            sumBuffer[i] = -neuronBuffer[anotherOutputNeuronIndex];
-                        }
-                    }
-                    int found = -1;
-                    long long s = 0;
-                    for (int i = 0; i < elemCount - 1; i++)
-                    {
-                        if ((sumBuffer[i] > 0) == (sumBuffer[i + 1] > 0))
+                        int lastNrIndex = getLastNeuronsIndex(indicePos[neuronIndex], bucketPos[neuronIndex], _modNum[tick], _totalModNum[tick], neuronBuffer, index);
+                        if (lastNrIndex == -1) continue;
+                        if (lastNrIndex == -2) break;
+                        char sign = (lastNrIndex & 1) ? 1 : -1;
+                        int anotherNeuronIndex = lastNrIndex >> 1;
+                        char v = neuronBuffer[anotherNeuronIndex] * sign;
+                        sum2 = sum1;
+                        sum1 = sum0;
+                        sum0 = v;
+                        if (elemCount > 1)
                         {
-                            found = i + 1;
-                            break;
+                            if (sum2 > 0) nSum++;
+                            else nSum--;
                         }
-                    }
-                    if (found != -1)
-                    {
-                        s = 0;
-                        for (int i = found; i >= 0; i--)
+                        elemCount++;
+                        if (sum0 == sum1)
                         {
-                            s += sumBuffer[i];
-                            clampNeuron(s);
+                            char result = (sum0 > 0) ? 1 : -1;
+                            result += nSum;
+                            clampNeuron(result);
+                            neurons[beginLength + neuronIndex] = result;
+                            found = true;
+                            break;
                         }
-                        neurons.output[infoLength + outputNeuronIndex] = s;
-                        continue;
                     }
                 }
-                {
-                    totalIndice = mergeSortBucket(indicePosOutput[outputNeuronIndex], bucketPosOutput[outputNeuronIndex], _modNum[tick], indices, (unsigned short*)sumBuffer, _totalModNum[tick]);
+                // full compute
+                if (!found) {
+                    int totalIndice = mergeSortBucket(indicePos[neuronIndex], bucketPos[neuronIndex], _modNum[tick], indices, (unsigned short*)sumBuffer, _totalModNum[tick]);
                     if (totalIndice == 0) continue;
 
                     for (int i = 0; i < totalIndice; i++) {
-                        unsigned int anotherOutputNeuronIndex = indices[i];
-                        const unsigned int offset = outputNeuronIndex * (dataLength + numberOfOutputNeurons + infoLength) + anotherOutputNeuronIndex;
-                        if (synapses.outputLength[offset] > 0) {
-                            sumBuffer[i] = neuronBuffer[anotherOutputNeuronIndex];
-                        }
-                        else {
-                            sumBuffer[i] = -neuronBuffer[anotherOutputNeuronIndex];
-                        }
-                    }                    
-
+                        unsigned int anotherNeuronIndex = indices[i] >> 1;
+                        char sign = (indices[i] & 1) ? 1 : -1;
+                        sumBuffer[i] = neuronBuffer[anotherNeuronIndex] * sign;
+                    }
                     for (int i = 0; i < totalIndice; i++)
                     {
-                        neurons.output[infoLength + outputNeuronIndex] += sumBuffer[i];
-                        clampNeuron(neurons.output[infoLength + outputNeuronIndex]);
+                        neurons[beginLength + neuronIndex] += sumBuffer[i];
+                        clampNeuron(neurons[beginLength + neuronIndex]);
                     }
                 }
             }
@@ -542,22 +458,22 @@ struct ScoreFunction
         auto& neurons = _neurons[solutionBufIdx];
         auto& synapses = _synapses[solutionBufIdx];
 
-        setMem(&neurons.input[0], sizeof(neurons), 0);
         generateSynapse(solutionBufIdx, publicKey, nonce);
 
-        computeInputBucket(solutionBufIdx);
-
-        computeInputNeuron(solutionBufIdx);
+        // compute input
+        setMem(&neurons.input[0], sizeof(neurons.input), 0);
+        copyMem(&neurons.input[0], miningData, sizeof(miningData));
+        computeBucket<true, dataLength, numberOfInputNeurons, infoLength>(solutionBufIdx);
+        computeNeuron<true, dataLength, numberOfInputNeurons, infoLength, maxInputDuration>(solutionBufIdx);
 
+        // compute output
         setMem(&neurons.output[0], sizeof(neurons.output), 0);
         for (unsigned int i = 0; i < infoLength; i++)
         {
             neurons.output[i] = (neurons.input[dataLength + numberOfInputNeurons + i] >= 0 ? 1 : -1);
         }
-
-        computeOutputBucket(solutionBufIdx);
-
-        computeOutputNeuron(solutionBufIdx);
+        computeBucket<false, infoLength, numberOfOutputNeurons, dataLength>(solutionBufIdx);
+        computeNeuron<false, infoLength, numberOfOutputNeurons, dataLength, maxOutputDuration>(solutionBufIdx);
 
         for (unsigned int i = 0; i < dataLength; i++)
         {

From f06b4fa784aa74c5dfa83a1d3a6fc6b87f482d77 Mon Sep 17 00:00:00 2001
From: Philipp Werner <22914157+philippwerner@users.noreply.github.com>
Date: Thu, 4 Apr 2024 12:29:43 +0200
Subject: [PATCH 4/7] Correctly update computer digest when state of contract 0
 is changed (#84)

* When contract 0 state is changed, set flag for updating digest

* Wrap fee reserve change into function
---
 src/qubic.cpp | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/qubic.cpp b/src/qubic.cpp
index 914f7fce..3c5da20b 100644
--- a/src/qubic.cpp
+++ b/src/qubic.cpp
@@ -1434,6 +1434,13 @@ static void requestProcessor(void* ProcedureArgument)
     }
 }
 
+// Return reference to fee reserve of contract for changing its value (data stored in state of contract 0)
+static long long & contractFeeReserve(unsigned int contractIndex)
+{
+    contractStateChangeFlags[0] |= 1ULL;
+    return ((Contract0State*)contractStates[0])->contractFeeReserves[contractIndex];
+}
+
 static void __beginFunctionOrProcedure(const unsigned int functionOrProcedureId)
 {
     // TODO
@@ -1486,8 +1493,7 @@ static long long __burn(long long amount)
 
     if (decreaseEnergy(index, amount))
     {
-        Contract0State* contract0State = (Contract0State*)contractStates[0];
-        contract0State->contractFeeReserves[executedContractIndex] += amount;
+        contractFeeReserve(executedContractIndex) += amount;
 
         const Burning burning = { currentContract , amount };
         logBurning(burning);
@@ -2756,7 +2762,6 @@ static void endEpoch()
     getUniverseDigest(etalonTick.prevUniverseDigest);
     getComputerDigest(etalonTick.prevComputerDigest);
 
-    Contract0State* contract0State = (Contract0State*)contractStates[0];
     for (unsigned int contractIndex = 1; contractIndex < sizeof(contractDescriptions) / sizeof(contractDescriptions[0]); contractIndex++)
     {
         if (system.epoch < contractDescriptions[contractIndex].constructionEpoch)
@@ -2807,7 +2812,7 @@ static void endEpoch()
                 logQuTransfer(quTransfer);
             }
 
-            contract0State->contractFeeReserves[contractIndex] = finalPrice * NUMBER_OF_COMPUTORS;
+            contractFeeReserve(contractIndex) = finalPrice * NUMBER_OF_COMPUTORS;
         }
     }
 

From 8d4920da7bdd90fd0c639cd943196ec0355bea13 Mon Sep 17 00:00:00 2001
From: J0ET0M <107187448+J0ET0M@users.noreply.github.com>
Date: Thu, 4 Apr 2024 13:04:41 +0000
Subject: [PATCH 5/7] Add solutionthreshold to system_info (#85)

* Add solutionthreshold to system_info

* Fix struct alignment and potential out-of-bounds access

---------

Co-authored-by: Philipp Werner <22914157+philippwerner@users.noreply.github.com>
---
 src/network_messages/system_info.h | 5 ++++-
 src/qubic.cpp                      | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/network_messages/system_info.h b/src/network_messages/system_info.h
index 9588c1d7..bb2e7ece 100644
--- a/src/network_messages/system_info.h
+++ b/src/network_messages/system_info.h
@@ -7,6 +7,7 @@
 
 #define RESPOND_SYSTEM_INFO 47
 
+#pragma pack(push, 1)
 struct RespondSystemInfo
 {
     short version;
@@ -27,6 +28,8 @@ struct RespondSystemInfo
     unsigned int numberOfTransactions;
 
     m256i randomMiningSeed;
+    int solutionThreshold;
 };
+#pragma pack(pop)
 
-static_assert(sizeof(RespondSystemInfo) == 2 + 2 + 4 + 4 + 4 + 2 + 1 + 1 + 1 + 1 + 1 + 1 + 4 + 32 + 4, "Something is wrong with the struct size of RespondSystemInfo.");
+static_assert(sizeof(RespondSystemInfo) == (2 + 2 + 4 + 4 + 4) + (2 + 1 + 1 + 1 + 1 + 1 + 1) + (4 + 4) + (32 + 4), "Something is wrong with the struct size of RespondSystemInfo.");
diff --git a/src/qubic.cpp b/src/qubic.cpp
index 3c5da20b..90758ffd 100644
--- a/src/qubic.cpp
+++ b/src/qubic.cpp
@@ -1101,6 +1101,7 @@ static void processRequestSystemInfo(Peer* peer, RequestResponseHeader* header)
     respondedSystemInfo.numberOfTransactions = numberOfTransactions;
 
     respondedSystemInfo.randomMiningSeed = score->initialRandomSeed;
+    respondedSystemInfo.solutionThreshold = (system.epoch < MAX_NUMBER_EPOCH) ? solutionThreshold[system.epoch] : SOLUTION_THRESHOLD_DEFAULT;
 
     enqueueResponse(peer, sizeof(respondedSystemInfo), RESPOND_SYSTEM_INFO, header->dejavu(), &respondedSystemInfo);
 }

From 002ca6efb2e0ef704f3ee0c6e209d739f05d2712 Mon Sep 17 00:00:00 2001
From: Philipp Werner <22914157+philippwerner@users.noreply.github.com>
Date: Tue, 9 Apr 2024 11:09:46 +0200
Subject: [PATCH 6/7] Update params for version 1.199.0 / epoch 104

---
 src/public_settings.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/public_settings.h b/src/public_settings.h
index 57ba8ff2..29f12ce3 100644
--- a/src/public_settings.h
+++ b/src/public_settings.h
@@ -24,17 +24,17 @@
 // Set START_NETWORK_FROM_SCRATCH to 0 if you start the node for syncing with the already ticking network.
 // If this flag is 1, it indicates that the whole network (all 676 IDs) will start from scratch and agree that the very first tick time will be set at (2022-04-13 Wed 12:00:00.000UTC).
 // If this flag is 0, the node will try to fetch the initial tick of the epoch from other nodes, because the tick's timestamp may differ from (2022-04-13 Wed 12:00:00.000UTC).
-#define START_NETWORK_FROM_SCRATCH 0
+#define START_NETWORK_FROM_SCRATCH 1
 
 //////////////////////////////////////////////////////////////////////////
 // Config options that should NOT be changed by operators
 
 #define VERSION_A 1
-#define VERSION_B 198
+#define VERSION_B 199
 #define VERSION_C 0
 
-#define EPOCH 103
-#define TICK 13218818
+#define EPOCH 104
+#define TICK 13360000
 
 #define ARBITRATOR "AFZPUAIYVPNUYGJRQVLUKOPPVLHAZQTGLYAAUUNBXFTVTAMSBKQBLEIEPCVJ"
 
@@ -47,8 +47,8 @@ static unsigned short CONTRACT_FILE_NAME[] = L"contract????.???";
 
 #define DATA_LENGTH 256
 #define INFO_LENGTH 128
-#define NUMBER_OF_INPUT_NEURONS 2048
-#define NUMBER_OF_OUTPUT_NEURONS 2048
+#define NUMBER_OF_INPUT_NEURONS 4096
+#define NUMBER_OF_OUTPUT_NEURONS 4096
 #define MAX_INPUT_DURATION 256
 #define MAX_OUTPUT_DURATION 256
 #define NEURON_VALUE_LIMIT 1LL

From 952654350600f6a867b4b3fedb32553401bf1bbc Mon Sep 17 00:00:00 2001
From: Philipp Werner <22914157+philippwerner@users.noreply.github.com>
Date: Wed, 10 Apr 2024 11:19:22 +0200
Subject: [PATCH 7/7] Quickfix to prevent race conditions in parallel
 processing of contracts

---
 src/qubic.cpp         | 13 +++++++++++--
 src/smart_contracts.h |  3 +++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/qubic.cpp b/src/qubic.cpp
index 90758ffd..484c2a4a 100644
--- a/src/qubic.cpp
+++ b/src/qubic.cpp
@@ -1053,6 +1053,7 @@ static void processRequestContractFunction(Peer* peer, const unsigned long long
     RespondContractFunction* response = (RespondContractFunction*)contractFunctionOutputs[processorNumber];
 
     RequestContractFunction* request = header->getPayload<RequestContractFunction>();
+    ACQUIRE(executedContractIndexLock);
     executedContractIndex = request->contractIndex;
     if (header->size() != sizeof(RequestResponseHeader) + sizeof(RequestContractFunction) + request->inputSize
         || !executedContractIndex || executedContractIndex >= sizeof(contractDescriptions) / sizeof(contractDescriptions[0])
@@ -1077,6 +1078,7 @@ static void processRequestContractFunction(Peer* peer, const unsigned long long
 
         enqueueResponse(peer, contractUserFunctionOutputSizes[executedContractIndex][request->inputType], response->type, header->dejavu(), response);
     }
+    RELEASE(executedContractIndexLock);
 }
 
 static void processRequestSystemInfo(Peer* peer, RequestResponseHeader* header)
@@ -1911,6 +1913,8 @@ static void contractProcessor(void*)
     unsigned long long processorNumber;
     mpServicesProtocol->WhoAmI(mpServicesProtocol, &processorNumber);
 
+    ACQUIRE(executedContractIndexLock);
+
     switch (contractProcessorPhase)
     {
     case INITIALIZE:
@@ -2013,6 +2017,8 @@ static void contractProcessor(void*)
     }
     break;
     }
+
+    RELEASE(executedContractIndexLock);
 }
 
 static void processTick(unsigned long long processorNumber)
@@ -2167,10 +2173,12 @@ static void processTick(unsigned long long processorNumber)
                                 // only 32 bits are used for the contract index.
                                 m256i maskedDestinationPublicKey = transaction->destinationPublicKey;
                                 maskedDestinationPublicKey.m256i_u64[0] &= ~(MAX_NUMBER_OF_CONTRACTS - 1ULL);
-                                executedContractIndex = (unsigned int)transaction->destinationPublicKey.m256i_u64[0];
+                                unsigned int contractIndex = (unsigned int)transaction->destinationPublicKey.m256i_u64[0];
                                 if (isZero(maskedDestinationPublicKey)
-                                    && executedContractIndex < sizeof(contractDescriptions) / sizeof(contractDescriptions[0]))
+                                    && contractIndex < sizeof(contractDescriptions) / sizeof(contractDescriptions[0]))
                                 {
+                                    ACQUIRE(executedContractIndexLock);
+                                    executedContractIndex = contractIndex;
                                     if (system.epoch < contractDescriptions[executedContractIndex].constructionEpoch)
                                     {
                                         if (!transaction->amount
@@ -2273,6 +2281,7 @@ static void processTick(unsigned long long processorNumber)
                                             contractTotalExecutionTicks[executedContractIndex] += __rdtsc() - startTick;
                                         }
                                     }
+                                    RELEASE(executedContractIndexLock);
                                 }
                                 else
                                 {
diff --git a/src/smart_contracts.h b/src/smart_contracts.h
index 6b57aa71..5dfceccc 100644
--- a/src/smart_contracts.h
+++ b/src/smart_contracts.h
@@ -148,6 +148,9 @@ executedContractIndex = contractIndex;\
 _##contractName->__registerUserFunctions();\
 _##contractName->__registerUserProcedures();
 
+// Protect executedContractIndex and other global contract state variables to prevent race conditions in parallel processing
+static volatile char executedContractIndexLock = 0;
+
 static volatile unsigned int executedContractIndex;
 
 static void initializeContract(const unsigned int contractIndex, void* contractState)