error: can't find a register in class 'RL_REGS' while reloading 'asm' #7

mroavi · 2016-09-14T13:05:30Z

I found a very interesting I2C master implementation written in assembly language for the ESP8266 (https://github.com/pasko-zh/brzo_i2c). However, as the author mentions, the library is written for the Arduino toolchain.

I made some slight changes in order to compile it with the newest SDK from espressif: ESP8266_NONOS_SDK_V2.0.0_16_07_19.

However, I haven't been able to get past this compiler error:

brzo_i2c.c: In function 'brzo_i2c_read':
modules/brzo_i2c.c:413:2: error: can't find a register in class 'RL_REGS' while reloading 'asm'
  asm volatile (
  ^
modules/brzo_i2c.c:413:2: error: 'asm' operand has impossible constraints
make: *** [build/modules/brzo_i2c.o] Error 1

14:46:16 Build Finished (took 162ms)

The CFLAG variable in my Makefile looks like this:

# compiler flags using during compilation of source files
CFLAGS      =   -g          \
                -Wpointer-arith     \
                -Wundef         \
                -Wl,-EL         \
                -fno-inline-functions   \
                -nostdlib       \
                -mlongcalls \
                -mtext-section-literals \
                -ffunction-sections \
                -fdata-sections \
                -fno-builtin-printf\
                -DICACHE_FLASH \
                -DBUID_TIME=\"$(DATETIME)\" \
                -std=gnu89

This is the inline assembler code that it points to:

asm volatile (
        // Disable all interrupts, i.e. interrupts up to the highest interrupt level of 15
        //   the current level is saved in %[r_temp1] but we will not use that value again,
        //   instead we will just enable all interrupt levels at the end of this routine
        "RSIL   %[r_temp1], 15;"
        "MOVI   %[r_set], 0x60000304;"

        // Check if bus is free and send START
        "OR     %[r_temp1], %[r_sda_bitmask], %[r_scl_bitmask];"
        "L16UI  %[r_in_value], %[r_set], 20;" // offset is 20d = 14h = > in: 0x60000318
        "MEMW;"
        "MOVI.N %[r_error], 1;"
        // If either SDA or SCL is low, then bus is not free and thus jump to l_exit
        "BNALL  %[r_in_value], %[r_temp1], l_exit;"
        // Bus is free, so we can send START
        "MOVI.N %[r_error], 0;"
        "MOV.N  %[r_temp1], %[r_iteration_scl_halfcycle];"
        // Set SCL = 1
        "S16I   %[r_scl_bitmask], %[r_set], 0;"
        "MEMW;"
        // Set SDA = 0
        // Delay for tHD;STA  >= 4.0 usec for standard mode, 0.6 usec for fast or 0.26 usec fast mode plus
        //  => a delay of one half cycle is enough to meet those timings
        "S16I   %[r_sda_bitmask], %[r_set], 4;"  // clear: 0x60000308
        "l_w01:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BNEZ   %[r_temp1], l_w01;"
        // Post Condition: SDA = 0, SCL = 1

        // The outer loop, sending 1...n data bytes
        "l_send_byte:"
        // select the MSB of byte_to_send
        "MOVI   %[r_bit_index], 128;"
        // The inner loop, sending 1...8 bits
        "l_send_bit:"
        "MOV.N  %[r_temp1], %[r_iteration_scl_halfcycle];"
        // check if the bit of byte_to_send at bit_index is 0 or 1
        "BALL   %[r_byte_to_send], %[r_bit_index], l_sda1_scl0;"
        // SDA = 0, SCL = 0
        "S16I   %[r_scl_bitmask], %[r_set], 4;" // clear: 0x60000308
        "MEMW;"
        "S16I   %[r_sda_bitmask], %[r_set], 4;" // clear: 0x60000308
        "l_w02:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BNEZ   %[r_temp1], l_w02;"
        "j l_sdax_scl1;"

        "l_sda1_scl0:"
        // SDA = 1, SCL = 0
        "S16I   %[r_scl_bitmask], %[r_set], 4;" // clear: 0x60000308
        "MEMW;"
        "S16I   %[r_sda_bitmask], %[r_set], 0;"
        "l_w03:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BNEZ   %[r_temp1], l_w03;"

        "l_sdax_scl1:"
        // SDA = leave unchanged and set SCL = 1
        // Check for clock stretching
        // Delay is little bit shorter, i.e. half_cycle - delta
        "ADDI   %[r_temp1], %[r_iteration_scl_halfcycle], -5;"
        // Let SCL raise
        "S16I   %[r_scl_bitmask], %[r_set], 0;"
        "MEMW;"
        "l_w04:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        // Explicitly BGEZ instead of BNEZ
        "BGEZ   %[r_temp1], l_w04;"

        // Sample SCL value
        "L16UI  %[r_in_value], %[r_set], 20;" // offset is 20d = 14h = > in: 0x60000318
        "MEMW;"
        // r_temp1 holds the number of iterations for clock stretch timeout
        "MOV.N  %[r_temp1], %[r_iteration_scl_clock_stretch];"
        // Branch if SCL = 1, i.e. no stretching
        "BALL   %[r_in_value], %[r_scl_bitmask], l_no_stretch;"
        // SCL = 0, i.e. stretching by the slave, i.e. it pulls SCL low
        "l_stretch:"
        // Sample SCL value
        "L16UI  %[r_in_value], %[r_set], 20;" // offset is 20d = 14h = > in: 0x60000318
        "MEMW;"
        // Branch if SCL = 1, i.e. no more stretching
        "BALL   %[r_in_value], %[r_scl_bitmask], l_scl_high_by_slave;"
        // SCL is still low
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        // Did we reach the clock stretch timeout?
        // Branch if we have not yet reached the timeout
        "BNEZ   %[r_temp1], l_stretch;"
        // We have reached the clock stretch timeout, i.e. SCL is still pulled low by the slave
        // Error: Bus is not free, since SCL is still low AND clock stretch timeout reached
        "MOVI.N %[r_error], 8;"
        // We explicitly do not send a STOP instead we exit, i.e. jump to l_exit and not to l_send_stop
        "j l_exit;"

        "l_scl_high_by_slave:"
        // SCL was set high by the slave
        // We have to make sure that SCL = 1 for a complete half cycle
        "MOV.N  %[r_temp1], %[r_iteration_scl_halfcycle];"
        "l_w041:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BNEZ   %[r_temp1], l_w041;"

        "l_no_stretch:"
        // Postcondition: SCL = 1 for a half cycle
        // Are there bits left that we need to send?
        "SRLI   %[r_bit_index], %[r_bit_index], 1;"
        // When the LSB of the byte_to_send was sent, i.e. bit index was 1 before SRLI, it will now be zero
        // As long as the LSB was not sent keep on sending bits, i.e. jump
        "BNEZ   %[r_bit_index], l_send_bit;"
        // we have sent 8 Bits

        // check for ACK by slave
        // Precondition
        // SDA = LSB (i.e. SDA = 0, since we have an i2c write), SCL = 1
        // SCL = 0
        // Spike reducing waits here
        "S16I   %[r_scl_bitmask], %[r_set], 4;"  // clear : 0x60000308
        "MOV.N  %[r_temp1], %[r_iteration_minimize_spike];"
        "l_w05:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BNEZ   %[r_temp1], l_w05;"
        // Reduce number of iterations by the ones we've already used
        "SUB    %[r_temp1], %[r_iteration_scl_halfcycle], %[r_iteration_minimize_spike];"
        // Now we let SDA raise.
        // In case of an ACK the i2c slave is pulling SDA down
        // In case of an NACK, SDA raises
        "S16I   %[r_sda_bitmask], %[r_set], 0;"
        "MEMW;"
        "l_w06:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BGEZ   %[r_temp1], l_w06;"

        // Delay is little bit shorter, i.e. half_cycle - delta
        // Because we will have a L16UI after in this half cycle
        "ADDI   %[r_temp1], %[r_iteration_scl_halfcycle], -5;"
        // Set SCL = 1, i.e. start of the second half cycle of the 9th SCL cycle
        "S16I   %[r_scl_bitmask], %[r_set], 0;"
        "MEMW;"
        // Delay for the second half cycle of the 9th SCL cycle
        "l_w07:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BGEZ   %[r_temp1], l_w07;"
        // Sample SDA at the end of the 9th clock cycle
        // In the case of an NACK we want to leave enough time that SDA can raise
        // If sda_value AND sda_bitmask == 0 => ACK else we have an NACK
        "L16UI  %[r_in_value], %[r_set], 20;" // offset is 20d = 14h = > in: 0x60000318
        "BNALL  %[r_in_value], %[r_sda_bitmask], l_slave_ack;"
        "MOVI.N %[r_error], 2;"
        // NACK by slave
        // Postcondition:
        //   SDA = 1 (NACK) and SCL = 1
        //   9th Clock Cycle is finished
        "j l_send_stop;"

        "l_slave_ack:"
        // ACK
        // Precondition: SDA = 0 (still pulled low by the slave) and SCL = 1
        // The slave will pull SDA low as long as SCL = 1
        // We have to set SDA = 0 by the master
        // clear : 0x60000308
        "S16I   %[r_sda_bitmask], %[r_set], 4;"
        // Postcondition:
        //   SDA = 0 and SCL = 1
        //   9th Clock Cycle is finished

        "BEQZ   %[r_no_of_bytes], l_send_stop;"
        // Branch if there are no more Data Bytes to send
        // Load the corresponding element of array data[.] into byte_to_send
        "L8UI   %[r_byte_to_send], %[r_adr_array_element], 0;"
        // Move the pointer to the next array element (since we have an array of bytes, the increment is 1)
        "ADDI.N %[r_adr_array_element], %[r_adr_array_element], 1;"
        // Decrement the number of bytes to send
        "ADDI.N %[r_no_of_bytes], %[r_no_of_bytes], -1;"
        "j l_send_byte;"

        "l_send_stop:"
        // Send Stop
        // We have to make sure that SDA = 0 and SCL = 1, before we send the STOP sequence,
        //   i.e. "A LOW to HIGH transition on the SDA line while SCL is HIGH"
        // In order to achieve this econdition, we have to distinguish between
        //   1) NACK: SDA = 1, SCL = 1
        //   2) ACK: SDA = 0, SCL = 1
        //      SDA is still pulled low by the slave, so we have to signal the slave to release it.
        //      We will do this by letting SCL go low.
        "MOV.N  %[r_temp1], %[r_iteration_scl_halfcycle];"
        // if we had a NACK then r_error = 2
        // if we had an ACK then r_error = 0
        "BNEZ.N %[r_error], l_stop_after_NACK;"
        // Send stop after ACK
        // Precondition: SDA = 0, SCL = 1

        // We are at the beginning of the 10th cycle (if there was no clock stretching)
        // Set SCL = 0
        // During the first half cycle the slave should release SDA...
        "S16I   %[r_scl_bitmask], %[r_set], 4;" // clear : 0x60000308
        "MEMW;"
        "l_w08:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BNEZ   %[r_temp1], l_w08;"

        // Check for a repeated start
        // Branch if r_repeated is 0, i.e. is no repeated start, just send stop
        "BEQZ.N %[r_repeated], l_no_repeated_start;"
        // Make sure that the precondition for the next command (i.e. the start) will be met
        // Currently, SCL = 0 and SDA is starting to raise, since the slave has released it
        // To be on the safe side, we set both SCL = 1 _and SDA = 1
        // SDA  = 1
        "S16I   %[r_sda_bitmask], %[r_set], 0;"
        "MEMW;"
        // SCL = 1;
        "S16I   %[r_scl_bitmask], %[r_set], 0;"
        // Postcondition: SCL = 1 and SDA = 1, now the next i2c command send start
        "j l_exit;"

        "l_no_repeated_start:"
        // For the second half cycle, we set SDA = 0, SCL = 1
        "MOV.N  %[r_temp1], %[r_iteration_scl_halfcycle];"
        "S16I   %[r_sda_bitmask], %[r_set], 4;" // clear : 0x60000308
        "MEMW;"
        "S16I   %[r_scl_bitmask], %[r_set], 0;"
        "l_w09:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BNEZ   %[r_temp1], l_w09;"

        // For the first half cycle of the 11th cycle, we set SDA = 1 and leave SCL = 1
        "MOV.N  %[r_temp1], %[r_iteration_scl_halfcycle];"
        // SDA = 1 (SCL is already high, we don't need to change it)
        "S16I   %[r_sda_bitmask], %[r_set], 0;"
        "MEMW;"
        "l_w10:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BNEZ   %[r_temp1], l_w10;"
        "j l_exit;"

        "l_stop_after_NACK:"
        // Send stop after NACK
        // Precondition: SDA = 1, SCL = 1

        // SDA = 0
        // SCL = 1 : In "normal" cycles we woud set SCL to 0
        "S16I   %[r_sda_bitmask], %[r_set], 4;"  // clear: 0x60000308
        "S16I   %[r_scl_bitmask], %[r_set], 0;"
        // Delay for the first half cycle of 10th cycle
        "MEMW;"
        "l_w11:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BNEZ   %[r_temp1], l_w11;"
        // Postcondition: SDA = 0 and SCL = 1

        // Now we set SDA = 1 and leave SCL = 1 : This ist the STOP condition,
        //   i.e. the "A LOW to HIGH transition on the SDA line while SCL is HIGH"
        "MOV.N  %[r_temp1], %[r_iteration_scl_halfcycle];"
        "S16I   %[r_sda_bitmask], %[r_set], 0;"
        // SDA = 1 (SCL is already high, we don't need to change it)
        "MEMW;"
        "l_w12:"
        "ADDI.N %[r_temp1], %[r_temp1], -1;"
        "NOP;"
        "BNEZ   %[r_temp1], l_w12;"

        "l_exit:"
        // Enable all interrupts again, i.e. interrupts with interrupt level >= 1
        "RSIL   %[r_temp1], 0;"

        : [r_set] "+r" (a_set), [r_repeated] "+r" (a_repeated), [r_temp1] "+r" (a_temp1), [r_in_value] "+r" (a_in_value), [r_error] "+r" (i2c_error), [r_bit_index] "+r" (a_bit_index), [r_adr_array_element] "+r" (&data[0]), [r_byte_to_send] "+r" (byte_to_send), [r_no_of_bytes] "+r" (no_of_bytes)
        : [r_sda_bitmask] "r" (sda_bitmask), [r_scl_bitmask] "r" (scl_bitmask), [r_iteration_scl_halfcycle] "r" (iteration_scl_halfcycle), [r_iteration_minimize_spike] "r" (iteration_remove_spike), [r_iteration_scl_clock_stretch] "r" (iteration_scl_clock_stretch)
        : "memory"
    );

I would really appreciate if you could help me understand what it is that is going wrong and what options there are to solve it. Thanks in advance.

The text was updated successfully, but these errors were encountered:

jcmvbkbc · 2016-09-14T17:19:51Z

That looks interesting.
One workaround that I see is the switch -fomit-frame-pointer, which frees a15.
But the assembly only wants 14 registers, which we have even when the frame pointer is in use.
Let me look at it some more.

mroavi · 2016-09-14T20:01:28Z

I finally manged to compile it.

Adding the -O2 option to the CFLAGS did the trick:

# compiler flags using during compilation of source files
CFLAGS      =   -g -O2          \
                -Wpointer-arith     \
                -Wundef         \
                -Wl,-EL         \
                -fno-inline-functions   \
                -nostdlib       \
                -mlongcalls \
                -mtext-section-literals \
                -ffunction-sections \
                -fdata-sections \
                -fno-builtin-printf\
                -DICACHE_FLASH \
                -DBUID_TIME=\"$(DATETIME)\" \
                -std=gnu89

It also works with -g -O1 but not with -g -O0.

This PR points out that we don't detect long double -> double narrowing when long double happens to have the same precision as double; on x86_64 this can be achieved by -mlong-double-64. [dcl.init.list]#7.2 specifically says "from long double to double or float, or from double to float", but check_narrowing only checks TYPE_PRECISION (type) < TYPE_PRECISION (ftype) so we need to handle the other cases too, e.g. by same_type_p as in the following patch. PR c++/94590 - Detect long double -> double narrowing. * typeck2.c (check_narrowing): Detect long double -> double narrowing even when double and long double have the same precision. Make it handle conversions to float too. * g++.dg/cpp0x/Wnarrowing18.C: New test.

In plenty of image and video processing code it's common to modify pixel values by a widening operation and then scale them back into range by dividing by 255. This patch adds an named function to allow us to emit an optimized sequence when doing an unsigned division that is equivalent to: x = y / (2 ^ (bitsize (y)/2)-1) For SVE2 this means we generate for: void draw_bitmap1(uint8_t* restrict pixel, uint8_t level, int n) { for (int i = 0; i < (n & -16); i+=1) pixel[i] = (pixel[i] * level) / 0xff; } the following: mov z3.b, #1 .L3: ld1b z0.h, p0/z, [x0, x3] mul z0.h, p1/m, z0.h, z2.h addhnb z1.b, z0.h, z3.h addhnb z0.b, z0.h, z1.h st1b z0.h, p0, [x0, x3] inch x3 whilelo p0.h, w3, w2 b.any .L3 instead of: .L3: ld1b z0.h, p1/z, [x0, x3] mul z0.h, p0/m, z0.h, z1.h umulh z0.h, p0/m, z0.h, z2.h lsr z0.h, z0.h, #7 st1b z0.h, p1, [x0, x3] inch x3 whilelo p1.h, w3, w2 b.any .L3 Which results in significantly faster code. gcc/ChangeLog: * config/aarch64/aarch64-sve2.md (@aarch64_bitmask_udiv<mode>3): New. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve2/div-by-bitmask_1.c: New test.

mcspr mentioned this issue May 28, 2020

Upgrade to GCC 10.1 toolchain esp8266/Arduino#6294

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

error: can't find a register in class 'RL_REGS' while reloading 'asm' #7

error: can't find a register in class 'RL_REGS' while reloading 'asm' #7

mroavi commented Sep 14, 2016 •

edited

Loading

jcmvbkbc commented Sep 14, 2016

mroavi commented Sep 14, 2016 •

edited

Loading

error: can't find a register in class 'RL_REGS' while reloading 'asm' #7

error: can't find a register in class 'RL_REGS' while reloading 'asm' #7

Comments

mroavi commented Sep 14, 2016 • edited Loading

jcmvbkbc commented Sep 14, 2016

mroavi commented Sep 14, 2016 • edited Loading

mroavi commented Sep 14, 2016 •

edited

Loading

mroavi commented Sep 14, 2016 •

edited

Loading