From: Tianling Shen <cnsztl@immortalwrt.org>
Date: Sat, 10 Jun 2023 11:08:04 +0000 (+0800)
Subject: toolchain: gcc: backport inline subword atomic support for riscv
X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=7b4a966de88aa0e1f9b7faa62a4d6cb7b01e9f8f;p=openwrt%2Fstaging%2Fthess.git

toolchain: gcc: backport inline subword atomic support for riscv

RISC-V has no support for subword atomic operations; code currently
generates libatomic library calls.

This patch changes the default behavior to fast inline subword atomic
calls that do not require libatomic.

Signed-off-by: Tianling Shen <cnsztl@immortalwrt.org>
---

diff --git a/toolchain/gcc/patches-11.x/700-RISCV-Inline-subword-atomic-ops.patch b/toolchain/gcc/patches-11.x/700-RISCV-Inline-subword-atomic-ops.patch
new file mode 100644
index 0000000000..0da7eb4af6
--- /dev/null
+++ b/toolchain/gcc/patches-11.x/700-RISCV-Inline-subword-atomic-ops.patch
@@ -0,0 +1,2021 @@
+From f797260adaf52bee0ec0e16190bbefbe1bfc3692 Mon Sep 17 00:00:00 2001
+From: Patrick O'Neill <patrick@rivosinc.com>
+Date: Tue, 18 Apr 2023 14:33:13 -0700
+Subject: [PATCH] RISCV: Inline subword atomic ops
+
+RISC-V has no support for subword atomic operations; code currently
+generates libatomic library calls.
+
+This patch changes the default behavior to inline subword atomic calls
+(using the same logic as the existing library call).
+Behavior can be specified using the -minline-atomics and
+-mno-inline-atomics command line flags.
+
+gcc/libgcc/config/riscv/atomic.c has the same logic implemented in asm.
+This will need to stay for backwards compatibility and the
+-mno-inline-atomics flag.
+
+2023-04-18 Patrick O'Neill <patrick@rivosinc.com>
+
+gcc/ChangeLog:
+	PR target/104338
+	* config/riscv/riscv-protos.h: Add helper function stubs.
+	* config/riscv/riscv.cc: Add helper functions for subword masking.
+	* config/riscv/riscv.opt: Add command-line flag.
+	* config/riscv/sync.md: Add masking logic and inline asm for fetch_and_op,
+	fetch_and_nand, CAS, and exchange ops.
+	* doc/invoke.texi: Add blurb regarding command-line flag.
+
+libgcc/ChangeLog:
+	PR target/104338
+	* config/riscv/atomic.c: Add reference to duplicate logic.
+
+gcc/testsuite/ChangeLog:
+	PR target/104338
+	* gcc.target/riscv/inline-atomics-1.c: New test.
+	* gcc.target/riscv/inline-atomics-2.c: New test.
+	* gcc.target/riscv/inline-atomics-3.c: New test.
+	* gcc.target/riscv/inline-atomics-4.c: New test.
+	* gcc.target/riscv/inline-atomics-5.c: New test.
+	* gcc.target/riscv/inline-atomics-6.c: New test.
+	* gcc.target/riscv/inline-atomics-7.c: New test.
+	* gcc.target/riscv/inline-atomics-8.c: New test.
+
+Signed-off-by: Patrick O'Neill <patrick@rivosinc.com>
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+---
+ gcc/config/riscv/riscv-protos.h               |   2 +
+ gcc/config/riscv/riscv.cc                     |  49 ++
+ gcc/config/riscv/riscv.opt                    |   4 +
+ gcc/config/riscv/sync.md                      | 301 +++++++++
+ gcc/doc/invoke.texi                           |  10 +-
+ .../gcc.target/riscv/inline-atomics-1.c       |  18 +
+ .../gcc.target/riscv/inline-atomics-2.c       |   9 +
+ .../gcc.target/riscv/inline-atomics-3.c       | 569 ++++++++++++++++++
+ .../gcc.target/riscv/inline-atomics-4.c       | 566 +++++++++++++++++
+ .../gcc.target/riscv/inline-atomics-5.c       |  87 +++
+ .../gcc.target/riscv/inline-atomics-6.c       |  87 +++
+ .../gcc.target/riscv/inline-atomics-7.c       |  69 +++
+ .../gcc.target/riscv/inline-atomics-8.c       |  69 +++
+ libgcc/config/riscv/atomic.c                  |   2 +
+ 14 files changed, 1841 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-1.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-2.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-3.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-4.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-5.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-6.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-7.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-8.c
+
+--- a/gcc/config/riscv/riscv-protos.h
++++ b/gcc/config/riscv/riscv-protos.h
+@@ -74,6 +74,8 @@ extern bool riscv_expand_block_move (rtx
+ extern bool riscv_store_data_bypass_p (rtx_insn *, rtx_insn *);
+ extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *);
+ extern bool riscv_gpr_save_operation_p (rtx);
++extern void riscv_subword_address (rtx, rtx *, rtx *, rtx *, rtx *);
++extern void riscv_lshift_subword (machine_mode, rtx, rtx, rtx *);
+ 
+ /* Routines implemented in riscv-c.c.  */
+ void riscv_cpu_cpp_builtins (cpp_reader *);
+--- a/gcc/config/riscv/riscv.c
++++ b/gcc/config/riscv/riscv.c
+@@ -5351,6 +5351,55 @@ riscv_asan_shadow_offset (void)
+   return TARGET_64BIT ? (HOST_WIDE_INT_1 << 29) : 0;
+ }
+ 
++/* Given memory reference MEM, expand code to compute the aligned
++   memory address, shift and mask values and store them into
++   *ALIGNED_MEM, *SHIFT, *MASK and *NOT_MASK.  */
++
++void
++riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask,
++		       rtx *not_mask)
++{
++  /* Align the memory address to a word.  */
++  rtx addr = force_reg (Pmode, XEXP (mem, 0));
++
++  rtx addr_mask = gen_int_mode (-4, Pmode);
++
++  rtx aligned_addr = gen_reg_rtx (Pmode);
++  emit_move_insn (aligned_addr,  gen_rtx_AND (Pmode, addr, addr_mask));
++
++  *aligned_mem = change_address (mem, SImode, aligned_addr);
++
++  /* Calculate the shift amount.  */
++  emit_move_insn (*shift, gen_rtx_AND (SImode, gen_lowpart (SImode, addr),
++				       gen_int_mode (3, SImode)));
++  emit_move_insn (*shift, gen_rtx_ASHIFT (SImode, *shift,
++					  gen_int_mode (3, SImode)));
++
++  /* Calculate the mask.  */
++  int unshifted_mask = GET_MODE_MASK (GET_MODE (mem));
++
++  emit_move_insn (*mask, gen_int_mode (unshifted_mask, SImode));
++
++  emit_move_insn (*mask, gen_rtx_ASHIFT (SImode, *mask,
++					 gen_lowpart (QImode, *shift)));
++
++  emit_move_insn (*not_mask, gen_rtx_NOT(SImode, *mask));
++}
++
++/* Leftshift a subword within an SImode register.  */
++
++void
++riscv_lshift_subword (machine_mode mode, rtx value, rtx shift,
++		      rtx *shifted_value)
++{
++  rtx value_reg = gen_reg_rtx (SImode);
++  emit_move_insn (value_reg, simplify_gen_subreg (SImode, value,
++						  mode, 0));
++
++  emit_move_insn(*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
++						 gen_lowpart (QImode, shift)));
++}
++
+ /* Initialize the GCC target structure.  */
+ #undef TARGET_ASM_ALIGNED_HI_OP
+ #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
+--- a/gcc/config/riscv/riscv.opt
++++ b/gcc/config/riscv/riscv.opt
+@@ -195,6 +195,10 @@ long riscv_stack_protector_guard_offset
+ TargetVariable
+ int riscv_zi_subext
+ 
++minline-atomics
++Target Var(TARGET_INLINE_SUBWORD_ATOMIC) Init(1)
++Always inline subword atomic operations.
++
+ Enum
+ Name(isa_spec_class) Type(enum riscv_isa_spec_class)
+ Supported ISA specs (for use with the -misa-spec= option):
+--- a/gcc/config/riscv/sync.md
++++ b/gcc/config/riscv/sync.md
+@@ -21,8 +21,11 @@
+ 
+ (define_c_enum "unspec" [
+   UNSPEC_COMPARE_AND_SWAP
++  UNSPEC_COMPARE_AND_SWAP_SUBWORD
+   UNSPEC_SYNC_OLD_OP
++  UNSPEC_SYNC_OLD_OP_SUBWORD
+   UNSPEC_SYNC_EXCHANGE
++  UNSPEC_SYNC_EXCHANGE_SUBWORD
+   UNSPEC_ATOMIC_STORE
+   UNSPEC_MEMORY_BARRIER
+ ])
+@@ -92,6 +95,135 @@
+   "%F3amo<insn>.<amo>%A3 %0,%z2,%1"
+   [(set (attr "length") (const_int 8))])
+ 
++(define_insn "subword_atomic_fetch_strong_<atomic_optab>"
++  [(set (match_operand:SI 0 "register_operand" "=&r")		   ;; old value at mem
++	(match_operand:SI 1 "memory_operand" "+A"))		   ;; mem location
++   (set (match_dup 1)
++	(unspec_volatile:SI
++	  [(any_atomic:SI (match_dup 1)
++		     (match_operand:SI 2 "register_operand" "rI")) ;; value for op
++	   (match_operand:SI 3 "register_operand" "rI")]	   ;; mask
++	 UNSPEC_SYNC_OLD_OP_SUBWORD))
++    (match_operand:SI 4 "register_operand" "rI")		   ;; not_mask
++    (clobber (match_scratch:SI 5 "=&r"))			   ;; tmp_1
++    (clobber (match_scratch:SI 6 "=&r"))]			   ;; tmp_2
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++  {
++    return "1:\;"
++	   "lr.w.aq\t%0, %1\;"
++	   "<insn>\t%5, %0, %2\;"
++	   "and\t%5, %5, %3\;"
++	   "and\t%6, %0, %4\;"
++	   "or\t%6, %6, %5\;"
++	   "sc.w.rl\t%5, %6, %1\;"
++	   "bnez\t%5, 1b";
++  }
++  [(set (attr "length") (const_int 28))])
++
++(define_expand "atomic_fetch_nand<mode>"
++  [(match_operand:SHORT 0 "register_operand")			      ;; old value at mem
++   (not:SHORT (and:SHORT (match_operand:SHORT 1 "memory_operand")     ;; mem location
++			 (match_operand:SHORT 2 "reg_or_0_operand"))) ;; value for op
++   (match_operand:SI 3 "const_int_operand")]			      ;; model
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++{
++  /* We have no QImode/HImode atomics, so form a mask, then use
++     subword_atomic_fetch_strong_nand to implement a LR/SC version of the
++     operation. */
++
++  /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining
++     is disabled */
++
++  rtx old = gen_reg_rtx (SImode);
++  rtx mem = operands[1];
++  rtx value = operands[2];
++  rtx aligned_mem = gen_reg_rtx (SImode);
++  rtx shift = gen_reg_rtx (SImode);
++  rtx mask = gen_reg_rtx (SImode);
++  rtx not_mask = gen_reg_rtx (SImode);
++
++  riscv_subword_address (mem, &aligned_mem, &shift, &mask, &not_mask);
++
++  rtx shifted_value = gen_reg_rtx (SImode);
++  riscv_lshift_subword (<MODE>mode, value, shift, &shifted_value);
++
++  emit_insn (gen_subword_atomic_fetch_strong_nand (old, aligned_mem,
++						   shifted_value,
++						   mask, not_mask));
++
++  emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old,
++					 gen_lowpart (QImode, shift)));
++
++  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, old));
++
++  DONE;
++})
++
++(define_insn "subword_atomic_fetch_strong_nand"
++  [(set (match_operand:SI 0 "register_operand" "=&r")			  ;; old value at mem
++	(match_operand:SI 1 "memory_operand" "+A"))			  ;; mem location
++   (set (match_dup 1)
++	(unspec_volatile:SI
++	  [(not:SI (and:SI (match_dup 1)
++			   (match_operand:SI 2 "register_operand" "rI"))) ;; value for op
++	   (match_operand:SI 3 "register_operand" "rI")]		  ;; mask
++	 UNSPEC_SYNC_OLD_OP_SUBWORD))
++    (match_operand:SI 4 "register_operand" "rI")			  ;; not_mask
++    (clobber (match_scratch:SI 5 "=&r"))				  ;; tmp_1
++    (clobber (match_scratch:SI 6 "=&r"))]				  ;; tmp_2
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++  {
++    return "1:\;"
++	   "lr.w.aq\t%0, %1\;"
++	   "and\t%5, %0, %2\;"
++	   "not\t%5, %5\;"
++	   "and\t%5, %5, %3\;"
++	   "and\t%6, %0, %4\;"
++	   "or\t%6, %6, %5\;"
++	   "sc.w.rl\t%5, %6, %1\;"
++	   "bnez\t%5, 1b";
++  }
++  [(set (attr "length") (const_int 32))])
++
++(define_expand "atomic_fetch_<atomic_optab><mode>"
++  [(match_operand:SHORT 0 "register_operand")			 ;; old value at mem
++   (any_atomic:SHORT (match_operand:SHORT 1 "memory_operand")	 ;; mem location
++		     (match_operand:SHORT 2 "reg_or_0_operand")) ;; value for op
++   (match_operand:SI 3 "const_int_operand")]			 ;; model
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++{
++  /* We have no QImode/HImode atomics, so form a mask, then use
++     subword_atomic_fetch_strong_<mode> to implement a LR/SC version of the
++     operation. */
++
++  /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining
++     is disabled */
++
++  rtx old = gen_reg_rtx (SImode);
++  rtx mem = operands[1];
++  rtx value = operands[2];
++  rtx aligned_mem = gen_reg_rtx (SImode);
++  rtx shift = gen_reg_rtx (SImode);
++  rtx mask = gen_reg_rtx (SImode);
++  rtx not_mask = gen_reg_rtx (SImode);
++
++  riscv_subword_address (mem, &aligned_mem, &shift, &mask, &not_mask);
++
++  rtx shifted_value = gen_reg_rtx (SImode);
++  riscv_lshift_subword (<MODE>mode, value, shift, &shifted_value);
++
++  emit_insn (gen_subword_atomic_fetch_strong_<atomic_optab> (old, aligned_mem,
++							     shifted_value,
++							     mask, not_mask));
++
++  emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old,
++					 gen_lowpart (QImode, shift)));
++
++  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, old));
++
++  DONE;
++})
++
+ (define_insn "atomic_exchange<mode>"
+   [(set (match_operand:GPR 0 "register_operand" "=&r")
+ 	(unspec_volatile:GPR
+@@ -104,6 +236,56 @@
+   "%F3amoswap.<amo>%A3 %0,%z2,%1"
+   [(set (attr "length") (const_int 8))])
+ 
++(define_expand "atomic_exchange<mode>"
++  [(match_operand:SHORT 0 "register_operand") ;; old value at mem
++   (match_operand:SHORT 1 "memory_operand")   ;; mem location
++   (match_operand:SHORT 2 "register_operand") ;; value
++   (match_operand:SI 3 "const_int_operand")]  ;; model
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++{
++  rtx old = gen_reg_rtx (SImode);
++  rtx mem = operands[1];
++  rtx value = operands[2];
++  rtx aligned_mem = gen_reg_rtx (SImode);
++  rtx shift = gen_reg_rtx (SImode);
++  rtx mask = gen_reg_rtx (SImode);
++  rtx not_mask = gen_reg_rtx (SImode);
++
++  riscv_subword_address (mem, &aligned_mem, &shift, &mask, &not_mask);
++
++  rtx shifted_value = gen_reg_rtx (SImode);
++  riscv_lshift_subword (<MODE>mode, value, shift, &shifted_value);
++
++  emit_insn (gen_subword_atomic_exchange_strong (old, aligned_mem,
++						 shifted_value, not_mask));
++
++  emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old,
++					 gen_lowpart (QImode, shift)));
++
++  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, old));
++  DONE;
++})
++
++(define_insn "subword_atomic_exchange_strong"
++  [(set (match_operand:SI 0 "register_operand" "=&r")	 ;; old value at mem
++	(match_operand:SI 1 "memory_operand" "+A"))	 ;; mem location
++   (set (match_dup 1)
++	(unspec_volatile:SI
++	  [(match_operand:SI 2 "reg_or_0_operand" "rI")  ;; value
++	   (match_operand:SI 3 "reg_or_0_operand" "rI")] ;; not_mask
++      UNSPEC_SYNC_EXCHANGE_SUBWORD))
++    (clobber (match_scratch:SI 4 "=&r"))]		 ;; tmp_1
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++  {
++    return "1:\;"
++	   "lr.w.aq\t%0, %1\;"
++	   "and\t%4, %0, %3\;"
++	   "or\t%4, %4, %2\;"
++	   "sc.w.rl\t%4, %4, %1\;"
++	   "bnez\t%4, 1b";
++  }
++  [(set (attr "length") (const_int 20))])
++
+ (define_insn "atomic_cas_value_strong<mode>"
+   [(set (match_operand:GPR 0 "register_operand" "=&r")
+ 	(match_operand:GPR 1 "memory_operand" "+A"))
+@@ -152,6 +334,125 @@
+   DONE;
+ })
+ 
++(define_expand "atomic_compare_and_swap<mode>"
++  [(match_operand:SI 0 "register_operand")    ;; bool output
++   (match_operand:SHORT 1 "register_operand") ;; val output
++   (match_operand:SHORT 2 "memory_operand")   ;; memory
++   (match_operand:SHORT 3 "reg_or_0_operand") ;; expected value
++   (match_operand:SHORT 4 "reg_or_0_operand") ;; desired value
++   (match_operand:SI 5 "const_int_operand")   ;; is_weak
++   (match_operand:SI 6 "const_int_operand")   ;; mod_s
++   (match_operand:SI 7 "const_int_operand")]  ;; mod_f
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++{
++  emit_insn (gen_atomic_cas_value_strong<mode> (operands[1], operands[2],
++						operands[3], operands[4],
++						operands[6], operands[7]));
++
++  rtx val = gen_reg_rtx (SImode);
++  if (operands[1] != const0_rtx)
++    emit_move_insn (val, gen_rtx_SIGN_EXTEND (SImode, operands[1]));
++  else
++    emit_move_insn (val, const0_rtx);
++
++  rtx exp = gen_reg_rtx (SImode);
++  if (operands[3] != const0_rtx)
++    emit_move_insn (exp, gen_rtx_SIGN_EXTEND (SImode, operands[3]));
++  else
++    emit_move_insn (exp, const0_rtx);
++
++  rtx compare = val;
++  if (exp != const0_rtx)
++    {
++      rtx difference = gen_rtx_MINUS (SImode, val, exp);
++      compare = gen_reg_rtx (SImode);
++      emit_move_insn  (compare, difference);
++    }
++
++  if (word_mode != SImode)
++    {
++      rtx reg = gen_reg_rtx (word_mode);
++      emit_move_insn (reg, gen_rtx_SIGN_EXTEND (word_mode, compare));
++      compare = reg;
++    }
++
++  emit_move_insn (operands[0], gen_rtx_EQ (SImode, compare, const0_rtx));
++  DONE;
++})
++
++(define_expand "atomic_cas_value_strong<mode>"
++  [(match_operand:SHORT 0 "register_operand") ;; val output
++   (match_operand:SHORT 1 "memory_operand")   ;; memory
++   (match_operand:SHORT 2 "reg_or_0_operand") ;; expected value
++   (match_operand:SHORT 3 "reg_or_0_operand") ;; desired value
++   (match_operand:SI 4 "const_int_operand")   ;; mod_s
++   (match_operand:SI 5 "const_int_operand")   ;; mod_f
++   (match_scratch:SHORT 6)]
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++{
++  /* We have no QImode/HImode atomics, so form a mask, then use
++     subword_atomic_cas_strong<mode> to implement a LR/SC version of the
++     operation. */
++
++  /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining
++     is disabled */
++
++  rtx old = gen_reg_rtx (SImode);
++  rtx mem = operands[1];
++  rtx aligned_mem = gen_reg_rtx (SImode);
++  rtx shift = gen_reg_rtx (SImode);
++  rtx mask = gen_reg_rtx (SImode);
++  rtx not_mask = gen_reg_rtx (SImode);
++
++  riscv_subword_address (mem, &aligned_mem, &shift, &mask, &not_mask);
++
++  rtx o = operands[2];
++  rtx n = operands[3];
++  rtx shifted_o = gen_reg_rtx (SImode);
++  rtx shifted_n = gen_reg_rtx (SImode);
++
++  riscv_lshift_subword (<MODE>mode, o, shift, &shifted_o);
++  riscv_lshift_subword (<MODE>mode, n, shift, &shifted_n);
++
++  emit_move_insn (shifted_o, gen_rtx_AND (SImode, shifted_o, mask));
++  emit_move_insn (shifted_n, gen_rtx_AND (SImode, shifted_n, mask));
++
++  emit_insn (gen_subword_atomic_cas_strong (old, aligned_mem,
++					    shifted_o, shifted_n,
++					    mask, not_mask));
++
++  emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old,
++					 gen_lowpart (QImode, shift)));
++
++  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, old));
++
++  DONE;
++})
++
++(define_insn "subword_atomic_cas_strong"
++  [(set (match_operand:SI 0 "register_operand" "=&r")			   ;; old value at mem
++	(match_operand:SI 1 "memory_operand" "+A"))			   ;; mem location
++   (set (match_dup 1)
++	(unspec_volatile:SI [(match_operand:SI 2 "reg_or_0_operand" "rJ")  ;; expected value
++			     (match_operand:SI 3 "reg_or_0_operand" "rJ")] ;; desired value
++	 UNSPEC_COMPARE_AND_SWAP_SUBWORD))
++	(match_operand:SI 4 "register_operand" "rI")			   ;; mask
++	(match_operand:SI 5 "register_operand" "rI")			   ;; not_mask
++	(clobber (match_scratch:SI 6 "=&r"))]				   ;; tmp_1
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++  {
++    return "1:\;"
++	   "lr.w.aq\t%0, %1\;"
++	   "and\t%6, %0, %4\;"
++	   "bne\t%6, %z2, 1f\;"
++	   "and\t%6, %0, %5\;"
++	   "or\t%6, %6, %3\;"
++	   "sc.w.rl\t%6, %6, %1\;"
++	   "bnez\t%6, 1b\;"
++	   "1:";
++  }
++  [(set (attr "length") (const_int 28))])
++
+ (define_expand "atomic_test_and_set"
+   [(match_operand:QI 0 "register_operand" "")     ;; bool output
+    (match_operand:QI 1 "memory_operand" "+A")    ;; memory
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -734,7 +734,8 @@ Objective-C and Objective-C++ Dialects}.
+ -moverride=@var{string}  -mverbose-cost-dump @gol
+ -mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{sysreg} @gol
+ -mstack-protector-guard-offset=@var{offset} -mtrack-speculation @gol
+--moutline-atomics }
++-moutline-atomics
++-minline-atomics  -mno-inline-atomics}
+ 
+ @emph{Adapteva Epiphany Options}
+ @gccoptlist{-mhalf-reg-file  -mprefer-short-insn-regs @gol
+@@ -26742,6 +26743,13 @@ Do or don't use smaller but slower prolo
+ library function calls.  The default is to use fast inline prologues and
+ epilogues.
+ 
++@opindex minline-atomics
++@item -minline-atomics
++@itemx -mno-inline-atomics
++Do or don't use smaller but slower subword atomic emulation code that uses
++libatomic function calls.  The default is to use fast inline subword atomics
++that do not require libatomic.
++
+ @item -mshorten-memrefs
+ @itemx -mno-shorten-memrefs
+ @opindex mshorten-memrefs
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-1.c
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-options "-mno-inline-atomics" } */
++/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "fetch_and_nand" { target *-*-* } 0 } */
++/* { dg-final { scan-assembler "\tcall\t__sync_fetch_and_add_1" } } */
++/* { dg-final { scan-assembler "\tcall\t__sync_fetch_and_nand_1" } } */
++/* { dg-final { scan-assembler "\tcall\t__sync_bool_compare_and_swap_1" } } */
++
++char foo;
++char bar;
++char baz;
++
++int
++main ()
++{
++  __sync_fetch_and_add(&foo, 1);
++  __sync_fetch_and_nand(&bar, 1);
++  __sync_bool_compare_and_swap (&baz, 1, 2);
++}
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-2.c
+@@ -0,0 +1,9 @@
++/* { dg-do compile } */
++/* Verify that subword atomics do not generate calls.  */
++/* { dg-options "-minline-atomics" } */
++/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "fetch_and_nand" { target *-*-* } 0 } */
++/* { dg-final { scan-assembler-not "\tcall\t__sync_fetch_and_add_1" } } */
++/* { dg-final { scan-assembler-not "\tcall\t__sync_fetch_and_nand_1" } } */
++/* { dg-final { scan-assembler-not "\tcall\t__sync_bool_compare_and_swap_1" } } */
++
++#include "inline-atomics-1.c"
+\ No newline at end of file
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-3.c
+@@ -0,0 +1,569 @@
++/* Check all char alignments.  */
++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-op-1.c */
++/* Test __atomic routines for existence and proper execution on 1 byte
++   values with each valid memory model.  */
++/* { dg-do run } */
++/* { dg-options "-minline-atomics -Wno-address-of-packed-member" } */
++
++/* Test the execution of the __atomic_*OP builtin routines for a char.  */
++
++extern void abort(void);
++
++char count, res;
++const char init = ~0;
++
++struct A
++{
++   char a;
++   char b;
++   char c;
++   char d;
++} __attribute__ ((packed)) A;
++
++/* The fetch_op routines return the original value before the operation.  */
++
++void
++test_fetch_add (char* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_fetch_add (v, count, __ATOMIC_RELAXED) != 0)
++    abort ();
++
++  if (__atomic_fetch_add (v, 1, __ATOMIC_CONSUME) != 1)
++    abort ();
++
++  if (__atomic_fetch_add (v, count, __ATOMIC_ACQUIRE) != 2)
++    abort ();
++
++  if (__atomic_fetch_add (v, 1, __ATOMIC_RELEASE) != 3)
++    abort ();
++
++  if (__atomic_fetch_add (v, count, __ATOMIC_ACQ_REL) != 4)
++    abort ();
++
++  if (__atomic_fetch_add (v, 1, __ATOMIC_SEQ_CST) != 5)
++    abort ();
++}
++
++
++void
++test_fetch_sub (char* v)
++{
++  *v = res = 20;
++  count = 0;
++
++  if (__atomic_fetch_sub (v, count + 1, __ATOMIC_RELAXED) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, 1, __ATOMIC_CONSUME) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQUIRE) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, 1, __ATOMIC_RELEASE) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQ_REL) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, 1, __ATOMIC_SEQ_CST) !=  res--)
++    abort ();
++}
++
++void
++test_fetch_and (char* v)
++{
++  *v = init;
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_fetch_and (v, init, __ATOMIC_CONSUME) !=  0)
++    abort ();
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  *v = ~*v;
++  if (__atomic_fetch_and (v, init, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST) !=  0)
++    abort ();
++}
++
++void
++test_fetch_nand (char* v)
++{
++  *v = init;
++
++  if (__atomic_fetch_nand (v, 0, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_fetch_nand (v, init, __ATOMIC_CONSUME) !=  init)
++    abort ();
++
++  if (__atomic_fetch_nand (v, 0, __ATOMIC_ACQUIRE) !=  0 )
++    abort ();
++
++  if (__atomic_fetch_nand (v, init, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL) !=  0)
++    abort ();
++
++  if (__atomic_fetch_nand (v, 0, __ATOMIC_SEQ_CST) !=  init)
++    abort ();
++}
++
++void
++test_fetch_xor (char* v)
++{
++  *v = init;
++  count = 0;
++
++  if (__atomic_fetch_xor (v, count, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME) !=  init)
++    abort ();
++
++  if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  if (__atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE) !=  0)
++    abort ();
++
++  if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_fetch_xor (v, ~count, __ATOMIC_SEQ_CST) !=  init)
++    abort ();
++}
++
++void
++test_fetch_or (char* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_fetch_or (v, count, __ATOMIC_RELAXED) !=  0)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, 2, __ATOMIC_CONSUME) !=  1)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, count, __ATOMIC_ACQUIRE) !=  3)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, 8, __ATOMIC_RELEASE) !=  7)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, count, __ATOMIC_ACQ_REL) !=  15)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, count, __ATOMIC_SEQ_CST) !=  31)
++    abort ();
++}
++
++/* The OP_fetch routines return the new value after the operation.  */
++
++void
++test_add_fetch (char* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_RELAXED) != 1)
++    abort ();
++
++  if (__atomic_add_fetch (v, 1, __ATOMIC_CONSUME) != 2)
++    abort ();
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_ACQUIRE) != 3)
++    abort ();
++
++  if (__atomic_add_fetch (v, 1, __ATOMIC_RELEASE) != 4)
++    abort ();
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_ACQ_REL) != 5)
++    abort ();
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_SEQ_CST) != 6)
++    abort ();
++}
++
++
++void
++test_sub_fetch (char* v)
++{
++  *v = res = 20;
++  count = 0;
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, 1, __ATOMIC_CONSUME) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQUIRE) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, 1, __ATOMIC_RELEASE) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_SEQ_CST) !=  --res)
++    abort ();
++}
++
++void
++test_and_fetch (char* v)
++{
++  *v = init;
++
++  if (__atomic_and_fetch (v, 0, __ATOMIC_RELAXED) !=  0)
++    abort ();
++
++  *v = init;
++  if (__atomic_and_fetch (v, init, __ATOMIC_CONSUME) !=  init)
++    abort ();
++
++  if (__atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  *v = ~*v;
++  if (__atomic_and_fetch (v, init, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL) !=  0)
++    abort ();
++
++  *v = ~*v;
++  if (__atomic_and_fetch (v, 0, __ATOMIC_SEQ_CST) !=  0)
++    abort ();
++}
++
++void
++test_nand_fetch (char* v)
++{
++  *v = init;
++
++  if (__atomic_nand_fetch (v, 0, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_nand_fetch (v, init, __ATOMIC_CONSUME) !=  0)
++    abort ();
++
++  if (__atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE) !=  init)
++    abort ();
++
++  if (__atomic_nand_fetch (v, init, __ATOMIC_RELEASE) !=  0)
++    abort ();
++
++  if (__atomic_nand_fetch (v, init, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST) !=  init)
++    abort ();
++}
++
++
++
++void
++test_xor_fetch (char* v)
++{
++  *v = init;
++  count = 0;
++
++  if (__atomic_xor_fetch (v, count, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_xor_fetch (v, ~count, __ATOMIC_CONSUME) !=  0)
++    abort ();
++
++  if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  if (__atomic_xor_fetch (v, ~count, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST) !=  0)
++    abort ();
++}
++
++void
++test_or_fetch (char* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_or_fetch (v, count, __ATOMIC_RELAXED) !=  1)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, 2, __ATOMIC_CONSUME) !=  3)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, count, __ATOMIC_ACQUIRE) !=  7)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, 8, __ATOMIC_RELEASE) !=  15)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, count, __ATOMIC_ACQ_REL) !=  31)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, count, __ATOMIC_SEQ_CST) !=  63)
++    abort ();
++}
++
++
++/* Test the OP routines with a result which isn't used. Use both variations
++   within each function.  */
++
++void
++test_add (char* v)
++{
++  *v = 0;
++  count = 1;
++
++  __atomic_add_fetch (v, count, __ATOMIC_RELAXED);
++  if (*v != 1)
++    abort ();
++
++  __atomic_fetch_add (v, count, __ATOMIC_CONSUME);
++  if (*v != 2)
++    abort ();
++
++  __atomic_add_fetch (v, 1 , __ATOMIC_ACQUIRE);
++  if (*v != 3)
++    abort ();
++
++  __atomic_fetch_add (v, 1, __ATOMIC_RELEASE);
++  if (*v != 4)
++    abort ();
++
++  __atomic_add_fetch (v, count, __ATOMIC_ACQ_REL);
++  if (*v != 5)
++    abort ();
++
++  __atomic_fetch_add (v, count, __ATOMIC_SEQ_CST);
++  if (*v != 6)
++    abort ();
++}
++
++
++void
++test_sub (char* v)
++{
++  *v = res = 20;
++  count = 0;
++
++  __atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED);
++  if (*v != --res)
++    abort ();
++
++  __atomic_fetch_sub (v, count + 1, __ATOMIC_CONSUME);
++  if (*v != --res)
++    abort ();
++
++  __atomic_sub_fetch (v, 1, __ATOMIC_ACQUIRE);
++  if (*v != --res)
++    abort ();
++
++  __atomic_fetch_sub (v, 1, __ATOMIC_RELEASE);
++  if (*v != --res)
++    abort ();
++
++  __atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL);
++  if (*v != --res)
++    abort ();
++
++  __atomic_fetch_sub (v, count + 1, __ATOMIC_SEQ_CST);
++  if (*v != --res)
++    abort ();
++}
++
++void
++test_and (char* v)
++{
++  *v = init;
++
++  __atomic_and_fetch (v, 0, __ATOMIC_RELAXED);
++  if (*v != 0)
++    abort ();
++
++  *v = init;
++  __atomic_fetch_and (v, init, __ATOMIC_CONSUME);
++  if (*v != init)
++    abort ();
++
++  __atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE);
++  if (*v != 0)
++    abort ();
++
++  *v = ~*v;
++  __atomic_fetch_and (v, init, __ATOMIC_RELEASE);
++  if (*v != init)
++    abort ();
++
++  __atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL);
++  if (*v != 0)
++    abort ();
++
++  *v = ~*v;
++  __atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST);
++  if (*v != 0)
++    abort ();
++}
++
++void
++test_nand (char* v)
++{
++  *v = init;
++
++  __atomic_fetch_nand (v, 0, __ATOMIC_RELAXED);
++  if (*v != init)
++    abort ();
++
++  __atomic_fetch_nand (v, init, __ATOMIC_CONSUME);
++  if (*v != 0)
++    abort ();
++
++  __atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE);
++  if (*v != init)
++    abort ();
++
++  __atomic_nand_fetch (v, init, __ATOMIC_RELEASE);
++  if (*v != 0)
++    abort ();
++
++  __atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL);
++  if (*v != init)
++    abort ();
++
++  __atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST);
++  if (*v != init)
++    abort ();
++}
++
++
++
++void
++test_xor (char* v)
++{
++  *v = init;
++  count = 0;
++
++  __atomic_xor_fetch (v, count, __ATOMIC_RELAXED);
++  if (*v != init)
++    abort ();
++
++  __atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME);
++  if (*v != 0)
++    abort ();
++
++  __atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE);
++  if (*v != 0)
++    abort ();
++
++  __atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE);
++  if (*v != init)
++    abort ();
++
++  __atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL);
++  if (*v != init)
++    abort ();
++
++  __atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST);
++  if (*v != 0)
++    abort ();
++}
++
++void
++test_or (char* v)
++{
++  *v = 0;
++  count = 1;
++
++  __atomic_or_fetch (v, count, __ATOMIC_RELAXED);
++  if (*v != 1)
++    abort ();
++
++  count *= 2;
++  __atomic_fetch_or (v, count, __ATOMIC_CONSUME);
++  if (*v != 3)
++    abort ();
++
++  count *= 2;
++  __atomic_or_fetch (v, 4, __ATOMIC_ACQUIRE);
++  if (*v != 7)
++    abort ();
++
++  count *= 2;
++  __atomic_fetch_or (v, 8, __ATOMIC_RELEASE);
++  if (*v != 15)
++    abort ();
++
++  count *= 2;
++  __atomic_or_fetch (v, count, __ATOMIC_ACQ_REL);
++  if (*v != 31)
++    abort ();
++
++  count *= 2;
++  __atomic_fetch_or (v, count, __ATOMIC_SEQ_CST);
++  if (*v != 63)
++    abort ();
++}
++
++int
++main ()
++{
++  char* V[] = {&A.a, &A.b, &A.c, &A.d};
++
++  for (int i = 0; i < 4; i++) {
++    test_fetch_add (V[i]);
++    test_fetch_sub (V[i]);
++    test_fetch_and (V[i]);
++    test_fetch_nand (V[i]);
++    test_fetch_xor (V[i]);
++    test_fetch_or (V[i]);
++
++    test_add_fetch (V[i]);
++    test_sub_fetch (V[i]);
++    test_and_fetch (V[i]);
++    test_nand_fetch (V[i]);
++    test_xor_fetch (V[i]);
++    test_or_fetch (V[i]);
++
++    test_add (V[i]);
++    test_sub (V[i]);
++    test_and (V[i]);
++    test_nand (V[i]);
++    test_xor (V[i]);
++    test_or (V[i]);
++  }
++
++  return 0;
++}
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-4.c
+@@ -0,0 +1,566 @@
++/* Check all short alignments.  */
++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-op-2.c */
++/* Test __atomic routines for existence and proper execution on 2 byte
++   values with each valid memory model.  */
++/* { dg-do run } */
++/* { dg-options "-minline-atomics -Wno-address-of-packed-member" } */
++
++/* Test the execution of the __atomic_*OP builtin routines for a short.  */
++
++extern void abort(void);
++
++short count, res;
++const short init = ~0;
++
++struct A
++{
++   short a;
++   short b;
++} __attribute__ ((packed)) A;
++
++/* The fetch_op routines return the original value before the operation.  */
++
++void
++test_fetch_add (short* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_fetch_add (v, count, __ATOMIC_RELAXED) != 0)
++    abort ();
++
++  if (__atomic_fetch_add (v, 1, __ATOMIC_CONSUME) != 1)
++    abort ();
++
++  if (__atomic_fetch_add (v, count, __ATOMIC_ACQUIRE) != 2)
++    abort ();
++
++  if (__atomic_fetch_add (v, 1, __ATOMIC_RELEASE) != 3)
++    abort ();
++
++  if (__atomic_fetch_add (v, count, __ATOMIC_ACQ_REL) != 4)
++    abort ();
++
++  if (__atomic_fetch_add (v, 1, __ATOMIC_SEQ_CST) != 5)
++    abort ();
++}
++
++
++void
++test_fetch_sub (short* v)
++{
++  *v = res = 20;
++  count = 0;
++
++  if (__atomic_fetch_sub (v, count + 1, __ATOMIC_RELAXED) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, 1, __ATOMIC_CONSUME) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQUIRE) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, 1, __ATOMIC_RELEASE) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQ_REL) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, 1, __ATOMIC_SEQ_CST) !=  res--)
++    abort ();
++}
++
++void
++test_fetch_and (short* v)
++{
++  *v = init;
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_fetch_and (v, init, __ATOMIC_CONSUME) !=  0)
++    abort ();
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  *v = ~*v;
++  if (__atomic_fetch_and (v, init, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST) !=  0)
++    abort ();
++}
++
++void
++test_fetch_nand (short* v)
++{
++  *v = init;
++
++  if (__atomic_fetch_nand (v, 0, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_fetch_nand (v, init, __ATOMIC_CONSUME) !=  init)
++    abort ();
++
++  if (__atomic_fetch_nand (v, 0, __ATOMIC_ACQUIRE) !=  0 )
++    abort ();
++
++  if (__atomic_fetch_nand (v, init, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL) !=  0)
++    abort ();
++
++  if (__atomic_fetch_nand (v, 0, __ATOMIC_SEQ_CST) !=  init)
++    abort ();
++}
++
++void
++test_fetch_xor (short* v)
++{
++  *v = init;
++  count = 0;
++
++  if (__atomic_fetch_xor (v, count, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME) !=  init)
++    abort ();
++
++  if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  if (__atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE) !=  0)
++    abort ();
++
++  if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_fetch_xor (v, ~count, __ATOMIC_SEQ_CST) !=  init)
++    abort ();
++}
++
++void
++test_fetch_or (short* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_fetch_or (v, count, __ATOMIC_RELAXED) !=  0)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, 2, __ATOMIC_CONSUME) !=  1)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, count, __ATOMIC_ACQUIRE) !=  3)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, 8, __ATOMIC_RELEASE) !=  7)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, count, __ATOMIC_ACQ_REL) !=  15)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, count, __ATOMIC_SEQ_CST) !=  31)
++    abort ();
++}
++
++/* The OP_fetch routines return the new value after the operation.  */
++
++void
++test_add_fetch (short* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_RELAXED) != 1)
++    abort ();
++
++  if (__atomic_add_fetch (v, 1, __ATOMIC_CONSUME) != 2)
++    abort ();
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_ACQUIRE) != 3)
++    abort ();
++
++  if (__atomic_add_fetch (v, 1, __ATOMIC_RELEASE) != 4)
++    abort ();
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_ACQ_REL) != 5)
++    abort ();
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_SEQ_CST) != 6)
++    abort ();
++}
++
++
++void
++test_sub_fetch (short* v)
++{
++  *v = res = 20;
++  count = 0;
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, 1, __ATOMIC_CONSUME) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQUIRE) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, 1, __ATOMIC_RELEASE) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_SEQ_CST) !=  --res)
++    abort ();
++}
++
++void
++test_and_fetch (short* v)
++{
++  *v = init;
++
++  if (__atomic_and_fetch (v, 0, __ATOMIC_RELAXED) !=  0)
++    abort ();
++
++  *v = init;
++  if (__atomic_and_fetch (v, init, __ATOMIC_CONSUME) !=  init)
++    abort ();
++
++  if (__atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  *v = ~*v;
++  if (__atomic_and_fetch (v, init, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL) !=  0)
++    abort ();
++
++  *v = ~*v;
++  if (__atomic_and_fetch (v, 0, __ATOMIC_SEQ_CST) !=  0)
++    abort ();
++}
++
++void
++test_nand_fetch (short* v)
++{
++  *v = init;
++
++  if (__atomic_nand_fetch (v, 0, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_nand_fetch (v, init, __ATOMIC_CONSUME) !=  0)
++    abort ();
++
++  if (__atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE) !=  init)
++    abort ();
++
++  if (__atomic_nand_fetch (v, init, __ATOMIC_RELEASE) !=  0)
++    abort ();
++
++  if (__atomic_nand_fetch (v, init, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST) !=  init)
++    abort ();
++}
++
++
++
++void
++test_xor_fetch (short* v)
++{
++  *v = init;
++  count = 0;
++
++  if (__atomic_xor_fetch (v, count, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_xor_fetch (v, ~count, __ATOMIC_CONSUME) !=  0)
++    abort ();
++
++  if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  if (__atomic_xor_fetch (v, ~count, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST) !=  0)
++    abort ();
++}
++
++void
++test_or_fetch (short* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_or_fetch (v, count, __ATOMIC_RELAXED) !=  1)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, 2, __ATOMIC_CONSUME) !=  3)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, count, __ATOMIC_ACQUIRE) !=  7)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, 8, __ATOMIC_RELEASE) !=  15)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, count, __ATOMIC_ACQ_REL) !=  31)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, count, __ATOMIC_SEQ_CST) !=  63)
++    abort ();
++}
++
++
++/* Test the OP routines with a result which isn't used. Use both variations
++   within each function.  */
++
++void
++test_add (short* v)
++{
++  *v = 0;
++  count = 1;
++
++  __atomic_add_fetch (v, count, __ATOMIC_RELAXED);
++  if (*v != 1)
++    abort ();
++
++  __atomic_fetch_add (v, count, __ATOMIC_CONSUME);
++  if (*v != 2)
++    abort ();
++
++  __atomic_add_fetch (v, 1 , __ATOMIC_ACQUIRE);
++  if (*v != 3)
++    abort ();
++
++  __atomic_fetch_add (v, 1, __ATOMIC_RELEASE);
++  if (*v != 4)
++    abort ();
++
++  __atomic_add_fetch (v, count, __ATOMIC_ACQ_REL);
++  if (*v != 5)
++    abort ();
++
++  __atomic_fetch_add (v, count, __ATOMIC_SEQ_CST);
++  if (*v != 6)
++    abort ();
++}
++
++
++void
++test_sub (short* v)
++{
++  *v = res = 20;
++  count = 0;
++
++  __atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED);
++  if (*v != --res)
++    abort ();
++
++  __atomic_fetch_sub (v, count + 1, __ATOMIC_CONSUME);
++  if (*v != --res)
++    abort ();
++
++  __atomic_sub_fetch (v, 1, __ATOMIC_ACQUIRE);
++  if (*v != --res)
++    abort ();
++
++  __atomic_fetch_sub (v, 1, __ATOMIC_RELEASE);
++  if (*v != --res)
++    abort ();
++
++  __atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL);
++  if (*v != --res)
++    abort ();
++
++  __atomic_fetch_sub (v, count + 1, __ATOMIC_SEQ_CST);
++  if (*v != --res)
++    abort ();
++}
++
++void
++test_and (short* v)
++{
++  *v = init;
++
++  __atomic_and_fetch (v, 0, __ATOMIC_RELAXED);
++  if (*v != 0)
++    abort ();
++
++  *v = init;
++  __atomic_fetch_and (v, init, __ATOMIC_CONSUME);
++  if (*v != init)
++    abort ();
++
++  __atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE);
++  if (*v != 0)
++    abort ();
++
++  *v = ~*v;
++  __atomic_fetch_and (v, init, __ATOMIC_RELEASE);
++  if (*v != init)
++    abort ();
++
++  __atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL);
++  if (*v != 0)
++    abort ();
++
++  *v = ~*v;
++  __atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST);
++  if (*v != 0)
++    abort ();
++}
++
++void
++test_nand (short* v)
++{
++  *v = init;
++
++  __atomic_fetch_nand (v, 0, __ATOMIC_RELAXED);
++  if (*v != init)
++    abort ();
++
++  __atomic_fetch_nand (v, init, __ATOMIC_CONSUME);
++  if (*v != 0)
++    abort ();
++
++  __atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE);
++  if (*v != init)
++    abort ();
++
++  __atomic_nand_fetch (v, init, __ATOMIC_RELEASE);
++  if (*v != 0)
++    abort ();
++
++  __atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL);
++  if (*v != init)
++    abort ();
++
++  __atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST);
++  if (*v != init)
++    abort ();
++}
++
++
++
++void
++test_xor (short* v)
++{
++  *v = init;
++  count = 0;
++
++  __atomic_xor_fetch (v, count, __ATOMIC_RELAXED);
++  if (*v != init)
++    abort ();
++
++  __atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME);
++  if (*v != 0)
++    abort ();
++
++  __atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE);
++  if (*v != 0)
++    abort ();
++
++  __atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE);
++  if (*v != init)
++    abort ();
++
++  __atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL);
++  if (*v != init)
++    abort ();
++
++  __atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST);
++  if (*v != 0)
++    abort ();
++}
++
++void
++test_or (short* v)
++{
++  *v = 0;
++  count = 1;
++
++  __atomic_or_fetch (v, count, __ATOMIC_RELAXED);
++  if (*v != 1)
++    abort ();
++
++  count *= 2;
++  __atomic_fetch_or (v, count, __ATOMIC_CONSUME);
++  if (*v != 3)
++    abort ();
++
++  count *= 2;
++  __atomic_or_fetch (v, 4, __ATOMIC_ACQUIRE);
++  if (*v != 7)
++    abort ();
++
++  count *= 2;
++  __atomic_fetch_or (v, 8, __ATOMIC_RELEASE);
++  if (*v != 15)
++    abort ();
++
++  count *= 2;
++  __atomic_or_fetch (v, count, __ATOMIC_ACQ_REL);
++  if (*v != 31)
++    abort ();
++
++  count *= 2;
++  __atomic_fetch_or (v, count, __ATOMIC_SEQ_CST);
++  if (*v != 63)
++    abort ();
++}
++
++int
++main () {
++  short* V[] = {&A.a, &A.b};
++
++  for (int i = 0; i < 2; i++) {
++    test_fetch_add (V[i]);
++    test_fetch_sub (V[i]);
++    test_fetch_and (V[i]);
++    test_fetch_nand (V[i]);
++    test_fetch_xor (V[i]);
++    test_fetch_or (V[i]);
++
++    test_add_fetch (V[i]);
++    test_sub_fetch (V[i]);
++    test_and_fetch (V[i]);
++    test_nand_fetch (V[i]);
++    test_xor_fetch (V[i]);
++    test_or_fetch (V[i]);
++
++    test_add (V[i]);
++    test_sub (V[i]);
++    test_and (V[i]);
++    test_nand (V[i]);
++    test_xor (V[i]);
++    test_or (V[i]);
++  }
++
++  return 0;
++}
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-5.c
+@@ -0,0 +1,87 @@
++/* Test __atomic routines for existence and proper execution on 1 byte
++   values with each valid memory model.  */
++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-compare-exchange-1.c */
++/* { dg-do run } */
++/* { dg-options "-minline-atomics" } */
++
++/* Test the execution of the __atomic_compare_exchange_n builtin for a char.  */
++
++extern void abort(void);
++
++char v = 0;
++char expected = 0;
++char max = ~0;
++char desired = ~0;
++char zero = 0;
++
++#define STRONG 0
++#define WEAK 1
++
++int
++main ()
++{
++
++  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != max)
++    abort ();
++
++  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != max)
++    abort ();
++  if (v != 0)
++    abort ();
++
++  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
++    abort ();
++  if (expected != 0)
++    abort ();
++  if (v != max)
++    abort ();
++
++  /* Now test the generic version.  */
++
++  v = 0;
++
++  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != max)
++    abort ();
++
++  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != max)
++    abort ();
++  if (v != 0)
++    abort ();
++
++  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
++    abort ();
++  if (expected != 0)
++    abort ();
++  if (v != max)
++    abort ();
++
++  return 0;
++}
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-6.c
+@@ -0,0 +1,87 @@
++/* Test __atomic routines for existence and proper execution on 2 byte
++   values with each valid memory model.  */
++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-compare-exchange-2.c */
++/* { dg-do run } */
++/* { dg-options "-minline-atomics" } */
++
++/* Test the execution of the __atomic_compare_exchange_n builtin for a short.  */
++
++extern void abort(void);
++
++short v = 0;
++short expected = 0;
++short max = ~0;
++short desired = ~0;
++short zero = 0;
++
++#define STRONG 0
++#define WEAK 1
++
++int
++main ()
++{
++
++  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != max)
++    abort ();
++
++  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != max)
++    abort ();
++  if (v != 0)
++    abort ();
++
++  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
++    abort ();
++  if (expected != 0)
++    abort ();
++  if (v != max)
++    abort ();
++
++  /* Now test the generic version.  */
++
++  v = 0;
++
++  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != max)
++    abort ();
++
++  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != max)
++    abort ();
++  if (v != 0)
++    abort ();
++
++  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
++    abort ();
++  if (expected != 0)
++    abort ();
++  if (v != max)
++    abort ();
++
++  return 0;
++}
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-7.c
+@@ -0,0 +1,69 @@
++/* Test __atomic routines for existence and proper execution on 1 byte
++   values with each valid memory model.  */
++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-exchange-1.c */
++/* { dg-do run } */
++/* { dg-options "-minline-atomics" } */
++
++/* Test the execution of the __atomic_exchange_n builtin for a char.  */
++
++extern void abort(void);
++
++char v, count, ret;
++
++int
++main ()
++{
++  v = 0;
++  count = 0;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) != count)
++    abort ();
++  count++;
++
++  /* Now test the generic version.  */
++
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  return 0;
++}
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-8.c
+@@ -0,0 +1,69 @@
++/* Test __atomic routines for existence and proper execution on 2 byte
++   values with each valid memory model.  */
++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-exchange-2.c */
++/* { dg-do run } */
++/* { dg-options "-minline-atomics" } */
++
++/* Test the execution of the __atomic_X builtin for a short.  */
++
++extern void abort(void);
++
++short v, count, ret;
++
++int
++main ()
++{
++  v = 0;
++  count = 0;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) != count)
++    abort ();
++  count++;
++
++  /* Now test the generic version.  */
++
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  return 0;
++}
+--- a/libgcc/config/riscv/atomic.c
++++ b/libgcc/config/riscv/atomic.c
+@@ -30,6 +30,8 @@ see the files COPYING3 and COPYING.RUNTI
+ #define INVERT		"not %[tmp1], %[tmp1]\n\t"
+ #define DONT_INVERT	""
+ 
++/* Logic duplicated in gcc/gcc/config/riscv/sync.md for use when inlining is enabled */
++
+ #define GENERATE_FETCH_AND_OP(type, size, opname, insn, invert, cop)	\
+   type __sync_fetch_and_ ## opname ## _ ## size (type *p, type v)	\
+   {									\
diff --git a/toolchain/gcc/patches-11.x/701-riscv-linux-Don-t-add-latomic-with-pthread.patch b/toolchain/gcc/patches-11.x/701-riscv-linux-Don-t-add-latomic-with-pthread.patch
new file mode 100644
index 0000000000..328c7be9ce
--- /dev/null
+++ b/toolchain/gcc/patches-11.x/701-riscv-linux-Don-t-add-latomic-with-pthread.patch
@@ -0,0 +1,36 @@
+From 203f3060dd363361b172f7295f42bb6bf5ac0b3b Mon Sep 17 00:00:00 2001
+From: Andreas Schwab <schwab@suse.de>
+Date: Sat, 23 Apr 2022 15:48:42 +0200
+Subject: [PATCH] riscv/linux: Don't add -latomic with -pthread
+
+Now that we have support for inline subword atomic operations, it is no
+longer necessary to link against libatomic.  This also fixes testsuite
+failures because the framework does not properly set up the linker flags
+for finding libatomic.
+The use of atomic operations is also independent of the use of libpthread.
+
+gcc/
+	* config/riscv/linux.h (LIB_SPEC): Don't redefine.
+---
+ gcc/config/riscv/linux.h | 10 ----------
+ 1 file changed, 10 deletions(-)
+
+--- a/gcc/config/riscv/linux.h
++++ b/gcc/config/riscv/linux.h
+@@ -35,16 +35,6 @@ along with GCC; see the file COPYING3.
+ #undef MUSL_DYNAMIC_LINKER
+ #define MUSL_DYNAMIC_LINKER "/lib/ld-musl-riscv" XLEN_SPEC MUSL_ABI_SUFFIX ".so.1"
+ 
+-/* Because RISC-V only has word-sized atomics, it requries libatomic where
+-   others do not.  So link libatomic by default, as needed.  */
+-#undef LIB_SPEC
+-#ifdef LD_AS_NEEDED_OPTION
+-#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC \
+-  " %{pthread:" LD_AS_NEEDED_OPTION " -latomic " LD_NO_AS_NEEDED_OPTION "}"
+-#else
+-#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC " -latomic "
+-#endif
+-
+ #define ICACHE_FLUSH_FUNC "__riscv_flush_icache"
+ 
+ #define CPP_SPEC "%{pthread:-D_REENTRANT}"
diff --git a/toolchain/gcc/patches-11.x/910-mbsd_multi.patch b/toolchain/gcc/patches-11.x/910-mbsd_multi.patch
index 21f532043e..2d1c3d1b80 100644
--- a/toolchain/gcc/patches-11.x/910-mbsd_multi.patch
+++ b/toolchain/gcc/patches-11.x/910-mbsd_multi.patch
@@ -114,7 +114,7 @@ Date:   Tue Jul 31 00:52:27 2007 +0000
  ; On SVR4 targets, it also controls whether or not to emit a
 --- a/gcc/doc/invoke.texi
 +++ b/gcc/doc/invoke.texi
-@@ -9058,6 +9058,17 @@ This option is only supported for C and
+@@ -9059,6 +9059,17 @@ This option is only supported for C and
  @option{-Wall} and by @option{-Wpedantic}, which can be disabled with
  @option{-Wno-pointer-sign}.
  
diff --git a/toolchain/gcc/patches-12.x/700-RISCV-Inline-subword-atomic-ops.patch b/toolchain/gcc/patches-12.x/700-RISCV-Inline-subword-atomic-ops.patch
new file mode 100644
index 0000000000..b164c76522
--- /dev/null
+++ b/toolchain/gcc/patches-12.x/700-RISCV-Inline-subword-atomic-ops.patch
@@ -0,0 +1,2021 @@
+From f797260adaf52bee0ec0e16190bbefbe1bfc3692 Mon Sep 17 00:00:00 2001
+From: Patrick O'Neill <patrick@rivosinc.com>
+Date: Tue, 18 Apr 2023 14:33:13 -0700
+Subject: [PATCH] RISCV: Inline subword atomic ops
+
+RISC-V has no support for subword atomic operations; code currently
+generates libatomic library calls.
+
+This patch changes the default behavior to inline subword atomic calls
+(using the same logic as the existing library call).
+Behavior can be specified using the -minline-atomics and
+-mno-inline-atomics command line flags.
+
+gcc/libgcc/config/riscv/atomic.c has the same logic implemented in asm.
+This will need to stay for backwards compatibility and the
+-mno-inline-atomics flag.
+
+2023-04-18 Patrick O'Neill <patrick@rivosinc.com>
+
+gcc/ChangeLog:
+	PR target/104338
+	* config/riscv/riscv-protos.h: Add helper function stubs.
+	* config/riscv/riscv.cc: Add helper functions for subword masking.
+	* config/riscv/riscv.opt: Add command-line flag.
+	* config/riscv/sync.md: Add masking logic and inline asm for fetch_and_op,
+	fetch_and_nand, CAS, and exchange ops.
+	* doc/invoke.texi: Add blurb regarding command-line flag.
+
+libgcc/ChangeLog:
+	PR target/104338
+	* config/riscv/atomic.c: Add reference to duplicate logic.
+
+gcc/testsuite/ChangeLog:
+	PR target/104338
+	* gcc.target/riscv/inline-atomics-1.c: New test.
+	* gcc.target/riscv/inline-atomics-2.c: New test.
+	* gcc.target/riscv/inline-atomics-3.c: New test.
+	* gcc.target/riscv/inline-atomics-4.c: New test.
+	* gcc.target/riscv/inline-atomics-5.c: New test.
+	* gcc.target/riscv/inline-atomics-6.c: New test.
+	* gcc.target/riscv/inline-atomics-7.c: New test.
+	* gcc.target/riscv/inline-atomics-8.c: New test.
+
+Signed-off-by: Patrick O'Neill <patrick@rivosinc.com>
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+---
+ gcc/config/riscv/riscv-protos.h               |   2 +
+ gcc/config/riscv/riscv.cc                     |  49 ++
+ gcc/config/riscv/riscv.opt                    |   4 +
+ gcc/config/riscv/sync.md                      | 301 +++++++++
+ gcc/doc/invoke.texi                           |  10 +-
+ .../gcc.target/riscv/inline-atomics-1.c       |  18 +
+ .../gcc.target/riscv/inline-atomics-2.c       |   9 +
+ .../gcc.target/riscv/inline-atomics-3.c       | 569 ++++++++++++++++++
+ .../gcc.target/riscv/inline-atomics-4.c       | 566 +++++++++++++++++
+ .../gcc.target/riscv/inline-atomics-5.c       |  87 +++
+ .../gcc.target/riscv/inline-atomics-6.c       |  87 +++
+ .../gcc.target/riscv/inline-atomics-7.c       |  69 +++
+ .../gcc.target/riscv/inline-atomics-8.c       |  69 +++
+ libgcc/config/riscv/atomic.c                  |   2 +
+ 14 files changed, 1841 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-1.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-2.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-3.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-4.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-5.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-6.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-7.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-8.c
+
+--- a/gcc/config/riscv/riscv-protos.h
++++ b/gcc/config/riscv/riscv-protos.h
+@@ -74,6 +74,8 @@ extern bool riscv_expand_block_move (rtx
+ extern bool riscv_store_data_bypass_p (rtx_insn *, rtx_insn *);
+ extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *);
+ extern bool riscv_gpr_save_operation_p (rtx);
++extern void riscv_subword_address (rtx, rtx *, rtx *, rtx *, rtx *);
++extern void riscv_lshift_subword (machine_mode, rtx, rtx, rtx *);
+ 
+ /* Routines implemented in riscv-c.cc.  */
+ void riscv_cpu_cpp_builtins (cpp_reader *);
+--- a/gcc/config/riscv/riscv.cc
++++ b/gcc/config/riscv/riscv.cc
+@@ -5605,6 +5605,55 @@ riscv_asan_shadow_offset (void)
+   return TARGET_64BIT ? (HOST_WIDE_INT_1 << 29) : 0;
+ }
+ 
++/* Given memory reference MEM, expand code to compute the aligned
++   memory address, shift and mask values and store them into
++   *ALIGNED_MEM, *SHIFT, *MASK and *NOT_MASK.  */
++
++void
++riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask,
++		       rtx *not_mask)
++{
++  /* Align the memory address to a word.  */
++  rtx addr = force_reg (Pmode, XEXP (mem, 0));
++
++  rtx addr_mask = gen_int_mode (-4, Pmode);
++
++  rtx aligned_addr = gen_reg_rtx (Pmode);
++  emit_move_insn (aligned_addr,  gen_rtx_AND (Pmode, addr, addr_mask));
++
++  *aligned_mem = change_address (mem, SImode, aligned_addr);
++
++  /* Calculate the shift amount.  */
++  emit_move_insn (*shift, gen_rtx_AND (SImode, gen_lowpart (SImode, addr),
++				       gen_int_mode (3, SImode)));
++  emit_move_insn (*shift, gen_rtx_ASHIFT (SImode, *shift,
++					  gen_int_mode (3, SImode)));
++
++  /* Calculate the mask.  */
++  int unshifted_mask = GET_MODE_MASK (GET_MODE (mem));
++
++  emit_move_insn (*mask, gen_int_mode (unshifted_mask, SImode));
++
++  emit_move_insn (*mask, gen_rtx_ASHIFT (SImode, *mask,
++					 gen_lowpart (QImode, *shift)));
++
++  emit_move_insn (*not_mask, gen_rtx_NOT(SImode, *mask));
++}
++
++/* Leftshift a subword within an SImode register.  */
++
++void
++riscv_lshift_subword (machine_mode mode, rtx value, rtx shift,
++		      rtx *shifted_value)
++{
++  rtx value_reg = gen_reg_rtx (SImode);
++  emit_move_insn (value_reg, simplify_gen_subreg (SImode, value,
++						  mode, 0));
++
++  emit_move_insn(*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
++						 gen_lowpart (QImode, shift)));
++}
++
+ /* Initialize the GCC target structure.  */
+ #undef TARGET_ASM_ALIGNED_HI_OP
+ #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
+--- a/gcc/config/riscv/riscv.opt
++++ b/gcc/config/riscv/riscv.opt
+@@ -209,6 +209,10 @@ int riscv_vector_elen_flags
+ TargetVariable
+ int riscv_zvl_flags
+ 
++minline-atomics
++Target Var(TARGET_INLINE_SUBWORD_ATOMIC) Init(1)
++Always inline subword atomic operations.
++
+ Enum
+ Name(isa_spec_class) Type(enum riscv_isa_spec_class)
+ Supported ISA specs (for use with the -misa-spec= option):
+--- a/gcc/config/riscv/sync.md
++++ b/gcc/config/riscv/sync.md
+@@ -21,8 +21,11 @@
+ 
+ (define_c_enum "unspec" [
+   UNSPEC_COMPARE_AND_SWAP
++  UNSPEC_COMPARE_AND_SWAP_SUBWORD
+   UNSPEC_SYNC_OLD_OP
++  UNSPEC_SYNC_OLD_OP_SUBWORD
+   UNSPEC_SYNC_EXCHANGE
++  UNSPEC_SYNC_EXCHANGE_SUBWORD
+   UNSPEC_ATOMIC_STORE
+   UNSPEC_MEMORY_BARRIER
+ ])
+@@ -92,6 +95,135 @@
+   "%F3amo<insn>.<amo>%A3 %0,%z2,%1"
+   [(set (attr "length") (const_int 8))])
+ 
++(define_insn "subword_atomic_fetch_strong_<atomic_optab>"
++  [(set (match_operand:SI 0 "register_operand" "=&r")		   ;; old value at mem
++	(match_operand:SI 1 "memory_operand" "+A"))		   ;; mem location
++   (set (match_dup 1)
++	(unspec_volatile:SI
++	  [(any_atomic:SI (match_dup 1)
++		     (match_operand:SI 2 "register_operand" "rI")) ;; value for op
++	   (match_operand:SI 3 "register_operand" "rI")]	   ;; mask
++	 UNSPEC_SYNC_OLD_OP_SUBWORD))
++    (match_operand:SI 4 "register_operand" "rI")		   ;; not_mask
++    (clobber (match_scratch:SI 5 "=&r"))			   ;; tmp_1
++    (clobber (match_scratch:SI 6 "=&r"))]			   ;; tmp_2
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++  {
++    return "1:\;"
++	   "lr.w.aq\t%0, %1\;"
++	   "<insn>\t%5, %0, %2\;"
++	   "and\t%5, %5, %3\;"
++	   "and\t%6, %0, %4\;"
++	   "or\t%6, %6, %5\;"
++	   "sc.w.rl\t%5, %6, %1\;"
++	   "bnez\t%5, 1b";
++  }
++  [(set (attr "length") (const_int 28))])
++
++(define_expand "atomic_fetch_nand<mode>"
++  [(match_operand:SHORT 0 "register_operand")			      ;; old value at mem
++   (not:SHORT (and:SHORT (match_operand:SHORT 1 "memory_operand")     ;; mem location
++			 (match_operand:SHORT 2 "reg_or_0_operand"))) ;; value for op
++   (match_operand:SI 3 "const_int_operand")]			      ;; model
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++{
++  /* We have no QImode/HImode atomics, so form a mask, then use
++     subword_atomic_fetch_strong_nand to implement a LR/SC version of the
++     operation. */
++
++  /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining
++     is disabled */
++
++  rtx old = gen_reg_rtx (SImode);
++  rtx mem = operands[1];
++  rtx value = operands[2];
++  rtx aligned_mem = gen_reg_rtx (SImode);
++  rtx shift = gen_reg_rtx (SImode);
++  rtx mask = gen_reg_rtx (SImode);
++  rtx not_mask = gen_reg_rtx (SImode);
++
++  riscv_subword_address (mem, &aligned_mem, &shift, &mask, &not_mask);
++
++  rtx shifted_value = gen_reg_rtx (SImode);
++  riscv_lshift_subword (<MODE>mode, value, shift, &shifted_value);
++
++  emit_insn (gen_subword_atomic_fetch_strong_nand (old, aligned_mem,
++						   shifted_value,
++						   mask, not_mask));
++
++  emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old,
++					 gen_lowpart (QImode, shift)));
++
++  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, old));
++
++  DONE;
++})
++
++(define_insn "subword_atomic_fetch_strong_nand"
++  [(set (match_operand:SI 0 "register_operand" "=&r")			  ;; old value at mem
++	(match_operand:SI 1 "memory_operand" "+A"))			  ;; mem location
++   (set (match_dup 1)
++	(unspec_volatile:SI
++	  [(not:SI (and:SI (match_dup 1)
++			   (match_operand:SI 2 "register_operand" "rI"))) ;; value for op
++	   (match_operand:SI 3 "register_operand" "rI")]		  ;; mask
++	 UNSPEC_SYNC_OLD_OP_SUBWORD))
++    (match_operand:SI 4 "register_operand" "rI")			  ;; not_mask
++    (clobber (match_scratch:SI 5 "=&r"))				  ;; tmp_1
++    (clobber (match_scratch:SI 6 "=&r"))]				  ;; tmp_2
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++  {
++    return "1:\;"
++	   "lr.w.aq\t%0, %1\;"
++	   "and\t%5, %0, %2\;"
++	   "not\t%5, %5\;"
++	   "and\t%5, %5, %3\;"
++	   "and\t%6, %0, %4\;"
++	   "or\t%6, %6, %5\;"
++	   "sc.w.rl\t%5, %6, %1\;"
++	   "bnez\t%5, 1b";
++  }
++  [(set (attr "length") (const_int 32))])
++
++(define_expand "atomic_fetch_<atomic_optab><mode>"
++  [(match_operand:SHORT 0 "register_operand")			 ;; old value at mem
++   (any_atomic:SHORT (match_operand:SHORT 1 "memory_operand")	 ;; mem location
++		     (match_operand:SHORT 2 "reg_or_0_operand")) ;; value for op
++   (match_operand:SI 3 "const_int_operand")]			 ;; model
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++{
++  /* We have no QImode/HImode atomics, so form a mask, then use
++     subword_atomic_fetch_strong_<mode> to implement a LR/SC version of the
++     operation. */
++
++  /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining
++     is disabled */
++
++  rtx old = gen_reg_rtx (SImode);
++  rtx mem = operands[1];
++  rtx value = operands[2];
++  rtx aligned_mem = gen_reg_rtx (SImode);
++  rtx shift = gen_reg_rtx (SImode);
++  rtx mask = gen_reg_rtx (SImode);
++  rtx not_mask = gen_reg_rtx (SImode);
++
++  riscv_subword_address (mem, &aligned_mem, &shift, &mask, &not_mask);
++
++  rtx shifted_value = gen_reg_rtx (SImode);
++  riscv_lshift_subword (<MODE>mode, value, shift, &shifted_value);
++
++  emit_insn (gen_subword_atomic_fetch_strong_<atomic_optab> (old, aligned_mem,
++							     shifted_value,
++							     mask, not_mask));
++
++  emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old,
++					 gen_lowpart (QImode, shift)));
++
++  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, old));
++
++  DONE;
++})
++
+ (define_insn "atomic_exchange<mode>"
+   [(set (match_operand:GPR 0 "register_operand" "=&r")
+ 	(unspec_volatile:GPR
+@@ -104,6 +236,56 @@
+   "%F3amoswap.<amo>%A3 %0,%z2,%1"
+   [(set (attr "length") (const_int 8))])
+ 
++(define_expand "atomic_exchange<mode>"
++  [(match_operand:SHORT 0 "register_operand") ;; old value at mem
++   (match_operand:SHORT 1 "memory_operand")   ;; mem location
++   (match_operand:SHORT 2 "register_operand") ;; value
++   (match_operand:SI 3 "const_int_operand")]  ;; model
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++{
++  rtx old = gen_reg_rtx (SImode);
++  rtx mem = operands[1];
++  rtx value = operands[2];
++  rtx aligned_mem = gen_reg_rtx (SImode);
++  rtx shift = gen_reg_rtx (SImode);
++  rtx mask = gen_reg_rtx (SImode);
++  rtx not_mask = gen_reg_rtx (SImode);
++
++  riscv_subword_address (mem, &aligned_mem, &shift, &mask, &not_mask);
++
++  rtx shifted_value = gen_reg_rtx (SImode);
++  riscv_lshift_subword (<MODE>mode, value, shift, &shifted_value);
++
++  emit_insn (gen_subword_atomic_exchange_strong (old, aligned_mem,
++						 shifted_value, not_mask));
++
++  emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old,
++					 gen_lowpart (QImode, shift)));
++
++  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, old));
++  DONE;
++})
++
++(define_insn "subword_atomic_exchange_strong"
++  [(set (match_operand:SI 0 "register_operand" "=&r")	 ;; old value at mem
++	(match_operand:SI 1 "memory_operand" "+A"))	 ;; mem location
++   (set (match_dup 1)
++	(unspec_volatile:SI
++	  [(match_operand:SI 2 "reg_or_0_operand" "rI")  ;; value
++	   (match_operand:SI 3 "reg_or_0_operand" "rI")] ;; not_mask
++      UNSPEC_SYNC_EXCHANGE_SUBWORD))
++    (clobber (match_scratch:SI 4 "=&r"))]		 ;; tmp_1
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++  {
++    return "1:\;"
++	   "lr.w.aq\t%0, %1\;"
++	   "and\t%4, %0, %3\;"
++	   "or\t%4, %4, %2\;"
++	   "sc.w.rl\t%4, %4, %1\;"
++	   "bnez\t%4, 1b";
++  }
++  [(set (attr "length") (const_int 20))])
++
+ (define_insn "atomic_cas_value_strong<mode>"
+   [(set (match_operand:GPR 0 "register_operand" "=&r")
+ 	(match_operand:GPR 1 "memory_operand" "+A"))
+@@ -152,6 +334,125 @@
+   DONE;
+ })
+ 
++(define_expand "atomic_compare_and_swap<mode>"
++  [(match_operand:SI 0 "register_operand")    ;; bool output
++   (match_operand:SHORT 1 "register_operand") ;; val output
++   (match_operand:SHORT 2 "memory_operand")   ;; memory
++   (match_operand:SHORT 3 "reg_or_0_operand") ;; expected value
++   (match_operand:SHORT 4 "reg_or_0_operand") ;; desired value
++   (match_operand:SI 5 "const_int_operand")   ;; is_weak
++   (match_operand:SI 6 "const_int_operand")   ;; mod_s
++   (match_operand:SI 7 "const_int_operand")]  ;; mod_f
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++{
++  emit_insn (gen_atomic_cas_value_strong<mode> (operands[1], operands[2],
++						operands[3], operands[4],
++						operands[6], operands[7]));
++
++  rtx val = gen_reg_rtx (SImode);
++  if (operands[1] != const0_rtx)
++    emit_move_insn (val, gen_rtx_SIGN_EXTEND (SImode, operands[1]));
++  else
++    emit_move_insn (val, const0_rtx);
++
++  rtx exp = gen_reg_rtx (SImode);
++  if (operands[3] != const0_rtx)
++    emit_move_insn (exp, gen_rtx_SIGN_EXTEND (SImode, operands[3]));
++  else
++    emit_move_insn (exp, const0_rtx);
++
++  rtx compare = val;
++  if (exp != const0_rtx)
++    {
++      rtx difference = gen_rtx_MINUS (SImode, val, exp);
++      compare = gen_reg_rtx (SImode);
++      emit_move_insn  (compare, difference);
++    }
++
++  if (word_mode != SImode)
++    {
++      rtx reg = gen_reg_rtx (word_mode);
++      emit_move_insn (reg, gen_rtx_SIGN_EXTEND (word_mode, compare));
++      compare = reg;
++    }
++
++  emit_move_insn (operands[0], gen_rtx_EQ (SImode, compare, const0_rtx));
++  DONE;
++})
++
++(define_expand "atomic_cas_value_strong<mode>"
++  [(match_operand:SHORT 0 "register_operand") ;; val output
++   (match_operand:SHORT 1 "memory_operand")   ;; memory
++   (match_operand:SHORT 2 "reg_or_0_operand") ;; expected value
++   (match_operand:SHORT 3 "reg_or_0_operand") ;; desired value
++   (match_operand:SI 4 "const_int_operand")   ;; mod_s
++   (match_operand:SI 5 "const_int_operand")   ;; mod_f
++   (match_scratch:SHORT 6)]
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++{
++  /* We have no QImode/HImode atomics, so form a mask, then use
++     subword_atomic_cas_strong<mode> to implement a LR/SC version of the
++     operation. */
++
++  /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining
++     is disabled */
++
++  rtx old = gen_reg_rtx (SImode);
++  rtx mem = operands[1];
++  rtx aligned_mem = gen_reg_rtx (SImode);
++  rtx shift = gen_reg_rtx (SImode);
++  rtx mask = gen_reg_rtx (SImode);
++  rtx not_mask = gen_reg_rtx (SImode);
++
++  riscv_subword_address (mem, &aligned_mem, &shift, &mask, &not_mask);
++
++  rtx o = operands[2];
++  rtx n = operands[3];
++  rtx shifted_o = gen_reg_rtx (SImode);
++  rtx shifted_n = gen_reg_rtx (SImode);
++
++  riscv_lshift_subword (<MODE>mode, o, shift, &shifted_o);
++  riscv_lshift_subword (<MODE>mode, n, shift, &shifted_n);
++
++  emit_move_insn (shifted_o, gen_rtx_AND (SImode, shifted_o, mask));
++  emit_move_insn (shifted_n, gen_rtx_AND (SImode, shifted_n, mask));
++
++  emit_insn (gen_subword_atomic_cas_strong (old, aligned_mem,
++					    shifted_o, shifted_n,
++					    mask, not_mask));
++
++  emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old,
++					 gen_lowpart (QImode, shift)));
++
++  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, old));
++
++  DONE;
++})
++
++(define_insn "subword_atomic_cas_strong"
++  [(set (match_operand:SI 0 "register_operand" "=&r")			   ;; old value at mem
++	(match_operand:SI 1 "memory_operand" "+A"))			   ;; mem location
++   (set (match_dup 1)
++	(unspec_volatile:SI [(match_operand:SI 2 "reg_or_0_operand" "rJ")  ;; expected value
++			     (match_operand:SI 3 "reg_or_0_operand" "rJ")] ;; desired value
++	 UNSPEC_COMPARE_AND_SWAP_SUBWORD))
++	(match_operand:SI 4 "register_operand" "rI")			   ;; mask
++	(match_operand:SI 5 "register_operand" "rI")			   ;; not_mask
++	(clobber (match_scratch:SI 6 "=&r"))]				   ;; tmp_1
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++  {
++    return "1:\;"
++	   "lr.w.aq\t%0, %1\;"
++	   "and\t%6, %0, %4\;"
++	   "bne\t%6, %z2, 1f\;"
++	   "and\t%6, %0, %5\;"
++	   "or\t%6, %6, %3\;"
++	   "sc.w.rl\t%6, %6, %1\;"
++	   "bnez\t%6, 1b\;"
++	   "1:";
++  }
++  [(set (attr "length") (const_int 28))])
++
+ (define_expand "atomic_test_and_set"
+   [(match_operand:QI 0 "register_operand" "")     ;; bool output
+    (match_operand:QI 1 "memory_operand" "+A")    ;; memory
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -753,7 +753,8 @@ Objective-C and Objective-C++ Dialects}.
+ -moverride=@var{string}  -mverbose-cost-dump @gol
+ -mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{sysreg} @gol
+ -mstack-protector-guard-offset=@var{offset} -mtrack-speculation @gol
+--moutline-atomics }
++-moutline-atomics
++-minline-atomics  -mno-inline-atomics}
+ 
+ @emph{Adapteva Epiphany Options}
+ @gccoptlist{-mhalf-reg-file  -mprefer-short-insn-regs @gol
+@@ -28035,6 +28036,13 @@ Do or don't use smaller but slower prolo
+ library function calls.  The default is to use fast inline prologues and
+ epilogues.
+ 
++@opindex minline-atomics
++@item -minline-atomics
++@itemx -mno-inline-atomics
++Do or don't use smaller but slower subword atomic emulation code that uses
++libatomic function calls.  The default is to use fast inline subword atomics
++that do not require libatomic.
++
+ @item -mshorten-memrefs
+ @itemx -mno-shorten-memrefs
+ @opindex mshorten-memrefs
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-1.c
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-options "-mno-inline-atomics" } */
++/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "fetch_and_nand" { target *-*-* } 0 } */
++/* { dg-final { scan-assembler "\tcall\t__sync_fetch_and_add_1" } } */
++/* { dg-final { scan-assembler "\tcall\t__sync_fetch_and_nand_1" } } */
++/* { dg-final { scan-assembler "\tcall\t__sync_bool_compare_and_swap_1" } } */
++
++char foo;
++char bar;
++char baz;
++
++int
++main ()
++{
++  __sync_fetch_and_add(&foo, 1);
++  __sync_fetch_and_nand(&bar, 1);
++  __sync_bool_compare_and_swap (&baz, 1, 2);
++}
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-2.c
+@@ -0,0 +1,9 @@
++/* { dg-do compile } */
++/* Verify that subword atomics do not generate calls.  */
++/* { dg-options "-minline-atomics" } */
++/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "fetch_and_nand" { target *-*-* } 0 } */
++/* { dg-final { scan-assembler-not "\tcall\t__sync_fetch_and_add_1" } } */
++/* { dg-final { scan-assembler-not "\tcall\t__sync_fetch_and_nand_1" } } */
++/* { dg-final { scan-assembler-not "\tcall\t__sync_bool_compare_and_swap_1" } } */
++
++#include "inline-atomics-1.c"
+\ No newline at end of file
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-3.c
+@@ -0,0 +1,569 @@
++/* Check all char alignments.  */
++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-op-1.c */
++/* Test __atomic routines for existence and proper execution on 1 byte
++   values with each valid memory model.  */
++/* { dg-do run } */
++/* { dg-options "-minline-atomics -Wno-address-of-packed-member" } */
++
++/* Test the execution of the __atomic_*OP builtin routines for a char.  */
++
++extern void abort(void);
++
++char count, res;
++const char init = ~0;
++
++struct A
++{
++   char a;
++   char b;
++   char c;
++   char d;
++} __attribute__ ((packed)) A;
++
++/* The fetch_op routines return the original value before the operation.  */
++
++void
++test_fetch_add (char* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_fetch_add (v, count, __ATOMIC_RELAXED) != 0)
++    abort ();
++
++  if (__atomic_fetch_add (v, 1, __ATOMIC_CONSUME) != 1)
++    abort ();
++
++  if (__atomic_fetch_add (v, count, __ATOMIC_ACQUIRE) != 2)
++    abort ();
++
++  if (__atomic_fetch_add (v, 1, __ATOMIC_RELEASE) != 3)
++    abort ();
++
++  if (__atomic_fetch_add (v, count, __ATOMIC_ACQ_REL) != 4)
++    abort ();
++
++  if (__atomic_fetch_add (v, 1, __ATOMIC_SEQ_CST) != 5)
++    abort ();
++}
++
++
++void
++test_fetch_sub (char* v)
++{
++  *v = res = 20;
++  count = 0;
++
++  if (__atomic_fetch_sub (v, count + 1, __ATOMIC_RELAXED) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, 1, __ATOMIC_CONSUME) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQUIRE) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, 1, __ATOMIC_RELEASE) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQ_REL) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, 1, __ATOMIC_SEQ_CST) !=  res--)
++    abort ();
++}
++
++void
++test_fetch_and (char* v)
++{
++  *v = init;
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_fetch_and (v, init, __ATOMIC_CONSUME) !=  0)
++    abort ();
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  *v = ~*v;
++  if (__atomic_fetch_and (v, init, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST) !=  0)
++    abort ();
++}
++
++void
++test_fetch_nand (char* v)
++{
++  *v = init;
++
++  if (__atomic_fetch_nand (v, 0, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_fetch_nand (v, init, __ATOMIC_CONSUME) !=  init)
++    abort ();
++
++  if (__atomic_fetch_nand (v, 0, __ATOMIC_ACQUIRE) !=  0 )
++    abort ();
++
++  if (__atomic_fetch_nand (v, init, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL) !=  0)
++    abort ();
++
++  if (__atomic_fetch_nand (v, 0, __ATOMIC_SEQ_CST) !=  init)
++    abort ();
++}
++
++void
++test_fetch_xor (char* v)
++{
++  *v = init;
++  count = 0;
++
++  if (__atomic_fetch_xor (v, count, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME) !=  init)
++    abort ();
++
++  if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  if (__atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE) !=  0)
++    abort ();
++
++  if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_fetch_xor (v, ~count, __ATOMIC_SEQ_CST) !=  init)
++    abort ();
++}
++
++void
++test_fetch_or (char* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_fetch_or (v, count, __ATOMIC_RELAXED) !=  0)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, 2, __ATOMIC_CONSUME) !=  1)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, count, __ATOMIC_ACQUIRE) !=  3)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, 8, __ATOMIC_RELEASE) !=  7)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, count, __ATOMIC_ACQ_REL) !=  15)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, count, __ATOMIC_SEQ_CST) !=  31)
++    abort ();
++}
++
++/* The OP_fetch routines return the new value after the operation.  */
++
++void
++test_add_fetch (char* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_RELAXED) != 1)
++    abort ();
++
++  if (__atomic_add_fetch (v, 1, __ATOMIC_CONSUME) != 2)
++    abort ();
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_ACQUIRE) != 3)
++    abort ();
++
++  if (__atomic_add_fetch (v, 1, __ATOMIC_RELEASE) != 4)
++    abort ();
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_ACQ_REL) != 5)
++    abort ();
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_SEQ_CST) != 6)
++    abort ();
++}
++
++
++void
++test_sub_fetch (char* v)
++{
++  *v = res = 20;
++  count = 0;
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, 1, __ATOMIC_CONSUME) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQUIRE) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, 1, __ATOMIC_RELEASE) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_SEQ_CST) !=  --res)
++    abort ();
++}
++
++void
++test_and_fetch (char* v)
++{
++  *v = init;
++
++  if (__atomic_and_fetch (v, 0, __ATOMIC_RELAXED) !=  0)
++    abort ();
++
++  *v = init;
++  if (__atomic_and_fetch (v, init, __ATOMIC_CONSUME) !=  init)
++    abort ();
++
++  if (__atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  *v = ~*v;
++  if (__atomic_and_fetch (v, init, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL) !=  0)
++    abort ();
++
++  *v = ~*v;
++  if (__atomic_and_fetch (v, 0, __ATOMIC_SEQ_CST) !=  0)
++    abort ();
++}
++
++void
++test_nand_fetch (char* v)
++{
++  *v = init;
++
++  if (__atomic_nand_fetch (v, 0, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_nand_fetch (v, init, __ATOMIC_CONSUME) !=  0)
++    abort ();
++
++  if (__atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE) !=  init)
++    abort ();
++
++  if (__atomic_nand_fetch (v, init, __ATOMIC_RELEASE) !=  0)
++    abort ();
++
++  if (__atomic_nand_fetch (v, init, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST) !=  init)
++    abort ();
++}
++
++
++
++void
++test_xor_fetch (char* v)
++{
++  *v = init;
++  count = 0;
++
++  if (__atomic_xor_fetch (v, count, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_xor_fetch (v, ~count, __ATOMIC_CONSUME) !=  0)
++    abort ();
++
++  if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  if (__atomic_xor_fetch (v, ~count, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST) !=  0)
++    abort ();
++}
++
++void
++test_or_fetch (char* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_or_fetch (v, count, __ATOMIC_RELAXED) !=  1)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, 2, __ATOMIC_CONSUME) !=  3)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, count, __ATOMIC_ACQUIRE) !=  7)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, 8, __ATOMIC_RELEASE) !=  15)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, count, __ATOMIC_ACQ_REL) !=  31)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, count, __ATOMIC_SEQ_CST) !=  63)
++    abort ();
++}
++
++
++/* Test the OP routines with a result which isn't used. Use both variations
++   within each function.  */
++
++void
++test_add (char* v)
++{
++  *v = 0;
++  count = 1;
++
++  __atomic_add_fetch (v, count, __ATOMIC_RELAXED);
++  if (*v != 1)
++    abort ();
++
++  __atomic_fetch_add (v, count, __ATOMIC_CONSUME);
++  if (*v != 2)
++    abort ();
++
++  __atomic_add_fetch (v, 1 , __ATOMIC_ACQUIRE);
++  if (*v != 3)
++    abort ();
++
++  __atomic_fetch_add (v, 1, __ATOMIC_RELEASE);
++  if (*v != 4)
++    abort ();
++
++  __atomic_add_fetch (v, count, __ATOMIC_ACQ_REL);
++  if (*v != 5)
++    abort ();
++
++  __atomic_fetch_add (v, count, __ATOMIC_SEQ_CST);
++  if (*v != 6)
++    abort ();
++}
++
++
++void
++test_sub (char* v)
++{
++  *v = res = 20;
++  count = 0;
++
++  __atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED);
++  if (*v != --res)
++    abort ();
++
++  __atomic_fetch_sub (v, count + 1, __ATOMIC_CONSUME);
++  if (*v != --res)
++    abort ();
++
++  __atomic_sub_fetch (v, 1, __ATOMIC_ACQUIRE);
++  if (*v != --res)
++    abort ();
++
++  __atomic_fetch_sub (v, 1, __ATOMIC_RELEASE);
++  if (*v != --res)
++    abort ();
++
++  __atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL);
++  if (*v != --res)
++    abort ();
++
++  __atomic_fetch_sub (v, count + 1, __ATOMIC_SEQ_CST);
++  if (*v != --res)
++    abort ();
++}
++
++void
++test_and (char* v)
++{
++  *v = init;
++
++  __atomic_and_fetch (v, 0, __ATOMIC_RELAXED);
++  if (*v != 0)
++    abort ();
++
++  *v = init;
++  __atomic_fetch_and (v, init, __ATOMIC_CONSUME);
++  if (*v != init)
++    abort ();
++
++  __atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE);
++  if (*v != 0)
++    abort ();
++
++  *v = ~*v;
++  __atomic_fetch_and (v, init, __ATOMIC_RELEASE);
++  if (*v != init)
++    abort ();
++
++  __atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL);
++  if (*v != 0)
++    abort ();
++
++  *v = ~*v;
++  __atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST);
++  if (*v != 0)
++    abort ();
++}
++
++void
++test_nand (char* v)
++{
++  *v = init;
++
++  __atomic_fetch_nand (v, 0, __ATOMIC_RELAXED);
++  if (*v != init)
++    abort ();
++
++  __atomic_fetch_nand (v, init, __ATOMIC_CONSUME);
++  if (*v != 0)
++    abort ();
++
++  __atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE);
++  if (*v != init)
++    abort ();
++
++  __atomic_nand_fetch (v, init, __ATOMIC_RELEASE);
++  if (*v != 0)
++    abort ();
++
++  __atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL);
++  if (*v != init)
++    abort ();
++
++  __atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST);
++  if (*v != init)
++    abort ();
++}
++
++
++
++void
++test_xor (char* v)
++{
++  *v = init;
++  count = 0;
++
++  __atomic_xor_fetch (v, count, __ATOMIC_RELAXED);
++  if (*v != init)
++    abort ();
++
++  __atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME);
++  if (*v != 0)
++    abort ();
++
++  __atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE);
++  if (*v != 0)
++    abort ();
++
++  __atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE);
++  if (*v != init)
++    abort ();
++
++  __atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL);
++  if (*v != init)
++    abort ();
++
++  __atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST);
++  if (*v != 0)
++    abort ();
++}
++
++void
++test_or (char* v)
++{
++  *v = 0;
++  count = 1;
++
++  __atomic_or_fetch (v, count, __ATOMIC_RELAXED);
++  if (*v != 1)
++    abort ();
++
++  count *= 2;
++  __atomic_fetch_or (v, count, __ATOMIC_CONSUME);
++  if (*v != 3)
++    abort ();
++
++  count *= 2;
++  __atomic_or_fetch (v, 4, __ATOMIC_ACQUIRE);
++  if (*v != 7)
++    abort ();
++
++  count *= 2;
++  __atomic_fetch_or (v, 8, __ATOMIC_RELEASE);
++  if (*v != 15)
++    abort ();
++
++  count *= 2;
++  __atomic_or_fetch (v, count, __ATOMIC_ACQ_REL);
++  if (*v != 31)
++    abort ();
++
++  count *= 2;
++  __atomic_fetch_or (v, count, __ATOMIC_SEQ_CST);
++  if (*v != 63)
++    abort ();
++}
++
++int
++main ()
++{
++  char* V[] = {&A.a, &A.b, &A.c, &A.d};
++
++  for (int i = 0; i < 4; i++) {
++    test_fetch_add (V[i]);
++    test_fetch_sub (V[i]);
++    test_fetch_and (V[i]);
++    test_fetch_nand (V[i]);
++    test_fetch_xor (V[i]);
++    test_fetch_or (V[i]);
++
++    test_add_fetch (V[i]);
++    test_sub_fetch (V[i]);
++    test_and_fetch (V[i]);
++    test_nand_fetch (V[i]);
++    test_xor_fetch (V[i]);
++    test_or_fetch (V[i]);
++
++    test_add (V[i]);
++    test_sub (V[i]);
++    test_and (V[i]);
++    test_nand (V[i]);
++    test_xor (V[i]);
++    test_or (V[i]);
++  }
++
++  return 0;
++}
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-4.c
+@@ -0,0 +1,566 @@
++/* Check all short alignments.  */
++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-op-2.c */
++/* Test __atomic routines for existence and proper execution on 2 byte
++   values with each valid memory model.  */
++/* { dg-do run } */
++/* { dg-options "-minline-atomics -Wno-address-of-packed-member" } */
++
++/* Test the execution of the __atomic_*OP builtin routines for a short.  */
++
++extern void abort(void);
++
++short count, res;
++const short init = ~0;
++
++struct A
++{
++   short a;
++   short b;
++} __attribute__ ((packed)) A;
++
++/* The fetch_op routines return the original value before the operation.  */
++
++void
++test_fetch_add (short* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_fetch_add (v, count, __ATOMIC_RELAXED) != 0)
++    abort ();
++
++  if (__atomic_fetch_add (v, 1, __ATOMIC_CONSUME) != 1)
++    abort ();
++
++  if (__atomic_fetch_add (v, count, __ATOMIC_ACQUIRE) != 2)
++    abort ();
++
++  if (__atomic_fetch_add (v, 1, __ATOMIC_RELEASE) != 3)
++    abort ();
++
++  if (__atomic_fetch_add (v, count, __ATOMIC_ACQ_REL) != 4)
++    abort ();
++
++  if (__atomic_fetch_add (v, 1, __ATOMIC_SEQ_CST) != 5)
++    abort ();
++}
++
++
++void
++test_fetch_sub (short* v)
++{
++  *v = res = 20;
++  count = 0;
++
++  if (__atomic_fetch_sub (v, count + 1, __ATOMIC_RELAXED) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, 1, __ATOMIC_CONSUME) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQUIRE) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, 1, __ATOMIC_RELEASE) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQ_REL) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, 1, __ATOMIC_SEQ_CST) !=  res--)
++    abort ();
++}
++
++void
++test_fetch_and (short* v)
++{
++  *v = init;
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_fetch_and (v, init, __ATOMIC_CONSUME) !=  0)
++    abort ();
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  *v = ~*v;
++  if (__atomic_fetch_and (v, init, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST) !=  0)
++    abort ();
++}
++
++void
++test_fetch_nand (short* v)
++{
++  *v = init;
++
++  if (__atomic_fetch_nand (v, 0, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_fetch_nand (v, init, __ATOMIC_CONSUME) !=  init)
++    abort ();
++
++  if (__atomic_fetch_nand (v, 0, __ATOMIC_ACQUIRE) !=  0 )
++    abort ();
++
++  if (__atomic_fetch_nand (v, init, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL) !=  0)
++    abort ();
++
++  if (__atomic_fetch_nand (v, 0, __ATOMIC_SEQ_CST) !=  init)
++    abort ();
++}
++
++void
++test_fetch_xor (short* v)
++{
++  *v = init;
++  count = 0;
++
++  if (__atomic_fetch_xor (v, count, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME) !=  init)
++    abort ();
++
++  if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  if (__atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE) !=  0)
++    abort ();
++
++  if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_fetch_xor (v, ~count, __ATOMIC_SEQ_CST) !=  init)
++    abort ();
++}
++
++void
++test_fetch_or (short* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_fetch_or (v, count, __ATOMIC_RELAXED) !=  0)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, 2, __ATOMIC_CONSUME) !=  1)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, count, __ATOMIC_ACQUIRE) !=  3)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, 8, __ATOMIC_RELEASE) !=  7)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, count, __ATOMIC_ACQ_REL) !=  15)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, count, __ATOMIC_SEQ_CST) !=  31)
++    abort ();
++}
++
++/* The OP_fetch routines return the new value after the operation.  */
++
++void
++test_add_fetch (short* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_RELAXED) != 1)
++    abort ();
++
++  if (__atomic_add_fetch (v, 1, __ATOMIC_CONSUME) != 2)
++    abort ();
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_ACQUIRE) != 3)
++    abort ();
++
++  if (__atomic_add_fetch (v, 1, __ATOMIC_RELEASE) != 4)
++    abort ();
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_ACQ_REL) != 5)
++    abort ();
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_SEQ_CST) != 6)
++    abort ();
++}
++
++
++void
++test_sub_fetch (short* v)
++{
++  *v = res = 20;
++  count = 0;
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, 1, __ATOMIC_CONSUME) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQUIRE) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, 1, __ATOMIC_RELEASE) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_SEQ_CST) !=  --res)
++    abort ();
++}
++
++void
++test_and_fetch (short* v)
++{
++  *v = init;
++
++  if (__atomic_and_fetch (v, 0, __ATOMIC_RELAXED) !=  0)
++    abort ();
++
++  *v = init;
++  if (__atomic_and_fetch (v, init, __ATOMIC_CONSUME) !=  init)
++    abort ();
++
++  if (__atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  *v = ~*v;
++  if (__atomic_and_fetch (v, init, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL) !=  0)
++    abort ();
++
++  *v = ~*v;
++  if (__atomic_and_fetch (v, 0, __ATOMIC_SEQ_CST) !=  0)
++    abort ();
++}
++
++void
++test_nand_fetch (short* v)
++{
++  *v = init;
++
++  if (__atomic_nand_fetch (v, 0, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_nand_fetch (v, init, __ATOMIC_CONSUME) !=  0)
++    abort ();
++
++  if (__atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE) !=  init)
++    abort ();
++
++  if (__atomic_nand_fetch (v, init, __ATOMIC_RELEASE) !=  0)
++    abort ();
++
++  if (__atomic_nand_fetch (v, init, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST) !=  init)
++    abort ();
++}
++
++
++
++void
++test_xor_fetch (short* v)
++{
++  *v = init;
++  count = 0;
++
++  if (__atomic_xor_fetch (v, count, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_xor_fetch (v, ~count, __ATOMIC_CONSUME) !=  0)
++    abort ();
++
++  if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  if (__atomic_xor_fetch (v, ~count, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST) !=  0)
++    abort ();
++}
++
++void
++test_or_fetch (short* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_or_fetch (v, count, __ATOMIC_RELAXED) !=  1)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, 2, __ATOMIC_CONSUME) !=  3)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, count, __ATOMIC_ACQUIRE) !=  7)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, 8, __ATOMIC_RELEASE) !=  15)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, count, __ATOMIC_ACQ_REL) !=  31)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, count, __ATOMIC_SEQ_CST) !=  63)
++    abort ();
++}
++
++
++/* Test the OP routines with a result which isn't used. Use both variations
++   within each function.  */
++
++void
++test_add (short* v)
++{
++  *v = 0;
++  count = 1;
++
++  __atomic_add_fetch (v, count, __ATOMIC_RELAXED);
++  if (*v != 1)
++    abort ();
++
++  __atomic_fetch_add (v, count, __ATOMIC_CONSUME);
++  if (*v != 2)
++    abort ();
++
++  __atomic_add_fetch (v, 1 , __ATOMIC_ACQUIRE);
++  if (*v != 3)
++    abort ();
++
++  __atomic_fetch_add (v, 1, __ATOMIC_RELEASE);
++  if (*v != 4)
++    abort ();
++
++  __atomic_add_fetch (v, count, __ATOMIC_ACQ_REL);
++  if (*v != 5)
++    abort ();
++
++  __atomic_fetch_add (v, count, __ATOMIC_SEQ_CST);
++  if (*v != 6)
++    abort ();
++}
++
++
++void
++test_sub (short* v)
++{
++  *v = res = 20;
++  count = 0;
++
++  __atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED);
++  if (*v != --res)
++    abort ();
++
++  __atomic_fetch_sub (v, count + 1, __ATOMIC_CONSUME);
++  if (*v != --res)
++    abort ();
++
++  __atomic_sub_fetch (v, 1, __ATOMIC_ACQUIRE);
++  if (*v != --res)
++    abort ();
++
++  __atomic_fetch_sub (v, 1, __ATOMIC_RELEASE);
++  if (*v != --res)
++    abort ();
++
++  __atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL);
++  if (*v != --res)
++    abort ();
++
++  __atomic_fetch_sub (v, count + 1, __ATOMIC_SEQ_CST);
++  if (*v != --res)
++    abort ();
++}
++
++void
++test_and (short* v)
++{
++  *v = init;
++
++  __atomic_and_fetch (v, 0, __ATOMIC_RELAXED);
++  if (*v != 0)
++    abort ();
++
++  *v = init;
++  __atomic_fetch_and (v, init, __ATOMIC_CONSUME);
++  if (*v != init)
++    abort ();
++
++  __atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE);
++  if (*v != 0)
++    abort ();
++
++  *v = ~*v;
++  __atomic_fetch_and (v, init, __ATOMIC_RELEASE);
++  if (*v != init)
++    abort ();
++
++  __atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL);
++  if (*v != 0)
++    abort ();
++
++  *v = ~*v;
++  __atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST);
++  if (*v != 0)
++    abort ();
++}
++
++void
++test_nand (short* v)
++{
++  *v = init;
++
++  __atomic_fetch_nand (v, 0, __ATOMIC_RELAXED);
++  if (*v != init)
++    abort ();
++
++  __atomic_fetch_nand (v, init, __ATOMIC_CONSUME);
++  if (*v != 0)
++    abort ();
++
++  __atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE);
++  if (*v != init)
++    abort ();
++
++  __atomic_nand_fetch (v, init, __ATOMIC_RELEASE);
++  if (*v != 0)
++    abort ();
++
++  __atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL);
++  if (*v != init)
++    abort ();
++
++  __atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST);
++  if (*v != init)
++    abort ();
++}
++
++
++
++void
++test_xor (short* v)
++{
++  *v = init;
++  count = 0;
++
++  __atomic_xor_fetch (v, count, __ATOMIC_RELAXED);
++  if (*v != init)
++    abort ();
++
++  __atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME);
++  if (*v != 0)
++    abort ();
++
++  __atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE);
++  if (*v != 0)
++    abort ();
++
++  __atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE);
++  if (*v != init)
++    abort ();
++
++  __atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL);
++  if (*v != init)
++    abort ();
++
++  __atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST);
++  if (*v != 0)
++    abort ();
++}
++
++void
++test_or (short* v)
++{
++  *v = 0;
++  count = 1;
++
++  __atomic_or_fetch (v, count, __ATOMIC_RELAXED);
++  if (*v != 1)
++    abort ();
++
++  count *= 2;
++  __atomic_fetch_or (v, count, __ATOMIC_CONSUME);
++  if (*v != 3)
++    abort ();
++
++  count *= 2;
++  __atomic_or_fetch (v, 4, __ATOMIC_ACQUIRE);
++  if (*v != 7)
++    abort ();
++
++  count *= 2;
++  __atomic_fetch_or (v, 8, __ATOMIC_RELEASE);
++  if (*v != 15)
++    abort ();
++
++  count *= 2;
++  __atomic_or_fetch (v, count, __ATOMIC_ACQ_REL);
++  if (*v != 31)
++    abort ();
++
++  count *= 2;
++  __atomic_fetch_or (v, count, __ATOMIC_SEQ_CST);
++  if (*v != 63)
++    abort ();
++}
++
++int
++main () {
++  short* V[] = {&A.a, &A.b};
++
++  for (int i = 0; i < 2; i++) {
++    test_fetch_add (V[i]);
++    test_fetch_sub (V[i]);
++    test_fetch_and (V[i]);
++    test_fetch_nand (V[i]);
++    test_fetch_xor (V[i]);
++    test_fetch_or (V[i]);
++
++    test_add_fetch (V[i]);
++    test_sub_fetch (V[i]);
++    test_and_fetch (V[i]);
++    test_nand_fetch (V[i]);
++    test_xor_fetch (V[i]);
++    test_or_fetch (V[i]);
++
++    test_add (V[i]);
++    test_sub (V[i]);
++    test_and (V[i]);
++    test_nand (V[i]);
++    test_xor (V[i]);
++    test_or (V[i]);
++  }
++
++  return 0;
++}
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-5.c
+@@ -0,0 +1,87 @@
++/* Test __atomic routines for existence and proper execution on 1 byte
++   values with each valid memory model.  */
++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-compare-exchange-1.c */
++/* { dg-do run } */
++/* { dg-options "-minline-atomics" } */
++
++/* Test the execution of the __atomic_compare_exchange_n builtin for a char.  */
++
++extern void abort(void);
++
++char v = 0;
++char expected = 0;
++char max = ~0;
++char desired = ~0;
++char zero = 0;
++
++#define STRONG 0
++#define WEAK 1
++
++int
++main ()
++{
++
++  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != max)
++    abort ();
++
++  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != max)
++    abort ();
++  if (v != 0)
++    abort ();
++
++  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
++    abort ();
++  if (expected != 0)
++    abort ();
++  if (v != max)
++    abort ();
++
++  /* Now test the generic version.  */
++
++  v = 0;
++
++  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != max)
++    abort ();
++
++  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != max)
++    abort ();
++  if (v != 0)
++    abort ();
++
++  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
++    abort ();
++  if (expected != 0)
++    abort ();
++  if (v != max)
++    abort ();
++
++  return 0;
++}
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-6.c
+@@ -0,0 +1,87 @@
++/* Test __atomic routines for existence and proper execution on 2 byte
++   values with each valid memory model.  */
++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-compare-exchange-2.c */
++/* { dg-do run } */
++/* { dg-options "-minline-atomics" } */
++
++/* Test the execution of the __atomic_compare_exchange_n builtin for a short.  */
++
++extern void abort(void);
++
++short v = 0;
++short expected = 0;
++short max = ~0;
++short desired = ~0;
++short zero = 0;
++
++#define STRONG 0
++#define WEAK 1
++
++int
++main ()
++{
++
++  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != max)
++    abort ();
++
++  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != max)
++    abort ();
++  if (v != 0)
++    abort ();
++
++  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
++    abort ();
++  if (expected != 0)
++    abort ();
++  if (v != max)
++    abort ();
++
++  /* Now test the generic version.  */
++
++  v = 0;
++
++  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != max)
++    abort ();
++
++  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != max)
++    abort ();
++  if (v != 0)
++    abort ();
++
++  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
++    abort ();
++  if (expected != 0)
++    abort ();
++  if (v != max)
++    abort ();
++
++  return 0;
++}
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-7.c
+@@ -0,0 +1,69 @@
++/* Test __atomic routines for existence and proper execution on 1 byte
++   values with each valid memory model.  */
++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-exchange-1.c */
++/* { dg-do run } */
++/* { dg-options "-minline-atomics" } */
++
++/* Test the execution of the __atomic_exchange_n builtin for a char.  */
++
++extern void abort(void);
++
++char v, count, ret;
++
++int
++main ()
++{
++  v = 0;
++  count = 0;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) != count)
++    abort ();
++  count++;
++
++  /* Now test the generic version.  */
++
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  return 0;
++}
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-8.c
+@@ -0,0 +1,69 @@
++/* Test __atomic routines for existence and proper execution on 2 byte
++   values with each valid memory model.  */
++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-exchange-2.c */
++/* { dg-do run } */
++/* { dg-options "-minline-atomics" } */
++
++/* Test the execution of the __atomic_X builtin for a short.  */
++
++extern void abort(void);
++
++short v, count, ret;
++
++int
++main ()
++{
++  v = 0;
++  count = 0;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) != count)
++    abort ();
++  count++;
++
++  /* Now test the generic version.  */
++
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  return 0;
++}
+--- a/libgcc/config/riscv/atomic.c
++++ b/libgcc/config/riscv/atomic.c
+@@ -30,6 +30,8 @@ see the files COPYING3 and COPYING.RUNTI
+ #define INVERT		"not %[tmp1], %[tmp1]\n\t"
+ #define DONT_INVERT	""
+ 
++/* Logic duplicated in gcc/gcc/config/riscv/sync.md for use when inlining is enabled */
++
+ #define GENERATE_FETCH_AND_OP(type, size, opname, insn, invert, cop)	\
+   type __sync_fetch_and_ ## opname ## _ ## size (type *p, type v)	\
+   {									\
diff --git a/toolchain/gcc/patches-12.x/701-riscv-linux-Don-t-add-latomic-with-pthread.patch b/toolchain/gcc/patches-12.x/701-riscv-linux-Don-t-add-latomic-with-pthread.patch
new file mode 100644
index 0000000000..328c7be9ce
--- /dev/null
+++ b/toolchain/gcc/patches-12.x/701-riscv-linux-Don-t-add-latomic-with-pthread.patch
@@ -0,0 +1,36 @@
+From 203f3060dd363361b172f7295f42bb6bf5ac0b3b Mon Sep 17 00:00:00 2001
+From: Andreas Schwab <schwab@suse.de>
+Date: Sat, 23 Apr 2022 15:48:42 +0200
+Subject: [PATCH] riscv/linux: Don't add -latomic with -pthread
+
+Now that we have support for inline subword atomic operations, it is no
+longer necessary to link against libatomic.  This also fixes testsuite
+failures because the framework does not properly set up the linker flags
+for finding libatomic.
+The use of atomic operations is also independent of the use of libpthread.
+
+gcc/
+	* config/riscv/linux.h (LIB_SPEC): Don't redefine.
+---
+ gcc/config/riscv/linux.h | 10 ----------
+ 1 file changed, 10 deletions(-)
+
+--- a/gcc/config/riscv/linux.h
++++ b/gcc/config/riscv/linux.h
+@@ -35,16 +35,6 @@ along with GCC; see the file COPYING3.
+ #undef MUSL_DYNAMIC_LINKER
+ #define MUSL_DYNAMIC_LINKER "/lib/ld-musl-riscv" XLEN_SPEC MUSL_ABI_SUFFIX ".so.1"
+ 
+-/* Because RISC-V only has word-sized atomics, it requries libatomic where
+-   others do not.  So link libatomic by default, as needed.  */
+-#undef LIB_SPEC
+-#ifdef LD_AS_NEEDED_OPTION
+-#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC \
+-  " %{pthread:" LD_AS_NEEDED_OPTION " -latomic " LD_NO_AS_NEEDED_OPTION "}"
+-#else
+-#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC " -latomic "
+-#endif
+-
+ #define ICACHE_FLUSH_FUNC "__riscv_flush_icache"
+ 
+ #define CPP_SPEC "%{pthread:-D_REENTRANT}"
diff --git a/toolchain/gcc/patches-12.x/910-mbsd_multi.patch b/toolchain/gcc/patches-12.x/910-mbsd_multi.patch
index 0f75d0ce0e..9233c6a1d7 100644
--- a/toolchain/gcc/patches-12.x/910-mbsd_multi.patch
+++ b/toolchain/gcc/patches-12.x/910-mbsd_multi.patch
@@ -114,7 +114,7 @@ Date:   Tue Jul 31 00:52:27 2007 +0000
  ; On SVR4 targets, it also controls whether or not to emit a
 --- a/gcc/doc/invoke.texi
 +++ b/gcc/doc/invoke.texi
-@@ -9596,6 +9596,17 @@ This option is only supported for C and
+@@ -9597,6 +9597,17 @@ This option is only supported for C and
  @option{-Wall} and by @option{-Wpedantic}, which can be disabled with
  @option{-Wno-pointer-sign}.
  
diff --git a/toolchain/gcc/patches-13.x/700-RISCV-Inline-subword-atomic-ops.patch b/toolchain/gcc/patches-13.x/700-RISCV-Inline-subword-atomic-ops.patch
new file mode 100644
index 0000000000..752480bc4c
--- /dev/null
+++ b/toolchain/gcc/patches-13.x/700-RISCV-Inline-subword-atomic-ops.patch
@@ -0,0 +1,2021 @@
+From f797260adaf52bee0ec0e16190bbefbe1bfc3692 Mon Sep 17 00:00:00 2001
+From: Patrick O'Neill <patrick@rivosinc.com>
+Date: Tue, 18 Apr 2023 14:33:13 -0700
+Subject: [PATCH] RISCV: Inline subword atomic ops
+
+RISC-V has no support for subword atomic operations; code currently
+generates libatomic library calls.
+
+This patch changes the default behavior to inline subword atomic calls
+(using the same logic as the existing library call).
+Behavior can be specified using the -minline-atomics and
+-mno-inline-atomics command line flags.
+
+gcc/libgcc/config/riscv/atomic.c has the same logic implemented in asm.
+This will need to stay for backwards compatibility and the
+-mno-inline-atomics flag.
+
+2023-04-18 Patrick O'Neill <patrick@rivosinc.com>
+
+gcc/ChangeLog:
+	PR target/104338
+	* config/riscv/riscv-protos.h: Add helper function stubs.
+	* config/riscv/riscv.cc: Add helper functions for subword masking.
+	* config/riscv/riscv.opt: Add command-line flag.
+	* config/riscv/sync.md: Add masking logic and inline asm for fetch_and_op,
+	fetch_and_nand, CAS, and exchange ops.
+	* doc/invoke.texi: Add blurb regarding command-line flag.
+
+libgcc/ChangeLog:
+	PR target/104338
+	* config/riscv/atomic.c: Add reference to duplicate logic.
+
+gcc/testsuite/ChangeLog:
+	PR target/104338
+	* gcc.target/riscv/inline-atomics-1.c: New test.
+	* gcc.target/riscv/inline-atomics-2.c: New test.
+	* gcc.target/riscv/inline-atomics-3.c: New test.
+	* gcc.target/riscv/inline-atomics-4.c: New test.
+	* gcc.target/riscv/inline-atomics-5.c: New test.
+	* gcc.target/riscv/inline-atomics-6.c: New test.
+	* gcc.target/riscv/inline-atomics-7.c: New test.
+	* gcc.target/riscv/inline-atomics-8.c: New test.
+
+Signed-off-by: Patrick O'Neill <patrick@rivosinc.com>
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+---
+ gcc/config/riscv/riscv-protos.h               |   2 +
+ gcc/config/riscv/riscv.cc                     |  49 ++
+ gcc/config/riscv/riscv.opt                    |   4 +
+ gcc/config/riscv/sync.md                      | 301 +++++++++
+ gcc/doc/invoke.texi                           |  10 +-
+ .../gcc.target/riscv/inline-atomics-1.c       |  18 +
+ .../gcc.target/riscv/inline-atomics-2.c       |   9 +
+ .../gcc.target/riscv/inline-atomics-3.c       | 569 ++++++++++++++++++
+ .../gcc.target/riscv/inline-atomics-4.c       | 566 +++++++++++++++++
+ .../gcc.target/riscv/inline-atomics-5.c       |  87 +++
+ .../gcc.target/riscv/inline-atomics-6.c       |  87 +++
+ .../gcc.target/riscv/inline-atomics-7.c       |  69 +++
+ .../gcc.target/riscv/inline-atomics-8.c       |  69 +++
+ libgcc/config/riscv/atomic.c                  |   2 +
+ 14 files changed, 1841 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-1.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-2.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-3.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-4.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-5.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-6.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-7.c
+ create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-8.c
+
+--- a/gcc/config/riscv/riscv-protos.h
++++ b/gcc/config/riscv/riscv-protos.h
+@@ -79,6 +79,8 @@ extern void riscv_reinit (void);
+ extern poly_uint64 riscv_regmode_natural_size (machine_mode);
+ extern bool riscv_v_ext_vector_mode_p (machine_mode);
+ extern bool riscv_shamt_matches_mask_p (int, HOST_WIDE_INT);
++extern void riscv_subword_address (rtx, rtx *, rtx *, rtx *, rtx *);
++extern void riscv_lshift_subword (machine_mode, rtx, rtx, rtx *);
+ 
+ /* Routines implemented in riscv-c.cc.  */
+ void riscv_cpu_cpp_builtins (cpp_reader *);
+--- a/gcc/config/riscv/riscv.cc
++++ b/gcc/config/riscv/riscv.cc
+@@ -7143,6 +7143,55 @@ riscv_zero_call_used_regs (HARD_REG_SET
+ 							& ~zeroed_hardregs);
+ }
+ 
++/* Given memory reference MEM, expand code to compute the aligned
++   memory address, shift and mask values and store them into
++   *ALIGNED_MEM, *SHIFT, *MASK and *NOT_MASK.  */
++
++void
++riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask,
++		       rtx *not_mask)
++{
++  /* Align the memory address to a word.  */
++  rtx addr = force_reg (Pmode, XEXP (mem, 0));
++
++  rtx addr_mask = gen_int_mode (-4, Pmode);
++
++  rtx aligned_addr = gen_reg_rtx (Pmode);
++  emit_move_insn (aligned_addr,  gen_rtx_AND (Pmode, addr, addr_mask));
++
++  *aligned_mem = change_address (mem, SImode, aligned_addr);
++
++  /* Calculate the shift amount.  */
++  emit_move_insn (*shift, gen_rtx_AND (SImode, gen_lowpart (SImode, addr),
++				       gen_int_mode (3, SImode)));
++  emit_move_insn (*shift, gen_rtx_ASHIFT (SImode, *shift,
++					  gen_int_mode (3, SImode)));
++
++  /* Calculate the mask.  */
++  int unshifted_mask = GET_MODE_MASK (GET_MODE (mem));
++
++  emit_move_insn (*mask, gen_int_mode (unshifted_mask, SImode));
++
++  emit_move_insn (*mask, gen_rtx_ASHIFT (SImode, *mask,
++					 gen_lowpart (QImode, *shift)));
++
++  emit_move_insn (*not_mask, gen_rtx_NOT(SImode, *mask));
++}
++
++/* Leftshift a subword within an SImode register.  */
++
++void
++riscv_lshift_subword (machine_mode mode, rtx value, rtx shift,
++		      rtx *shifted_value)
++{
++  rtx value_reg = gen_reg_rtx (SImode);
++  emit_move_insn (value_reg, simplify_gen_subreg (SImode, value,
++						  mode, 0));
++
++  emit_move_insn(*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
++						 gen_lowpart (QImode, shift)));
++}
++
+ /* Initialize the GCC target structure.  */
+ #undef TARGET_ASM_ALIGNED_HI_OP
+ #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
+--- a/gcc/config/riscv/riscv.opt
++++ b/gcc/config/riscv/riscv.opt
+@@ -238,6 +238,10 @@ int riscv_sv_subext
+ TargetVariable
+ int riscv_xthead_subext
+ 
++minline-atomics
++Target Var(TARGET_INLINE_SUBWORD_ATOMIC) Init(1)
++Always inline subword atomic operations.
++
+ Enum
+ Name(isa_spec_class) Type(enum riscv_isa_spec_class)
+ Supported ISA specs (for use with the -misa-spec= option):
+--- a/gcc/config/riscv/sync.md
++++ b/gcc/config/riscv/sync.md
+@@ -21,8 +21,11 @@
+ 
+ (define_c_enum "unspec" [
+   UNSPEC_COMPARE_AND_SWAP
++  UNSPEC_COMPARE_AND_SWAP_SUBWORD
+   UNSPEC_SYNC_OLD_OP
++  UNSPEC_SYNC_OLD_OP_SUBWORD
+   UNSPEC_SYNC_EXCHANGE
++  UNSPEC_SYNC_EXCHANGE_SUBWORD
+   UNSPEC_ATOMIC_STORE
+   UNSPEC_MEMORY_BARRIER
+ ])
+@@ -91,6 +94,135 @@
+   [(set_attr "type" "atomic")
+    (set (attr "length") (const_int 8))])
+ 
++(define_insn "subword_atomic_fetch_strong_<atomic_optab>"
++  [(set (match_operand:SI 0 "register_operand" "=&r")		   ;; old value at mem
++	(match_operand:SI 1 "memory_operand" "+A"))		   ;; mem location
++   (set (match_dup 1)
++	(unspec_volatile:SI
++	  [(any_atomic:SI (match_dup 1)
++		     (match_operand:SI 2 "register_operand" "rI")) ;; value for op
++	   (match_operand:SI 3 "register_operand" "rI")]	   ;; mask
++	 UNSPEC_SYNC_OLD_OP_SUBWORD))
++    (match_operand:SI 4 "register_operand" "rI")		   ;; not_mask
++    (clobber (match_scratch:SI 5 "=&r"))			   ;; tmp_1
++    (clobber (match_scratch:SI 6 "=&r"))]			   ;; tmp_2
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++  {
++    return "1:\;"
++	   "lr.w.aq\t%0, %1\;"
++	   "<insn>\t%5, %0, %2\;"
++	   "and\t%5, %5, %3\;"
++	   "and\t%6, %0, %4\;"
++	   "or\t%6, %6, %5\;"
++	   "sc.w.rl\t%5, %6, %1\;"
++	   "bnez\t%5, 1b";
++  }
++  [(set (attr "length") (const_int 28))])
++
++(define_expand "atomic_fetch_nand<mode>"
++  [(match_operand:SHORT 0 "register_operand")			      ;; old value at mem
++   (not:SHORT (and:SHORT (match_operand:SHORT 1 "memory_operand")     ;; mem location
++			 (match_operand:SHORT 2 "reg_or_0_operand"))) ;; value for op
++   (match_operand:SI 3 "const_int_operand")]			      ;; model
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++{
++  /* We have no QImode/HImode atomics, so form a mask, then use
++     subword_atomic_fetch_strong_nand to implement a LR/SC version of the
++     operation. */
++
++  /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining
++     is disabled */
++
++  rtx old = gen_reg_rtx (SImode);
++  rtx mem = operands[1];
++  rtx value = operands[2];
++  rtx aligned_mem = gen_reg_rtx (SImode);
++  rtx shift = gen_reg_rtx (SImode);
++  rtx mask = gen_reg_rtx (SImode);
++  rtx not_mask = gen_reg_rtx (SImode);
++
++  riscv_subword_address (mem, &aligned_mem, &shift, &mask, &not_mask);
++
++  rtx shifted_value = gen_reg_rtx (SImode);
++  riscv_lshift_subword (<MODE>mode, value, shift, &shifted_value);
++
++  emit_insn (gen_subword_atomic_fetch_strong_nand (old, aligned_mem,
++						   shifted_value,
++						   mask, not_mask));
++
++  emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old,
++					 gen_lowpart (QImode, shift)));
++
++  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, old));
++
++  DONE;
++})
++
++(define_insn "subword_atomic_fetch_strong_nand"
++  [(set (match_operand:SI 0 "register_operand" "=&r")			  ;; old value at mem
++	(match_operand:SI 1 "memory_operand" "+A"))			  ;; mem location
++   (set (match_dup 1)
++	(unspec_volatile:SI
++	  [(not:SI (and:SI (match_dup 1)
++			   (match_operand:SI 2 "register_operand" "rI"))) ;; value for op
++	   (match_operand:SI 3 "register_operand" "rI")]		  ;; mask
++	 UNSPEC_SYNC_OLD_OP_SUBWORD))
++    (match_operand:SI 4 "register_operand" "rI")			  ;; not_mask
++    (clobber (match_scratch:SI 5 "=&r"))				  ;; tmp_1
++    (clobber (match_scratch:SI 6 "=&r"))]				  ;; tmp_2
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++  {
++    return "1:\;"
++	   "lr.w.aq\t%0, %1\;"
++	   "and\t%5, %0, %2\;"
++	   "not\t%5, %5\;"
++	   "and\t%5, %5, %3\;"
++	   "and\t%6, %0, %4\;"
++	   "or\t%6, %6, %5\;"
++	   "sc.w.rl\t%5, %6, %1\;"
++	   "bnez\t%5, 1b";
++  }
++  [(set (attr "length") (const_int 32))])
++
++(define_expand "atomic_fetch_<atomic_optab><mode>"
++  [(match_operand:SHORT 0 "register_operand")			 ;; old value at mem
++   (any_atomic:SHORT (match_operand:SHORT 1 "memory_operand")	 ;; mem location
++		     (match_operand:SHORT 2 "reg_or_0_operand")) ;; value for op
++   (match_operand:SI 3 "const_int_operand")]			 ;; model
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++{
++  /* We have no QImode/HImode atomics, so form a mask, then use
++     subword_atomic_fetch_strong_<mode> to implement a LR/SC version of the
++     operation. */
++
++  /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining
++     is disabled */
++
++  rtx old = gen_reg_rtx (SImode);
++  rtx mem = operands[1];
++  rtx value = operands[2];
++  rtx aligned_mem = gen_reg_rtx (SImode);
++  rtx shift = gen_reg_rtx (SImode);
++  rtx mask = gen_reg_rtx (SImode);
++  rtx not_mask = gen_reg_rtx (SImode);
++
++  riscv_subword_address (mem, &aligned_mem, &shift, &mask, &not_mask);
++
++  rtx shifted_value = gen_reg_rtx (SImode);
++  riscv_lshift_subword (<MODE>mode, value, shift, &shifted_value);
++
++  emit_insn (gen_subword_atomic_fetch_strong_<atomic_optab> (old, aligned_mem,
++							     shifted_value,
++							     mask, not_mask));
++
++  emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old,
++					 gen_lowpart (QImode, shift)));
++
++  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, old));
++
++  DONE;
++})
++
+ (define_insn "atomic_exchange<mode>"
+   [(set (match_operand:GPR 0 "register_operand" "=&r")
+ 	(unspec_volatile:GPR
+@@ -104,6 +236,56 @@
+   [(set_attr "type" "atomic")
+    (set (attr "length") (const_int 8))])
+ 
++(define_expand "atomic_exchange<mode>"
++  [(match_operand:SHORT 0 "register_operand") ;; old value at mem
++   (match_operand:SHORT 1 "memory_operand")   ;; mem location
++   (match_operand:SHORT 2 "register_operand") ;; value
++   (match_operand:SI 3 "const_int_operand")]  ;; model
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++{
++  rtx old = gen_reg_rtx (SImode);
++  rtx mem = operands[1];
++  rtx value = operands[2];
++  rtx aligned_mem = gen_reg_rtx (SImode);
++  rtx shift = gen_reg_rtx (SImode);
++  rtx mask = gen_reg_rtx (SImode);
++  rtx not_mask = gen_reg_rtx (SImode);
++
++  riscv_subword_address (mem, &aligned_mem, &shift, &mask, &not_mask);
++
++  rtx shifted_value = gen_reg_rtx (SImode);
++  riscv_lshift_subword (<MODE>mode, value, shift, &shifted_value);
++
++  emit_insn (gen_subword_atomic_exchange_strong (old, aligned_mem,
++						 shifted_value, not_mask));
++
++  emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old,
++					 gen_lowpart (QImode, shift)));
++
++  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, old));
++  DONE;
++})
++
++(define_insn "subword_atomic_exchange_strong"
++  [(set (match_operand:SI 0 "register_operand" "=&r")	 ;; old value at mem
++	(match_operand:SI 1 "memory_operand" "+A"))	 ;; mem location
++   (set (match_dup 1)
++	(unspec_volatile:SI
++	  [(match_operand:SI 2 "reg_or_0_operand" "rI")  ;; value
++	   (match_operand:SI 3 "reg_or_0_operand" "rI")] ;; not_mask
++      UNSPEC_SYNC_EXCHANGE_SUBWORD))
++    (clobber (match_scratch:SI 4 "=&r"))]		 ;; tmp_1
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++  {
++    return "1:\;"
++	   "lr.w.aq\t%0, %1\;"
++	   "and\t%4, %0, %3\;"
++	   "or\t%4, %4, %2\;"
++	   "sc.w.rl\t%4, %4, %1\;"
++	   "bnez\t%4, 1b";
++  }
++  [(set (attr "length") (const_int 20))])
++
+ (define_insn "atomic_cas_value_strong<mode>"
+   [(set (match_operand:GPR 0 "register_operand" "=&r")
+ 	(match_operand:GPR 1 "memory_operand" "+A"))
+@@ -153,6 +335,125 @@
+   DONE;
+ })
+ 
++(define_expand "atomic_compare_and_swap<mode>"
++  [(match_operand:SI 0 "register_operand")    ;; bool output
++   (match_operand:SHORT 1 "register_operand") ;; val output
++   (match_operand:SHORT 2 "memory_operand")   ;; memory
++   (match_operand:SHORT 3 "reg_or_0_operand") ;; expected value
++   (match_operand:SHORT 4 "reg_or_0_operand") ;; desired value
++   (match_operand:SI 5 "const_int_operand")   ;; is_weak
++   (match_operand:SI 6 "const_int_operand")   ;; mod_s
++   (match_operand:SI 7 "const_int_operand")]  ;; mod_f
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++{
++  emit_insn (gen_atomic_cas_value_strong<mode> (operands[1], operands[2],
++						operands[3], operands[4],
++						operands[6], operands[7]));
++
++  rtx val = gen_reg_rtx (SImode);
++  if (operands[1] != const0_rtx)
++    emit_move_insn (val, gen_rtx_SIGN_EXTEND (SImode, operands[1]));
++  else
++    emit_move_insn (val, const0_rtx);
++
++  rtx exp = gen_reg_rtx (SImode);
++  if (operands[3] != const0_rtx)
++    emit_move_insn (exp, gen_rtx_SIGN_EXTEND (SImode, operands[3]));
++  else
++    emit_move_insn (exp, const0_rtx);
++
++  rtx compare = val;
++  if (exp != const0_rtx)
++    {
++      rtx difference = gen_rtx_MINUS (SImode, val, exp);
++      compare = gen_reg_rtx (SImode);
++      emit_move_insn  (compare, difference);
++    }
++
++  if (word_mode != SImode)
++    {
++      rtx reg = gen_reg_rtx (word_mode);
++      emit_move_insn (reg, gen_rtx_SIGN_EXTEND (word_mode, compare));
++      compare = reg;
++    }
++
++  emit_move_insn (operands[0], gen_rtx_EQ (SImode, compare, const0_rtx));
++  DONE;
++})
++
++(define_expand "atomic_cas_value_strong<mode>"
++  [(match_operand:SHORT 0 "register_operand") ;; val output
++   (match_operand:SHORT 1 "memory_operand")   ;; memory
++   (match_operand:SHORT 2 "reg_or_0_operand") ;; expected value
++   (match_operand:SHORT 3 "reg_or_0_operand") ;; desired value
++   (match_operand:SI 4 "const_int_operand")   ;; mod_s
++   (match_operand:SI 5 "const_int_operand")   ;; mod_f
++   (match_scratch:SHORT 6)]
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++{
++  /* We have no QImode/HImode atomics, so form a mask, then use
++     subword_atomic_cas_strong<mode> to implement a LR/SC version of the
++     operation. */
++
++  /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining
++     is disabled */
++
++  rtx old = gen_reg_rtx (SImode);
++  rtx mem = operands[1];
++  rtx aligned_mem = gen_reg_rtx (SImode);
++  rtx shift = gen_reg_rtx (SImode);
++  rtx mask = gen_reg_rtx (SImode);
++  rtx not_mask = gen_reg_rtx (SImode);
++
++  riscv_subword_address (mem, &aligned_mem, &shift, &mask, &not_mask);
++
++  rtx o = operands[2];
++  rtx n = operands[3];
++  rtx shifted_o = gen_reg_rtx (SImode);
++  rtx shifted_n = gen_reg_rtx (SImode);
++
++  riscv_lshift_subword (<MODE>mode, o, shift, &shifted_o);
++  riscv_lshift_subword (<MODE>mode, n, shift, &shifted_n);
++
++  emit_move_insn (shifted_o, gen_rtx_AND (SImode, shifted_o, mask));
++  emit_move_insn (shifted_n, gen_rtx_AND (SImode, shifted_n, mask));
++
++  emit_insn (gen_subword_atomic_cas_strong (old, aligned_mem,
++					    shifted_o, shifted_n,
++					    mask, not_mask));
++
++  emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old,
++					 gen_lowpart (QImode, shift)));
++
++  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, old));
++
++  DONE;
++})
++
++(define_insn "subword_atomic_cas_strong"
++  [(set (match_operand:SI 0 "register_operand" "=&r")			   ;; old value at mem
++	(match_operand:SI 1 "memory_operand" "+A"))			   ;; mem location
++   (set (match_dup 1)
++	(unspec_volatile:SI [(match_operand:SI 2 "reg_or_0_operand" "rJ")  ;; expected value
++			     (match_operand:SI 3 "reg_or_0_operand" "rJ")] ;; desired value
++	 UNSPEC_COMPARE_AND_SWAP_SUBWORD))
++	(match_operand:SI 4 "register_operand" "rI")			   ;; mask
++	(match_operand:SI 5 "register_operand" "rI")			   ;; not_mask
++	(clobber (match_scratch:SI 6 "=&r"))]				   ;; tmp_1
++  "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
++  {
++    return "1:\;"
++	   "lr.w.aq\t%0, %1\;"
++	   "and\t%6, %0, %4\;"
++	   "bne\t%6, %z2, 1f\;"
++	   "and\t%6, %0, %5\;"
++	   "or\t%6, %6, %3\;"
++	   "sc.w.rl\t%6, %6, %1\;"
++	   "bnez\t%6, 1b\;"
++	   "1:";
++  }
++  [(set (attr "length") (const_int 28))])
++
+ (define_expand "atomic_test_and_set"
+   [(match_operand:QI 0 "register_operand" "")     ;; bool output
+    (match_operand:QI 1 "memory_operand" "+A")    ;; memory
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -1226,7 +1226,8 @@ See RS/6000 and PowerPC Options.
+ -mbig-endian  -mlittle-endian
+ -mstack-protector-guard=@var{guard}  -mstack-protector-guard-reg=@var{reg}
+ -mstack-protector-guard-offset=@var{offset}
+--mcsr-check -mno-csr-check}
++-mcsr-check -mno-csr-check
++-minline-atomics  -mno-inline-atomics}
+ 
+ @emph{RL78 Options}
+ @gccoptlist{-msim  -mmul=none  -mmul=g13  -mmul=g14  -mallregs
+@@ -29006,6 +29007,13 @@ Do or don't use smaller but slower prolo
+ library function calls.  The default is to use fast inline prologues and
+ epilogues.
+ 
++@opindex minline-atomics
++@item -minline-atomics
++@itemx -mno-inline-atomics
++Do or don't use smaller but slower subword atomic emulation code that uses
++libatomic function calls.  The default is to use fast inline subword atomics
++that do not require libatomic.
++
+ @opindex mshorten-memrefs
+ @item -mshorten-memrefs
+ @itemx -mno-shorten-memrefs
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-1.c
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-options "-mno-inline-atomics" } */
++/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "fetch_and_nand" { target *-*-* } 0 } */
++/* { dg-final { scan-assembler "\tcall\t__sync_fetch_and_add_1" } } */
++/* { dg-final { scan-assembler "\tcall\t__sync_fetch_and_nand_1" } } */
++/* { dg-final { scan-assembler "\tcall\t__sync_bool_compare_and_swap_1" } } */
++
++char foo;
++char bar;
++char baz;
++
++int
++main ()
++{
++  __sync_fetch_and_add(&foo, 1);
++  __sync_fetch_and_nand(&bar, 1);
++  __sync_bool_compare_and_swap (&baz, 1, 2);
++}
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-2.c
+@@ -0,0 +1,9 @@
++/* { dg-do compile } */
++/* Verify that subword atomics do not generate calls.  */
++/* { dg-options "-minline-atomics" } */
++/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "fetch_and_nand" { target *-*-* } 0 } */
++/* { dg-final { scan-assembler-not "\tcall\t__sync_fetch_and_add_1" } } */
++/* { dg-final { scan-assembler-not "\tcall\t__sync_fetch_and_nand_1" } } */
++/* { dg-final { scan-assembler-not "\tcall\t__sync_bool_compare_and_swap_1" } } */
++
++#include "inline-atomics-1.c"
+\ No newline at end of file
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-3.c
+@@ -0,0 +1,569 @@
++/* Check all char alignments.  */
++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-op-1.c */
++/* Test __atomic routines for existence and proper execution on 1 byte
++   values with each valid memory model.  */
++/* { dg-do run } */
++/* { dg-options "-minline-atomics -Wno-address-of-packed-member" } */
++
++/* Test the execution of the __atomic_*OP builtin routines for a char.  */
++
++extern void abort(void);
++
++char count, res;
++const char init = ~0;
++
++struct A
++{
++   char a;
++   char b;
++   char c;
++   char d;
++} __attribute__ ((packed)) A;
++
++/* The fetch_op routines return the original value before the operation.  */
++
++void
++test_fetch_add (char* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_fetch_add (v, count, __ATOMIC_RELAXED) != 0)
++    abort ();
++
++  if (__atomic_fetch_add (v, 1, __ATOMIC_CONSUME) != 1)
++    abort ();
++
++  if (__atomic_fetch_add (v, count, __ATOMIC_ACQUIRE) != 2)
++    abort ();
++
++  if (__atomic_fetch_add (v, 1, __ATOMIC_RELEASE) != 3)
++    abort ();
++
++  if (__atomic_fetch_add (v, count, __ATOMIC_ACQ_REL) != 4)
++    abort ();
++
++  if (__atomic_fetch_add (v, 1, __ATOMIC_SEQ_CST) != 5)
++    abort ();
++}
++
++
++void
++test_fetch_sub (char* v)
++{
++  *v = res = 20;
++  count = 0;
++
++  if (__atomic_fetch_sub (v, count + 1, __ATOMIC_RELAXED) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, 1, __ATOMIC_CONSUME) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQUIRE) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, 1, __ATOMIC_RELEASE) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQ_REL) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, 1, __ATOMIC_SEQ_CST) !=  res--)
++    abort ();
++}
++
++void
++test_fetch_and (char* v)
++{
++  *v = init;
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_fetch_and (v, init, __ATOMIC_CONSUME) !=  0)
++    abort ();
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  *v = ~*v;
++  if (__atomic_fetch_and (v, init, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST) !=  0)
++    abort ();
++}
++
++void
++test_fetch_nand (char* v)
++{
++  *v = init;
++
++  if (__atomic_fetch_nand (v, 0, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_fetch_nand (v, init, __ATOMIC_CONSUME) !=  init)
++    abort ();
++
++  if (__atomic_fetch_nand (v, 0, __ATOMIC_ACQUIRE) !=  0 )
++    abort ();
++
++  if (__atomic_fetch_nand (v, init, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL) !=  0)
++    abort ();
++
++  if (__atomic_fetch_nand (v, 0, __ATOMIC_SEQ_CST) !=  init)
++    abort ();
++}
++
++void
++test_fetch_xor (char* v)
++{
++  *v = init;
++  count = 0;
++
++  if (__atomic_fetch_xor (v, count, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME) !=  init)
++    abort ();
++
++  if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  if (__atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE) !=  0)
++    abort ();
++
++  if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_fetch_xor (v, ~count, __ATOMIC_SEQ_CST) !=  init)
++    abort ();
++}
++
++void
++test_fetch_or (char* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_fetch_or (v, count, __ATOMIC_RELAXED) !=  0)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, 2, __ATOMIC_CONSUME) !=  1)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, count, __ATOMIC_ACQUIRE) !=  3)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, 8, __ATOMIC_RELEASE) !=  7)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, count, __ATOMIC_ACQ_REL) !=  15)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, count, __ATOMIC_SEQ_CST) !=  31)
++    abort ();
++}
++
++/* The OP_fetch routines return the new value after the operation.  */
++
++void
++test_add_fetch (char* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_RELAXED) != 1)
++    abort ();
++
++  if (__atomic_add_fetch (v, 1, __ATOMIC_CONSUME) != 2)
++    abort ();
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_ACQUIRE) != 3)
++    abort ();
++
++  if (__atomic_add_fetch (v, 1, __ATOMIC_RELEASE) != 4)
++    abort ();
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_ACQ_REL) != 5)
++    abort ();
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_SEQ_CST) != 6)
++    abort ();
++}
++
++
++void
++test_sub_fetch (char* v)
++{
++  *v = res = 20;
++  count = 0;
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, 1, __ATOMIC_CONSUME) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQUIRE) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, 1, __ATOMIC_RELEASE) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_SEQ_CST) !=  --res)
++    abort ();
++}
++
++void
++test_and_fetch (char* v)
++{
++  *v = init;
++
++  if (__atomic_and_fetch (v, 0, __ATOMIC_RELAXED) !=  0)
++    abort ();
++
++  *v = init;
++  if (__atomic_and_fetch (v, init, __ATOMIC_CONSUME) !=  init)
++    abort ();
++
++  if (__atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  *v = ~*v;
++  if (__atomic_and_fetch (v, init, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL) !=  0)
++    abort ();
++
++  *v = ~*v;
++  if (__atomic_and_fetch (v, 0, __ATOMIC_SEQ_CST) !=  0)
++    abort ();
++}
++
++void
++test_nand_fetch (char* v)
++{
++  *v = init;
++
++  if (__atomic_nand_fetch (v, 0, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_nand_fetch (v, init, __ATOMIC_CONSUME) !=  0)
++    abort ();
++
++  if (__atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE) !=  init)
++    abort ();
++
++  if (__atomic_nand_fetch (v, init, __ATOMIC_RELEASE) !=  0)
++    abort ();
++
++  if (__atomic_nand_fetch (v, init, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST) !=  init)
++    abort ();
++}
++
++
++
++void
++test_xor_fetch (char* v)
++{
++  *v = init;
++  count = 0;
++
++  if (__atomic_xor_fetch (v, count, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_xor_fetch (v, ~count, __ATOMIC_CONSUME) !=  0)
++    abort ();
++
++  if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  if (__atomic_xor_fetch (v, ~count, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST) !=  0)
++    abort ();
++}
++
++void
++test_or_fetch (char* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_or_fetch (v, count, __ATOMIC_RELAXED) !=  1)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, 2, __ATOMIC_CONSUME) !=  3)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, count, __ATOMIC_ACQUIRE) !=  7)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, 8, __ATOMIC_RELEASE) !=  15)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, count, __ATOMIC_ACQ_REL) !=  31)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, count, __ATOMIC_SEQ_CST) !=  63)
++    abort ();
++}
++
++
++/* Test the OP routines with a result which isn't used. Use both variations
++   within each function.  */
++
++void
++test_add (char* v)
++{
++  *v = 0;
++  count = 1;
++
++  __atomic_add_fetch (v, count, __ATOMIC_RELAXED);
++  if (*v != 1)
++    abort ();
++
++  __atomic_fetch_add (v, count, __ATOMIC_CONSUME);
++  if (*v != 2)
++    abort ();
++
++  __atomic_add_fetch (v, 1 , __ATOMIC_ACQUIRE);
++  if (*v != 3)
++    abort ();
++
++  __atomic_fetch_add (v, 1, __ATOMIC_RELEASE);
++  if (*v != 4)
++    abort ();
++
++  __atomic_add_fetch (v, count, __ATOMIC_ACQ_REL);
++  if (*v != 5)
++    abort ();
++
++  __atomic_fetch_add (v, count, __ATOMIC_SEQ_CST);
++  if (*v != 6)
++    abort ();
++}
++
++
++void
++test_sub (char* v)
++{
++  *v = res = 20;
++  count = 0;
++
++  __atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED);
++  if (*v != --res)
++    abort ();
++
++  __atomic_fetch_sub (v, count + 1, __ATOMIC_CONSUME);
++  if (*v != --res)
++    abort ();
++
++  __atomic_sub_fetch (v, 1, __ATOMIC_ACQUIRE);
++  if (*v != --res)
++    abort ();
++
++  __atomic_fetch_sub (v, 1, __ATOMIC_RELEASE);
++  if (*v != --res)
++    abort ();
++
++  __atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL);
++  if (*v != --res)
++    abort ();
++
++  __atomic_fetch_sub (v, count + 1, __ATOMIC_SEQ_CST);
++  if (*v != --res)
++    abort ();
++}
++
++void
++test_and (char* v)
++{
++  *v = init;
++
++  __atomic_and_fetch (v, 0, __ATOMIC_RELAXED);
++  if (*v != 0)
++    abort ();
++
++  *v = init;
++  __atomic_fetch_and (v, init, __ATOMIC_CONSUME);
++  if (*v != init)
++    abort ();
++
++  __atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE);
++  if (*v != 0)
++    abort ();
++
++  *v = ~*v;
++  __atomic_fetch_and (v, init, __ATOMIC_RELEASE);
++  if (*v != init)
++    abort ();
++
++  __atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL);
++  if (*v != 0)
++    abort ();
++
++  *v = ~*v;
++  __atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST);
++  if (*v != 0)
++    abort ();
++}
++
++void
++test_nand (char* v)
++{
++  *v = init;
++
++  __atomic_fetch_nand (v, 0, __ATOMIC_RELAXED);
++  if (*v != init)
++    abort ();
++
++  __atomic_fetch_nand (v, init, __ATOMIC_CONSUME);
++  if (*v != 0)
++    abort ();
++
++  __atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE);
++  if (*v != init)
++    abort ();
++
++  __atomic_nand_fetch (v, init, __ATOMIC_RELEASE);
++  if (*v != 0)
++    abort ();
++
++  __atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL);
++  if (*v != init)
++    abort ();
++
++  __atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST);
++  if (*v != init)
++    abort ();
++}
++
++
++
++void
++test_xor (char* v)
++{
++  *v = init;
++  count = 0;
++
++  __atomic_xor_fetch (v, count, __ATOMIC_RELAXED);
++  if (*v != init)
++    abort ();
++
++  __atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME);
++  if (*v != 0)
++    abort ();
++
++  __atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE);
++  if (*v != 0)
++    abort ();
++
++  __atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE);
++  if (*v != init)
++    abort ();
++
++  __atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL);
++  if (*v != init)
++    abort ();
++
++  __atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST);
++  if (*v != 0)
++    abort ();
++}
++
++void
++test_or (char* v)
++{
++  *v = 0;
++  count = 1;
++
++  __atomic_or_fetch (v, count, __ATOMIC_RELAXED);
++  if (*v != 1)
++    abort ();
++
++  count *= 2;
++  __atomic_fetch_or (v, count, __ATOMIC_CONSUME);
++  if (*v != 3)
++    abort ();
++
++  count *= 2;
++  __atomic_or_fetch (v, 4, __ATOMIC_ACQUIRE);
++  if (*v != 7)
++    abort ();
++
++  count *= 2;
++  __atomic_fetch_or (v, 8, __ATOMIC_RELEASE);
++  if (*v != 15)
++    abort ();
++
++  count *= 2;
++  __atomic_or_fetch (v, count, __ATOMIC_ACQ_REL);
++  if (*v != 31)
++    abort ();
++
++  count *= 2;
++  __atomic_fetch_or (v, count, __ATOMIC_SEQ_CST);
++  if (*v != 63)
++    abort ();
++}
++
++int
++main ()
++{
++  char* V[] = {&A.a, &A.b, &A.c, &A.d};
++
++  for (int i = 0; i < 4; i++) {
++    test_fetch_add (V[i]);
++    test_fetch_sub (V[i]);
++    test_fetch_and (V[i]);
++    test_fetch_nand (V[i]);
++    test_fetch_xor (V[i]);
++    test_fetch_or (V[i]);
++
++    test_add_fetch (V[i]);
++    test_sub_fetch (V[i]);
++    test_and_fetch (V[i]);
++    test_nand_fetch (V[i]);
++    test_xor_fetch (V[i]);
++    test_or_fetch (V[i]);
++
++    test_add (V[i]);
++    test_sub (V[i]);
++    test_and (V[i]);
++    test_nand (V[i]);
++    test_xor (V[i]);
++    test_or (V[i]);
++  }
++
++  return 0;
++}
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-4.c
+@@ -0,0 +1,566 @@
++/* Check all short alignments.  */
++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-op-2.c */
++/* Test __atomic routines for existence and proper execution on 2 byte
++   values with each valid memory model.  */
++/* { dg-do run } */
++/* { dg-options "-minline-atomics -Wno-address-of-packed-member" } */
++
++/* Test the execution of the __atomic_*OP builtin routines for a short.  */
++
++extern void abort(void);
++
++short count, res;
++const short init = ~0;
++
++struct A
++{
++   short a;
++   short b;
++} __attribute__ ((packed)) A;
++
++/* The fetch_op routines return the original value before the operation.  */
++
++void
++test_fetch_add (short* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_fetch_add (v, count, __ATOMIC_RELAXED) != 0)
++    abort ();
++
++  if (__atomic_fetch_add (v, 1, __ATOMIC_CONSUME) != 1)
++    abort ();
++
++  if (__atomic_fetch_add (v, count, __ATOMIC_ACQUIRE) != 2)
++    abort ();
++
++  if (__atomic_fetch_add (v, 1, __ATOMIC_RELEASE) != 3)
++    abort ();
++
++  if (__atomic_fetch_add (v, count, __ATOMIC_ACQ_REL) != 4)
++    abort ();
++
++  if (__atomic_fetch_add (v, 1, __ATOMIC_SEQ_CST) != 5)
++    abort ();
++}
++
++
++void
++test_fetch_sub (short* v)
++{
++  *v = res = 20;
++  count = 0;
++
++  if (__atomic_fetch_sub (v, count + 1, __ATOMIC_RELAXED) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, 1, __ATOMIC_CONSUME) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQUIRE) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, 1, __ATOMIC_RELEASE) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQ_REL) !=  res--)
++    abort ();
++
++  if (__atomic_fetch_sub (v, 1, __ATOMIC_SEQ_CST) !=  res--)
++    abort ();
++}
++
++void
++test_fetch_and (short* v)
++{
++  *v = init;
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_fetch_and (v, init, __ATOMIC_CONSUME) !=  0)
++    abort ();
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  *v = ~*v;
++  if (__atomic_fetch_and (v, init, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST) !=  0)
++    abort ();
++}
++
++void
++test_fetch_nand (short* v)
++{
++  *v = init;
++
++  if (__atomic_fetch_nand (v, 0, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_fetch_nand (v, init, __ATOMIC_CONSUME) !=  init)
++    abort ();
++
++  if (__atomic_fetch_nand (v, 0, __ATOMIC_ACQUIRE) !=  0 )
++    abort ();
++
++  if (__atomic_fetch_nand (v, init, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL) !=  0)
++    abort ();
++
++  if (__atomic_fetch_nand (v, 0, __ATOMIC_SEQ_CST) !=  init)
++    abort ();
++}
++
++void
++test_fetch_xor (short* v)
++{
++  *v = init;
++  count = 0;
++
++  if (__atomic_fetch_xor (v, count, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME) !=  init)
++    abort ();
++
++  if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  if (__atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE) !=  0)
++    abort ();
++
++  if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_fetch_xor (v, ~count, __ATOMIC_SEQ_CST) !=  init)
++    abort ();
++}
++
++void
++test_fetch_or (short* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_fetch_or (v, count, __ATOMIC_RELAXED) !=  0)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, 2, __ATOMIC_CONSUME) !=  1)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, count, __ATOMIC_ACQUIRE) !=  3)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, 8, __ATOMIC_RELEASE) !=  7)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, count, __ATOMIC_ACQ_REL) !=  15)
++    abort ();
++
++  count *= 2;
++  if (__atomic_fetch_or (v, count, __ATOMIC_SEQ_CST) !=  31)
++    abort ();
++}
++
++/* The OP_fetch routines return the new value after the operation.  */
++
++void
++test_add_fetch (short* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_RELAXED) != 1)
++    abort ();
++
++  if (__atomic_add_fetch (v, 1, __ATOMIC_CONSUME) != 2)
++    abort ();
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_ACQUIRE) != 3)
++    abort ();
++
++  if (__atomic_add_fetch (v, 1, __ATOMIC_RELEASE) != 4)
++    abort ();
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_ACQ_REL) != 5)
++    abort ();
++
++  if (__atomic_add_fetch (v, count, __ATOMIC_SEQ_CST) != 6)
++    abort ();
++}
++
++
++void
++test_sub_fetch (short* v)
++{
++  *v = res = 20;
++  count = 0;
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, 1, __ATOMIC_CONSUME) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQUIRE) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, 1, __ATOMIC_RELEASE) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL) !=  --res)
++    abort ();
++
++  if (__atomic_sub_fetch (v, count + 1, __ATOMIC_SEQ_CST) !=  --res)
++    abort ();
++}
++
++void
++test_and_fetch (short* v)
++{
++  *v = init;
++
++  if (__atomic_and_fetch (v, 0, __ATOMIC_RELAXED) !=  0)
++    abort ();
++
++  *v = init;
++  if (__atomic_and_fetch (v, init, __ATOMIC_CONSUME) !=  init)
++    abort ();
++
++  if (__atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  *v = ~*v;
++  if (__atomic_and_fetch (v, init, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL) !=  0)
++    abort ();
++
++  *v = ~*v;
++  if (__atomic_and_fetch (v, 0, __ATOMIC_SEQ_CST) !=  0)
++    abort ();
++}
++
++void
++test_nand_fetch (short* v)
++{
++  *v = init;
++
++  if (__atomic_nand_fetch (v, 0, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_nand_fetch (v, init, __ATOMIC_CONSUME) !=  0)
++    abort ();
++
++  if (__atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE) !=  init)
++    abort ();
++
++  if (__atomic_nand_fetch (v, init, __ATOMIC_RELEASE) !=  0)
++    abort ();
++
++  if (__atomic_nand_fetch (v, init, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST) !=  init)
++    abort ();
++}
++
++
++
++void
++test_xor_fetch (short* v)
++{
++  *v = init;
++  count = 0;
++
++  if (__atomic_xor_fetch (v, count, __ATOMIC_RELAXED) !=  init)
++    abort ();
++
++  if (__atomic_xor_fetch (v, ~count, __ATOMIC_CONSUME) !=  0)
++    abort ();
++
++  if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE) !=  0)
++    abort ();
++
++  if (__atomic_xor_fetch (v, ~count, __ATOMIC_RELEASE) !=  init)
++    abort ();
++
++  if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQ_REL) !=  init)
++    abort ();
++
++  if (__atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST) !=  0)
++    abort ();
++}
++
++void
++test_or_fetch (short* v)
++{
++  *v = 0;
++  count = 1;
++
++  if (__atomic_or_fetch (v, count, __ATOMIC_RELAXED) !=  1)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, 2, __ATOMIC_CONSUME) !=  3)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, count, __ATOMIC_ACQUIRE) !=  7)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, 8, __ATOMIC_RELEASE) !=  15)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, count, __ATOMIC_ACQ_REL) !=  31)
++    abort ();
++
++  count *= 2;
++  if (__atomic_or_fetch (v, count, __ATOMIC_SEQ_CST) !=  63)
++    abort ();
++}
++
++
++/* Test the OP routines with a result which isn't used. Use both variations
++   within each function.  */
++
++void
++test_add (short* v)
++{
++  *v = 0;
++  count = 1;
++
++  __atomic_add_fetch (v, count, __ATOMIC_RELAXED);
++  if (*v != 1)
++    abort ();
++
++  __atomic_fetch_add (v, count, __ATOMIC_CONSUME);
++  if (*v != 2)
++    abort ();
++
++  __atomic_add_fetch (v, 1 , __ATOMIC_ACQUIRE);
++  if (*v != 3)
++    abort ();
++
++  __atomic_fetch_add (v, 1, __ATOMIC_RELEASE);
++  if (*v != 4)
++    abort ();
++
++  __atomic_add_fetch (v, count, __ATOMIC_ACQ_REL);
++  if (*v != 5)
++    abort ();
++
++  __atomic_fetch_add (v, count, __ATOMIC_SEQ_CST);
++  if (*v != 6)
++    abort ();
++}
++
++
++void
++test_sub (short* v)
++{
++  *v = res = 20;
++  count = 0;
++
++  __atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED);
++  if (*v != --res)
++    abort ();
++
++  __atomic_fetch_sub (v, count + 1, __ATOMIC_CONSUME);
++  if (*v != --res)
++    abort ();
++
++  __atomic_sub_fetch (v, 1, __ATOMIC_ACQUIRE);
++  if (*v != --res)
++    abort ();
++
++  __atomic_fetch_sub (v, 1, __ATOMIC_RELEASE);
++  if (*v != --res)
++    abort ();
++
++  __atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL);
++  if (*v != --res)
++    abort ();
++
++  __atomic_fetch_sub (v, count + 1, __ATOMIC_SEQ_CST);
++  if (*v != --res)
++    abort ();
++}
++
++void
++test_and (short* v)
++{
++  *v = init;
++
++  __atomic_and_fetch (v, 0, __ATOMIC_RELAXED);
++  if (*v != 0)
++    abort ();
++
++  *v = init;
++  __atomic_fetch_and (v, init, __ATOMIC_CONSUME);
++  if (*v != init)
++    abort ();
++
++  __atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE);
++  if (*v != 0)
++    abort ();
++
++  *v = ~*v;
++  __atomic_fetch_and (v, init, __ATOMIC_RELEASE);
++  if (*v != init)
++    abort ();
++
++  __atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL);
++  if (*v != 0)
++    abort ();
++
++  *v = ~*v;
++  __atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST);
++  if (*v != 0)
++    abort ();
++}
++
++void
++test_nand (short* v)
++{
++  *v = init;
++
++  __atomic_fetch_nand (v, 0, __ATOMIC_RELAXED);
++  if (*v != init)
++    abort ();
++
++  __atomic_fetch_nand (v, init, __ATOMIC_CONSUME);
++  if (*v != 0)
++    abort ();
++
++  __atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE);
++  if (*v != init)
++    abort ();
++
++  __atomic_nand_fetch (v, init, __ATOMIC_RELEASE);
++  if (*v != 0)
++    abort ();
++
++  __atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL);
++  if (*v != init)
++    abort ();
++
++  __atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST);
++  if (*v != init)
++    abort ();
++}
++
++
++
++void
++test_xor (short* v)
++{
++  *v = init;
++  count = 0;
++
++  __atomic_xor_fetch (v, count, __ATOMIC_RELAXED);
++  if (*v != init)
++    abort ();
++
++  __atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME);
++  if (*v != 0)
++    abort ();
++
++  __atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE);
++  if (*v != 0)
++    abort ();
++
++  __atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE);
++  if (*v != init)
++    abort ();
++
++  __atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL);
++  if (*v != init)
++    abort ();
++
++  __atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST);
++  if (*v != 0)
++    abort ();
++}
++
++void
++test_or (short* v)
++{
++  *v = 0;
++  count = 1;
++
++  __atomic_or_fetch (v, count, __ATOMIC_RELAXED);
++  if (*v != 1)
++    abort ();
++
++  count *= 2;
++  __atomic_fetch_or (v, count, __ATOMIC_CONSUME);
++  if (*v != 3)
++    abort ();
++
++  count *= 2;
++  __atomic_or_fetch (v, 4, __ATOMIC_ACQUIRE);
++  if (*v != 7)
++    abort ();
++
++  count *= 2;
++  __atomic_fetch_or (v, 8, __ATOMIC_RELEASE);
++  if (*v != 15)
++    abort ();
++
++  count *= 2;
++  __atomic_or_fetch (v, count, __ATOMIC_ACQ_REL);
++  if (*v != 31)
++    abort ();
++
++  count *= 2;
++  __atomic_fetch_or (v, count, __ATOMIC_SEQ_CST);
++  if (*v != 63)
++    abort ();
++}
++
++int
++main () {
++  short* V[] = {&A.a, &A.b};
++
++  for (int i = 0; i < 2; i++) {
++    test_fetch_add (V[i]);
++    test_fetch_sub (V[i]);
++    test_fetch_and (V[i]);
++    test_fetch_nand (V[i]);
++    test_fetch_xor (V[i]);
++    test_fetch_or (V[i]);
++
++    test_add_fetch (V[i]);
++    test_sub_fetch (V[i]);
++    test_and_fetch (V[i]);
++    test_nand_fetch (V[i]);
++    test_xor_fetch (V[i]);
++    test_or_fetch (V[i]);
++
++    test_add (V[i]);
++    test_sub (V[i]);
++    test_and (V[i]);
++    test_nand (V[i]);
++    test_xor (V[i]);
++    test_or (V[i]);
++  }
++
++  return 0;
++}
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-5.c
+@@ -0,0 +1,87 @@
++/* Test __atomic routines for existence and proper execution on 1 byte
++   values with each valid memory model.  */
++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-compare-exchange-1.c */
++/* { dg-do run } */
++/* { dg-options "-minline-atomics" } */
++
++/* Test the execution of the __atomic_compare_exchange_n builtin for a char.  */
++
++extern void abort(void);
++
++char v = 0;
++char expected = 0;
++char max = ~0;
++char desired = ~0;
++char zero = 0;
++
++#define STRONG 0
++#define WEAK 1
++
++int
++main ()
++{
++
++  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != max)
++    abort ();
++
++  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != max)
++    abort ();
++  if (v != 0)
++    abort ();
++
++  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
++    abort ();
++  if (expected != 0)
++    abort ();
++  if (v != max)
++    abort ();
++
++  /* Now test the generic version.  */
++
++  v = 0;
++
++  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != max)
++    abort ();
++
++  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != max)
++    abort ();
++  if (v != 0)
++    abort ();
++
++  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
++    abort ();
++  if (expected != 0)
++    abort ();
++  if (v != max)
++    abort ();
++
++  return 0;
++}
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-6.c
+@@ -0,0 +1,87 @@
++/* Test __atomic routines for existence and proper execution on 2 byte
++   values with each valid memory model.  */
++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-compare-exchange-2.c */
++/* { dg-do run } */
++/* { dg-options "-minline-atomics" } */
++
++/* Test the execution of the __atomic_compare_exchange_n builtin for a short.  */
++
++extern void abort(void);
++
++short v = 0;
++short expected = 0;
++short max = ~0;
++short desired = ~0;
++short zero = 0;
++
++#define STRONG 0
++#define WEAK 1
++
++int
++main ()
++{
++
++  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != max)
++    abort ();
++
++  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != max)
++    abort ();
++  if (v != 0)
++    abort ();
++
++  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
++    abort ();
++  if (expected != 0)
++    abort ();
++  if (v != max)
++    abort ();
++
++  /* Now test the generic version.  */
++
++  v = 0;
++
++  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
++    abort ();
++  if (expected != max)
++    abort ();
++
++  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != max)
++    abort ();
++  if (v != 0)
++    abort ();
++
++  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE))
++    abort ();
++  if (expected != 0)
++    abort ();
++
++  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
++    abort ();
++  if (expected != 0)
++    abort ();
++  if (v != max)
++    abort ();
++
++  return 0;
++}
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-7.c
+@@ -0,0 +1,69 @@
++/* Test __atomic routines for existence and proper execution on 1 byte
++   values with each valid memory model.  */
++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-exchange-1.c */
++/* { dg-do run } */
++/* { dg-options "-minline-atomics" } */
++
++/* Test the execution of the __atomic_exchange_n builtin for a char.  */
++
++extern void abort(void);
++
++char v, count, ret;
++
++int
++main ()
++{
++  v = 0;
++  count = 0;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) != count)
++    abort ();
++  count++;
++
++  /* Now test the generic version.  */
++
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  return 0;
++}
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-8.c
+@@ -0,0 +1,69 @@
++/* Test __atomic routines for existence and proper execution on 2 byte
++   values with each valid memory model.  */
++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-exchange-2.c */
++/* { dg-do run } */
++/* { dg-options "-minline-atomics" } */
++
++/* Test the execution of the __atomic_X builtin for a short.  */
++
++extern void abort(void);
++
++short v, count, ret;
++
++int
++main ()
++{
++  v = 0;
++  count = 0;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) != count)
++    abort ();
++  count++;
++
++  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) != count)
++    abort ();
++  count++;
++
++  /* Now test the generic version.  */
++
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
++  if (ret != count - 1 || v != count)
++    abort ();
++  count++;
++
++  return 0;
++}
+--- a/libgcc/config/riscv/atomic.c
++++ b/libgcc/config/riscv/atomic.c
+@@ -30,6 +30,8 @@ see the files COPYING3 and COPYING.RUNTI
+ #define INVERT		"not %[tmp1], %[tmp1]\n\t"
+ #define DONT_INVERT	""
+ 
++/* Logic duplicated in gcc/gcc/config/riscv/sync.md for use when inlining is enabled */
++
+ #define GENERATE_FETCH_AND_OP(type, size, opname, insn, invert, cop)	\
+   type __sync_fetch_and_ ## opname ## _ ## size (type *p, type v)	\
+   {									\
diff --git a/toolchain/gcc/patches-13.x/701-riscv-linux-Don-t-add-latomic-with-pthread.patch b/toolchain/gcc/patches-13.x/701-riscv-linux-Don-t-add-latomic-with-pthread.patch
new file mode 100644
index 0000000000..328c7be9ce
--- /dev/null
+++ b/toolchain/gcc/patches-13.x/701-riscv-linux-Don-t-add-latomic-with-pthread.patch
@@ -0,0 +1,36 @@
+From 203f3060dd363361b172f7295f42bb6bf5ac0b3b Mon Sep 17 00:00:00 2001
+From: Andreas Schwab <schwab@suse.de>
+Date: Sat, 23 Apr 2022 15:48:42 +0200
+Subject: [PATCH] riscv/linux: Don't add -latomic with -pthread
+
+Now that we have support for inline subword atomic operations, it is no
+longer necessary to link against libatomic.  This also fixes testsuite
+failures because the framework does not properly set up the linker flags
+for finding libatomic.
+The use of atomic operations is also independent of the use of libpthread.
+
+gcc/
+	* config/riscv/linux.h (LIB_SPEC): Don't redefine.
+---
+ gcc/config/riscv/linux.h | 10 ----------
+ 1 file changed, 10 deletions(-)
+
+--- a/gcc/config/riscv/linux.h
++++ b/gcc/config/riscv/linux.h
+@@ -35,16 +35,6 @@ along with GCC; see the file COPYING3.
+ #undef MUSL_DYNAMIC_LINKER
+ #define MUSL_DYNAMIC_LINKER "/lib/ld-musl-riscv" XLEN_SPEC MUSL_ABI_SUFFIX ".so.1"
+ 
+-/* Because RISC-V only has word-sized atomics, it requries libatomic where
+-   others do not.  So link libatomic by default, as needed.  */
+-#undef LIB_SPEC
+-#ifdef LD_AS_NEEDED_OPTION
+-#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC \
+-  " %{pthread:" LD_AS_NEEDED_OPTION " -latomic " LD_NO_AS_NEEDED_OPTION "}"
+-#else
+-#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC " -latomic "
+-#endif
+-
+ #define ICACHE_FLUSH_FUNC "__riscv_flush_icache"
+ 
+ #define CPP_SPEC "%{pthread:-D_REENTRANT}"
diff --git a/toolchain/gcc/patches-13.x/910-mbsd_multi.patch b/toolchain/gcc/patches-13.x/910-mbsd_multi.patch
index 095fc62f69..8af05c9994 100644
--- a/toolchain/gcc/patches-13.x/910-mbsd_multi.patch
+++ b/toolchain/gcc/patches-13.x/910-mbsd_multi.patch
@@ -114,7 +114,7 @@ Date:   Tue Jul 31 00:52:27 2007 +0000
  ; On SVR4 targets, it also controls whether or not to emit a
 --- a/gcc/doc/invoke.texi
 +++ b/gcc/doc/invoke.texi
-@@ -10061,6 +10061,17 @@ This option is only supported for C and
+@@ -10062,6 +10062,17 @@ This option is only supported for C and
  @option{-Wall} and by @option{-Wpedantic}, which can be disabled with
  @option{-Wno-pointer-sign}.