ubuntu-buildroot/output/build/host-gcc-initial-11.4.0/gcc/config/sparc/niagara7.md

;; Scheduling description for Niagara-7
;;   Copyright (C) 2016-2021 Free Software Foundation, Inc.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; GCC is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3.  If not see
;; <http://www.gnu.org/licenses/>.

(define_automaton "niagara7_0")

;; The S4 core has a dual-issue queue.  This queue is divided into two
;; slots.  One instruction can be issued each cycle to each slot, and
;; up to 2 instructions are committed each cycle.  Each slot serves
;; several execution units, as depicted below:
;;
;;
;;                 m7_slot0 - Integer unit.
;;                          - Load/Store unit.
;; === QUEUE ==>
;;
;;                 m7_slot1 - Integer unit.
;;                          - Branch unit.
;;                          - Floating-point and graphics unit.
;;                          - 3-cycles crypto unit.

(define_cpu_unit "n7_slot0,n7_slot1" "niagara7_0")

;; Some instructions stall the pipeline and avoid any other
;; instruction to be issued in the same cycle.  We assume the same for
;; multi-instruction insns.

(define_reservation "n7_single_issue" "n7_slot0 + n7_slot1")

(define_insn_reservation "n7_single" 1
  (and (eq_attr "cpu" "niagara7")
    (eq_attr "type" "multi,savew,flushw,trap"))
  "n7_single_issue")

;; Most of the instructions executing in the integer unit have a
;; latency of 1.

(define_insn_reservation "n7_integer" 1
  (and (eq_attr "cpu" "niagara7")
    (eq_attr "type" "ialu,ialuX,shift,cmove,compare"))
  "(n7_slot0 | n7_slot1)")

;; Flushing the instruction memory takes 27 cycles.

(define_insn_reservation "n7_iflush" 27
  (and (eq_attr "cpu" "niagara7")
       (eq_attr "type" "iflush"))
  "(n7_slot0 | n7_slot1), nothing*26")

;; The integer multiplication instructions have a latency of 12 cycles
;; and execute in the integer unit.
;;
;; Likewise for array*, edge* and pdistn instructions.

(define_insn_reservation "n7_imul" 12
  (and (eq_attr "cpu" "niagara7")
    (eq_attr "type" "imul,array,edge,edgen,pdistn"))
  "(n7_slot0 | n7_slot1), nothing*11")

;; The integer division instructions have a latency of 35 cycles and
;; execute in the integer unit.

(define_insn_reservation "n7_idiv" 35
  (and (eq_attr "cpu" "niagara7")
    (eq_attr "type" "idiv"))
  "(n7_slot0 | n7_slot1), nothing*34")

;; Both integer and floating-point load instructions have a latency of
;; 5 cycles, and execute in the slot0.
;;
;; The prefetch instruction also executes in the load/store unit, but
;; its latency is only 1 cycle.

(define_insn_reservation "n7_load" 5
  (and (eq_attr "cpu" "niagara7")
       (ior (eq_attr "type" "fpload,sload")
            (and (eq_attr "type" "load")
                 (eq_attr "subtype" "regular"))))
  "n7_slot0, nothing*4")

(define_insn_reservation "n7_prefetch" 1
  (and (eq_attr "cpu" "niagara7")
       (eq_attr "type" "load")
       (eq_attr "subtype" "prefetch"))
  "n7_slot0")

;; Both integer and floating-point store instructions have a latency
;; of 1 cycle, and execute in the load/store unit in slot0.

(define_insn_reservation "n7_store" 1
  (and (eq_attr "cpu" "niagara7")
    (eq_attr "type" "store,fpstore"))
  "n7_slot0")

;; Control-transfer instructions execute in the Branch Unit in the
;; slot1.

(define_insn_reservation "n7_cti" 1
  (and (eq_attr "cpu" "niagara7")
    (eq_attr "type" "cbcond,uncond_cbcond,branch,call,sibcall,call_no_delay_slot,uncond_branch,return"))
  "n7_slot1")

;; Many instructions executing in the Floating-point and Graphics unit
;; in the slot1 feature a latency of 11 cycles.

(define_insn_reservation "n7_fp" 11
  (and (eq_attr "cpu" "niagara7")
       (ior (eq_attr "type" "fpmove,fpcmove,fpcrmove,fp,fpcmp,fpmul,fgm_pack,fgm_mul,pdist")
            (and (eq_attr "type" "fga")
                 (eq_attr "subtype" "fpu,maxmin"))))
  "n7_slot1, nothing*10")

;; Floating-point division and floating-point square-root instructions
;; have high latencies.  They execute in the floating-point and
;; graphics unit in the slot1.


(define_insn_reservation "n7_fpdivs" 24
  (and (eq_attr "cpu" "niagara7")
       (eq_attr "type" "fpdivs,fpsqrts"))
  "n7_slot1, nothing*23")

(define_insn_reservation "n7_fpdivd" 37
  (and (eq_attr "cpu" "niagara7")
    (eq_attr "type" "fpdivd,fpsqrtd"))
  "n7_slot1, nothing*36")

;; SIMD VIS instructions executing in the Floating-point and graphics
;; unit (FPG) in slot1 usually have a latency of either 11 or 12
;; cycles.
;;
;; However, the latency for many instructions is only 3 cycles if the
;; consumer can also be executed in 3 cycles.  We model this with a
;; bypass.  In these cases the instructions are executed in the
;; 3-cycle crypto unit which also serves slot1.

(define_insn_reservation "n7_vis_11cycles" 11
  (and (eq_attr "cpu" "niagara7")
       (ior (and (eq_attr "type" "fga")
                 (eq_attr "subtype" "addsub64,other"))
            (and (eq_attr "type" "vismv")
                 (eq_attr "subtype" "double,single"))
            (and (eq_attr "type" "visl")
                 (eq_attr "subtype" "double,single"))))
  "n7_slot1, nothing*10")

(define_insn_reservation "n7_vis_12cycles" 12
  (and (eq_attr "cpu" "niagara7")
       (ior (eq_attr "type" "bmask,viscmp")
            (and (eq_attr "type" "fga")
                 (eq_attr "subtype" "cmask"))
            (and (eq_attr "type" "vismv")
                 (eq_attr "subtype" "movstouw"))))
  "n7_slot1, nothing*11")

(define_bypass 3 "n7_vis_*" "n7_vis_*")

;; Some other VIS instructions have a latency of 12 cycles, and won't
;; be executed in the 3-cycle crypto pipe.

(define_insn_reservation "n7_lzd" 12
  (and (eq_attr "cpu" "niagara7")
       (ior (eq_attr "type" "lzd,")
            (and (eq_attr "type" "gsr")
                 (eq_attr "subtype" "alignaddr"))))
  "n7_slot1, nothing*11")

;; A couple of VIS instructions feature very low latencies in the M7.

(define_insn_reservation "n7_single_vis" 1
  (and (eq_attr "cpu" "niagara7")
       (eq_attr "type" "vismv")
       (eq_attr "subtype" "movxtod"))
  "n7_slot1")

(define_insn_reservation "n7_double_vis" 2
  (and (eq_attr "cpu" "niagara7")
       (eq_attr "type" "vismv")
       (eq_attr "subtype" "movdtox"))
  "n7_slot1, nothing")

;; Reading and writing to the gsr register takes a high number of
;; cycles that is not documented in the PRM.  Let's use the same value
;; than the M8.

(define_insn_reservation "n7_gsr_reg" 70
  (and (eq_attr "cpu" "niagara7")
       (eq_attr "type" "gsr")
       (eq_attr "subtype" "reg"))
  "n7_slot1, nothing*70")
1 2024-04-01 15:19:46 +00:00			`;; Scheduling description for Niagara-7`
			`;; Copyright (C) 2016-2021 Free Software Foundation, Inc.`
			`;;`
			`;; This file is part of GCC.`
			`;;`
			`;; GCC is free software; you can redistribute it and/or modify`
			`;; it under the terms of the GNU General Public License as published by`
			`;; the Free Software Foundation; either version 3, or (at your option)`
			`;; any later version.`
			`;;`
			`;; GCC is distributed in the hope that it will be useful,`
			`;; but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`;; GNU General Public License for more details.`
			`;;`
			`;; You should have received a copy of the GNU General Public License`
			`;; along with GCC; see the file COPYING3. If not see`
			`;; <http://www.gnu.org/licenses/>.`

			`(define_automaton "niagara7_0")`

			`;; The S4 core has a dual-issue queue. This queue is divided into two`
			`;; slots. One instruction can be issued each cycle to each slot, and`
			`;; up to 2 instructions are committed each cycle. Each slot serves`
			`;; several execution units, as depicted below:`
			`;;`
			`;;`
			`;; m7_slot0 - Integer unit.`
			`;; - Load/Store unit.`
			`;; === QUEUE ==>`
			`;;`
			`;; m7_slot1 - Integer unit.`
			`;; - Branch unit.`
			`;; - Floating-point and graphics unit.`
			`;; - 3-cycles crypto unit.`

			`(define_cpu_unit "n7_slot0,n7_slot1" "niagara7_0")`

			`;; Some instructions stall the pipeline and avoid any other`
			`;; instruction to be issued in the same cycle. We assume the same for`
			`;; multi-instruction insns.`

			`(define_reservation "n7_single_issue" "n7_slot0 + n7_slot1")`

			`(define_insn_reservation "n7_single" 1`
			`(and (eq_attr "cpu" "niagara7")`
			`(eq_attr "type" "multi,savew,flushw,trap"))`
			`"n7_single_issue")`

			`;; Most of the instructions executing in the integer unit have a`
			`;; latency of 1.`

			`(define_insn_reservation "n7_integer" 1`
			`(and (eq_attr "cpu" "niagara7")`
			`(eq_attr "type" "ialu,ialuX,shift,cmove,compare"))`
			`"(n7_slot0 \| n7_slot1)")`

			`;; Flushing the instruction memory takes 27 cycles.`

			`(define_insn_reservation "n7_iflush" 27`
			`(and (eq_attr "cpu" "niagara7")`
			`(eq_attr "type" "iflush"))`
			`"(n7_slot0 \| n7_slot1), nothing*26")`

			`;; The integer multiplication instructions have a latency of 12 cycles`
			`;; and execute in the integer unit.`
			`;;`
			`;; Likewise for array, edge and pdistn instructions.`

			`(define_insn_reservation "n7_imul" 12`
			`(and (eq_attr "cpu" "niagara7")`
			`(eq_attr "type" "imul,array,edge,edgen,pdistn"))`
			`"(n7_slot0 \| n7_slot1), nothing*11")`

			`;; The integer division instructions have a latency of 35 cycles and`
			`;; execute in the integer unit.`

			`(define_insn_reservation "n7_idiv" 35`
			`(and (eq_attr "cpu" "niagara7")`
			`(eq_attr "type" "idiv"))`
			`"(n7_slot0 \| n7_slot1), nothing*34")`

			`;; Both integer and floating-point load instructions have a latency of`
			`;; 5 cycles, and execute in the slot0.`
			`;;`
			`;; The prefetch instruction also executes in the load/store unit, but`
			`;; its latency is only 1 cycle.`

			`(define_insn_reservation "n7_load" 5`
			`(and (eq_attr "cpu" "niagara7")`
			`(ior (eq_attr "type" "fpload,sload")`
			`(and (eq_attr "type" "load")`
			`(eq_attr "subtype" "regular"))))`
			`"n7_slot0, nothing*4")`

			`(define_insn_reservation "n7_prefetch" 1`
			`(and (eq_attr "cpu" "niagara7")`
			`(eq_attr "type" "load")`
			`(eq_attr "subtype" "prefetch"))`
			`"n7_slot0")`

			`;; Both integer and floating-point store instructions have a latency`
			`;; of 1 cycle, and execute in the load/store unit in slot0.`

			`(define_insn_reservation "n7_store" 1`
			`(and (eq_attr "cpu" "niagara7")`
			`(eq_attr "type" "store,fpstore"))`
			`"n7_slot0")`

			`;; Control-transfer instructions execute in the Branch Unit in the`
			`;; slot1.`

			`(define_insn_reservation "n7_cti" 1`
			`(and (eq_attr "cpu" "niagara7")`
			`(eq_attr "type" "cbcond,uncond_cbcond,branch,call,sibcall,call_no_delay_slot,uncond_branch,return"))`
			`"n7_slot1")`

			`;; Many instructions executing in the Floating-point and Graphics unit`
			`;; in the slot1 feature a latency of 11 cycles.`

			`(define_insn_reservation "n7_fp" 11`
			`(and (eq_attr "cpu" "niagara7")`
			`(ior (eq_attr "type" "fpmove,fpcmove,fpcrmove,fp,fpcmp,fpmul,fgm_pack,fgm_mul,pdist")`
			`(and (eq_attr "type" "fga")`
			`(eq_attr "subtype" "fpu,maxmin"))))`
			`"n7_slot1, nothing*10")`

			`;; Floating-point division and floating-point square-root instructions`
			`;; have high latencies. They execute in the floating-point and`
			`;; graphics unit in the slot1.`


			`(define_insn_reservation "n7_fpdivs" 24`
			`(and (eq_attr "cpu" "niagara7")`
			`(eq_attr "type" "fpdivs,fpsqrts"))`
			`"n7_slot1, nothing*23")`

			`(define_insn_reservation "n7_fpdivd" 37`
			`(and (eq_attr "cpu" "niagara7")`
			`(eq_attr "type" "fpdivd,fpsqrtd"))`
			`"n7_slot1, nothing*36")`

			`;; SIMD VIS instructions executing in the Floating-point and graphics`
			`;; unit (FPG) in slot1 usually have a latency of either 11 or 12`
			`;; cycles.`
			`;;`
			`;; However, the latency for many instructions is only 3 cycles if the`
			`;; consumer can also be executed in 3 cycles. We model this with a`
			`;; bypass. In these cases the instructions are executed in the`
			`;; 3-cycle crypto unit which also serves slot1.`

			`(define_insn_reservation "n7_vis_11cycles" 11`
			`(and (eq_attr "cpu" "niagara7")`
			`(ior (and (eq_attr "type" "fga")`
			`(eq_attr "subtype" "addsub64,other"))`
			`(and (eq_attr "type" "vismv")`
			`(eq_attr "subtype" "double,single"))`
			`(and (eq_attr "type" "visl")`
			`(eq_attr "subtype" "double,single"))))`
			`"n7_slot1, nothing*10")`

			`(define_insn_reservation "n7_vis_12cycles" 12`
			`(and (eq_attr "cpu" "niagara7")`
			`(ior (eq_attr "type" "bmask,viscmp")`
			`(and (eq_attr "type" "fga")`
			`(eq_attr "subtype" "cmask"))`
			`(and (eq_attr "type" "vismv")`
			`(eq_attr "subtype" "movstouw"))))`
			`"n7_slot1, nothing*11")`

			`(define_bypass 3 "n7_vis_" "n7_vis_")`

			`;; Some other VIS instructions have a latency of 12 cycles, and won't`
			`;; be executed in the 3-cycle crypto pipe.`

			`(define_insn_reservation "n7_lzd" 12`
			`(and (eq_attr "cpu" "niagara7")`
			`(ior (eq_attr "type" "lzd,")`
			`(and (eq_attr "type" "gsr")`
			`(eq_attr "subtype" "alignaddr"))))`
			`"n7_slot1, nothing*11")`

			`;; A couple of VIS instructions feature very low latencies in the M7.`

			`(define_insn_reservation "n7_single_vis" 1`
			`(and (eq_attr "cpu" "niagara7")`
			`(eq_attr "type" "vismv")`
			`(eq_attr "subtype" "movxtod"))`
			`"n7_slot1")`

			`(define_insn_reservation "n7_double_vis" 2`
			`(and (eq_attr "cpu" "niagara7")`
			`(eq_attr "type" "vismv")`
			`(eq_attr "subtype" "movdtox"))`
			`"n7_slot1, nothing")`

			`;; Reading and writing to the gsr register takes a high number of`
			`;; cycles that is not documented in the PRM. Let's use the same value`
			`;; than the M8.`

			`(define_insn_reservation "n7_gsr_reg" 70`
			`(and (eq_attr "cpu" "niagara7")`
			`(eq_attr "type" "gsr")`
			`(eq_attr "subtype" "reg"))`
			`"n7_slot1, nothing*70")`