Source file src/simd/archsimd/_gen/simdgen/xed.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"fmt"
     9  	"log"
    10  	"maps"
    11  	"reflect"
    12  	"regexp"
    13  	"slices"
    14  	"strconv"
    15  	"strings"
    16  
    17  	"simd/archsimd/_gen/unify"
    18  
    19  	"golang.org/x/arch/x86/xeddata"
    20  	"gopkg.in/yaml.v3"
    21  )
    22  
    23  const (
    24  	NOT_REG_CLASS = iota // not a register
    25  	VREG_CLASS           // classify as a vector register; see
    26  	GREG_CLASS           // classify as a general register
    27  )
    28  
    29  // instVariant is a bitmap indicating a variant of an instruction that has
    30  // optional parameters.
    31  type instVariant uint8
    32  
    33  const (
    34  	instVariantNone instVariant = 0
    35  
    36  	// instVariantMasked indicates that this is the masked variant of an
    37  	// optionally-masked instruction.
    38  	instVariantMasked instVariant = 1 << iota
    39  )
    40  
    41  var operandRemarks int
    42  
    43  // TODO: Doc. Returns Values with Def domains.
    44  func loadXED(xedPath string) []*unify.Value {
    45  	// TODO: Obviously a bunch more to do here.
    46  
    47  	db, err := xeddata.NewDatabase(xedPath)
    48  	if err != nil {
    49  		log.Fatalf("open database: %v", err)
    50  	}
    51  
    52  	var defs []*unify.Value
    53  	type opData struct {
    54  		inst *xeddata.Inst
    55  		ops  []operand
    56  		mem  string
    57  	}
    58  	// Maps from opcode to opdata(s).
    59  	memOps := make(map[string][]opData, 0)
    60  	otherOps := make(map[string][]opData, 0)
    61  	appendDefs := func(inst *xeddata.Inst, ops []operand, addFields map[string]string) {
    62  		applyQuirks(inst, ops)
    63  
    64  		defsPos := len(defs)
    65  		defs = append(defs, instToUVal(inst, ops, addFields)...)
    66  
    67  		if *flagDebugXED {
    68  			for i := defsPos; i < len(defs); i++ {
    69  				y, _ := yaml.Marshal(defs[i])
    70  				fmt.Printf("==>\n%s\n", y)
    71  			}
    72  		}
    73  	}
    74  	err = xeddata.WalkInsts(xedPath, func(inst *xeddata.Inst) {
    75  		inst.Pattern = xeddata.ExpandStates(db, inst.Pattern)
    76  
    77  		switch {
    78  		case inst.RealOpcode == "N":
    79  			return // Skip unstable instructions
    80  		case !(strings.HasPrefix(inst.Extension, "AVX") || strings.HasPrefix(inst.Extension, "SHA") || inst.Extension == "FMA"):
    81  			// We're only interested in AVX and SHA instructions.
    82  			return
    83  		}
    84  
    85  		if *flagDebugXED {
    86  			fmt.Printf("%s:\n%+v\n", inst.Pos, inst)
    87  		}
    88  
    89  		ops, err := decodeOperands(db, strings.Fields(inst.Operands))
    90  		if err != nil {
    91  			operandRemarks++
    92  			if *Verbose {
    93  				log.Printf("%s: [%s] %s", inst.Pos, inst.Opcode(), err)
    94  			}
    95  			return
    96  		}
    97  		var data map[string][]opData
    98  		mem := checkMem(ops)
    99  		if mem == "vbcst" {
   100  			// A pure vreg variant might exist, wait for later to see if we can
   101  			// merge them
   102  			data = memOps
   103  		} else {
   104  			data = otherOps
   105  		}
   106  		opcode := inst.Opcode()
   107  		if _, ok := data[opcode]; !ok {
   108  			s := make([]opData, 1)
   109  			s[0] = opData{inst, ops, mem}
   110  			data[opcode] = s
   111  		} else {
   112  			data[opcode] = append(data[opcode], opData{inst, ops, mem})
   113  		}
   114  	})
   115  	for _, s := range otherOps {
   116  		for _, o := range s {
   117  			addFields := map[string]string{}
   118  			if o.mem == "noMem" {
   119  				opcode := o.inst.Opcode()
   120  				// Checking if there is a vbcst variant of this operation exist
   121  				// First check the opcode
   122  				// Keep this logic in sync with [decodeOperands]
   123  				if ms, ok := memOps[opcode]; ok {
   124  					feat1, ok1 := decodeCPUFeature(o.inst)
   125  					// Then check if there exist such an operation that for all vreg
   126  					// shapes they are the same at the same index
   127  					var feat1Match, feat2Match string
   128  					matchIdx := -1
   129  					var featMismatchCnt int
   130  				outer:
   131  					for i, m := range ms {
   132  						// Their CPU feature should match first
   133  						var featMismatch bool
   134  						feat2, ok2 := decodeCPUFeature(m.inst)
   135  						if !ok1 || !ok2 {
   136  							continue
   137  						}
   138  						if feat1 != feat2 {
   139  							featMismatch = true
   140  							featMismatchCnt++
   141  						}
   142  						if len(o.ops) == len(m.ops) {
   143  							for j := range o.ops {
   144  								if reflect.TypeOf(o.ops[j]) == reflect.TypeOf(m.ops[j]) {
   145  									v1, ok3 := o.ops[j].(operandVReg)
   146  									v2, _ := m.ops[j].(operandVReg)
   147  									if !ok3 {
   148  										continue
   149  									}
   150  									if v1.vecShape != v2.vecShape {
   151  										// A mismatch, skip this memOp
   152  										continue outer
   153  									}
   154  								} else {
   155  									_, ok3 := o.ops[j].(operandVReg)
   156  									_, ok4 := m.ops[j].(operandMem)
   157  									// The only difference must be the vreg and mem, no other cases.
   158  									if !ok3 || !ok4 {
   159  										// A mismatch, skip this memOp
   160  										continue outer
   161  									}
   162  								}
   163  							}
   164  							// Found a match, break early
   165  							matchIdx = i
   166  							feat1Match = feat1
   167  							feat2Match = feat2
   168  							if featMismatchCnt > 1 {
   169  								panic("multiple feature mismatch vbcst memops detected, simdgen failed to distinguish")
   170  							}
   171  							if !featMismatch {
   172  								// Mismatch feat is ok but should prioritize matching cases.
   173  								break
   174  							}
   175  						}
   176  					}
   177  					// Remove the match from memOps, it's now merged to this pure vreg operation
   178  					if matchIdx != -1 {
   179  						memOps[opcode] = append(memOps[opcode][:matchIdx], memOps[opcode][matchIdx+1:]...)
   180  						// Merge is done by adding a new field
   181  						// Right now we only have vbcst
   182  						addFields["memFeatures"] = "vbcst"
   183  						if feat1Match != feat2Match {
   184  							addFields["memFeaturesData"] = fmt.Sprintf("feat1=%s;feat2=%s", feat1Match, feat2Match)
   185  						}
   186  					}
   187  				}
   188  			}
   189  			appendDefs(o.inst, o.ops, addFields)
   190  		}
   191  	}
   192  	for _, ms := range memOps {
   193  		for _, m := range ms {
   194  			if *Verbose {
   195  				log.Printf("mem op not merged: %s, %v\n", m.inst.Opcode(), m)
   196  			}
   197  			appendDefs(m.inst, m.ops, nil)
   198  		}
   199  	}
   200  	if err != nil {
   201  		log.Fatalf("walk insts: %v", err)
   202  	}
   203  
   204  	if len(unknownFeatures) > 0 {
   205  		if !*Verbose {
   206  			nInst := 0
   207  			for _, insts := range unknownFeatures {
   208  				nInst += len(insts)
   209  			}
   210  			log.Printf("%d unhandled CPU features for %d instructions (use -v for details)", len(unknownFeatures), nInst)
   211  		} else {
   212  			keys := slices.Sorted(maps.Keys(unknownFeatures))
   213  			for _, key := range keys {
   214  				log.Printf("unhandled ISASet %s", key)
   215  				log.Printf("  opcodes: %s", slices.Sorted(maps.Keys(unknownFeatures[key])))
   216  			}
   217  		}
   218  	}
   219  
   220  	return defs
   221  }
   222  
   223  var (
   224  	maskRequiredRe = regexp.MustCompile(`VPCOMPRESS[BWDQ]|VCOMPRESSP[SD]|VPEXPAND[BWDQ]|VEXPANDP[SD]`)
   225  	maskOptionalRe = regexp.MustCompile(`VPCMP(EQ|GT|U)?[BWDQ]|VCMPP[SD]`)
   226  )
   227  
   228  func applyQuirks(inst *xeddata.Inst, ops []operand) {
   229  	opc := inst.Opcode()
   230  	switch {
   231  	case maskRequiredRe.MatchString(opc):
   232  		// The mask on these instructions is marked optional, but the
   233  		// instruction is pointless without the mask.
   234  		for i, op := range ops {
   235  			if op, ok := op.(operandMask); ok {
   236  				op.optional = false
   237  				ops[i] = op
   238  			}
   239  		}
   240  
   241  	case maskOptionalRe.MatchString(opc):
   242  		// Conversely, these masks should be marked optional and aren't.
   243  		for i, op := range ops {
   244  			if op, ok := op.(operandMask); ok && op.action.r {
   245  				op.optional = true
   246  				ops[i] = op
   247  			}
   248  		}
   249  	}
   250  }
   251  
   252  type operandCommon struct {
   253  	action operandAction
   254  }
   255  
   256  // operandAction defines whether this operand is read and/or written.
   257  //
   258  // TODO: Should this live in [xeddata.Operand]?
   259  type operandAction struct {
   260  	r  bool // Read
   261  	w  bool // Written
   262  	cr bool // Read is conditional (implies r==true)
   263  	cw bool // Write is conditional (implies w==true)
   264  }
   265  
   266  type operandMem struct {
   267  	operandCommon
   268  	vecShape
   269  	elemBaseType scalarBaseType
   270  	// The following fields are not flushed to the final output
   271  	// Supports full-vector broadcasting; implies the operand having a "vv"(vector vector) type specified in width and
   272  	// the instruction is with attribute TXT=BCASTSTR.
   273  	vbcst   bool
   274  	unknown bool // unknown kind
   275  }
   276  
   277  type vecShape struct {
   278  	elemBits  int    // Element size in bits
   279  	bits      int    // Register width in bits (total vector bits)
   280  	fixedName string // the fixed register name
   281  }
   282  
   283  type operandVReg struct { // Vector register
   284  	operandCommon
   285  	vecShape
   286  	elemBaseType scalarBaseType
   287  }
   288  
   289  type operandGReg struct { // Vector register
   290  	operandCommon
   291  	vecShape
   292  	elemBaseType scalarBaseType
   293  }
   294  
   295  // operandMask is a vector mask.
   296  //
   297  // Regardless of the actual mask representation, the [vecShape] of this operand
   298  // corresponds to the "bit for bit" type of mask. That is, elemBits gives the
   299  // element width covered by each mask element, and bits/elemBits gives the total
   300  // number of mask elements. (bits gives the total number of bits as if this were
   301  // a bit-for-bit mask, which may be meaningless on its own.)
   302  type operandMask struct {
   303  	operandCommon
   304  	vecShape
   305  	// Bits in the mask is w/bits.
   306  
   307  	allMasks bool // If set, size cannot be inferred because all operands are masks.
   308  
   309  	// Mask can be omitted, in which case it defaults to K0/"no mask"
   310  	optional bool
   311  }
   312  
   313  type operandImm struct {
   314  	operandCommon
   315  	bits int // Immediate size in bits
   316  }
   317  
   318  type operand interface {
   319  	common() operandCommon
   320  	addToDef(b *unify.DefBuilder)
   321  }
   322  
   323  func strVal(s any) *unify.Value {
   324  	return unify.NewValue(unify.NewStringExact(fmt.Sprint(s)))
   325  }
   326  
   327  func (o operandCommon) common() operandCommon {
   328  	return o
   329  }
   330  
   331  func (o operandMem) addToDef(b *unify.DefBuilder) {
   332  	b.Add("class", strVal("memory"))
   333  	if o.unknown {
   334  		return
   335  	}
   336  	baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex())
   337  	if err != nil {
   338  		panic("parsing baseRe: " + err.Error())
   339  	}
   340  	b.Add("base", unify.NewValue(baseDomain))
   341  	b.Add("bits", strVal(o.bits))
   342  	if o.elemBits != o.bits {
   343  		b.Add("elemBits", strVal(o.elemBits))
   344  	}
   345  }
   346  
   347  func (o operandVReg) addToDef(b *unify.DefBuilder) {
   348  	baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex())
   349  	if err != nil {
   350  		panic("parsing baseRe: " + err.Error())
   351  	}
   352  	b.Add("class", strVal("vreg"))
   353  	b.Add("bits", strVal(o.bits))
   354  	b.Add("base", unify.NewValue(baseDomain))
   355  	// If elemBits == bits, then the vector can be ANY shape. This happens with,
   356  	// for example, logical ops.
   357  	if o.elemBits != o.bits {
   358  		b.Add("elemBits", strVal(o.elemBits))
   359  	}
   360  	if o.fixedName != "" {
   361  		b.Add("fixedReg", strVal(o.fixedName))
   362  	}
   363  }
   364  
   365  func (o operandGReg) addToDef(b *unify.DefBuilder) {
   366  	baseDomain, err := unify.NewStringRegex(o.elemBaseType.regex())
   367  	if err != nil {
   368  		panic("parsing baseRe: " + err.Error())
   369  	}
   370  	b.Add("class", strVal("greg"))
   371  	b.Add("bits", strVal(o.bits))
   372  	b.Add("base", unify.NewValue(baseDomain))
   373  	if o.elemBits != o.bits {
   374  		b.Add("elemBits", strVal(o.elemBits))
   375  	}
   376  	if o.fixedName != "" {
   377  		b.Add("fixedReg", strVal(o.fixedName))
   378  	}
   379  }
   380  
   381  func (o operandMask) addToDef(b *unify.DefBuilder) {
   382  	b.Add("class", strVal("mask"))
   383  	if o.allMasks {
   384  		// If all operands are masks, omit sizes and let unification determine mask sizes.
   385  		return
   386  	}
   387  	b.Add("elemBits", strVal(o.elemBits))
   388  	b.Add("bits", strVal(o.bits))
   389  	if o.fixedName != "" {
   390  		b.Add("fixedReg", strVal(o.fixedName))
   391  	}
   392  }
   393  
   394  func (o operandImm) addToDef(b *unify.DefBuilder) {
   395  	b.Add("class", strVal("immediate"))
   396  	b.Add("bits", strVal(o.bits))
   397  }
   398  
   399  var actionEncoding = map[string]operandAction{
   400  	"r":   {r: true},
   401  	"cr":  {r: true, cr: true},
   402  	"w":   {w: true},
   403  	"cw":  {w: true, cw: true},
   404  	"rw":  {r: true, w: true},
   405  	"crw": {r: true, w: true, cr: true},
   406  	"rcw": {r: true, w: true, cw: true},
   407  }
   408  
   409  func decodeOperand(db *xeddata.Database, operand string) (operand, error) {
   410  	op, err := xeddata.NewOperand(db, operand)
   411  	if err != nil {
   412  		log.Fatalf("parsing operand %q: %v", operand, err)
   413  	}
   414  	if *flagDebugXED {
   415  		fmt.Printf("  %+v\n", op)
   416  	}
   417  
   418  	if strings.HasPrefix(op.Name, "EMX_BROADCAST") {
   419  		// This refers to a set of macros defined in all-state.txt that set a
   420  		// BCAST operand to various fixed values. But the BCAST operand is
   421  		// itself suppressed and "internal", so I think we can just ignore this
   422  		// operand.
   423  		return nil, nil
   424  	}
   425  
   426  	// TODO: See xed_decoded_inst_operand_action. This might need to be more
   427  	// complicated.
   428  	action, ok := actionEncoding[op.Action]
   429  	if !ok {
   430  		return nil, fmt.Errorf("unknown action %q", op.Action)
   431  	}
   432  	common := operandCommon{action: action}
   433  
   434  	lhs := op.NameLHS()
   435  	if strings.HasPrefix(lhs, "MEM") {
   436  		// looks like XED data has an inconsistency on VPADDD, marking attribute
   437  		// VPBROADCASTD instead of the canonical BCASTSTR.
   438  		if op.Width == "vv" && (op.Attributes["TXT=BCASTSTR"] ||
   439  			op.Attributes["TXT=VPBROADCASTD"]) {
   440  			baseType, elemBits, ok := decodeType(op)
   441  			if !ok {
   442  				return nil, fmt.Errorf("failed to decode memory width %q", operand)
   443  			}
   444  			// This operand has two possible width([bits]):
   445  			// 1. the same as the other operands
   446  			// 2. the element width as the other operands (broaccasting)
   447  			// left it default to 2, later we will set a new field in the operation
   448  			// to indicate this dual-width property.
   449  			shape := vecShape{elemBits: elemBits, bits: elemBits}
   450  			return operandMem{
   451  				operandCommon: common,
   452  				vecShape:      shape,
   453  				elemBaseType:  baseType,
   454  				vbcst:         true,
   455  				unknown:       false,
   456  			}, nil
   457  		}
   458  		// TODO: parse op.Width better to handle all cases
   459  		// Right now this will at least miss VPBROADCAST.
   460  		return operandMem{
   461  			operandCommon: common,
   462  			unknown:       true,
   463  		}, nil
   464  	} else if strings.HasPrefix(lhs, "REG") {
   465  		if op.Width == "mskw" {
   466  			// The mask operand doesn't specify a width. We have to infer it.
   467  			//
   468  			// XED uses the marker ZEROSTR to indicate that a mask operand is
   469  			// optional and, if omitted, implies K0, aka "no mask".
   470  			return operandMask{
   471  				operandCommon: common,
   472  				optional:      op.Attributes["TXT=ZEROSTR"],
   473  			}, nil
   474  		} else {
   475  			class, regBits, fixedReg := decodeReg(op)
   476  			if class == NOT_REG_CLASS {
   477  				return nil, fmt.Errorf("failed to decode register %q", operand)
   478  			}
   479  			baseType, elemBits, ok := decodeType(op)
   480  			if !ok {
   481  				return nil, fmt.Errorf("failed to decode register width %q", operand)
   482  			}
   483  			shape := vecShape{elemBits: elemBits, bits: regBits, fixedName: fixedReg}
   484  			if class == VREG_CLASS {
   485  				return operandVReg{
   486  					operandCommon: common,
   487  					vecShape:      shape,
   488  					elemBaseType:  baseType,
   489  				}, nil
   490  			}
   491  			// general register
   492  			m := min(shape.bits, shape.elemBits)
   493  			shape.bits, shape.elemBits = m, m
   494  			return operandGReg{
   495  				operandCommon: common,
   496  				vecShape:      shape,
   497  				elemBaseType:  baseType,
   498  			}, nil
   499  
   500  		}
   501  	} else if strings.HasPrefix(lhs, "IMM") {
   502  		_, bits, ok := decodeType(op)
   503  		if !ok {
   504  			return nil, fmt.Errorf("failed to decode register width %q", operand)
   505  		}
   506  		return operandImm{
   507  			operandCommon: common,
   508  			bits:          bits,
   509  		}, nil
   510  	}
   511  
   512  	// TODO: BASE and SEG
   513  	return nil, fmt.Errorf("unknown operand LHS %q in %q", lhs, operand)
   514  }
   515  
   516  func decodeOperands(db *xeddata.Database, operands []string) (ops []operand, err error) {
   517  	// Decode the XED operand descriptions.
   518  	for _, o := range operands {
   519  		op, err := decodeOperand(db, o)
   520  		if err != nil {
   521  			return nil, err
   522  		}
   523  		if op != nil {
   524  			ops = append(ops, op)
   525  		}
   526  	}
   527  
   528  	// XED doesn't encode the size of mask operands. If there are mask operands,
   529  	// try to infer their sizes from other operands.
   530  	if err := inferMaskSizes(ops); err != nil {
   531  		return nil, fmt.Errorf("%w in operands %+v", err, operands)
   532  	}
   533  
   534  	return ops, nil
   535  }
   536  
   537  func inferMaskSizes(ops []operand) error {
   538  	// This is a heuristic and it falls apart in some cases:
   539  	//
   540  	// - Mask operations like KAND[BWDQ] have *nothing* in the XED to indicate
   541  	// mask size.
   542  	//
   543  	// - VINSERT*, VPSLL*, VPSRA*, and VPSRL* and some others naturally have
   544  	// mixed input sizes and the XED doesn't indicate which operands the mask
   545  	// applies to.
   546  	//
   547  	// - VPDP* and VP4DP* have really complex mixed operand patterns.
   548  	//
   549  	// I think for these we may just have to hand-write a table of which
   550  	// operands each mask applies to.
   551  	inferMask := func(r, w bool) error {
   552  		var masks []int
   553  		var rSizes, wSizes, sizes []vecShape
   554  		allMasks := true
   555  		hasWMask := false
   556  		for i, op := range ops {
   557  			action := op.common().action
   558  			if _, ok := op.(operandMask); ok {
   559  				if action.r && action.w {
   560  					return fmt.Errorf("unexpected rw mask")
   561  				}
   562  				if action.r == r || action.w == w {
   563  					masks = append(masks, i)
   564  				}
   565  				if action.w {
   566  					hasWMask = true
   567  				}
   568  			} else {
   569  				allMasks = false
   570  				if reg, ok := op.(operandVReg); ok {
   571  					if action.r {
   572  						rSizes = append(rSizes, reg.vecShape)
   573  					}
   574  					if action.w {
   575  						wSizes = append(wSizes, reg.vecShape)
   576  					}
   577  				}
   578  			}
   579  		}
   580  		if len(masks) == 0 {
   581  			return nil
   582  		}
   583  
   584  		if r {
   585  			sizes = rSizes
   586  			if len(sizes) == 0 {
   587  				sizes = wSizes
   588  			}
   589  		}
   590  		if w {
   591  			sizes = wSizes
   592  			if len(sizes) == 0 {
   593  				sizes = rSizes
   594  			}
   595  		}
   596  
   597  		if len(sizes) == 0 {
   598  			// If all operands are masks, leave the mask inferrence to the users.
   599  			if allMasks {
   600  				for _, i := range masks {
   601  					m := ops[i].(operandMask)
   602  					m.allMasks = true
   603  					ops[i] = m
   604  				}
   605  				return nil
   606  			}
   607  			return fmt.Errorf("cannot infer mask size: no register operands")
   608  		}
   609  		shape, ok := singular(sizes)
   610  		if !ok {
   611  			if !hasWMask && len(wSizes) == 1 && len(masks) == 1 {
   612  				// This pattern looks like predicate mask, so its shape should align with the
   613  				// output. TODO: verify this is a safe assumption.
   614  				shape = wSizes[0]
   615  			} else {
   616  				return fmt.Errorf("cannot infer mask size: multiple register sizes %v", sizes)
   617  			}
   618  		}
   619  		for _, i := range masks {
   620  			m := ops[i].(operandMask)
   621  			m.vecShape = shape
   622  			ops[i] = m
   623  		}
   624  		return nil
   625  	}
   626  	if err := inferMask(true, false); err != nil {
   627  		return err
   628  	}
   629  	if err := inferMask(false, true); err != nil {
   630  		return err
   631  	}
   632  	return nil
   633  }
   634  
   635  // addOperandstoDef adds "in", "inVariant", and "out" to an instruction Def.
   636  //
   637  // Optional mask input operands are added to the inVariant field if
   638  // variant&instVariantMasked, and omitted otherwise.
   639  func addOperandsToDef(ops []operand, instDB *unify.DefBuilder, variant instVariant) {
   640  	var inVals, inVar, outVals []*unify.Value
   641  	asmPos := 0
   642  	for _, op := range ops {
   643  		var db unify.DefBuilder
   644  		op.addToDef(&db)
   645  		db.Add("asmPos", unify.NewValue(unify.NewStringExact(fmt.Sprint(asmPos))))
   646  
   647  		action := op.common().action
   648  		asmCount := 1 // # of assembly operands; 0 or 1
   649  		if action.r {
   650  			inVal := unify.NewValue(db.Build())
   651  			// If this is an optional mask, put it in the input variant tuple.
   652  			if mask, ok := op.(operandMask); ok && mask.optional {
   653  				if variant&instVariantMasked != 0 {
   654  					inVar = append(inVar, inVal)
   655  				} else {
   656  					// This operand doesn't appear in the assembly at all.
   657  					asmCount = 0
   658  				}
   659  			} else {
   660  				// Just a regular input operand.
   661  				inVals = append(inVals, inVal)
   662  			}
   663  		}
   664  		if action.w {
   665  			outVal := unify.NewValue(db.Build())
   666  			outVals = append(outVals, outVal)
   667  		}
   668  
   669  		asmPos += asmCount
   670  	}
   671  
   672  	instDB.Add("in", unify.NewValue(unify.NewTuple(inVals...)))
   673  	instDB.Add("inVariant", unify.NewValue(unify.NewTuple(inVar...)))
   674  	instDB.Add("out", unify.NewValue(unify.NewTuple(outVals...)))
   675  	memFeatures := checkMem(ops)
   676  	if memFeatures != "noMem" {
   677  		instDB.Add("memFeatures", unify.NewValue(unify.NewStringExact(memFeatures)))
   678  	}
   679  }
   680  
   681  // checkMem checks the shapes of memory operand in the operation and returns the shape.
   682  // Keep this function in sync with [decodeOperand].
   683  func checkMem(ops []operand) string {
   684  	memState := "noMem"
   685  	var mem *operandMem
   686  	memCnt := 0
   687  	for _, op := range ops {
   688  		if m, ok := op.(operandMem); ok {
   689  			mem = &m
   690  			memCnt++
   691  		}
   692  	}
   693  	if mem != nil {
   694  		if mem.unknown {
   695  			memState = "unknown"
   696  		} else if memCnt > 1 {
   697  			memState = "tooManyMem"
   698  		} else {
   699  			// We only have vbcst case as of now.
   700  			// This shape has an indication that [bits] fields has two possible value:
   701  			// 1. The element broadcast width, which is its peer vreg operand's [elemBits] (default val in the parsed XED data)
   702  			// 2. The full vector width, which is its peer vreg operand's [bits] (godefs should be aware of this)
   703  			memState = "vbcst"
   704  		}
   705  	}
   706  	return memState
   707  }
   708  
   709  func instToUVal(inst *xeddata.Inst, ops []operand, addFields map[string]string) []*unify.Value {
   710  	feature, ok := decodeCPUFeature(inst)
   711  	if !ok {
   712  		return nil
   713  	}
   714  
   715  	var vals []*unify.Value
   716  	vals = append(vals, instToUVal1(inst, ops, feature, instVariantNone, addFields))
   717  	if hasOptionalMask(ops) {
   718  		vals = append(vals, instToUVal1(inst, ops, feature, instVariantMasked, addFields))
   719  	}
   720  	return vals
   721  }
   722  
   723  func instToUVal1(inst *xeddata.Inst, ops []operand, feature string, variant instVariant, addFields map[string]string) *unify.Value {
   724  	var db unify.DefBuilder
   725  	db.Add("goarch", unify.NewValue(unify.NewStringExact("amd64")))
   726  	db.Add("asm", unify.NewValue(unify.NewStringExact(inst.Opcode())))
   727  	addOperandsToDef(ops, &db, variant)
   728  	db.Add("cpuFeature", unify.NewValue(unify.NewStringExact(feature)))
   729  	for k, v := range addFields {
   730  		db.Add(k, unify.NewValue(unify.NewStringExact(v)))
   731  	}
   732  
   733  	if strings.Contains(inst.Pattern, "ZEROING=0") {
   734  		// This is an EVEX instruction, but the ".Z" (zero-merging)
   735  		// instruction flag is NOT valid. EVEX.z must be zero.
   736  		//
   737  		// This can mean a few things:
   738  		//
   739  		// - The output of an instruction is a mask, so merging modes don't
   740  		// make any sense. E.g., VCMPPS.
   741  		//
   742  		// - There are no masks involved anywhere. (Maybe MASK=0 is also set
   743  		// in this case?) E.g., VINSERTPS.
   744  		//
   745  		// - The operation inherently performs merging. E.g., VCOMPRESSPS
   746  		// with a mem operand.
   747  		//
   748  		// There may be other reasons.
   749  		db.Add("zeroing", unify.NewValue(unify.NewStringExact("false")))
   750  	}
   751  	pos := unify.Pos{Path: inst.Pos.Path, Line: inst.Pos.Line}
   752  	return unify.NewValuePos(db.Build(), pos)
   753  }
   754  
   755  // decodeCPUFeature returns the CPU feature name required by inst. These match
   756  // the names of the "Has*" feature checks in the simd package.
   757  func decodeCPUFeature(inst *xeddata.Inst) (string, bool) {
   758  	isaSet := inst.ISASet
   759  	if isaSet == "" {
   760  		// Older instructions don't have an ISA set. Use their "extension"
   761  		// instead.
   762  		isaSet = inst.Extension
   763  	}
   764  	// We require AVX512VL to use AVX512 at all, so strip off the vector length
   765  	// suffixes.
   766  	if strings.HasPrefix(isaSet, "AVX512") {
   767  		isaSet = isaSetVL.ReplaceAllLiteralString(isaSet, "")
   768  	}
   769  
   770  	feat, ok := cpuFeatureMap[isaSet]
   771  	if !ok {
   772  		imap := unknownFeatures[isaSet]
   773  		if imap == nil {
   774  			imap = make(map[string]struct{})
   775  			unknownFeatures[isaSet] = imap
   776  		}
   777  		imap[inst.Opcode()] = struct{}{}
   778  		return "", false
   779  	}
   780  	if feat == "ignore" {
   781  		return "", false
   782  	}
   783  	return feat, true
   784  }
   785  
   786  var isaSetVL = regexp.MustCompile("_(128N?|256N?|512)$")
   787  
   788  // cpuFeatureMap maps from XED's "ISA_SET" (or "EXTENSION") to a CPU feature
   789  // name to expose in the SIMD feature check API.
   790  //
   791  // See XED's datafiles/*/cpuid.xed.txt for how ISA set names map to CPUID flags.
   792  var cpuFeatureMap = map[string]string{
   793  	"AVX":      "AVX",
   794  	"AVX_VNNI": "AVXVNNI",
   795  	"AVX2":     "AVX2",
   796  	"AVXAES":   "AVXAES",
   797  	"SHA":      "SHA",
   798  	"FMA":      "FMA",
   799  
   800  	// AVX-512 foundational features. We combine all of these into one "AVX512" feature.
   801  	"AVX512F":  "AVX512",
   802  	"AVX512BW": "AVX512",
   803  	"AVX512CD": "AVX512",
   804  	"AVX512DQ": "AVX512",
   805  	// AVX512VL doesn't appear as its own ISASet; instead, the CPUID flag is
   806  	// required by the *_128 and *_256 ISASets. We fold it into "AVX512" anyway.
   807  
   808  	// AVX-512 extension features
   809  	"AVX512_BITALG":     "AVX512BITALG",
   810  	"AVX512_GFNI":       "AVX512GFNI",
   811  	"AVX512_VBMI":       "AVX512VBMI",
   812  	"AVX512_VBMI2":      "AVX512VBMI2",
   813  	"AVX512_VNNI":       "AVX512VNNI",
   814  	"AVX512_VPOPCNTDQ":  "AVX512VPOPCNTDQ",
   815  	"AVX512_VAES":       "AVX512VAES",
   816  	"AVX512_VPCLMULQDQ": "AVX512VPCLMULQDQ",
   817  
   818  	// AVX 10.2 (not yet supported)
   819  	"AVX10_2_RC": "ignore",
   820  }
   821  
   822  func init() {
   823  	// TODO: In general, Intel doesn't make any guarantees about what flags are
   824  	// set, so this means our feature checks need to ensure these, just to be
   825  	// sure.
   826  	var features = map[string]featureInfo{
   827  		"AVX2":   {Implies: []string{"AVX"}},
   828  		"AVX512": {Implies: []string{"AVX2"}},
   829  
   830  		"AVXAES": {Virtual: true, Implies: []string{"AVX", "AES"}},
   831  		"FMA":    {Implies: []string{"AVX"}},
   832  
   833  		// AVX-512 subfeatures.
   834  		"AVX512BITALG":    {Implies: []string{"AVX512"}},
   835  		"AVX512GFNI":      {Implies: []string{"AVX512"}},
   836  		"AVX512VBMI":      {Implies: []string{"AVX512"}},
   837  		"AVX512VBMI2":     {Implies: []string{"AVX512"}},
   838  		"AVX512VNNI":      {Implies: []string{"AVX512"}},
   839  		"AVX512VPOPCNTDQ": {Implies: []string{"AVX512"}},
   840  		"AVX512VAES":      {Implies: []string{"AVX512"}},
   841  
   842  		// AVX-VNNI and AVX-IFMA are "backports" of the AVX512-VNNI/IFMA
   843  		// instructions to VEX encoding, limited to 256 bit vectors. They're
   844  		// intended for lower end CPUs that want to support VNNI/IFMA without
   845  		// supporting AVX-512. As such, they're built on AVX2's VEX encoding.
   846  		"AVXVNNI": {Implies: []string{"AVX2"}},
   847  		"AVXIFMA": {Implies: []string{"AVX2"}},
   848  	}
   849  	registerFeatureInfo("amd64", goarchFeatures{
   850  		featureVar: "X86",
   851  		features:   features,
   852  	})
   853  }
   854  
   855  var unknownFeatures = map[string]map[string]struct{}{}
   856  
   857  // hasOptionalMask returns whether there is an optional mask operand in ops.
   858  func hasOptionalMask(ops []operand) bool {
   859  	for _, op := range ops {
   860  		if op, ok := op.(operandMask); ok && op.optional {
   861  			return true
   862  		}
   863  	}
   864  	return false
   865  }
   866  
   867  func singular[T comparable](xs []T) (T, bool) {
   868  	if len(xs) == 0 {
   869  		return *new(T), false
   870  	}
   871  	for _, x := range xs[1:] {
   872  		if x != xs[0] {
   873  			return *new(T), false
   874  		}
   875  	}
   876  	return xs[0], true
   877  }
   878  
   879  type fixedReg struct {
   880  	class int
   881  	name  string
   882  	width int
   883  }
   884  
   885  var fixedRegMap = map[string]fixedReg{
   886  	"XED_REG_XMM0": {VREG_CLASS, "x0", 128},
   887  }
   888  
   889  // decodeReg returns class (NOT_REG_CLASS, VREG_CLASS, GREG_CLASS, VREG_CLASS_FIXED,
   890  // GREG_CLASS_FIXED), width in bits and reg name(if fixed).
   891  // If the operand cannot be decided as a register, then the clas is NOT_REG_CLASS.
   892  func decodeReg(op *xeddata.Operand) (class, width int, name string) {
   893  	// op.Width tells us the total width, e.g.,:
   894  	//
   895  	//    dq => 128 bits (XMM)
   896  	//    qq => 256 bits (YMM)
   897  	//    mskw => K
   898  	//    z[iuf?](8|16|32|...) => 512 bits (ZMM)
   899  	//
   900  	// But the encoding is really weird and it's not clear if these *always*
   901  	// mean XMM/YMM/ZMM or if other irregular things can use these large widths.
   902  	// Hence, we dig into the register sets themselves.
   903  
   904  	if !strings.HasPrefix(op.NameLHS(), "REG") {
   905  		return NOT_REG_CLASS, 0, ""
   906  	}
   907  	// TODO: We shouldn't be relying on the macro naming conventions. We should
   908  	// use all-dec-patterns.txt, but xeddata doesn't support that table right now.
   909  	rhs := op.NameRHS()
   910  	if !strings.HasSuffix(rhs, "()") {
   911  		if fixedReg, ok := fixedRegMap[rhs]; ok {
   912  			return fixedReg.class, fixedReg.width, fixedReg.name
   913  		}
   914  		return NOT_REG_CLASS, 0, ""
   915  	}
   916  	switch {
   917  	case strings.HasPrefix(rhs, "XMM_"):
   918  		return VREG_CLASS, 128, ""
   919  	case strings.HasPrefix(rhs, "YMM_"):
   920  		return VREG_CLASS, 256, ""
   921  	case strings.HasPrefix(rhs, "ZMM_"):
   922  		return VREG_CLASS, 512, ""
   923  	case strings.HasPrefix(rhs, "GPR64_"), strings.HasPrefix(rhs, "VGPR64_"):
   924  		return GREG_CLASS, 64, ""
   925  	case strings.HasPrefix(rhs, "GPR32_"), strings.HasPrefix(rhs, "VGPR32_"):
   926  		return GREG_CLASS, 32, ""
   927  	}
   928  	return NOT_REG_CLASS, 0, ""
   929  }
   930  
   931  var xtypeRe = regexp.MustCompile(`^([iuf])([0-9]+)$`)
   932  
   933  // scalarBaseType describes the base type of a scalar element. This is a Go
   934  // type, but without the bit width suffix (with the exception of
   935  // scalarBaseIntOrUint).
   936  type scalarBaseType int
   937  
   938  const (
   939  	scalarBaseInt scalarBaseType = iota
   940  	scalarBaseUint
   941  	scalarBaseIntOrUint // Signed or unsigned is unspecified
   942  	scalarBaseFloat
   943  	scalarBaseComplex
   944  	scalarBaseBFloat
   945  	scalarBaseHFloat
   946  )
   947  
   948  func (s scalarBaseType) regex() string {
   949  	switch s {
   950  	case scalarBaseInt:
   951  		return "int"
   952  	case scalarBaseUint:
   953  		return "uint"
   954  	case scalarBaseIntOrUint:
   955  		return "int|uint"
   956  	case scalarBaseFloat:
   957  		return "float"
   958  	case scalarBaseComplex:
   959  		return "complex"
   960  	case scalarBaseBFloat:
   961  		return "BFloat"
   962  	case scalarBaseHFloat:
   963  		return "HFloat"
   964  	}
   965  	panic(fmt.Sprintf("unknown scalar base type %d", s))
   966  }
   967  
   968  func decodeType(op *xeddata.Operand) (base scalarBaseType, bits int, ok bool) {
   969  	// The xtype tells you the element type. i8, i16, i32, i64, f32, etc.
   970  	//
   971  	// TODO: Things like AVX2 VPAND have an xtype of u256 because they're
   972  	// element-width agnostic. Do I map that to all widths, or just omit the
   973  	// element width and let unification flesh it out? There's no u512
   974  	// (presumably those are all masked, so elem width matters). These are all
   975  	// Category: LOGICAL, so maybe we could use that info?
   976  
   977  	// Handle some weird ones.
   978  	switch op.Xtype {
   979  	// 8-bit float formats as defined by Open Compute Project "OCP 8-bit
   980  	// Floating Point Specification (OFP8)".
   981  	case "bf8": // E5M2 float
   982  		return scalarBaseBFloat, 8, true
   983  	case "hf8": // E4M3 float
   984  		return scalarBaseHFloat, 8, true
   985  	case "bf16": // bfloat16 float
   986  		return scalarBaseBFloat, 16, true
   987  	case "2f16":
   988  		// Complex consisting of 2 float16s. Doesn't exist in Go, but we can say
   989  		// what it would be.
   990  		return scalarBaseComplex, 32, true
   991  	case "2i8", "2I8":
   992  		// These just use the lower INT8 in each 16 bit field.
   993  		// As far as I can tell, "2I8" is a typo.
   994  		return scalarBaseInt, 8, true
   995  	case "2u16", "2U16":
   996  		// some VPDP* has it
   997  		// TODO: does "z" means it has zeroing?
   998  		return scalarBaseUint, 16, true
   999  	case "2i16", "2I16":
  1000  		// some VPDP* has it
  1001  		return scalarBaseInt, 16, true
  1002  	case "4u8", "4U8":
  1003  		// some VPDP* has it
  1004  		return scalarBaseUint, 8, true
  1005  	case "4i8", "4I8":
  1006  		// some VPDP* has it
  1007  		return scalarBaseInt, 8, true
  1008  	}
  1009  
  1010  	// The rest follow a simple pattern.
  1011  	m := xtypeRe.FindStringSubmatch(op.Xtype)
  1012  	if m == nil {
  1013  		// TODO: Report unrecognized xtype
  1014  		return 0, 0, false
  1015  	}
  1016  	bits, _ = strconv.Atoi(m[2])
  1017  	switch m[1] {
  1018  	case "i", "u":
  1019  		// XED is rather inconsistent about what's signed, unsigned, or doesn't
  1020  		// matter, so merge them together and let the Go definitions narrow as
  1021  		// appropriate. Maybe there's a better way to do this.
  1022  		return scalarBaseIntOrUint, bits, true
  1023  	case "f":
  1024  		return scalarBaseFloat, bits, true
  1025  	default:
  1026  		panic("unreachable")
  1027  	}
  1028  }
  1029  

View as plain text