LLVM学习笔记(44)

3.6.2.3.3. 处理器特征数据

TD文件里通过SubTargetFeature定义来描述处理器所支持的指令集。反映到LLVM上,就体现为类型X86Subtarget(是下面输出X86GenSubtargetInfo类型的派生类)里的一系列开关。对每个处理器,显然会有一个对应的X86Subtarget实例。

SubtargetEmitter::run(续)

1468     OS << "\n#ifdef GET_SUBTARGETINFO_TARGET_DESC\n";

1469     OS << "#undef GET_SUBTARGETINFO_TARGET_DESC\n";

1470  

1471     OS << "#include \"llvm/Support/Debug.h\"\n";

1472     OS << "#include \"llvm/Support/raw_ostream.h\"\n";

1473     ParseFeaturesFunction(OS, NumFeatures, NumProcs);

1474  

1475     OS << "#endif // GET_SUBTARGETINFO_TARGET_DESC\n\n";

1476  

1477     // Create a TargetSubtargetInfo subclass to hide the MC layer initialization.

1478     OS << "\n#ifdef GET_SUBTARGETINFO_HEADER\n";

1479     OS << "#undef GET_SUBTARGETINFO_HEADER\n";

1480  

1481     std::string ClassName = Target + "GenSubtargetInfo";

1482     OS << "namespace llvm {\n";

1483     OS << "class DFAPacketizer;\n";

1484     OS << "struct " << ClassName << " : public TargetSubtargetInfo {\n"

1485        << "  explicit " << ClassName << "(const Triple &TT, StringRef CPU, "

1486        << "StringRef FS);\n"

1487        << "public:\n"

1488        << "  unsigned resolveSchedClass(unsigned SchedClass, "

1489        << " const MachineInstr *DefMI,"

1490        << " const TargetSchedModel *SchedModel) const override;\n"

1491        << "  DFAPacketizer *createDFAPacketizer(const InstrItineraryData *IID)"

1492        << " const;\n"

1493        << "};\n";

1494     OS << "} // End llvm namespace \n";

1495  

1496     OS << "#endif // GET_SUBTARGETINFO_HEADER\n\n";

1497  

1498     OS << "\n#ifdef GET_SUBTARGETINFO_CTOR\n";

1499     OS << "#undef GET_SUBTARGETINFO_CTOR\n";

1500  

1501     OS << "#include \"llvm/CodeGen/TargetSchedule.h\"\n";

1502     OS << "namespace llvm {\n";

1503     OS << "extern const llvm::SubtargetFeatureKV " << Target << "FeatureKV[];\n";

1504     OS << "extern const llvm::SubtargetFeatureKV " << Target << "SubTypeKV[];\n";

1505     OS << "extern const llvm::SubtargetInfoKV " << Target << "ProcSchedKV[];\n";

1506     OS << "extern const llvm::MCWriteProcResEntry "

1507        << Target << "WriteProcResTable[];\n";

1508     OS << "extern const llvm::MCWriteLatencyEntry "

1509        << Target << "WriteLatencyTable[];\n";

1510     OS << "extern const llvm::MCReadAdvanceEntry "

1511        << Target << "ReadAdvanceTable[];\n";

1512  

1513     if (SchedModels.hasItineraries()) {

1514       OS << "extern const llvm::InstrStage " << Target << "Stages[];\n";

1515       OS << "extern const unsigned " << Target << "OperandCycles[];\n";

1516       OS << "extern const unsigned " << Target << "ForwardingPaths[];\n";

1517     }

1518  

1519     OS << ClassName << "::" << ClassName << "(const Triple &TT, StringRef CPU, "

1520        << "StringRef FS)\n"

1521        << "  : TargetSubtargetInfo() {\n"

1522        << "  InitMCSubtargetInfo(TT, CPU, FS, ";

1523     if (NumFeatures)

1524       OS << "makeArrayRef(" << Target << "FeatureKV, " << NumFeatures << "), ";

1525     else

1526       OS << "None, ";

1527     if (NumProcs)

1528       OS << "makeArrayRef(" << Target << "SubTypeKV, " << NumProcs << "), ";

1529     else

1530      OS << "None, ";

1531     OS << '\n'; OS.indent(22);

1532     OS << Target << "ProcSchedKV, "

1533        << Target << "WriteProcResTable, "

1534        << Target << "WriteLatencyTable, "

1535        << Target << "ReadAdvanceTable, ";

1536     OS << '\n'; OS.indent(22);

1537     if (SchedModels.hasItineraries()) {

1538       OS << Target << "Stages, "

1539          << Target << "OperandCycles, "

1540          << Target << "ForwardingPaths";

1541     } else

1542       OS << "0, 0, 0";

1543     OS << ");\n}\n\n";

1544  

1545     EmitSchedModelHelpers(ClassName, OS);

1546  

1547     OS << "} // End llvm namespace \n";

1548  

1549     OS << "#endif // GET_SUBTARGETINFO_CTOR\n\n";

1550   }

毫无疑问,可以根据.td文件里SubTargetFeature定义与处理器定义间的关系,自动生成一个X86Subtarget的方法来根据处理器类型自动设置这些开关。

1362   void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS,

1363                                                unsigned NumFeatures,

1364                                                unsigned NumProcs) {

1365     std::vector<Record*> Features =

1366                          Records.getAllDerivedDefinitions("SubtargetFeature");

1367     std::sort(Features.begin(), Features.end(), LessRecord());

1368  

1369     OS << "// ParseSubtargetFeatures - Parses features string setting specified\n"

1370        << "// subtarget options.\n"

1371        << "void llvm::";

1372     OS << Target;

1373     OS << "Subtarget::ParseSubtargetFeatures(StringRef CPU, StringRef FS) {\n"

1374        << "  DEBUG(dbgs() << \"\\nFeatures:\" << FS);\n"

1375        << "  DEBUG(dbgs() << \"\\nCPU:\" << CPU << \"\\n\\n\");\n";

1376  

1377     if (Features.empty()) {

1378       OS << "}\n";

1379       return;

1380     }

1381  

1382     OS << "  InitMCProcessorInfo(CPU, FS);\n"

1383        << "  const FeatureBitset& Bits = getFeatureBits();\n";

1384  

1385     for (unsigned i = 0; i < Features.size(); i++) {

1386       // Next record

1387       Record *R = Features[i];

1388       const std::string &Instance = R->getName();

1389       const std::string &Value = R->getValueAsString("Value");

1390       const std::string &Attribute = R->getValueAsString("Attribute");

1391  

1392       if (Value=="true" || Value=="false")

1393         OS << "  if (Bits[" << Target << "::"

1394            << Instance << "]) "

1395            << Attribute << " = " << Value << ";\n";

1396       else

1397         OS << "  if (Bits[" << Target << "::"

1398            << Instance << "] && "

1399            << Attribute << " < " << Value << ") "

1400            << Attribute << " = " << Value << ";\n";

1401     }

1402  

1403     OS << "}\n";

1404   }

方法ParseFeaturesFunction没有太特别的地方。所生成的X86Subtarget::ParseSubtargetFeatures方法定义如下:

#ifdef GET_SUBTARGETINFO_TARGET_DESC

#undef GET_SUBTARGETINFO_TARGET_DESC

#include "llvm/Support/Debug.h"

#include "llvm/Support/raw_ostream.h"

// ParseSubtargetFeatures - Parses features string setting specified

// subtarget options.

void llvm::X86Subtarget::ParseSubtargetFeatures(StringRef CPU, StringRef FS) {

  DEBUG(dbgs() << "\nFeatures:" << FS);

  DEBUG(dbgs() << "\nCPU:" << CPU << "\n\n");

  InitMCProcessorInfo(CPU, FS);

  const FeatureBitset& Bits = getFeatureBits();

  if (Bits[X86::Feature3DNow] && X863DNowLevel < ThreeDNow) X863DNowLevel = ThreeDNow;

  if (Bits[X86::Feature3DNowA] && X863DNowLevel < ThreeDNowA) X863DNowLevel = ThreeDNowA;

  if (Bits[X86::Feature64Bit]) HasX86_64 = true;

  if (Bits[X86::FeatureADX]) HasADX = true;

  if (Bits[X86::FeatureAES]) HasAES = true;

  if (Bits[X86::FeatureAVX] && X86SSELevel < AVX) X86SSELevel = AVX;

  if (Bits[X86::FeatureAVX2] && X86SSELevel < AVX2) X86SSELevel = AVX2;

  if (Bits[X86::FeatureAVX512] && X86SSELevel < AVX512F) X86SSELevel = AVX512F;

  if (Bits[X86::FeatureBMI]) HasBMI = true;

  if (Bits[X86::FeatureBMI2]) HasBMI2 = true;

  if (Bits[X86::FeatureBWI]) HasBWI = true;

  if (Bits[X86::FeatureCDI]) HasCDI = true;

  if (Bits[X86::FeatureCMOV]) HasCMov = true;

  if (Bits[X86::FeatureCMPXCHG16B]) HasCmpxchg16b = true;

  if (Bits[X86::FeatureCallRegIndirect]) CallRegIndirect = true;

  if (Bits[X86::FeatureDQI]) HasDQI = true;

  if (Bits[X86::FeatureERI]) HasERI = true;

  if (Bits[X86::FeatureF16C]) HasF16C = true;

  if (Bits[X86::FeatureFMA]) HasFMA = true;

  if (Bits[X86::FeatureFMA4]) HasFMA4 = true;

  if (Bits[X86::FeatureFSGSBase]) HasFSGSBase = true;

  if (Bits[X86::FeatureFastUAMem]) IsUAMemFast = true;

  if (Bits[X86::FeatureHLE]) HasHLE = true;

  if (Bits[X86::FeatureLEAUsesAG]) LEAUsesAG = true;

  if (Bits[X86::FeatureLZCNT]) HasLZCNT = true;

  if (Bits[X86::FeatureLeaForSP]) UseLeaForSP = true;

  if (Bits[X86::FeatureMMX] && X86SSELevel < MMX) X86SSELevel = MMX;

  if (Bits[X86::FeatureMOVBE]) HasMOVBE = true;

  if (Bits[X86::FeatureMPX]) HasMPX = true;

  if (Bits[X86::FeaturePCLMUL]) HasPCLMUL = true;

  if (Bits[X86::FeaturePFI]) HasPFI = true;

  if (Bits[X86::FeaturePOPCNT]) HasPOPCNT = true;

  if (Bits[X86::FeaturePRFCHW]) HasPRFCHW = true;

  if (Bits[X86::FeaturePadShortFunctions]) PadShortFunctions = true;

  if (Bits[X86::FeatureRDRAND]) HasRDRAND = true;

  if (Bits[X86::FeatureRDSEED]) HasRDSEED = true;

  if (Bits[X86::FeatureRTM]) HasRTM = true;

  if (Bits[X86::FeatureSHA]) HasSHA = true;

  if (Bits[X86::FeatureSSE1] && X86SSELevel < SSE1) X86SSELevel = SSE1;

  if (Bits[X86::FeatureSSE2] && X86SSELevel < SSE2) X86SSELevel = SSE2;

  if (Bits[X86::FeatureSSE3] && X86SSELevel < SSE3) X86SSELevel = SSE3;

  if (Bits[X86::FeatureSSE4A]) HasSSE4A = true;

  if (Bits[X86::FeatureSSE41] && X86SSELevel < SSE41) X86SSELevel = SSE41;

  if (Bits[X86::FeatureSSE42] && X86SSELevel < SSE42) X86SSELevel = SSE42;

  if (Bits[X86::FeatureSSEUnalignedMem]) HasSSEUnalignedMem = true;

  if (Bits[X86::FeatureSSSE3] && X86SSELevel < SSSE3) X86SSELevel = SSSE3;

  if (Bits[X86::FeatureSlowBTMem]) IsBTMemSlow = true;

  if (Bits[X86::FeatureSlowDivide32]) HasSlowDivide32 = true;

  if (Bits[X86::FeatureSlowDivide64]) HasSlowDivide64 = true;

  if (Bits[X86::FeatureSlowIncDec]) SlowIncDec = true;

  if (Bits[X86::FeatureSlowLEA]) SlowLEA = true;

  if (Bits[X86::FeatureSlowSHLD]) IsSHLDSlow = true;

  if (Bits[X86::FeatureSlowUAMem32]) IsUAMem32Slow = true;

  if (Bits[X86::FeatureSoftFloat]) UseSoftFloat = true;

  if (Bits[X86::FeatureTBM]) HasTBM = true;

  if (Bits[X86::FeatureVLX]) HasVLX = true;

  if (Bits[X86::FeatureXOP]) HasXOP = true;

  if (Bits[X86::Mode16Bit]) In16BitMode = true;

  if (Bits[X86::Mode32Bit]) In32BitMode = true;

  if (Bits[X86::Mode64Bit]) In64BitMode = true;

  if (Bits[X86::ProcIntelAtom] && X86ProcFamily < IntelAtom) X86ProcFamily = IntelAtom;

  if (Bits[X86::ProcIntelSLM] && X86ProcFamily < IntelSLM) X86ProcFamily = IntelSLM;

}

#endif // GET_SUBTARGETINFO_TARGET_DESC

X86Subtarget是MCSubtargetInfo的间接派生类,在调用ParseSubtargetFeatures方法之前,必须先通过InitMCSubtargetInfo方法(由上面生成的InitX86MCSubtargetInfo方法所调用)记录下上面的X86FeatureKV与X86SubTypeKV,接着通过InitMCProcessorInfo与getFeatureBits方法获取一个比特集,然后根据比特集里设置的比特位设置这些布尔变量。另外,InitMCProcessorInfo还会调用方法InitCPUSchedModel来设置目标机器使用的调度模型:

1478~1496行就是输出这个X86GenSubtargetInfo类型的声明:

#ifdef GET_SUBTARGETINFO_HEADER

#undef GET_SUBTARGETINFO_HEADER

namespace llvm {

class DFAPacketizer;

struct X86GenSubtargetInfo : public TargetSubtargetInfo {

  explicit X86GenSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS);

public:

  unsigned resolveSchedClass(unsigned SchedClass,  const MachineInstr *DefMI, const TargetSchedModel *SchedModel) const override;

  DFAPacketizer *createDFAPacketizer(const InstrItineraryData *IID) const;

};

} // End llvm namespace

#endif // GET_SUBTARGETINFO_HEADER

1499~1543行生成以下的代码片段:

#ifdef GET_SUBTARGETINFO_CTOR

#undef GET_SUBTARGETINFO_CTOR

#include "llvm/CodeGen/TargetSchedule.h"

namespace llvm {

extern const llvm::SubtargetFeatureKV X86FeatureKV[];

extern const llvm::SubtargetFeatureKV X86SubTypeKV[];

extern const llvm::SubtargetInfoKV X86ProcSchedKV[];

extern const llvm::MCWriteProcResEntry X86WriteProcResTable[];

extern const llvm::MCWriteLatencyEntry X86WriteLatencyTable[];

extern const llvm::MCReadAdvanceEntry X86ReadAdvanceTable[];

extern const llvm::InstrStage X86Stages[];

extern const unsigned X86OperandCycles[];

extern const unsigned X86ForwardingPaths[];

X86GenSubtargetInfo::X86GenSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS)

  : TargetSubtargetInfo() {

  InitMCSubtargetInfo(TT, CPU, FS, makeArrayRef(X86FeatureKV, 62), makeArrayRef(X86SubTypeKV, 65),

                      X86ProcSchedKV, X86WriteProcResTable, X86WriteLatencyTable, X86ReadAdvanceTable,

                      X86Stages, X86OperandCycles, X86ForwardingPaths);

}

因为这段代码被宏GET_SUBTARGETINFO_CTOR所选中,这时它所援引的X86FeatureKV数组等,在当前上下文不可见,因此需要extern声明。

前面我们看过,对于差异比较大的处理器,可以使用instRW与ItinRW定义将特定的指令关联到新的SchedReadWrite。这种情况下会产生新的调度类型,而原有调度类型CodeGenSchedClass对象的Transitions容器会记录下到这个新类型的迁移。

在LLVM里,这个迁移是动态发生的。前面在生成CodeGenSchedClass对应的MCSchedClassDesc实例时,如果针对特定的处理器,该调度类型应该被新调度类型替代,这个MCSchedClassDesc的NumMicroOps被设置为MCSchedClassDesc::VariantNumMicroOps。在执行是一旦发现这种形式的MCSchedClassDesc对象,就会执行动态的解析。具体实现在下面这个方法(TargetSchedule.cpp):

101      const MCSchedClassDesc *TargetSchedModel::

102      resolveSchedClass(const MachineInstr *MI) const {

103     

104        // Get the definition's scheduling class descriptor from this machine model.

105        unsigned SchedClass = MI->getDesc().getSchedClass();

106        const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);

107        if (!SCDesc->isValid())

108          return SCDesc;

109     

110      #ifndef NDEBUG

111        unsigned NIter = 0;

112      #endif

113        while (SCDesc->isVariant()) {

114          assert(++NIter < 6 && "Variants are nested deeper than the magic number");

115     

116          SchedClass = STI->resolveSchedClass(SchedClass, MI, this);

117          SCDesc = SchedModel.getSchedClassDesc(SchedClass);

118        }

119        return SCDesc;

120      }

113行的isVariant方法就是检查NumMicroOps是否为MCSchedClassDesc::VariantNumMicroOps。显然调度类型之间如何替换,只能由具体的处理器给出,因此116行调用的resolveSchedClass方法就是由下面的EmitSchedModelHelpers方法生成的。

1284   void SubtargetEmitter::EmitSchedModelHelpers(std::string ClassName,

1285                                                raw_ostream &OS) {

1286     OS << "unsigned " << ClassName

1287        << "\n::resolveSchedClass(unsigned SchedClass, const MachineInstr *MI,"

1288        << " const TargetSchedModel *SchedModel) const {\n";

1289  

1290     std::vector<Record*> Prologs = Records.getAllDerivedDefinitions("PredicateProlog");

1291     std::sort(Prologs.begin(), Prologs.end(), LessRecord());

1292     for (std::vector<Record*>::const_iterator

1293            PI = Prologs.begin(), PE = Prologs.end(); PI != PE; ++PI) {

1294       OS << (*PI)->getValueAsString("Code") << '\n';

1295     }

1296     IdxVec VariantClasses;

1297     for (CodeGenSchedModels::SchedClassIter SCI = SchedModels.schedClassBegin(),

1298            SCE = SchedModels.schedClassEnd(); SCI != SCE; ++SCI) {

1299       if (SCI->Transitions.empty())

1300         continue;

1301       VariantClasses.push_back(SCI->Index);

1302     }

1303     if (!VariantClasses.empty()) {

1304       OS << "  switch (SchedClass) {\n";

1305       for (IdxIter VCI = VariantClasses.begin(), VCE = VariantClasses.end();

1306            VCI != VCE; ++VCI) {

1307         const CodeGenSchedClass &SC = SchedModels.getSchedClass(*VCI);

1308         OS << "  case " << *VCI << ": // " << SC.Name << '\n';

1309         IdxVec ProcIndices;

1310         for (std::vector<CodeGenSchedTransition>::const_iterator

1311                TI = SC.Transitions.begin(), TE = SC.Transitions.end();

1312              TI != TE; ++TI) {

1313           IdxVec PI;

1314           std::set_union(TI->ProcIndices.begin(), TI->ProcIndices.end(),

1315                          ProcIndices.begin(), ProcIndices.end(),

1316                          std::back_inserter(PI));

1317           ProcIndices.swap(PI);

1318         }

1319         for (IdxIter PI = ProcIndices.begin(), PE = ProcIndices.end();

1320              PI != PE; ++PI) {

1321           OS << "    ";

1322           if (*PI != 0)

1323             OS << "if (SchedModel->getProcessorID() == " << *PI << ") ";

1324           OS << "{ // " << (SchedModels.procModelBegin() + *PI)->ModelName

1325              << '\n';

1326           for (std::vector<CodeGenSchedTransition>::const_iterator

1327                  TI = SC.Transitions.begin(), TE = SC.Transitions.end();

1328                TI != TE; ++TI) {

1329             if (*PI != 0 && !std::count(TI->ProcIndices.begin(),

1330                                         TI->ProcIndices.end(), *PI)) {

1331                 continue;

1332             }

1333             OS << "      if (";

1334             for (RecIter RI = TI->PredTerm.begin(), RE = TI->PredTerm.end();

1335                  RI != RE; ++RI) {

1336               if (RI != TI->PredTerm.begin())

1337                 OS << "\n          && ";

1338               OS << "(" << (*RI)->getValueAsString("Predicate") << ")";

1339             }

1340             OS << ")\n"

1341                << "        return " << TI->ToClassIdx << "; // "

1342                << SchedModels.getSchedClass(TI->ToClassIdx).Name << '\n';

1343           }

1344          OS << "    }\n";

1345           if (*PI == 0)

1346             break;

1347         }

1348         if (SC.isInferred())

1349           OS << "    return " << SC.Index << ";\n";

1350         OS << "    break;\n";

1351       }

1352       OS << "  };\n";

1353     }

1354     OS << "  report_fatal_error(\"Expected a variant SchedClass\");\n"

1355        << "} // " << ClassName << "::resolveSchedClass\n";

1356   }

首先,处理器的resolveSchedClass方法最后参数的类型是const TargetSchedModel*,但是判定替换能否发生的谓词代码(来自SchedVar的Predicate,而SchedVar嵌入在SchedVariant中)却是来自XXXInstrInfo类的方法。为此,TableGen提供了一个PredicateProlog定义,允许目标机器提供从const TargetSchedModel*获取XXXInstrInfo实例的代码片段。

对于X86目标机器,resolveSchedClass是一个不应该调用的方法,因为X86不使用SchedVar,SchedWriteVariant以及SchedReadVariant定义

unsigned X86GenSubtargetInfo

::resolveSchedClass(unsigned SchedClass, const MachineInstr *MI, const TargetSchedModel *SchedModel) const {

  report_fatal_error("Expected a variant SchedClass");

} // X86GenSubtargetInfo::resolveSchedClass

下面是ARM这个方法的一个片段(ARM使用了大量的SchedVar,SchedWriteVariant以及SchedReadVariant定义,其resolveSchedClass超过1300行):

unsigned ARMGenSubtargetInfo

::resolveSchedClass(unsigned SchedClass, const MachineInstr *MI, const TargetSchedModel *SchedModel) const {

 

  const ARMBaseInstrInfo *TII =

    static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());

  (void)TII;

 

  switch (SchedClass) {

  case 3: // IIC_iALUsr_WriteALUsi_ReadALU

    if (SchedModel->getProcessorID() == 4) { // SwiftModel

      if ((TII->isSwiftFastImmShift(MI)))

        return 591; // SwiftWriteP01TwoCycle_ReadALU

      if ((true))

        return 592; // WriteALU_ReadALU

    }

    break;

  case 4: // IIC_iALUsr_WriteALUsr_ReadALUsr

    if (SchedModel->getProcessorID() == 4) { // SwiftModel

      if ((TII->isPredicated(MI)))

        return 593; // SwiftWriteP01ThreeCycleTwoUops_anonymous_3760

      if ((true))

        return 594; // SwiftWriteP01TwoCycle_NoReadAdvance

    }

    break;

    …

  };

  report_fatal_error("Expected a variant SchedClass");

} // ARMGenSubtargetInfo::resolveSchedClass

在case语句中包含在“if (SchedModel->getProcessorID() == NUM”块里的if语句是对应SchedVar的中选谓词(即Predicate成员)。从上面输出的代码结构,我们就能明白SchedVariant一定需要是这个格式,最后一项的谓词一定是NoSchedPred:

def A57WriteISReg : SchedWriteVariant<[

       SchedVar<RegShiftedPred, [WriteISReg]>,

       SchedVar<NoSchedPred, [WriteI]>]>;

从方法EmitSchedModelHelpers返回,SubtargetEmitter::run给X86GenSubTargetInfo.inc输出最后两行代码也就结束了。

猜你喜欢

转载自blog.csdn.net/wuhui_gdnt/article/details/83652842