LLVM学习笔记(43)

3.6.2.3. 输出代码与数据结构

3.6.2.3.1. 资源使用与时延

SchedTables保存在WriteProcResources,WriteLatencies,ReadAdvanceEntries以及WriterNames容器里的数据是所有处理器公用的,因此下面的方法首先输出包含这些公用数据的数组。

1070   void SubtargetEmitter::EmitSchedClassTables(SchedClassTables &SchedTables,

1071                                               raw_ostream &OS) {

1072     // Emit global WriteProcResTable.

1073     OS << "\n// {ProcResourceIdx, Cycles}\n"

1074        << "extern const llvm::MCWriteProcResEntry "

1075        << Target << "WriteProcResTable[] = {\n"

1076        << "  { 0,  0}, // Invalid\n";

1077     for (unsigned WPRIdx = 1, WPREnd = SchedTables.WriteProcResources.size();

1078          WPRIdx != WPREnd; ++WPRIdx) {

1079       MCWriteProcResEntry &WPREntry = SchedTables.WriteProcResources[WPRIdx];

1080       OS << "  {" << format("%2d", WPREntry.ProcResourceIdx) << ", "

1081          << format("%2d", WPREntry.Cycles) << "}";

1082       if (WPRIdx + 1 < WPREnd)

1083         OS << ',';

1084       OS << " // #" << WPRIdx << '\n';

1085     }

1086     OS << "}; // " << Target << "WriteProcResTable\n";

1087  

1088     // Emit global WriteLatencyTable.

1089     OS << "\n// {Cycles, WriteResourceID}\n"

1090        << "extern const llvm::MCWriteLatencyEntry "

1091        << Target << "WriteLatencyTable[] = {\n"

1092        << "  { 0,  0}, // Invalid\n";

1093     for (unsigned WLIdx = 1, WLEnd = SchedTables.WriteLatencies.size();

1094          WLIdx != WLEnd; ++WLIdx) {

1095       MCWriteLatencyEntry &WLEntry = SchedTables.WriteLatencies[WLIdx];

1096       OS << "  {" << format("%2d", WLEntry.Cycles) << ", "

1097          << format("%2d", WLEntry.WriteResourceID) << "}";

1098       if (WLIdx + 1 < WLEnd)

1099         OS << ',';

1100       OS << " // #" << WLIdx << " " << SchedTables.WriterNames[WLIdx] << '\n';

1101     }

1102     OS << "}; // " << Target << "WriteLatencyTable\n";

1103  

1104     // Emit global ReadAdvanceTable.

1105     OS << "\n// {UseIdx, WriteResourceID, Cycles}\n"

1106        << "extern const llvm::MCReadAdvanceEntry "

1107        << Target << "ReadAdvanceTable[] = {\n"

1108        << "  {0,  0,  0}, // Invalid\n";

1109     for (unsigned RAIdx = 1, RAEnd = SchedTables.ReadAdvanceEntries.size();

1110          RAIdx != RAEnd; ++RAIdx) {

1111       MCReadAdvanceEntry &RAEntry = SchedTables.ReadAdvanceEntries[RAIdx];

1112       OS << "  {" << RAEntry.UseIdx << ", "

1113          << format("%2d", RAEntry.WriteResourceID) << ", "

1114          << format("%2d", RAEntry.Cycles) << "}";

1115       if (RAIdx + 1 < RAEnd)

1116         OS << ',';

1117       OS << " // #" << RAIdx << '\n';

1118     }

1119     OS << "}; // " << Target << "ReadAdvanceTable\n";

1120  

1121     // Emit a SchedClass table for each processor.

1122     for (CodeGenSchedModels::ProcIter PI = SchedModels.procModelBegin(),

1123            PE = SchedModels.procModelEnd(); PI != PE; ++PI) {

1124       if (!PI->hasInstrSchedModel())

1125         continue;

1126  

1127       std::vector<MCSchedClassDesc> &SCTab =

1128         SchedTables.ProcSchedClasses[1 + (PI - SchedModels.procModelBegin())];

1129  

1130       OS << "\n// {Name, NumMicroOps, BeginGroup, EndGroup,"

1131          << " WriteProcResIdx,#, WriteLatencyIdx,#, ReadAdvanceIdx,#}\n";

1132       OS << "static const llvm::MCSchedClassDesc "

1133          << PI->ModelName << "SchedClasses[] = {\n";

1134  

1135       // The first class is always invalid. We no way to distinguish it except by

1136       // name and position.

1137       assert(SchedModels.getSchedClass(0).Name == "NoInstrModel"

1138              && "invalid class not first");

1139       OS << "  {DBGFIELD(\"InvalidSchedClass\")  "

1140          << MCSchedClassDesc::InvalidNumMicroOps

1141          << ", 0, 0,  0, 0,  0, 0,  0, 0},\n";

1142  

1143       for (unsigned SCIdx = 1, SCEnd = SCTab.size(); SCIdx != SCEnd; ++SCIdx) {

1144         MCSchedClassDesc &MCDesc = SCTab[SCIdx];

1145         const CodeGenSchedClass &SchedClass = SchedModels.getSchedClass(SCIdx);

1146         OS << "  {DBGFIELD(\"" << SchedClass.Name << "\") ";

1147         if (SchedClass.Name.size() < 18)

1148           OS.indent(18 - SchedClass.Name.size());

1149         OS << MCDesc.NumMicroOps

1150            << ", " << MCDesc.BeginGroup << ", " << MCDesc.EndGroup

1151            << ", " << format("%2d", MCDesc.WriteProcResIdx)

1152            << ", " << MCDesc.NumWriteProcResEntries

1153            << ", " << format("%2d", MCDesc.WriteLatencyIdx)

1154            << ", " << MCDesc.NumWriteLatencyEntries

1155            << ", " << format("%2d", MCDesc.ReadAdvanceIdx)

1156            << ", " << MCDesc.NumReadAdvanceEntries << "}";

1157         if (SCIdx + 1 < SCEnd)

1158           OS << ',';

1159         OS << " // #" << SCIdx << '\n';

1160       }

1161       OS << "}; // " << PI->ModelName << "SchedClasses\n";

1162     }

1163   }

首先在1073~1086行输出X86家族处理器公用的MCWriteProcResEntry数组:

// {ProcResourceIdx, Cycles}

extern const llvm::MCWriteProcResEntry X86WriteProcResTable[] = {

  { 0,  0}, // Invalid

  {18,  1}, // #1

  {19,  1}, // #2

  { 4,  1}, // #3

  { 8,  1}, // #4

  …

  { 4,  1}, // #1217

  { 6,  1}, // #1218

  { 7,  2} // #1219

}; // X86WriteProcResTable

接着1088~1102行输出X86家族目标机器公用的MCWriteLatencyEntry数组:

// {Cycles, WriteResourceID}

extern const llvm::MCWriteLatencyEntry X86WriteLatencyTable[] = {

  { 0,  0}, // Invalid

  {100,  0}, // #1 WriteMicrocoded_WriteSystem

  { 1,  0}, // #2 WriteALU_WriteVecLogic_WriteZero_WriteFBlend_WriteShift_WriteJump_WriteFShuffle_WriteStore_WriteMove_WriteLEA_WriteFence_WriteShuffle_WriteVecALU_WriteVecShift_WriteNop_WriteBlend_Write2P237_P4_WritePushF_WritePushA_WritePopF_WritePopA_WriteP06_WriteBSwap32_WriteBSwap64_WriteMoveBE32rm_WriteMoveBE16mr_WriteMoveBE32mr_WriteMoveBE64mr_Write2P0156_2P237_P4_Write3P0156_2P237_P4_WriteP0156_2P237_P4_WriteShiftRMW_WriteShiftClLdRMW_Write2P06_WriteRotateRMW_WriteRotateRMWCL_WriteRCm1_WriteRCmi_WriteShDmr_WriteShDmrCL_WriteBTmr_WriteBTRSCmr_WriteSetCCm_WriteCldStd_WriteP15_WriteJCXZ_WriteLOOP_WriteCALLr_WriteCALLm_WriteRET_WriteRETI_WriteBOUND_WriteINTO_Write2P0156_P23_WriteP0156_P23_WriteSTOS_WriteXADD_WriteCMPXCHG_WriteCMPXCHG8B_WriteCMPXCHG16B_WritePAUSE_WriteXGETBV_WriteRDTSC_WriteRDPMC_WriteRDRAND_WriteST_FP80m_WriteFBSTP_WriteFNSTSW_WriteFNSTCW_WriteFNSAVE_WriteFRSTOR_WriteP1_P23_Write2P1_P23_Write5P0156_WriteFNINIT_WriteP5_WriteP01_P5_WritePBLENDWr_WriteVPBLENDDr_WritePEXTRm_WriteVPGATHERDD128_WriteVPGATHERDD256_WriteVPGATHERQD128_WriteVPGATHERQD256_WriteVPGATHERDQ128_WriteVPGATHERDQ256_WriteVPGATHERQQ128_WriteVPGATHERQQ256_WriteEXTRACTPSr_WriteVGATHERDPS128_WriteVGATHERDPS256_WriteVGATHERQPS128_WriteVGATHERQPS256_WriteVGATHERDPD128_WriteVGATHERDPD256_WriteVGATHERQPD128_WriteVGATHERQPD256_WriteVZEROUPPER_WriteVZEROALL_WriteFShuffle256_WriteShuffle256_WriteVarVecShift_WriteVarBlend_WriteFVarBlend

  { 5,  0}, // #3 WriteALULd_WriteVecLogicLd_WriteFBlendLd_WriteShiftLd_WriteJumpLd_WriteFShuffleLd_WriteVecALULd_WriteShuffleLd_WriteVecIMul_WriteVecShiftLd_WriteFMul_WriteBlendLd_WriteFRcp_WriteFRsqrt_WriteVPBROADCAST128Ld_WritePCMPGTQr_WritePCMPGTQm_WriteCVTPD2PSYrr_WriteCVTPS2PDrm_WriteVCVTPS2PDYrr_WriteCVTSS2SDrm_WriteHADDSUBPr_WriteMULr_WriteFMADDr_WriteRSQRTr_WriteP5Ld_WriteLoad_WriteShuffle256Ld_WriteFShuffle256Ld_WriteVarVecShiftLd

  { 1,  0}, // #4 WriteRMW

  …

  { 3,  0}, // #54 WriteIMul

  { 1,  0}, // #55 WriteIMulH

  {17,  0} // #56 WritePCmpEStrMLd_WritePCmpIStrILd

}; // X86WriteLatencyTable

旁边输出的注释是具有相同时延数据的SchedWrite定义(具体资源的使用则记录在调度类的MCSchedClassDesc对象里,GenSchedClassTables的938~940行)。另外,WriteResourceID域都是0,表明X86目标机器使用了适用于所有SchedWrite定义的ReadAdvance或SchedReadAdvance定义,或者没有使用ReadAdvance及SchedReadAdvance定义(因为随后输出了X86ReadAdvanceTable,因此显然是前者)。

1104~1119行输出X86家族目标机器公用的MCReadAdvanceEntry数组:

// {UseIdx, WriteResourceID, Cycles}

extern const llvm::MCReadAdvanceEntry X86ReadAdvanceTable[] = {

  {0,  0,  0}, // Invalid

  {0,  0,  4}, // #1

  {5,  0,  4}, // #2

  {6,  0,  4}, // #3

  {0,  0,  3}, // #4

  {5,  0,  3}, // #5

  {6,  0,  3} // #6

}; // X86ReadAdvanceTable

因为WriteResourceID域都是0,因此X86目标机器使用了适用于所有SchedWrite定义的ReadAdvance或SchedReadAdvance定义。另外,UseIdx是读操作数的索引,实际上是X86目标机器只定义了ReadAdvance<ReadAfterLd, 4>与ReadAdvance<ReadAfterLd, 3>,但由于ReadAfterLd在指令定义中可作为第0、5及6个读操作数出现,所以构建出上面的数组。

接下来的代码开始输出描述处理器调度类型的数组。

对X86目标机器,这些数组有:HaswellModelSchedClasses,BtVer2ModelSchedClasses,SandyBridgeModelSchedClasses,及SLMModelSchedClasses。其中SandyBridgeModelSchedClasses相关的数组是这样的:

// {Name, NumMicroOps, BeginGroup, EndGroup, WriteProcResIdx,#, WriteLatencyIdx,#, ReadAdvanceIdx,#}

static const llvm::MCSchedClassDesc SandyBridgeModelSchedClasses[] = {

  {DBGFIELD("InvalidSchedClass")  65535, 0, 0,  0, 0,  0, 0,  0, 0},

  {DBGFIELD("IIC_AAA_WriteMicrocoded") 1, 0, 0, 108, 2,  1, 1,  0, 0}, // #1

  {DBGFIELD("IIC_AAD_WriteMicrocoded") 1, 0, 0, 108, 2,  1, 1,  0, 0}, // #2

  {DBGFIELD("IIC_AAM_WriteMicrocoded") 1, 0, 0, 108, 2,  1, 1,  0, 0}, // #3

  {DBGFIELD("IIC_AAS_WriteMicrocoded") 1, 0, 0, 108, 2,  1, 1,  0, 0}, // #4

  …

  {DBGFIELD("VZEROALL")           0, 0, 0,  0, 0,  0, 0,  0, 0}, // #947

  {DBGFIELD("LDMXCSR_VLDMXCSR")   1, 0, 0, 279, 2,  9, 1,  0, 0}, // #948

  {DBGFIELD("STMXCSR_VSTMXCSR")   1, 0, 0, 439, 3,  2, 1,  0, 0} // #949

}; // SandyBridgeModelSchedClasses

DBGFIELD声明的域用于调试目的。所以的BeginGroup与EndGroup域都是0(false),表示这些调度类没有组成调度组(LLVM目前没有任何调度组)。剩下的(Idx,number)组给出了对前面生成数组的引用情况。以LDMXCSR_VLDMXCSR为例,它援引X86WriteLatencyTable的第10项:{4,  0}——这个SchedWrite有4周期时延,以及X86WriteProcResTable的第280、281项:{8,  1}, {10,  1}——占用编号为8及10的资源1周期。

3.6.2.3.2. 处理器资源模型

有了所有处理器调度类型的数组后,就该轮到输出描述处理器的数据结构了。同样,类似Atom的处理器与类似SandyBridge的处理器的处理方法是不一样的,因为在.td文件的处理器描述上,它们就有很大的区别。

1165   void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {

1166     // For each processor model.

1167     for (CodeGenSchedModels::ProcIter PI = SchedModels.procModelBegin(),

1168            PE = SchedModels.procModelEnd(); PI != PE; ++PI) {

1169       // Emit processor resource table.

1170       if (PI->hasInstrSchedModel())

1171         EmitProcessorResources(*PI, OS);

1172       else if(!PI->ProcResourceDefs.empty())

1173         PrintFatalError(PI->ModelDef->getLoc(), "SchedMachineModel defines "

1174                       "ProcResources without defining WriteRes SchedWriteRes");

1175  

1176       // Begin processor itinerary properties

1177       OS << "\n";

1178       OS << "static const llvm::MCSchedModel " << PI->ModelName << " = {\n";

1179       EmitProcessorProp(OS, PI->ModelDef, "IssueWidth", ',');

1180       EmitProcessorProp(OS, PI->ModelDef, "MicroOpBufferSize", ',');

1181       EmitProcessorProp(OS, PI->ModelDef, "LoopMicroOpBufferSize", ',');

1182       EmitProcessorProp(OS, PI->ModelDef, "LoadLatency", ',');

1183       EmitProcessorProp(OS, PI->ModelDef, "HighLatency", ',');

1184       EmitProcessorProp(OS, PI->ModelDef, "MispredictPenalty", ',');

1185  

1186       OS << "  " << (bool)(PI->ModelDef ?

1187                            PI->ModelDef->getValueAsBit("PostRAScheduler") : 0)

1188          << ", // " << "PostRAScheduler\n";

1189  

1190       OS << "  " << (bool)(PI->ModelDef ?

1191                            PI->ModelDef->getValueAsBit("CompleteModel") : 0)

1192          << ", // " << "CompleteModel\n";

1193  

1194       OS << "  " << PI->Index << ", // Processor ID\n";

1195       if (PI->hasInstrSchedModel())

1196         OS << "  " << PI->ModelName << "ProcResources" << ",\n"

1197            << "  " << PI->ModelName << "SchedClasses" << ",\n"

1198            << "  " << PI->ProcResourceDefs.size()+1 << ",\n"

1199            << "  " << (SchedModels.schedClassEnd()

1200                        - SchedModels.schedClassBegin()) << ",\n";

1201       else

1202         OS << "  0, 0, 0, 0, // No instruction-level machine model.\n";

1203       if (PI->hasItineraries())

1204         OS << "  " << PI->ItinsDef->getName() << "};\n";

1205       else

1206         OS << "  nullptr}; // No Itinerary\n";

1207     }

1208   }

对类似SandyBridge的处理器,首先调用下面的EmitProcessorResources方法输出描述资源的数据结构。因为这样的处理器需要另外描述资源,而类似Atom的处理器则是在ProcessorItineraries派生定义里给出资源描述。

605      void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel,

606                                                    raw_ostream &OS) {

607        char Sep = ProcModel.ProcResourceDefs.empty() ? ' ' : ',';

608     

609        OS << "\n// {Name, NumUnits, SuperIdx, IsBuffered}\n";

610        OS << "static const llvm::MCProcResourceDesc "

611           << ProcModel.ModelName << "ProcResources" << "[] = {\n"

612           << "  {DBGFIELD(\"InvalidUnit\")     0, 0, 0}" << Sep << "\n";

613     

614        for (unsigned i = 0, e = ProcModel.ProcResourceDefs.size(); i < e; ++i) {

615          Record *PRDef = ProcModel.ProcResourceDefs[i];

616     

617          Record *SuperDef = nullptr;

618          unsigned SuperIdx = 0;

619          unsigned NumUnits = 0;

620          int BufferSize = PRDef->getValueAsInt("BufferSize");

621          if (PRDef->isSubClassOf("ProcResGroup")) {

622            RecVec ResUnits = PRDef->getValueAsListOfDefs("Resources");

623            for (RecIter RUI = ResUnits.begin(), RUE = ResUnits.end();

624                 RUI != RUE; ++RUI) {

625              NumUnits += (*RUI)->getValueAsInt("NumUnits");

626            }

627          }

628          else {

629            // Find the SuperIdx

630            if (PRDef->getValueInit("Super")->isComplete()) {

631              SuperDef = SchedModels.findProcResUnits(

632                PRDef->getValueAsDef("Super"), ProcModel);

633              SuperIdx = ProcModel.getProcResourceIdx(SuperDef);

634            }

635            NumUnits = PRDef->getValueAsInt("NumUnits");

636          }

637          // Emit the ProcResourceDesc

638          if (i+1 == e)

639            Sep = ' ';

640          OS << "  {DBGFIELD(\"" << PRDef->getName() << "\") ";

641          if (PRDef->getName().size() < 15)

642            OS.indent(15 - PRDef->getName().size());

643          OS << NumUnits << ", " << SuperIdx << ", "

644             << BufferSize << "}" << Sep << " // #" << i+1;

645          if (SuperDef)

646            OS << ", Super=" << SuperDef->getName();

647          OS << "\n";

648        }

649        OS << "};\n";

650      }

每个类似SandyBridge的处理器都要输出一个资源数组,描述SandyBridge处理器资源的数组是:

// {Name, NumUnits, SuperIdx, IsBuffered}

static const llvm::MCProcResourceDesc SandyBridgeModelProcResources[] = {

  {DBGFIELD("InvalidUnit")     0, 0, 0},

  {DBGFIELD("SBDivider")       1, 0, -1}, // #1

  {DBGFIELD("SBPort0")         1, 0, -1}, // #2

  {DBGFIELD("SBPort1")         1, 0, -1}, // #3

  {DBGFIELD("SBPort4")         1, 0, -1}, // #4

  {DBGFIELD("SBPort5")         1, 0, -1}, // #5

  {DBGFIELD("SBPort05")        2, 0, -1}, // #6

  {DBGFIELD("SBPort15")        2, 0, -1}, // #7

  {DBGFIELD("SBPort23")        2, 0, -1}, // #8

  {DBGFIELD("SBPort015")       3, 0, -1}, // #9

  {DBGFIELD("SBPortAny")       6, 0, 54}  // #10

};

MCProcResourceDesc的定义与ProcResourceUnits类似。其中SuperIdx为0,表示不存在上级资源。注释里称为IsBuffered的域,实际上是BufferSize,-1表示发布口由统一的保留站(即SandyBridge处理器的硬件调度器)来供给。最后一行的54,表示SandyBridge处理器的硬件调度器一个周期能发布54条微操作。这是Sandy Bridge保留站的深度(reservation station,保留站的作用是排队微操作,直到所有的源操作数就绪,将就绪的微操作调度并分发到可用的执行单元)。

前面X86WriteProcResTable数组元素ProcResourceIdx成员的内容就是这个数组的索引。

方法EmitProcessorProp则对所有的处理器都适用。它辅助生成处理器的MCSchedModel数组。注意,对所有的目标机器家族,描述的第一个处理器总是NoSchedModel。

594      void SubtargetEmitter::EmitProcessorProp(raw_ostream &OS, const Record *R,

595                                               const char *Name, char Separator) {

596        OS << "  ";

597        int V = R ? R->getValueAsInt(Name) : -1;

598        if (V >= 0)

599          OS << V << Separator << " // " << Name;

600        else

601          OS << "MCSchedModel::Default" << Name << Separator;

602        OS << '\n';

603      }

参考在TargetSchedule.td中定义的SchedMachineModel,缺省的LoadLatency,MicroOpBufferSize,MinLatency,LoopMicroOpBufferSize,IssueWidth,HighLatency,MispredictPenalty都是-1,在输出数组时,这些缺省值被输出为MCSchedModel::DefaultXXX。因此,对X86目标机器,NoSchedModel的输出是这样的:

static const llvm::MCSchedModel NoSchedModel = {

  MCSchedModel::DefaultIssueWidth,

  MCSchedModel::DefaultMicroOpBufferSize,

  MCSchedModel::DefaultLoopMicroOpBufferSize,

  MCSchedModel::DefaultLoadLatency,

  MCSchedModel::DefaultHighLatency,

  MCSchedModel::DefaultMispredictPenalty,

  0, // PostRAScheduler

  1, // CompleteModel

  0, // Processor ID

  0, 0, 0, 0, // No instruction-level machine model.

  nullptr}; // No Itinerary

在X86家族里,指令调度得到良好描述的只有基于Atom,BtVer2,SLM,Haswell,SandyBridge架构的处理器,其他像i386,i686,pentium4m,k6,athlon系列,opteron系列等处理器LLVM并没有给出有关指令调度的细节(估计是找不到相关的文档)。对这些处理器,LLVM给出了一个通用的描述(当然也没有什么战力J)——GenericModel,它的输出是这样的:

static const llvm::MCSchedModel GenericModel = {

  4, // IssueWidth

  32, // MicroOpBufferSize

  MCSchedModel::DefaultLoopMicroOpBufferSize,

  4, // LoadLatency

  10, // HighLatency

  MCSchedModel::DefaultMispredictPenalty,

  0, // PostRAScheduler

  1, // CompleteModel

  1, // Processor ID

  0, 0, 0, 0, // No instruction-level machine model.

  nullptr}; // No Itinerary

对于我们前面看过的Atom与SandyBridge处理器,它们的输出则是:

static const llvm::MCSchedModel AtomModel = {

  2, // IssueWidth

  0, // MicroOpBufferSize

  10, // LoopMicroOpBufferSize

  3, // LoadLatency

  30, // HighLatency

  MCSchedModel::DefaultMispredictPenalty,

  1, // PostRAScheduler

  1, // CompleteModel

  2, // Processor ID

  0, 0, 0, 0, // No instruction-level machine model.

  AtomItineraries};

 

static const llvm::MCSchedModel SandyBridgeModel = {

  4, // IssueWidth

  168, // MicroOpBufferSize

  28, // LoopMicroOpBufferSize

  4, // LoadLatency

  MCSchedModel::DefaultHighLatency,

  16, // MispredictPenalty

  0, // PostRAScheduler

  0, // CompleteModel

  5, // Processor ID

  SandyBridgeModelProcResources,

  SandyBridgeModelSchedClasses,

  11,

  950,

  nullptr}; // No Itinerary

对Atom处理器,MCSchedModel实例的ProcResourceTable(const MCProcResourceDesc*类型),SchedClassTable(const MCSchedClassDesc*类型),NumProcResourceKinds与NumSchedClasses都是0。而SandyBridge则指向前面生成的数组。

接下来,我们还要生成一个查找表,根据处理器的名字,给出对应的MCSchedModel实例。

1213   void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) {

1214     // Gather and sort processor information

1215     std::vector<Record*> ProcessorList =

1216                             Records.getAllDerivedDefinitions("Processor");

1217     std::sort(ProcessorList.begin(), ProcessorList.end(), LessRecordFieldName());

1218  

1219     // Begin processor table

1220     OS << "\n";

1221     OS << "// Sorted (by key) array of itineraries for CPU subtype.\n"

1222        << "extern const llvm::SubtargetInfoKV "

1223        << Target << "ProcSchedKV[] = {\n";

1224  

1225     // For each processor

1226     for (unsigned i = 0, N = ProcessorList.size(); i < N;) {

1227       // Next processor

1228       Record *Processor = ProcessorList[i];

1229  

1230       const std::string &Name = Processor->getValueAsString("Name");

1231       const std::string &ProcModelName =

1232         SchedModels.getModelForProc(Processor).ModelName;

1233  

1234       // Emit as { "cpu", procinit },

1235       OS << "  { \"" << Name << "\", (const void *)&" << ProcModelName << " }";

1236  

1237       // Depending on ''if more in the list'' emit comma

1238       if (++i < N) OS << ",";

1239  

1240       OS << "\n";

1241     }

1242  

1243     // End processor table

1244     OS << "};\n";

1245   }

这个查找表的类型是SubtargetInfoKV:

69        struct SubtargetInfoKV {

70          const char *Key;                      // K-V key string

71          const void *Value;                    // K-V pointer value

72       

73          // Compare routine for std::lower_bound

74          bool operator<(StringRef S) const {

75            return StringRef(Key) < S;

76          }

77        };

X86家族的这张表不算太大,下面给出它完整的定义:

// Sorted (by key) array of itineraries for CPU subtype.

extern const llvm::SubtargetInfoKV X86ProcSchedKV[] = {

  { "amdfam10", (const void *)&GenericModel },

  { "athlon", (const void *)&GenericModel },

  { "athlon-4", (const void *)&GenericModel },

  { "athlon-fx", (const void *)&GenericModel },

  { "athlon-mp", (const void *)&GenericModel },

  { "athlon-tbird", (const void *)&GenericModel },

  { "athlon-xp", (const void *)&GenericModel },

  { "athlon64", (const void *)&GenericModel },

  { "athlon64-sse3", (const void *)&GenericModel },

  { "atom", (const void *)&AtomModel },

  { "barcelona", (const void *)&GenericModel },

  { "bdver1", (const void *)&GenericModel },

  { "bdver2", (const void *)&GenericModel },

  { "bdver3", (const void *)&GenericModel },

  { "bdver4", (const void *)&GenericModel },

  { "bonnell", (const void *)&AtomModel },

  { "broadwell", (const void *)&HaswellModel },

  { "btver1", (const void *)&GenericModel },

  { "btver2", (const void *)&BtVer2Model },

  { "c3", (const void *)&GenericModel },

  { "c3-2", (const void *)&GenericModel },

  { "core-avx-i", (const void *)&SandyBridgeModel },

  { "core-avx2", (const void *)&HaswellModel },

  { "core2", (const void *)&SandyBridgeModel },

  { "corei7", (const void *)&SandyBridgeModel },

  { "corei7-avx", (const void *)&SandyBridgeModel },

  { "generic", (const void *)&GenericModel },

  { "geode", (const void *)&GenericModel },

  { "haswell", (const void *)&HaswellModel },

  { "i386", (const void *)&GenericModel },

  { "i486", (const void *)&GenericModel },

  { "i586", (const void *)&GenericModel },

  { "i686", (const void *)&GenericModel },

  { "ivybridge", (const void *)&SandyBridgeModel },

  { "k6", (const void *)&GenericModel },

  { "k6-2", (const void *)&GenericModel },

  { "k6-3", (const void *)&GenericModel },

  { "k8", (const void *)&GenericModel },

  { "k8-sse3", (const void *)&GenericModel },

  { "knl", (const void *)&HaswellModel },

  { "nehalem", (const void *)&SandyBridgeModel },

  { "nocona", (const void *)&GenericModel },

  { "opteron", (const void *)&GenericModel },

  { "opteron-sse3", (const void *)&GenericModel },

  { "penryn", (const void *)&SandyBridgeModel },

  { "pentium", (const void *)&GenericModel },

  { "pentium-m", (const void *)&GenericModel },

  { "pentium-mmx", (const void *)&GenericModel },

  { "pentium2", (const void *)&GenericModel },

  { "pentium3", (const void *)&GenericModel },

  { "pentium3m", (const void *)&GenericModel },

  { "pentium4", (const void *)&GenericModel },

  { "pentium4m", (const void *)&GenericModel },

  { "pentiumpro", (const void *)&GenericModel },

  { "prescott", (const void *)&GenericModel },

  { "sandybridge", (const void *)&SandyBridgeModel },

  { "silvermont", (const void *)&SLMModel },

  { "skx", (const void *)&HaswellModel },

  { "skylake", (const void *)&HaswellModel },

  { "slm", (const void *)&SLMModel },

  { "westmere", (const void *)&SandyBridgeModel },

  { "winchip-c6", (const void *)&GenericModel },

  { "winchip2", (const void *)&GenericModel },

  { "x86-64", (const void *)&SandyBridgeModel },

  { "yonah", (const void *)&SandyBridgeModel }

};

因此,为了得到尽可能好的性能,应该通过命令行选项告诉LLVM目标机器是什么处理器。

回到SubtargetEmitter::run,下面的代码输出一个重要的方法:InitX86MCSubtargetInfo。

SubtargetEmitter::run(续)

1437     // MCInstrInfo initialization routine.

1438     OS << "static inline void Init" << Target

1439        << "MCSubtargetInfo(MCSubtargetInfo *II, "

1440        << "const Triple &TT, StringRef CPU, StringRef FS) {\n";

1441     OS << "  II->InitMCSubtargetInfo(TT, CPU, FS, ";

1442     if (NumFeatures)

1443       OS << Target << "FeatureKV, ";

1444     else

1445       OS << "None, ";

1446     if (NumProcs)

1447       OS << Target << "SubTypeKV, ";

1448     else

1449       OS << "None, ";

1450     OS << '\n'; OS.indent(22);

1451     OS << Target << "ProcSchedKV, "

1452        << Target << "WriteProcResTable, "

1453        << Target << "WriteLatencyTable, "

1454        << Target << "ReadAdvanceTable, ";

1455     if (SchedModels.hasItineraries()) {

1456       OS << '\n'; OS.indent(22);

1457       OS << Target << "Stages, "

1458          << Target << "OperandCycles, "

1459          << Target << "ForwardingPaths";

1460     } else

1461       OS << "0, 0, 0";

1462     OS << ");\n}\n\n";

1463  

1464     OS << "} // End llvm namespace \n";

1465  

1466     OS << "#endif // GET_SUBTARGETINFO_MC_DESC\n\n";

生成的InitX86MCSubtargetInfo方法的定义如下(连带收尾代码):

#undef DBGFIELD

static inline void InitX86MCSubtargetInfo(MCSubtargetInfo *II, const Triple &TT, StringRef CPU, StringRef FS) {

  II->InitMCSubtargetInfo(TT, CPU, FS, X86FeatureKV, X86SubTypeKV,

                      X86ProcSchedKV, X86WriteProcResTable, X86WriteLatencyTable, X86ReadAdvanceTable,

                      X86Stages, X86OperandCycles, X86ForwardingPaths);

}

} // End llvm namespace

#endif // GET_SUBTARGETINFO_MC_DESC

通过这个方法,X86目标机器的数据就与机器无关的MC框架挂上钩。

猜你喜欢

转载自blog.csdn.net/wuhui_gdnt/article/details/83412400