Linux内核深度解析之中断、异常和系统调用——中断

中断

中断是外围设备通知处理器的一种机制。

1. 中断控制器

外围设备不是把中断请求直接发送给处理器,而是发给中断控制器,由中断控制器转发给处理器。

不同种类的中断控制器的访问方法存在差异,为了屏蔽差异,内核定义了中断控制器描述符irq_chip,每种中断控制器自定义各种操作函数。GIC v2控制器的描述符如下:

drivers/irqchip/irq-gic.c

tatic const struct irq_chip gic_chip = {
	.irq_mask		= gic_mask_irq,
	.irq_unmask		= gic_unmask_irq,
	.irq_eoi		= gic_eoi_irq,
	.irq_set_type		= gic_set_type,
	.irq_get_irqchip_state	= gic_irq_get_irqchip_state,
	.irq_set_irqchip_state	= gic_irq_set_irqchip_state,
	.flags			= IRQCHIP_SET_TYPE_MASKED |
				  IRQCHIP_SKIP_SET_WAKE |
				  IRQCHIP_MASK_ON_SUSPEND,
};

2. 中断域

一个大型系统可能有多个中断控制器,这些中断控制器可以级联,一个中断控制器作为中断源连接到另一个中断控制器,但只有一个中断控制器作为根控制器直接连接到处理器。为了把每个中断控制器本地的硬件中断映射到全局唯一的Linux中断号(也称为虚拟中断),内核定义了中断域irq_domain,每个中断控制器由自己的中断域。

2.1. 创建中断域

中断控制器的驱动程序使用分配函数irq_domain_add_*()创建和注册中断域。

2.2. 创建映射

创建中断域以后,需要向中断域添加硬件中断号到Linux中断号的映射,内核提供了函数irq_create_mapping:

unsigned int irq_create_mapping(struct irq_domain *host, irq_hw_number_t hwirq);

输入参数是中断域和硬件中断号,返回Linux中断号。

该函数首先分配Linux中断号,然后把硬件中断号到Linux中断号的映射添加到中断域。

2.3. 查找映射

中断处理程序需要根据硬件中断号查找Linux中断号,内核提供了函数irq_find_mapping:

unsigned int irq_find_mapping(struct irq_domain *host, irq_hw_number_t hwirq);

输入参数是中断域和硬件中断号,返回Linux中断号。

3. 中断控制器驱动初始化

3.1. 设备树源文件

ARM64架构使用扁平设备树(Flattened Device Tree,FDT)描述板卡的硬件信息,好处是可以把板卡的特定的代码从内核中删除,编译生成通用的板卡无关的内核。

设备树源文件是文本文件,扩展名是“.dts”,需要在设备树源文件中描述中断的相关信息:

(1)中断控制器的信息

(2)对于作为中断源的外围设备,需要描述设备连接到哪个中断控制器,使用哪个硬件中断号

参考arch/arm64/boot/dts/arm/foundation-v8.dtsi、arch/arm64/boot/dts/arm/foundation-v8-gicv2.dtsi。

3.2. 中断控制器匹配表

在GIC v2控制器的驱动程序中,定义了多个类型为of_device_id的静态变量,成员compatible是驱动程序支持的设备的名称,成员data是初始化函数,编译器把这些静态变量放在专用的节“__irqchip_of_table”里面。

我们把节“__irqchip_of_table”称为中断控制器匹配表,里面每个表项的格式是结构体of_device_id。

drivers/irqchip/irq-gic.c

IRQCHIP_DECLARE(gic_400, "arm,gic-400", gic_of_init);
...
IRQCHIP_DECLARE(cortex_a15_gic, "arm,cortex-a15-gic", gic_of_init);
IRQCHIP_DECLARE(cortex_a9_gic, "arm,cortex-a9-gic", gic_of_init);
...

把宏IRQCHIP_DECLARE展开以后是:

static const struct of_device_id __of_table_cortex_gic_400
	__section(__irqchip_of_table)
		= { .compatible = "arm,gic-g400",	
		    .data = gic_of_init  }
...
static const struct of_device_id __of_table_cortex_a15_gic
	__section(__irqchip_of_table)
		= { .compatible = "arm,cortex-a15-gic",	
		    .data = gic_of_init  }
static const struct of_device_id __of_table_cortex_a9_gic
	__section(__irqchip_of_table)
		= { .compatible = "arm,cortex-a9-gic",	
		    .data = gic_of_init  }
...

3.3. 初始化

在内核初始化的时候,匹配设备树文件中的中断控制器的属性“compatible”和内核的中断控制器匹配表,找到合适的中断控制器驱动程序,执行驱动程序的初始化函数。

start_kernel()  ->  init_IRQ()  ->  irqchip_init()

drivers/irqchip/irqchip.c
void __init irqchip_init(void)
{
	of_irq_init(__irqchip_of_table);    // 参数是中断控制器匹配表的起始地址__irqchip_of_table
	...
}

(1)函数of_irq_init

driver/of/irq.c
/**
 * of_irq_init - Scan and init matching interrupt controllers in DT
 * @matches: 0 terminated array of nodes to match and init function to call
 *
 * This function scans the device tree for matching interrupt controller nodes,
 * and calls their initialization functions in order with parents first.
 */
void __init of_irq_init(const struct of_device_id *matches)
{
	const struct of_device_id *match;
	struct device_node *np, *parent = NULL;
	struct of_intc_desc *desc, *temp_desc;
	struct list_head intc_desc_list, intc_parent_list;

	INIT_LIST_HEAD(&intc_desc_list);
	INIT_LIST_HEAD(&intc_parent_list);

	for_each_matching_node_and_match(np, matches, &match) {		/* 遍历设备树文件的设备节点。如果属性compatible和中断控制器匹配表中的任何一条表项的字段compatible匹配,处理如下 */
		if (!of_property_read_bool(np, "interrupt-controller") ||
				!of_device_is_available(np))		/* 如果没有节点属性interrupt-controller,说明设备不是中断控制器,忽略该设备 */
			continue;

		if (WARN(!match->data, "of_irq_init: no init function for %s\n",
			 match->compatible))
			continue;

		/*
		 * Here, we allocate and populate an of_intc_desc with the node
		 * pointer, interrupt-parent device_node etc.
		 */
		desc = kzalloc(sizeof(*desc), GFP_KERNEL);		/* 分配一个of_intc_desc实例 */
		if (WARN_ON(!desc)) {
			of_node_put(np);
			goto err;
		}

		desc->irq_init_cb = match->data;		/* 成员irq_init_cb保存初始化函数 */
		desc->dev = of_node_get(np);		/* 成员dev保存本设备的device_node */
		desc->interrupt_parent = of_irq_find_parent(np);		/* 成员interrupt保存父设备 */
		if (desc->interrupt_parent == np)
			desc->interrupt_parent = NULL;
		list_add_tail(&desc->list, &intc_desc_list);		/* 把of_intc_desc实例添加到链表intc_desc_list中 */
	}

	/*
	 * The root irq controller is the one without an interrupt-parent.
	 * That one goes first, followed by the controllers that reference it,
	 * followed by the ones that reference the 2nd level controllers, etc.
	 */
	while (!list_empty(&intc_desc_list)) {		/* 遍历链表intc_desc_list,从根设备开始,先执行父设备的初始化函数,然后执行子设备的初始化函数 */
		/*
		 * Process all controllers with the current 'parent'.
		 * First pass will be looking for NULL as the parent.
		 * The assumption is that NULL parent means a root controller.
		 */
		list_for_each_entry_safe(desc, temp_desc, &intc_desc_list, list) {
			int ret;

			if (desc->interrupt_parent != parent)
				continue;

			list_del(&desc->list);

			of_node_set_flag(desc->dev, OF_POPULATED);

			pr_debug("of_irq_init: init %pOF (%p), parent %p\n",
				 desc->dev,
				 desc->dev, desc->interrupt_parent);
			ret = desc->irq_init_cb(desc->dev,
						desc->interrupt_parent);
			if (ret) {
				of_node_clear_flag(desc->dev, OF_POPULATED);
				kfree(desc);
				continue;
			}

			/*
			 * This one is now set up; add it to the parent list so
			 * its children can get processed in a subsequent pass.
			 */
			list_add_tail(&desc->list, &intc_parent_list);
		}

		/* Get the next pending parent that might have children */
		desc = list_first_entry_or_null(&intc_parent_list,
						typeof(*desc), list);
		if (!desc) {
			pr_err("of_irq_init: children remain, but no parents\n");
			break;
		}
		list_del(&desc->list);
		parent = desc->dev;
		kfree(desc);
	}

	list_for_each_entry_safe(desc, temp_desc, &intc_parent_list, list) {
		list_del(&desc->list);
		kfree(desc);
	}
err:
	list_for_each_entry_safe(desc, temp_desc, &intc_desc_list, list) {
		list_del(&desc->list);
		of_node_put(desc->dev);
		kfree(desc);
	}
}

设备树文件“arch/arm64/boot/dts/arm/foundation-v8.dts”里面中断控制器的属性“compatible”是:

compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";

和中断控制器匹配表中的

{ .compatible = "arm,cortex-a15-gic", .data = gic_of_init  }
{ .compatible = "arm,cortex-a9-gic", .data = gic_of_init  }

匹配。

(2)gic_of_init

int __init
gic_of_init(struct device_node *node, struct device_node *parent)			/* 参数node是本中断控制器,参数parent是父设备 */
{
	struct gic_chip_data *gic;
	int irq, ret;

	if (WARN_ON(!node))
		return -ENODEV;

	if (WARN_ON(gic_cnt >= CONFIG_ARM_GIC_MAX_NR))
		return -EINVAL;

	gic = &gic_data[gic_cnt];		/* 从全局数组gic_data取一个空闲的元素来保存本中断控制器的信息 */

	ret = gic_of_setup(gic, node);		/* 调用函数gic_of_setup:从设备树文件读取中断控制器的属性reg,获取分发器和处理器接口的寄存器的物理地址范围,把物理地址映射到内核的虚拟地址空间 */
	if (ret)
		return ret;

	/*
	 * Disable split EOI/Deactivate if either HYP is not available
	 * or the CPU interface is too small.
	 */
	if (gic_cnt == 0 && !gic_check_eoimode(node, &gic->raw_cpu_base))
		static_branch_disable(&supports_deactivate_key);

	ret = __gic_init_bases(gic, -1, &node->fwnode);		/* 调用函数__gic_init_bases以初始化结构体gic_chip_data */
	if (ret) {
		gic_teardown(gic);
		return ret;
	}

	if (!gic_cnt) {
		gic_init_physaddr(node);
		gic_of_setup_kvm_info(node);
	}

	if (parent) {		/* 如果本中断控制器有父设备,即作为中断源连接到其他中断控制器,处理如下 */
		irq = irq_of_parse_and_map(node, 0);		/* 调用函数irq_of_parse_and_map:从设备树文件中本设备节点的属性interrupts获取硬件中断号,把硬件中断号映射到Linux中断号n */
		gic_cascade_irq(gic_cnt, irq);		/* 调用函数gic_cascade_irq:把Linux中断号n中断描述符的成员handle_irq()设置为函数gic_handle_cascade_irq() */
	}

	if (IS_ENABLED(CONFIG_ARM_GIC_V2M))
		gicv2m_init(&node->fwnode, gic_data[gic_cnt].domain);

	gic_cnt++;
	return 0;
}

(3)函数__gic_init_bases

static int __init __gic_init_bases(struct gic_chip_data *gic,
				   int irq_start,
				   struct fwnode_handle *handle)
{
	char *name;
	int i, ret;

	if (WARN_ON(!gic || gic->domain))
		return -EINVAL;

	if (gic == &gic_data[0]) {		/* 如果本中断控制器是根控制器,处理如下: */
		/*
		 * Initialize the CPU interface map to all CPUs.
		 * It will be refined as each CPU probes its ID.
		 * This is only necessary for the primary GIC.
		 */
		for (i = 0; i < NR_GIC_CPU_IF; i++)
			gic_cpu_map[i] = 0xff;
#ifdef CONFIG_SMP
		set_smp_cross_call(gic_raise_softirq);		/* 把全局函数指针__smp_cross_call设置为函数gic_raise_softirq */
#endif											/* 用来发送软件生成的中断,即一个处理器向其他处理器发送中断 */
		cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING,
					  "irqchip/arm/gic:starting",
					  gic_starting_cpu, NULL);
		set_handle_irq(gic_handle_irq);		/* 把全局函数指针handle_arch_irq设置为函数gic_handle_irq,该函数是中断处理程序C语言部分的入口 */
		if (static_branch_likely(&supports_deactivate_key))
			pr_info("GIC: Using split EOI/Deactivate mode\n");
	}
		/* 调用函数gic_init_chip以初始化中断控制器描述符irq_chip */
	if (static_branch_likely(&supports_deactivate_key) && gic == &gic_data[0]) {
		name = kasprintf(GFP_KERNEL, "GICv2");
		gic_init_chip(gic, NULL, name, true);
	} else {
		name = kasprintf(GFP_KERNEL, "GIC-%d", (int)(gic-&gic_data[0]));
		gic_init_chip(gic, NULL, name, false);
	}

	ret = gic_init_bases(gic, irq_start, handle);		/* 调用函数gic_init_bases进行初始化:为本中断控制器分配中断域,初始化中断控制器的分发器的各种寄存器,初始化中断控制器的处理器接口的各种寄存器 */
	if (ret)
		kfree(name);

	return ret;
}

4. Linux中断处理

对于中断控制器的每个中断源,向中断域添加硬件中断号到Linux中断号的映射时,内核分配一个Linux中断号和一个中断描述符irq_desc,中断描述符由两个层次的中断处理函数:

(1)第一层处理函数是中断描述符的成员handle_irq()

(2)第二层处理函数是设备驱动程序注册的处理函数。中断描述符由一个中断处理链表(irq_desc.action),每个中断处理描述符(irq_action)保存设备驱动程序注册的处理函数。因为多个设备可以共享同一个硬件中断号,所以中断处理链表可能挂载多个中断处理描述符。

怎么存储Linux中断号到中断描述符的映射关系?

有两种实现方式:

(1)如果中断编号是稀疏的(即不连续),那么使用基数树(radix tree)存储。需要开启配置宏CONFIG_SPARSE_IRQ。

(2)如果中断编号是连续的,那么使用数组存储。

#ifdef CONFIG_SPARSE_IRQ

...
static RADIX_TREE(irq_desc_tree, GFP_KERNEL);
...

#else /* !CONFIG_SPARSE_IRQ */

struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
	[0 ... NR_IRQS-1] = {
		.handle_irq	= handle_bad_irq,
		.depth		= 1,
		.lock		= __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock),
	}
};
...

#endif /* !CONFIG_SPARSE_IRQ */

把硬件中断号映射到Linux中断号的时候,根据硬件中断的类型设置中断描述符的成员handle_irq(),以GIC v2控制器为例,函数gic_irq_domain_map所做的处理如下:

irq_create_mapping  ->  irq_domain_associate()  ->  domain->ops->map()  ->  gic_irq_domain_map()

drivers/irqchip/irq-gic.c
tatic int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
				irq_hw_number_t hw)
{
	struct gic_chip_data *gic = d->host_data;

	if (hw < 32) {		/* 如果硬件中断号小于32,说明是软件生成的中断或私有外设中断,那么把终端描述符的成员handle_irq()设置为函数handle_percpu_devid_irq */
		irq_set_percpu_devid(irq);
		irq_domain_set_info(d, irq, hw, &gic->chip, d->host_data,
				    handle_percpu_devid_irq, NULL, NULL);
		irq_set_status_flags(irq, IRQ_NOAUTOEN);
	} else {		/* 如果硬件中断号大于或等于32,说明共享外设中断,那么把中断描述符的成员handle_irq()设置为函数handle_fasteoi_irq */
		irq_domain_set_info(d, irq, hw, &gic->chip, d->host_data,
				    handle_fasteoi_irq, NULL, NULL);
		irq_set_probe(irq);
		irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(irq)));
	}
	return 0;
}

在ARM64架构下,在异常级别1的异常向量表中,中断的入口有3个:

(1)如果处理器处在内核模式(异常级别1),中断的入口是el1_irq;

(2)如果处理器正在用户模式(异常级别0)下执行64位应用程序,中断的入口是el0_irq;

(3)如果处理器正在用户模式(异常级别0)下执行32位应用程序,中断的入口是el0_irq_compat。

假设处理器正在用户模式(异常级别0)下执行64位应用程序,中断控制器是GIC v2控制器,Linux中断处理流程如下:

函数el0_irq的代码如下:

arch/arm64/kernel/irq.c
/* 每个处理器有一个专用的中断栈 */
DEFINE_PER_CPU(unsigned long *, irq_stack_ptr);

arch/arm64/kernel/entry.S
/*
 * Interrupt handling.
 */
	.macro	irq_handler
	ldr_l	x1, handle_arch_irq
	mov	x0, sp
	irq_stack_entry		// 从进程的内核栈切换到中断栈
	blr	x1		// 调用函数指针handle_arch_irq指向的函数
	irq_stack_exit		// 从中断栈切换到进程的内核栈
	.endm

	.align	6
el1_irq:
	kernel_entry 1
	enable_da_f
#ifdef CONFIG_TRACE_IRQFLAGS
	bl	trace_hardirqs_off
#endif

	irq_handler            // irq_handler是一个宏

#ifdef CONFIG_PREEMPT
	ldr	x24, [tsk, #TSK_TI_PREEMPT]	// get preempt count	读取抢占计数
	cbnz	x24, 1f				// preempt count != 0		抢占计数不等于0
	bl	el1_preempt
1:
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
	bl	trace_hardirqs_on
#endif
	kernel_exit 1
ENDPROC(el1_irq)

在gic_of_init()  ->  __gic_init_bases中初始化了函数handle_arch_irq,GIC v2控制器把该函数指针设置为函数gic_handle_irq。

drivers/irqchip/irq-gic.c
static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
{
	u32 irqstat, irqnr;
	struct gic_chip_data *gic = &gic_data[0];
	void __iomem *cpu_base = gic_data_cpu_base(gic);

	do {
		irqstat = readl_relaxed(cpu_base + GIC_CPU_INTACK);		/* 读取处理器接口的中断确认寄存器得到中断号 */
		irqnr = irqstat & GICC_IAR_INT_ID_MASK;

		if (likely(irqnr > 15 && irqnr < 1020)) {		/* 如果硬件中断号大于15且小于1020,即中断是由外围设备发送的 */
			if (static_branch_likely(&supports_deactivate_key))		/* 把中断号写到处理器接口的中断结束寄存器中,指示中断处理完成 */
				writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);		
			isb();
			handle_domain_irq(gic->domain, irqnr, regs);		/* 如果是私有外设中断,那么中断描述符的成员handle_irq()是函数handle_percpu_devid_irq; */
			continue;					        /* 如果是共享外设中断,那么中断描述符的成员handle_irq()是函数handle_fasteoi_irq */
		}
		if (irqnr < 16) {		/* 如果硬件中断号小于16,即软件生成的中断 */
			writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
			if (static_branch_likely(&supports_deactivate_key))		/* 把中断号写到处理器接口的中断结束寄存器中,指示中断处理完成 */
				writel_relaxed(irqstat, cpu_base + GIC_CPU_DEACTIVATE);
#ifdef CONFIG_SMP
			/*
			 * Ensure any shared data written by the CPU sending
			 * the IPI is read after we've read the ACK register
			 * on the GIC.
			 *
			 * Pairs with the write barrier in gic_raise_softirq
			 */
			smp_rmb();
			handle_IPI(irqnr, regs);
#endif
			continue;
		}
		break;
	} while (1);
}

函数handle_domain_irq():

include/linux/irqdesc.h
static inline void generic_handle_irq_desc(struct irq_desc *desc)
{
	desc->handle_irq(desc);
}

kernel/irq/irqdesc.c
int generic_handle_irq(unsigned int irq)
{
	struct irq_desc *desc = irq_to_desc(irq);

	if (!desc)
		return -EINVAL;
	generic_handle_irq_desc(desc);
	return 0;
}

kernel/irq/irqdesc.c
#ifdef CONFIG_HANDLE_DOMAIN_IRQ
int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
			bool lookup, struct pt_regs *regs)
{
	struct pt_regs *old_regs = set_irq_regs(regs);
	unsigned int irq = hwirq;
	int ret = 0;

	irq_enter();

#ifdef CONFIG_IRQ_DOMAIN
	if (lookup)
		irq = irq_find_mapping(domain, hwirq);
#endif

	/*
	 * Some hardware gives randomly wrong interrupts.  Rather
	 * than crashing, do something sensible.
	 */
	if (unlikely(!irq || irq >= nr_irqs)) {
		ack_bad_irq(irq);
		ret = -EINVAL;
	} else {
		generic_handle_irq(irq);
	}

	irq_exit();
	set_irq_regs(old_regs);
	return ret;
}
#endif

include/linux/irqdesc.h
static inline int handle_domain_irq(struct irq_domain *domain,
				    unsigned int hwirq, struct pt_regs *regs)
{
	return __handle_domain_irq(domain, hwirq, true, regs);
}

如果是私有外设中断,那么中断描述符的成员handle_irq()是函数handle_percpu_devid_irq,其代码如下:

kernel/irq/chip.c
void handle_percpu_devid_irq(struct irq_desc *desc)
{
	struct irq_chip *chip = irq_desc_get_chip(desc);
	struct irqaction *action = desc->action;
	unsigned int irq = irq_desc_get_irq(desc);
	irqreturn_t res;

	...
	if (chip->irq_ack)
		chip->irq_ack(&desc->irq_data);

	if (likely(action)) {
		...
		res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id));
		...
	} else {
		...
	}

	if (chip->irq_eoi)
		chip->irq_eoi(&desc->irq_data);
}

如果是共享外设中断,那么中断描述符的成员handle_irq()是函数handle_fasteoi_irq,其代码如下:

kernel/irq/chip.c
void handle_fasteoi_irq(struct irq_desc *desc)
{
	struct irq_chip *chip = desc->irq_data.chip;

	raw_spin_lock(&desc->lock);

	if (!irq_may_run(desc))
		goto out;

	desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);

	/*
	 * If its disabled or no action available
	 * then mask it and get out of here:
	 */
	if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
		desc->istate |= IRQS_PENDING;
		mask_irq(desc);
		goto out;
	}

	kstat_incr_irqs_this_cpu(desc);
	if (desc->istate & IRQS_ONESHOT)
		mask_irq(desc);

	preflow_handler(desc);
	handle_irq_event(desc);		/* 调用函数handle_irq_event执行设备驱动程序注册的处理函数 */

	cond_unmask_eoi_irq(desc, chip);

	raw_spin_unlock(&desc->lock);
	return;
out:
	if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED))
		chip->irq_eoi(&desc->irq_data);
	raw_spin_unlock(&desc->lock);
}

函数handle_irq_event把主要工作委托给函数__handle_irq_event_percpu。函数__handle_irq_event_percpu遍历中断描述符的中断处理链表,执行每个中断处理描述符的处理函数,其代码如下:

handle_irq_event()  ->  handle_irq_event_percpu()  ->  __handle_irq_event_percpu()

kernel/irq/handle.c
irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags)
{
	irqreturn_t retval = IRQ_NONE;
	unsigned int irq = desc->irq_data.irq;
	struct irqaction *action;
	/* 遍历中断描述符的的中断处理链表,执行每个中断处理描述符的处理函数 */
	record_irq_time(desc);
	
	for_each_action_of_desc(desc, action) {		
		irqreturn_t res;

		trace_irq_handler_entry(irq, action);
		res = action->handler(irq, action->dev_id);
		trace_irq_handler_exit(irq, action, res);

		if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pF enabled interrupts\n",
			      irq, action->handler))
			local_irq_disable();

		switch (res) {
		case IRQ_WAKE_THREAD:
			/*
			 * Catch drivers which return WAKE_THREAD but
			 * did not set up a thread function
			 */
			if (unlikely(!action->thread_fn)) {
				warn_no_thread(irq, action);
				break;
			}

			__irq_wake_thread(desc, action);
			/* 继续往下走,把action->flags作为生成随机数的一个因子 */
			/* Fall through to add to randomness */
		case IRQ_HANDLED:
			*flags |= action->flags;
			break;

		default:
			break;
		}

		retval |= res;
	}

	return retval;
}

5. 中断线程化

中断线程化就是使用内核线程处理中断,目的是减少系统关中断的时间,增强系统的实时性。内核提供的函数request_threaded_irq()用来注册线程化的中断:

int request_threaded_irq(unsigned int irq, irq_handler_t handler,
			 irq_handler_t thread_fn, unsigned long irqflags,
			 const char *devname, void *dev_id)

参数thread_fn是线程处理函数。

少数中断不能线程化,典型的例子是时钟中断,有些流氓进程不主动让出处理器,内核只能依靠周期性的时钟中断夺回处理器的控制权,时钟中断是调度器的脉搏。对于不能线程化的中断,注册处理函数的时候必须设置标志IRQF_NO_THREAD。

如果开启了强制中断线程化的配置宏CONFIG_IRQ_FORCED_THREADING,并且在引导内核的时候指定内核参数“threadirqs”,那么强制除了标记IRQF_NO_THREAD以外的所有中断线程化。

每个中断描述符(irqaction)对应一个内核线程,其代码如下:

include/linux/interrupt.h
struct irqaction {
	irq_handler_t		handler;
	void			*dev_id;
	void __percpu		*percpu_dev_id;
	struct irqaction	*next;		/* 中断处理描述符链表 */
	irq_handler_t		thread_fn;		/* 指向线程处理函数 */
	struct task_struct	*thread;		/* 指向内核线程的进程描述符 */
	struct irqaction	*secondary;
	unsigned int		irq;
	unsigned int		flags;
	unsigned long		thread_flags;
	unsigned long		thread_mask;
	const char		*name;
	struct proc_dir_entry	*dir;
} ____cacheline_internodealigned_in_smp;
request_threaded_irq()  ->  __setup_irq()  ->  setup_irq_thread()

kernel/irq/manage.c
static int
setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
{
	struct task_struct *t;
	struct sched_param param = {
		.sched_priority = MAX_USER_RT_PRIO/2,		/*  */
	};

	if (!secondary) {
		t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
				   new->name);		/* 名称是“irq/”后面跟着Linux中断号,线程处理函数是irq_thread() */
	} else {
		t = kthread_create(irq_thread, new, "irq/%d-s-%s", irq,
				   new->name);
		param.sched_priority -= 1;
	}

	if (IS_ERR(t))
		return PTR_ERR(t);

	sched_setscheduler_nocheck(t, SCHED_FIFO, &param);		/* 中断处理线程是优先级为50、调度策略是SCHED_FIFO的实时内核线程 */

	/*
	 * We keep the reference to the task struct even if
	 * the thread dies to avoid that the interrupt code
	 * references an already freed task_struct.
	 */
	get_task_struct(t);
	new->thread = t;
	/*
	 * Tell the thread to set its affinity. This is
	 * important for shared interrupt handlers as we do
	 * not invoke setup_affinity() for the secondary
	 * handlers as everything is already set up. Even for
	 * interrupts marked with IRQF_NO_BALANCE this is
	 * correct as we want the thread to move to the cpu(s)
	 * on which the requesting code placed the interrupt.
	 */
	set_bit(IRQTF_AFFINITY, &new->thread_flags);
	return 0;
}

在中断处理程序中,函数__handle_irq_event_percpu遍历中断描述符的中断处理链表,执行每个中断处理描述符的处理函数。如果返回IRQ_WAKE_THREAD,说明是线程化的中断,那么唤醒中断处理线程。

中断处理线程的处理函数是irq_thread(),调用函数irq_thread_fn(),然后函数irq_thread_fn()调用注册的线程处理函数。

kernel/irq/manage.c

static irqreturn_t irq_thread_fn(struct irq_desc *desc,
		struct irqaction *action)
{
	irqreturn_t ret;

	ret = action->thread_fn(action->irq, action->dev_id);
	if (ret == IRQ_HANDLED)
		atomic_inc(&desc->threads_handled);

	irq_finalize_oneshot(desc, action);
	return ret;
}

static int irq_thread(void *data)
{
	struct callback_head on_exit_work;
	struct irqaction *action = data;
	struct irq_desc *desc = irq_to_desc(action->irq);
	irqreturn_t (*handler_fn)(struct irq_desc *desc,
			struct irqaction *action);

	if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD,
					&action->thread_flags))
		handler_fn = irq_forced_thread_fn;
	else
		handler_fn = irq_thread_fn;

	init_task_work(&on_exit_work, irq_thread_dtor);
	task_work_add(current, &on_exit_work, false);

	irq_thread_check_affinity(desc, action);

	while (!irq_wait_for_interrupt(action)) {
		irqreturn_t action_ret;

		irq_thread_check_affinity(desc, action);

		action_ret = handler_fn(desc, action);
		if (action_ret == IRQ_WAKE_THREAD)
			irq_wake_secondary(desc, action);

		wake_threads_waitq(desc);
	}

	/*
	 * This is the regular exit path. __free_irq() is stopping the
	 * thread via kthread_stop() after calling
	 * synchronize_hardirq(). So neither IRQTF_RUNTHREAD nor the
	 * oneshot mask bit can be set.
	 */
	task_work_cancel(current, irq_thread_dtor);
	return 0;
}

6. 禁止/开启中断

禁止中断接口:

(1)local_irq_disable()

(2)local_irq_save(flags):首先把中断状态保存在参数flags中,然后禁止中断

这两个接口只能禁止本处理器的中断,不能禁止其他处理器的中断。

开启中断接口:

(1)local_irq_enable()

(2)local_irq_restore(flags):恢复本处理器的中断状态

local_irq_disable()和local_irq_enable()不能嵌套使用,local_irq_save(flags)和local_irq_restore(flags)可以嵌套使用。

7. 禁止/开启单个中断

禁止中断的函数是:void disable_irq(unsigned int irq),参数irq是Linux中断号

开启中断的函数是:void enable_irq(unsigned int irq),参数irq是Linux中断号

8. 中断亲和性

在多处理器系统中,管理员可以设置中断亲和性,允许中断控制器把某个中断转发给哪些处理器,有两种配置方法:

(1)写文件“/proc/irq/IRQ#/smp_affinity”,参数是位掩码

(2)写文件“/proc/irq/IRQ#/smp_affinity_list”,参数是处理器列表

内核提供了设置中断亲和性的函数:

int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)

参数irq是Linux中断号,参数cpumask是处理器位掩码。

9. 处理器间中断

处理器间中断(Inter-Processor Interrupt,IPI)是一种特殊的中断,在多处理器系统中,一个处理器可以向其他处理器发送中断,要求目标处理器执行某件事情。

常见的使用处理器间中断的函数如下:

(1)在所有处理器上执行同一个函数:

int up_smp_call_function(smp_call_func_t func, void *info, int wait);

(2)在指定的处理器上执行一个函数

int smp_call_function_single(int cpuid, smp_call_func_t func, void *info, int wait);

(3)要求指定的处理器重新调度进程

void smp_send_reschedule(int cpu);

对于ARM64架构的GIC控制器,把处理器间生成的中断称为软件生成的中断。

函数handle_IPI负责处理处理器减中断,参数ipinr是硬件中断号,其代码如下:

arch/arm64/kernel/smp.c
void handle_IPI(int ipinr, struct pt_regs *regs)
{
	unsigned int cpu = smp_processor_id();
	struct pt_regs *old_regs = set_irq_regs(regs);

	if ((unsigned)ipinr < NR_IPI) {
		trace_ipi_entry_rcuidle(ipi_types[ipinr]);
		__inc_irq_stat(cpu, ipi_irqs[ipinr]);
	}
		/* 目前支持7种处理间中断 */
	switch (ipinr) {
	case IPI_RESCHEDULE:		/* 硬件中断号是0,重新调度进程,函数smp_send_reschedule()生成的中断 */
		scheduler_ipi();
		break;

	case IPI_CALL_FUNC:		/* 硬件中断号是1,执行函数,函数smp_call_function生成的中断 */
		irq_enter();
		generic_smp_call_function_interrupt();
		irq_exit();
		break;

	case IPI_CPU_STOP:		/* 硬件中断号是2,使处理器停止,函数smp_send_stop()生成的中断 */
		irq_enter();
		ipi_cpu_stop(cpu);
		irq_exit();
		break;

	case IPI_CPU_CRASH_STOP:		/* 硬件中断号是3,使处理器停止,函数smp_send_crash_stop()生成的中断 */
		if (IS_ENABLED(CONFIG_KEXEC_CORE)) {
			irq_enter();
			ipi_cpu_crash_stop(cpu, regs);

			unreachable();
		}
		break;

#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
	case IPI_TIMER:		/* 硬件中断号是4,广播的时钟事件,函数tick_broadcast()生成的中断 */
		irq_enter();
		tick_receive_broadcast();
		irq_exit();
		break;
#endif

#ifdef CONFIG_IRQ_WORK
	case IPI_IRQ_WORK:		/* 硬件中断号是5,在硬中断上下文中执行回调函数,函数irq_work_queue()生成的中断 */
		irq_enter();
		irq_work_run();
		irq_exit();
		break;
#endif

#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
	case IPI_WAKEUP:		/* 硬件中断号是6,唤醒处理器,函数acpi_parking_protocol_cpu_boot()生成的中断 */
		WARN_ONCE(!acpi_parking_protocol_valid(cpu),
			  "CPU%u: Wake-up IPI outside the ACPI parking protocol\n",
			  cpu);
		break;
#endif

	default:
		pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
		break;
	}

	if ((unsigned)ipinr < NR_IPI)
		trace_ipi_exit_rcuidle(ipi_types[ipinr]);
	set_irq_regs(old_regs);
}

猜你喜欢

转载自blog.csdn.net/linuxweiyh/article/details/106955157