1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
|
/* This file contains code for initialization of protected mode, to initialize
* code and data segment descriptors, and to initialize global descriptors
* for local descriptors in the process table.
*/
#include <assert.h>
#include <string.h>
#include <minix/cpufeature.h>
#include <sys/types.h>
#include "kernel/kernel.h"
#include "arch_proto.h"
#include <sys/exec.h>
#include <libexec.h>
#define INT_GATE_TYPE (INT_286_GATE | DESC_386_BIT)
#define TSS_TYPE (AVL_286_TSS | DESC_386_BIT)
/* This is OK initially, when the 1:1 mapping is still there. */
char *video_mem = (char *) MULTIBOOT_VIDEO_BUFFER;
/* Storage for gdt, idt and tss. */
struct segdesc_s gdt[GDT_SIZE] __aligned(DESC_SIZE);
struct gatedesc_s idt[IDT_SIZE] __aligned(DESC_SIZE);
struct tss_s tss[CONFIG_MAX_CPUS];
u32_t k_percpu_stacks[CONFIG_MAX_CPUS];
int prot_init_done = 0;
phys_bytes vir2phys(void *vir)
{
extern char _kern_vir_base, _kern_phys_base; /* in kernel.lds */
u32_t offset = (vir_bytes) &_kern_vir_base -
(vir_bytes) &_kern_phys_base;
return (phys_bytes)vir - offset;
}
/*===========================================================================*
* enable_iop *
*===========================================================================*/
void enable_iop(struct proc *pp)
{
/* Allow a user process to use I/O instructions. Change the I/O Permission
* Level bits in the psw. These specify least-privileged Current Permission
* Level allowed to execute I/O instructions. Users and servers have CPL 3.
* You can't have less privilege than that. Kernel has CPL 0, tasks CPL 1.
*/
pp->p_reg.psw |= 0x3000;
}
/*===========================================================================*
* sdesc *
*===========================================================================*/
void sdesc(struct segdesc_s *segdp, phys_bytes base, vir_bytes size)
{
/* Fill in the size fields (base, limit and granularity) of a descriptor. */
segdp->base_low = base;
segdp->base_middle = base >> BASE_MIDDLE_SHIFT;
segdp->base_high = base >> BASE_HIGH_SHIFT;
--size; /* convert to a limit, 0 size means 4G */
if (size > BYTE_GRAN_MAX) {
segdp->limit_low = size >> PAGE_GRAN_SHIFT;
segdp->granularity = GRANULAR | (size >>
(PAGE_GRAN_SHIFT + GRANULARITY_SHIFT));
} else {
segdp->limit_low = size;
segdp->granularity = size >> GRANULARITY_SHIFT;
}
segdp->granularity |= DEFAULT; /* means BIG for data seg */
}
/*===========================================================================*
* init_dataseg *
*===========================================================================*/
void init_param_dataseg(register struct segdesc_s *segdp,
phys_bytes base, vir_bytes size, const int privilege)
{
/* Build descriptor for a data segment. */
sdesc(segdp, base, size);
segdp->access = (privilege << DPL_SHIFT) | (PRESENT | SEGMENT |
WRITEABLE | ACCESSED);
/* EXECUTABLE = 0, EXPAND_DOWN = 0, ACCESSED = 0 */
}
void init_dataseg(int index, const int privilege)
{
init_param_dataseg(&gdt[index], 0, 0xFFFFFFFF, privilege);
}
/*===========================================================================*
* init_codeseg *
*===========================================================================*/
static void init_codeseg(int index, int privilege)
{
/* Build descriptor for a code segment. */
sdesc(&gdt[index], 0, 0xFFFFFFFF);
gdt[index].access = (privilege << DPL_SHIFT)
| (PRESENT | SEGMENT | EXECUTABLE | READABLE);
/* CONFORMING = 0, ACCESSED = 0 */
}
static struct gate_table_s gate_table_pic[] = {
{ hwint00, VECTOR( 0), INTR_PRIVILEGE },
{ hwint01, VECTOR( 1), INTR_PRIVILEGE },
{ hwint02, VECTOR( 2), INTR_PRIVILEGE },
{ hwint03, VECTOR( 3), INTR_PRIVILEGE },
{ hwint04, VECTOR( 4), INTR_PRIVILEGE },
{ hwint05, VECTOR( 5), INTR_PRIVILEGE },
{ hwint06, VECTOR( 6), INTR_PRIVILEGE },
{ hwint07, VECTOR( 7), INTR_PRIVILEGE },
{ hwint08, VECTOR( 8), INTR_PRIVILEGE },
{ hwint09, VECTOR( 9), INTR_PRIVILEGE },
{ hwint10, VECTOR(10), INTR_PRIVILEGE },
{ hwint11, VECTOR(11), INTR_PRIVILEGE },
{ hwint12, VECTOR(12), INTR_PRIVILEGE },
{ hwint13, VECTOR(13), INTR_PRIVILEGE },
{ hwint14, VECTOR(14), INTR_PRIVILEGE },
{ hwint15, VECTOR(15), INTR_PRIVILEGE },
{ NULL, 0, 0}
};
static struct gate_table_s gate_table_exceptions[] = {
{ divide_error, DIVIDE_VECTOR, INTR_PRIVILEGE },
{ single_step_exception, DEBUG_VECTOR, INTR_PRIVILEGE },
{ nmi, NMI_VECTOR, INTR_PRIVILEGE },
{ breakpoint_exception, BREAKPOINT_VECTOR, USER_PRIVILEGE },
{ overflow, OVERFLOW_VECTOR, USER_PRIVILEGE },
{ bounds_check, BOUNDS_VECTOR, INTR_PRIVILEGE },
{ inval_opcode, INVAL_OP_VECTOR, INTR_PRIVILEGE },
{ copr_not_available, COPROC_NOT_VECTOR, INTR_PRIVILEGE },
{ double_fault, DOUBLE_FAULT_VECTOR, INTR_PRIVILEGE },
{ copr_seg_overrun, COPROC_SEG_VECTOR, INTR_PRIVILEGE },
{ inval_tss, INVAL_TSS_VECTOR, INTR_PRIVILEGE },
{ segment_not_present, SEG_NOT_VECTOR, INTR_PRIVILEGE },
{ stack_exception, STACK_FAULT_VECTOR, INTR_PRIVILEGE },
{ general_protection, PROTECTION_VECTOR, INTR_PRIVILEGE },
{ page_fault, PAGE_FAULT_VECTOR, INTR_PRIVILEGE },
{ copr_error, COPROC_ERR_VECTOR, INTR_PRIVILEGE },
{ alignment_check, ALIGNMENT_CHECK_VECTOR, INTR_PRIVILEGE },
{ machine_check, MACHINE_CHECK_VECTOR, INTR_PRIVILEGE },
{ simd_exception, SIMD_EXCEPTION_VECTOR, INTR_PRIVILEGE },
{ ipc_entry_softint_orig, IPC_VECTOR_ORIG, USER_PRIVILEGE },
{ kernel_call_entry_orig, KERN_CALL_VECTOR_ORIG, USER_PRIVILEGE },
{ ipc_entry_softint_um, IPC_VECTOR_UM, USER_PRIVILEGE },
{ kernel_call_entry_um, KERN_CALL_VECTOR_UM, USER_PRIVILEGE },
{ NULL, 0, 0}
};
int tss_init(unsigned cpu, void * kernel_stack)
{
struct tss_s * t = &tss[cpu];
int index = TSS_INDEX(cpu);
struct segdesc_s *tssgdt;
tssgdt = &gdt[index];
init_param_dataseg(tssgdt, (phys_bytes) t,
sizeof(struct tss_s), INTR_PRIVILEGE);
tssgdt->access = PRESENT | (INTR_PRIVILEGE << DPL_SHIFT) | TSS_TYPE;
/* Build TSS. */
memset(t, 0, sizeof(*t));
t->ds = t->es = t->fs = t->gs = t->ss0 = KERN_DS_SELECTOR;
t->cs = KERN_CS_SELECTOR;
t->iobase = sizeof(struct tss_s); /* empty i/o permissions map */
/*
* make space for process pointer and cpu id and point to the first
* usable word
*/
k_percpu_stacks[cpu] = t->sp0 = ((unsigned) kernel_stack) - X86_STACK_TOP_RESERVED;
/*
* set the cpu id at the top of the stack so we know on which cpu is
* this stack in use when we trap to kernel
*/
*((reg_t *)(t->sp0 + 1 * sizeof(reg_t))) = cpu;
/* Set up Intel SYSENTER support if available. */
if(minix_feature_flags & MKF_I386_INTEL_SYSENTER) {
ia32_msr_write(INTEL_MSR_SYSENTER_CS, 0, KERN_CS_SELECTOR);
ia32_msr_write(INTEL_MSR_SYSENTER_ESP, 0, t->sp0);
ia32_msr_write(INTEL_MSR_SYSENTER_EIP, 0, (u32_t) ipc_entry_sysenter);
}
/* Set up AMD SYSCALL support if available. */
if(minix_feature_flags & MKF_I386_AMD_SYSCALL) {
u32_t msr_lo, msr_hi;
/* set SYSCALL ENABLE bit in EFER MSR */
ia32_msr_read(AMD_MSR_EFER, &msr_hi, &msr_lo);
msr_lo |= AMD_EFER_SCE;
ia32_msr_write(AMD_MSR_EFER, msr_hi, msr_lo);
/* set STAR register value */
#define set_star_cpu(forcpu) if(cpu == forcpu) { \
ia32_msr_write(AMD_MSR_STAR, \
((u32_t)USER_CS_SELECTOR << 16) | (u32_t)KERN_CS_SELECTOR, \
(u32_t) ipc_entry_syscall_cpu ## forcpu); }
set_star_cpu(0);
set_star_cpu(1);
set_star_cpu(2);
set_star_cpu(3);
set_star_cpu(4);
set_star_cpu(5);
set_star_cpu(6);
set_star_cpu(7);
assert(CONFIG_MAX_CPUS <= 8);
}
return SEG_SELECTOR(index);
}
phys_bytes init_segdesc(int gdt_index, void *base, int size)
{
struct desctableptr_s *dtp = (struct desctableptr_s *) &gdt[gdt_index];
dtp->limit = size - 1;
dtp->base = (phys_bytes) base;
return (phys_bytes) dtp;
}
void int_gate(struct gatedesc_s *tab,
unsigned vec_nr, vir_bytes offset, unsigned dpl_type)
{
/* Build descriptor for an interrupt gate. */
register struct gatedesc_s *idp;
idp = &tab[vec_nr];
idp->offset_low = offset;
idp->selector = KERN_CS_SELECTOR;
idp->p_dpl_type = dpl_type;
idp->offset_high = offset >> OFFSET_HIGH_SHIFT;
}
void int_gate_idt(unsigned vec_nr, vir_bytes offset, unsigned dpl_type)
{
int_gate(idt, vec_nr, offset, dpl_type);
}
void idt_copy_vectors(struct gate_table_s * first)
{
struct gate_table_s *gtp;
for (gtp = first; gtp->gate; gtp++) {
int_gate(idt, gtp->vec_nr, (vir_bytes) gtp->gate,
PRESENT | INT_GATE_TYPE |
(gtp->privilege << DPL_SHIFT));
}
}
void idt_copy_vectors_pic(void)
{
idt_copy_vectors(gate_table_pic);
}
void idt_init(void)
{
idt_copy_vectors_pic();
idt_copy_vectors(gate_table_exceptions);
}
struct desctableptr_s gdt_desc, idt_desc;
void idt_reload(void)
{
x86_lidt(&idt_desc);
}
multiboot_module_t *bootmod(int pnr)
{
int i;
assert(pnr >= 0);
/* Search for desired process in boot process
* list. The first NR_TASKS ones do not correspond
* to a module, however, so we don't search those.
*/
for(i = NR_TASKS; i < NR_BOOT_PROCS; i++) {
int p;
p = i - NR_TASKS;
if(image[i].proc_nr == pnr) {
assert(p < MULTIBOOT_MAX_MODS);
assert(p < kinfo.mbi.mi_mods_count);
return &kinfo.module_list[p];
}
}
panic("boot module %d not found", pnr);
}
int booting_cpu = 0;
void prot_load_selectors(void)
{
/* this function is called by both prot_init by the BSP and
* the early AP booting code in mpx.S by secondary CPU's.
* everything is set up the same except for the TSS that is per-CPU.
*/
x86_lgdt(&gdt_desc); /* Load gdt */
idt_init();
idt_reload();
x86_lldt(LDT_SELECTOR); /* Load bogus ldt */
x86_ltr(TSS_SELECTOR(booting_cpu));
x86_load_kerncs();
x86_load_ds(KERN_DS_SELECTOR);
x86_load_es(KERN_DS_SELECTOR);
x86_load_fs(KERN_DS_SELECTOR);
x86_load_gs(KERN_DS_SELECTOR);
x86_load_ss(KERN_DS_SELECTOR);
}
/*===========================================================================*
* prot_init *
*===========================================================================*/
void prot_init(void)
{
extern char k_boot_stktop;
if(_cpufeature(_CPUF_I386_SYSENTER))
minix_feature_flags |= MKF_I386_INTEL_SYSENTER;
if(_cpufeature(_CPUF_I386_SYSCALL))
minix_feature_flags |= MKF_I386_AMD_SYSCALL;
memset(gdt, 0, sizeof(gdt));
memset(idt, 0, sizeof(idt));
/* Build GDT, IDT, IDT descriptors. */
gdt_desc.base = (u32_t) gdt;
gdt_desc.limit = sizeof(gdt)-1;
idt_desc.base = (u32_t) idt;
idt_desc.limit = sizeof(idt)-1;
tss_init(0, &k_boot_stktop);
/* Build GDT */
init_param_dataseg(&gdt[LDT_INDEX],
(phys_bytes) 0, 0, INTR_PRIVILEGE); /* unusable LDT */
gdt[LDT_INDEX].access = PRESENT | LDT;
init_codeseg(KERN_CS_INDEX, INTR_PRIVILEGE);
init_dataseg(KERN_DS_INDEX, INTR_PRIVILEGE);
init_codeseg(USER_CS_INDEX, USER_PRIVILEGE);
init_dataseg(USER_DS_INDEX, USER_PRIVILEGE);
/* Currently the multiboot segments are loaded; which is fine, but
* let's replace them with the ones from our own GDT so we test
* right away whether they work as expected.
*/
prot_load_selectors();
/* Set up a new post-relocate bootstrap pagetable so that
* we can map in VM, and we no longer rely on pre-relocated
* data.
*/
pg_clear();
pg_identity(&kinfo); /* Still need 1:1 for lapic and video mem and such. */
pg_mapkernel();
pg_load();
prot_init_done = 1;
}
static int alloc_for_vm = 0;
void arch_post_init(void)
{
/* Let memory mapping code know what's going on at bootstrap time */
struct proc *vm;
vm = proc_addr(VM_PROC_NR);
get_cpulocal_var(ptproc) = vm;
pg_info(&vm->p_seg.p_cr3, &vm->p_seg.p_cr3_v);
}
static int libexec_pg_alloc(struct exec_info *execi, vir_bytes vaddr, size_t len)
{
pg_map(PG_ALLOCATEME, vaddr, vaddr+len, &kinfo);
pg_load();
memset((char *) vaddr, 0, len);
alloc_for_vm += len;
return OK;
}
void arch_boot_proc(struct boot_image *ip, struct proc *rp)
{
multiboot_module_t *mod;
struct ps_strings *psp;
char *sp;
if(rp->p_nr < 0) return;
mod = bootmod(rp->p_nr);
/* Important special case: we put VM in the bootstrap pagetable
* so it can run.
*/
if(rp->p_nr == VM_PROC_NR) {
struct exec_info execi;
memset(&execi, 0, sizeof(execi));
/* exec parameters */
execi.stack_high = kinfo.user_sp;
execi.stack_size = 64 * 1024; /* not too crazy as it must be preallocated */
execi.proc_e = ip->endpoint;
execi.hdr = (char *) mod->mod_start; /* phys mem direct */
execi.filesize = execi.hdr_len = mod->mod_end - mod->mod_start;
strlcpy(execi.progname, ip->proc_name, sizeof(execi.progname));
execi.frame_len = 0;
/* callbacks for use in the kernel */
execi.copymem = libexec_copy_memcpy;
execi.clearmem = libexec_clear_memset;
execi.allocmem_prealloc_junk = libexec_pg_alloc;
execi.allocmem_prealloc_cleared = libexec_pg_alloc;
execi.allocmem_ondemand = libexec_pg_alloc;
execi.clearproc = NULL;
/* parse VM ELF binary and alloc/map it into bootstrap pagetable */
if(libexec_load_elf(&execi) != OK)
panic("VM loading failed");
/* Setup a ps_strings struct on the stack, pointing to the
* following argv, envp. */
sp = (char *)execi.stack_high;
sp -= sizeof(struct ps_strings);
psp = (struct ps_strings *) sp;
/* Take the stack pointer down three words to give startup code
* something to use as "argc", "argv" and "envp".
*/
sp -= (sizeof(void *) + sizeof(void *) + sizeof(int));
// linear address space, so it is available.
psp->ps_argvstr = (char **)(sp + sizeof(int));
psp->ps_nargvstr = 0;
psp->ps_envstr = psp->ps_argvstr + sizeof(void *);
psp->ps_nenvstr = 0;
arch_proc_init(rp, execi.pc, (vir_bytes)sp,
execi.stack_high - sizeof(struct ps_strings),
ip->proc_name);
/* Free VM blob that was just copied into existence. */
add_memmap(&kinfo, mod->mod_start, mod->mod_end-mod->mod_start);
mod->mod_end = mod->mod_start = 0;
/* Remember them */
kinfo.vm_allocated_bytes = alloc_for_vm;
}
}
|