asm { ALIGN 16,OC_NOP USE16 //See TempleOS MultiCore. //This code gets copied to MP_VECT_ADDR. //See MemCpy(MP_VECT_ADDR. COREAP_16BIT_INIT:: JMP @@05 ALIGN 4,OC_NOP AP_GDT_PTR: DU16 sizeof(CGDT)-1; DU64 0; @@05: CLI WBINVD MOV AX,MP_VECT_ADDR/16 MOV DS,AX LGDT U32 [CAP16BitInit.ap_gdt_ptr] //See mp->ap_gdt_ptr MOV EAX,SYS_START_CR0 MOV_CR0_EAX DU8 0x66,0xEA; //JMP CGDT.cs32:AP_32BIT_INIT DU32 AP_32BIT_INIT; DU16 CGDT.cs32; COREAP_16BIT_INIT_END:: USE32 AP_32BIT_INIT: MOV AX,CGDT.ds MOV DS,AX MOV ES,AX MOV FS,AX MOV GS,AX MOV SS,AX @@05: LOCK BTS U32 [SYS_MP_CNT_LOCK],0 JC @@05 MOV ESI,U32 [SYS_MP_CNT_INITIAL] LOCK INC U32 [SYS_MP_CNT_INITIAL] LOCK BTR U32 [SYS_MP_CNT_LOCK],0 CMP ESI,MP_PROCESSORS_NUM JAE I32 _SYS_HLT IMUL2 ESI,sizeof(CCPU) ADD ESI,U32 [SYS_CPU_STRUCTS] LEA ESP,U32 CCPU.start_stk+sizeof(CCPU.start_stk)[ESI] PUSH U32 RFLAGG_START POPFD PUSH U32 0 //Return from next call will be 64-bit CALL SYS_ENTER_LONG_MODE USE64 FNINIT MOV RAX,RSI CALL SET_GS_BASE @@10: MOV RAX,U64 CCPU.seth_task[RSI] TEST RAX,RAX JZ @@10 MOV U64 CTask.gs[RAX],RSI CALL SET_FS_BASE JMP I32 _TASK_CONTEXT_RESTORE } U0 TSSBusy(I64 tr,Bool val=OFF) {//See ::/Demo/Lectures/Ring3.HC. LBEqu((&sys_gdt)(U8 *)+tr+4,9,val); } CTSS *TSSNew(I64 cpu_num) { U32 *d,*d1; CTSS *tss=CAlloc(sizeof(CTSS)); tss->io_map_offset=offset(CTSS.io_map); MemSet(tss->io_map,0xFF,0x10000/8); tss->st0=MAlloc(MEM_INTERRUPT_STK); tss->rsp0=tss->st0(U8 *)+MSize(tss->st0); tss->st1=MAlloc(MEM_INTERRUPT_STK); tss->rsp1=tss->st1(U8 *)+MSize(tss->st1); tss->st2=MAlloc(MEM_INTERRUPT_STK); tss->rsp2=tss->st2(U8 *)+MSize(tss->st2); tss->tr =offset(CGDT.tr)+cpu_num*16; tss->tr_ring3=offset(CGDT.tr_ring3)+cpu_num*16; d=(&sys_gdt)(U8 *)+tss->tr; d1=d(U8 *)+4; *d =0x0000FFFF; *d1=0x008F8900; d(U8 *)+=2; *d|=tss & 0x00FFFFFF; *d1++|=tss & 0xFF000000; *d1++=tss>>32; *d1=0; d=(&sys_gdt)(U8 *)+tss->tr_ring3; d1=d(U8 *)+4; *d =0x0000FFFF; *d1=0x008FE900; d(U8 *)+=2; *d|=tss & 0x00FFFFFF; *d1++|=tss & 0xFF000000; *d1++=tss>>32; *d1=0; return tss; } CCPU *CPUStructInit(I64 num,CCPU *c,CTask *seth_task) {//Seth is null when called by adam on CSysFixedArea.boot_cpu0 MemSet(c,0,sizeof(CCPU)); c->addr=c; c->num=num; c->idle_factor=0.01; QueInit(&c->next_dying); if (Bt(&sys_run_level,RLf_16MEG_ADAM_HEAP_CTRL)) { c->idle_task=Spawn(0,NULL,"Idle Task",,Fs,,0); LBts(&c->idle_task->task_flags,TASKf_IDLE); c->tss=TSSNew(num); } WBINVD c->seth_task=seth_task;// It waits for this to be filled-in: seth_task return c; } U0 MPInt(U8 num,I64 cpu_num=1) {//Generate interrupt for specified core. if (cpu_num>=mp_cnt) { if (!Bt(&sys_run_level,RLf_MP)) return; else throw('MultCore'); } PUSHFD CLI //Multitasking safe because each core has a local apic and IRQ's are off while (*(LAPIC_ICR_LOW)(U32 *)&0x1000) PAUSE *(LAPIC_ICR_HIGH)(U32 *)=dev.mp_apic_ids[cpu_num]<<24; *(LAPIC_ICR_LOW)(U32 *)=0x4000+num; POPFD } U0 MPIntAll(U8 num) {//Generate interrupt for all but own core. PUSHFD CLI //Multitasking safe because each core has a local apic and IRQ's are off while (*(LAPIC_ICR_LOW)(U32 *)&0x1000) PAUSE *(LAPIC_ICR_LOW)(U32 *)=0xC4800+num; POPFD } U0 MPNMInt() {//Generate nonmaskable interrupt. *(LAPIC_ICR_LOW)(U32 *)=0xC4400; } U0 MPHalt() {//Halt all other cores. mp_cnt=1; MPNMInt; Busy(10000); } U0 MPAPICInit() {//Called by adam during start-up //and other cores during initialization //after Core0StartMP(). *(LAPIC_SVR)(U32 *)|=LAPICF_APIC_ENABLED; dev.mp_apic_ids[Gs->num]=*(LAPIC_APIC_ID)(U32 *)>>24; *(LAPIC_LDR)(U32 *)=dev.mp_apic_ids[Gs->num]<<24; *(LAPIC_DFR)(U32 *)=0xF0000000; SetRAX(Gs->tss->tr); LTR AX if (Gs->num) { IntInit1; Busy(10); SetRFlags(RFLAGG_NORMAL); } } #assert !offset(CJobCtrl.next_waiting) U0 CoreAPSethTask() { CJobCtrl *ctrl=&Fs->srv_ctrl; while (TRUE) { STI do { TaskKillDying; do PAUSE while (LBts(&ctrl->flags,JOBCf_LOCKED)); } while (ctrl->next_waiting!=ctrl && JobRunOne(GetRFlags,ctrl)); CLI LBts(&Fs->task_flags,TASKf_AWAITING_MSG); LBtr(&ctrl->flags,JOBCf_LOCKED); LBts(&Fs->task_flags,TASKf_IDLE); Yield; LBtr(&Fs->task_flags,TASKf_IDLE); } } CJob *JobQue(I64 (*fp_addr)(U8 *data),U8 *data=NULL, I64 target_cpu=1,I64 flags=1<<JOBf_FREE_ON_COMPLETE, I64 job_code=JOBT_CALL,U8 *aux_str=NULL,I64 aux1=0,I64 aux2=0) {//Queue multicore jobs, handled by Seth tasks. //Set flags to zero if you wish to get the res. //See ::/Demo/MultiCore/Lock.HC CJobCtrl *ctrl; CJob *tmpc; CTask *seth; if (!(0<=target_cpu<mp_cnt)) throw('MultCore'); tmpc=ACAlloc(sizeof(CJob)); if (aux_str) tmpc->aux_str=AStrNew(aux_str); tmpc->job_code=job_code; tmpc->addr=fp_addr; tmpc->fun_arg=data; tmpc->flags=flags; tmpc->aux1=aux1; tmpc->aux2=aux2; seth=cpu_structs[target_cpu].seth_task; tmpc->ctrl=ctrl=&seth->srv_ctrl; PUSHFD CLI while (LBts(&ctrl->flags,JOBCf_LOCKED)) Yield; if (ctrl->next_waiting==ctrl && LBtr(&seth->task_flags,TASKf_AWAITING_MSG)) MPInt(I_WAKE,target_cpu); QueIns(tmpc,ctrl->last_waiting); LBtr(&ctrl->flags,JOBCf_LOCKED); POPFD return tmpc; } CTask *SpawnQue(U0 (*fp_addr)(U8 *data),U8 *data=NULL,U8 *task_name=NULL, I64 target_cpu, CTask *parent=NULL, //NULL means adam I64 stk_size=0,I64 flags=1<<JOBf_ADD_TO_QUE) { CTask *res; CJob *tmpc=JobQue(fp_addr,data,target_cpu, flags,JOBT_SPAWN_TASK,task_name,parent,stk_size); CJobCtrl *ctrl; while (!Bt(&tmpc->flags,JOBf_DONE)) { LBts(&Fs->task_flags,TASKf_IDLE); Yield; } LBtr(&Fs->task_flags,TASKf_IDLE); res=tmpc->spawned_task; ctrl=tmpc->ctrl; PUSHFD CLI while (LBts(&ctrl->flags,JOBCf_LOCKED)) Yield; QueRem(tmpc); LBtr(&ctrl->flags,JOBCf_LOCKED); POPFD JobDel(tmpc); return res; } U0 CoreAPSethInit() {//Called by multicore's seth task after Core0StartMP() //as the first thing a CPU does before waiting for jobs. Busy(10); MPAPICInit; Fs->rip=&CoreAPSethTask; TaskContextRestore; } U64 nextPow2(U64 x) { x--; x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; x |= x >> 32; x++; return x; } U0 Core0StartMP() {//Called by adam during start-up. CTask *task; U8 buf[STR_LEN]; CAP16BitInit *mp=MP_VECT_ADDR; CCPU *c; I64 i,my_mp_cnt; CRAXRBXRCXRDX ee; CPUId(0x1,&ee); if (!Bt(&ee.rdx,9)) { return; } PUSHFD CLI while (LBts(&sys_semas[SEMA_SYS_TIMER],0)) PAUSE if (mp_cnt>1) { my_mp_cnt=mp_cnt; MPHalt; //sets mp_cnt to 1 for (i=1; i<my_mp_cnt; i++) { c=&cpu_structs[i]; JobQueDel(&c->seth_task->srv_ctrl.next_waiting); JobQueDel(&c->seth_task->srv_ctrl.next_done); } } MemSet(&cpu_structs[1],0,sizeof(CCPU)*(MP_PROCESSORS_NUM-1)); //When you start-up other cores, they jump to an addr //specified by a byte vect number, MPN_VECT which corresponds //to a location 4096*vect number, MP_VECT_ADDR. // Copy initial ASM code for CPU to execute MemCpy(mp,COREAP_16BIT_INIT,COREAP_16BIT_INIT_END-COREAP_16BIT_INIT); MemCpy(&mp->ap_gdt_ptr,SYS_GDT_PTR,sizeof(CSysLimitBase)); mp_cnt_initial=mp_cnt=1; mp_cnt_lock=0; *(LAPIC_LVT_ERR)(U32 *)= *(LAPIC_LVT_ERR)(U32 *)&0xFFFFFF00+MPN_VECT; WBINVD //Not sure why this is needed. Might just need delay. MemCpy above? Busy(10000); *(LAPIC_ICR_LOW)(U32 *)=0xC4500; //assert init IPI Busy(10000); *(LAPIC_ICR_LOW)(U32 *)=0xC4600+MPN_VECT; //start-up Busy(200); *(LAPIC_ICR_LOW)(U32 *)=0xC4600+MPN_VECT; //start-up Busy(200000); // try delaying longer for more cores which take more time // This seems sketchy? for (i=0; i<10000; i++) LBts(&mp_cnt_lock,0); //Don't let more through Busy(200); my_mp_cnt=mp_cnt_initial; if (my_mp_cnt>MP_PROCESSORS_NUM) my_mp_cnt=MP_PROCESSORS_NUM; for (i=1; i<my_mp_cnt; i++) { StrPrint(buf,"Seth Task CPU%02X",i); task=Spawn(&CoreAPSethInit,NULL,buf,,,MEM_SETH_STK,0); task->rflags=RFLAGG_START; //CTask alloced off this core's seth_task's heap (Which is Adam) CPUStructInit(i,&cpu_structs[i],task); WBINVD //Not sure why this is needed. Might just need delay. } //Make sure they're all up-and-running for (i=1; i<my_mp_cnt; i++) while (!Bt(&cpu_structs[i].seth_task->task_flags,TASKf_AWAITING_MSG)) PAUSE; lock mp_cnt=my_mp_cnt; //Finalize cnt WBINVD LBtr(&sys_semas[SEMA_SYS_TIMER],0); POPFD } U0 Core0Init() {//Called by adam during start-up CRAXRBXRCXRDX ee; CPUId(0x1,&ee); mp_cnt_initial=mp_cnt=1; mp_cnt_lock=0; dbg.mp_crash=ACAlloc(sizeof(CMPCrash)); //Must be in code heap because init code uses 32 bit addr of cpu_struct adam_task->gs=cpu_structs= CAlloc(sizeof(CCPU)*MP_PROCESSORS_NUM,Fs->code_heap); CPUStructInit(0,cpu_structs,adam_task); asm //RAX has GS { IMPORT SET_GS_BASE; CALL SET_GS_BASE } if (Bt(&ee.rdx,9)) { MPAPICInit; } } interrupt U0 IntMPCrash() {//Entering the debugger from another core causes an interrupt on Core0 //Which calls this routine. *(LAPIC_EOI)(U32 *)=0; mp_cnt=1; // For now ignore an MP crash during boot and // try to continue with a single core since // you probably can't debug it in an early // boot stage anyway if (Bt(&sys_run_level,RLf_DOC)) { Raw(ON); text.raw_flags|=RWF_SHOW_DOLLAR; "MP Crash CPU%02X Task:%08X\n" "RIP:%P\n" ,dbg.mp_crash->cpu_num,dbg.mp_crash->task,dbg.mp_crash->rip; Panic(dbg.mp_crash->msg,dbg.mp_crash->msg_num); } }