Source file
src/runtime/os_linux.go
1
2
3
4
5 package runtime
6
7 import (
8 "internal/abi"
9 "internal/goarch"
10 "internal/runtime/atomic"
11 "internal/runtime/strconv"
12 "internal/runtime/syscall"
13 "unsafe"
14 )
15
16
17
18
19 const sigPerThreadSyscall = _SIGRTMIN + 1
20
21 type mOS struct {
22
23
24
25
26
27
28
29 profileTimer int32
30 profileTimerValid atomic.Bool
31
32
33
34 needPerThreadSyscall atomic.Uint8
35
36
37
38 vgetrandomState uintptr
39
40 waitsema uint32
41 }
42
43
44 func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32
45
46
47
48
49
50
51
52
53
54
55 const (
56 _FUTEX_PRIVATE_FLAG = 128
57 _FUTEX_WAIT_PRIVATE = 0 | _FUTEX_PRIVATE_FLAG
58 _FUTEX_WAKE_PRIVATE = 1 | _FUTEX_PRIVATE_FLAG
59 )
60
61
62
63
64
65
66
67
68
69 func futexsleep(addr *uint32, val uint32, ns int64) {
70
71
72
73
74
75 if ns < 0 {
76 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, nil, nil, 0)
77 return
78 }
79
80 var ts timespec
81 ts.setNsec(ns)
82 futex(unsafe.Pointer(addr), _FUTEX_WAIT_PRIVATE, val, unsafe.Pointer(&ts), nil, 0)
83 }
84
85
86
87
88 func futexwakeup(addr *uint32, cnt uint32) {
89 ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE_PRIVATE, cnt, nil, nil, 0)
90 if ret >= 0 {
91 return
92 }
93
94
95
96
97 systemstack(func() {
98 print("futexwakeup addr=", addr, " returned ", ret, "\n")
99 })
100
101 *(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
102 }
103
104 func getCPUCount() int32 {
105
106
107
108
109
110
111
112 const maxCPUs = 64 * 1024
113 var buf [maxCPUs / 8]byte
114 r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
115 if r < 0 {
116 return 1
117 }
118 n := int32(0)
119 for _, v := range buf[:r] {
120 for v != 0 {
121 n += int32(v & 1)
122 v >>= 1
123 }
124 }
125 if n == 0 {
126 n = 1
127 }
128 return n
129 }
130
131
132 const (
133 _CLONE_VM = 0x100
134 _CLONE_FS = 0x200
135 _CLONE_FILES = 0x400
136 _CLONE_SIGHAND = 0x800
137 _CLONE_PTRACE = 0x2000
138 _CLONE_VFORK = 0x4000
139 _CLONE_PARENT = 0x8000
140 _CLONE_THREAD = 0x10000
141 _CLONE_NEWNS = 0x20000
142 _CLONE_SYSVSEM = 0x40000
143 _CLONE_SETTLS = 0x80000
144 _CLONE_PARENT_SETTID = 0x100000
145 _CLONE_CHILD_CLEARTID = 0x200000
146 _CLONE_UNTRACED = 0x800000
147 _CLONE_CHILD_SETTID = 0x1000000
148 _CLONE_STOPPED = 0x2000000
149 _CLONE_NEWUTS = 0x4000000
150 _CLONE_NEWIPC = 0x8000000
151
152
153
154
155
156
157
158
159 cloneFlags = _CLONE_VM |
160 _CLONE_FS |
161 _CLONE_FILES |
162 _CLONE_SIGHAND |
163 _CLONE_SYSVSEM |
164 _CLONE_THREAD
165 )
166
167
168 func clone(flags int32, stk, mp, gp, fn unsafe.Pointer) int32
169
170
171
172
173 func newosproc(mp *m) {
174 stk := unsafe.Pointer(mp.g0.stack.hi)
175
178 if false {
179 print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " clone=", abi.FuncPCABI0(clone), " id=", mp.id, " ostk=", &mp, "\n")
180 }
181
182
183
184 var oset sigset
185 sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
186 ret := retryOnEAGAIN(func() int32 {
187 r := clone(cloneFlags, stk, unsafe.Pointer(mp), unsafe.Pointer(mp.g0), unsafe.Pointer(abi.FuncPCABI0(mstart)))
188
189
190 if r >= 0 {
191 return 0
192 }
193 return -r
194 })
195 sigprocmask(_SIG_SETMASK, &oset, nil)
196
197 if ret != 0 {
198 print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", ret, ")\n")
199 if ret == _EAGAIN {
200 println("runtime: may need to increase max user processes (ulimit -u)")
201 }
202 throw("newosproc")
203 }
204 }
205
206
207
208
209 func newosproc0(stacksize uintptr, fn unsafe.Pointer) {
210 stack := sysAlloc(stacksize, &memstats.stacks_sys, "OS thread stack")
211 if stack == nil {
212 writeErrStr(failallocatestack)
213 exit(1)
214 }
215 ret := clone(cloneFlags, unsafe.Pointer(uintptr(stack)+stacksize), nil, nil, fn)
216 if ret < 0 {
217 writeErrStr(failthreadcreate)
218 exit(1)
219 }
220 }
221
222 const (
223 _AT_NULL = 0
224 _AT_PAGESZ = 6
225 _AT_PLATFORM = 15
226 _AT_HWCAP = 16
227 _AT_SECURE = 23
228 _AT_RANDOM = 25
229 _AT_HWCAP2 = 26
230 )
231
232 var procAuxv = []byte("/proc/self/auxv\x00")
233
234 var addrspace_vec [1]byte
235
236 func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
237
238 var auxvreadbuf [128]uintptr
239
240 func sysargs(argc int32, argv **byte) {
241 n := argc + 1
242
243
244 for argv_index(argv, n) != nil {
245 n++
246 }
247
248
249 n++
250
251
252 auxvp := (*[1 << 28]uintptr)(add(unsafe.Pointer(argv), uintptr(n)*goarch.PtrSize))
253
254 if pairs := sysauxv(auxvp[:]); pairs != 0 {
255 auxv = auxvp[: pairs*2 : pairs*2]
256 return
257 }
258
259
260
261 fd := open(&procAuxv[0], 0 , 0)
262 if fd < 0 {
263
264
265
266 const size = 256 << 10
267 p, err := mmap(nil, size, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
268 if err != 0 {
269 return
270 }
271 var n uintptr
272 for n = 4 << 10; n < size; n <<= 1 {
273 err := mincore(unsafe.Pointer(uintptr(p)+n), 1, &addrspace_vec[0])
274 if err == 0 {
275 physPageSize = n
276 break
277 }
278 }
279 if physPageSize == 0 {
280 physPageSize = size
281 }
282 munmap(p, size)
283 return
284 }
285
286 n = read(fd, noescape(unsafe.Pointer(&auxvreadbuf[0])), int32(unsafe.Sizeof(auxvreadbuf)))
287 closefd(fd)
288 if n < 0 {
289 return
290 }
291
292
293 auxvreadbuf[len(auxvreadbuf)-2] = _AT_NULL
294 pairs := sysauxv(auxvreadbuf[:])
295 auxv = auxvreadbuf[: pairs*2 : pairs*2]
296 }
297
298
299 var secureMode bool
300
301 func sysauxv(auxv []uintptr) (pairs int) {
302
303
304 var i int
305 for ; auxv[i] != _AT_NULL; i += 2 {
306 tag, val := auxv[i], auxv[i+1]
307 switch tag {
308 case _AT_RANDOM:
309
310
311
312
313
314
315 startupRand = (*[16]byte)(unsafe.Pointer(val))[:]
316
317 case _AT_PAGESZ:
318 physPageSize = val
319
320 case _AT_SECURE:
321 secureMode = val == 1
322 }
323
324 archauxv(tag, val)
325 vdsoauxv(tag, val)
326 }
327 return i / 2
328 }
329
330 var sysTHPSizePath = []byte("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size\x00")
331
332 func getHugePageSize() uintptr {
333 var numbuf [20]byte
334 fd := open(&sysTHPSizePath[0], 0 , 0)
335 if fd < 0 {
336 return 0
337 }
338 ptr := noescape(unsafe.Pointer(&numbuf[0]))
339 n := read(fd, ptr, int32(len(numbuf)))
340 closefd(fd)
341 if n <= 0 {
342 return 0
343 }
344 n--
345 v, ok := strconv.Atoi(slicebytetostringtmp((*byte)(ptr), int(n)))
346 if !ok || v < 0 {
347 v = 0
348 }
349 if v&(v-1) != 0 {
350
351 return 0
352 }
353 return uintptr(v)
354 }
355
356 func osinit() {
357 numCPUStartup = getCPUCount()
358 physHugePageSize = getHugePageSize()
359 osArchInit()
360 vgetrandomInit()
361 }
362
363 var urandom_dev = []byte("/dev/urandom\x00")
364
365 func readRandom(r []byte) int {
366
367
368 fd := open(&urandom_dev[0], 0 , 0)
369 n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
370 closefd(fd)
371 return int(n)
372 }
373
374 func goenvs() {
375 goenvs_unix()
376 }
377
378
379
380
381
382
383
384 func libpreinit() {
385 initsig(true)
386 }
387
388
389
390 func mpreinit(mp *m) {
391 mp.gsignal = malg(32 * 1024)
392 mp.gsignal.m = mp
393 }
394
395 func gettid() uint32
396
397
398
399 func minit() {
400 minitSignals()
401
402
403
404
405 getg().m.procid = uint64(gettid())
406 }
407
408
409
410
411 func unminit() {
412 unminitSignals()
413 getg().m.procid = 0
414 }
415
416
417
418
419
420
421
422 func mdestroy(mp *m) {
423 }
424
425
426
427
428
429 func sigreturn__sigaction()
430 func sigtramp()
431 func cgoSigtramp()
432
433
434 func sigaltstack(new, old *stackt)
435
436
437 func setitimer(mode int32, new, old *itimerval)
438
439
440 func timer_create(clockid int32, sevp *sigevent, timerid *int32) int32
441
442
443 func timer_settime(timerid int32, flags int32, new, old *itimerspec) int32
444
445
446 func timer_delete(timerid int32) int32
447
448
449 func rtsigprocmask(how int32, new, old *sigset, size int32)
450
451
452
453 func sigprocmask(how int32, new, old *sigset) {
454 rtsigprocmask(how, new, old, int32(unsafe.Sizeof(*new)))
455 }
456
457 func raise(sig uint32)
458 func raiseproc(sig uint32)
459
460
461 func sched_getaffinity(pid, len uintptr, buf *byte) int32
462 func osyield()
463
464
465 func osyield_no_g() {
466 osyield()
467 }
468
469 func pipe2(flags int32) (r, w int32, errno int32)
470
471
472 func fcntl(fd, cmd, arg int32) (ret int32, errno int32) {
473 r, _, err := syscall.Syscall6(syscall.SYS_FCNTL, uintptr(fd), uintptr(cmd), uintptr(arg), 0, 0, 0)
474 return int32(r), int32(err)
475 }
476
477 const (
478 _si_max_size = 128
479 _sigev_max_size = 64
480 )
481
482
483
484 func setsig(i uint32, fn uintptr) {
485 var sa sigactiont
486 sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK | _SA_RESTORER | _SA_RESTART
487 sigfillset(&sa.sa_mask)
488
489
490
491 if GOARCH == "386" || GOARCH == "amd64" {
492 sa.sa_restorer = abi.FuncPCABI0(sigreturn__sigaction)
493 }
494 if fn == abi.FuncPCABIInternal(sighandler) {
495 if iscgo {
496 fn = abi.FuncPCABI0(cgoSigtramp)
497 } else {
498 fn = abi.FuncPCABI0(sigtramp)
499 }
500 }
501 sa.sa_handler = fn
502 sigaction(i, &sa, nil)
503 }
504
505
506
507 func setsigstack(i uint32) {
508 var sa sigactiont
509 sigaction(i, nil, &sa)
510 if sa.sa_flags&_SA_ONSTACK != 0 {
511 return
512 }
513 sa.sa_flags |= _SA_ONSTACK
514 sigaction(i, &sa, nil)
515 }
516
517
518
519 func getsig(i uint32) uintptr {
520 var sa sigactiont
521 sigaction(i, nil, &sa)
522 return sa.sa_handler
523 }
524
525
526
527
528 func setSignalstackSP(s *stackt, sp uintptr) {
529 *(*uintptr)(unsafe.Pointer(&s.ss_sp)) = sp
530 }
531
532
533 func (c *sigctxt) fixsigcode(sig uint32) {
534 }
535
536
537
538
539 func sysSigaction(sig uint32, new, old *sigactiont) {
540 if rt_sigaction(uintptr(sig), new, old, unsafe.Sizeof(sigactiont{}.sa_mask)) != 0 {
541
542
543
544
545
546
547
548
549
550
551
552 if sig != 32 && sig != 33 && sig != 64 {
553
554 systemstack(func() {
555 throw("sigaction failed")
556 })
557 }
558 }
559 }
560
561
562
563
564 func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
565
566 func getpid() int
567 func tgkill(tgid, tid, sig int)
568
569
570 func signalM(mp *m, sig int) {
571 tgkill(getpid(), int(mp.procid), sig)
572 }
573
574
575
576
577
578
579
580
581 func validSIGPROF(mp *m, c *sigctxt) bool {
582 code := int32(c.sigcode())
583 setitimer := code == _SI_KERNEL
584 timer_create := code == _SI_TIMER
585
586 if !(setitimer || timer_create) {
587
588
589
590 return true
591 }
592
593 if mp == nil {
594
595
596
597
598
599
600
601
602
603
604
605
606 return setitimer
607 }
608
609
610
611 if mp.profileTimerValid.Load() {
612
613
614
615
616
617 return timer_create
618 }
619
620
621 return setitimer
622 }
623
624 func setProcessCPUProfiler(hz int32) {
625 setProcessCPUProfilerTimer(hz)
626 }
627
628 func setThreadCPUProfiler(hz int32) {
629 mp := getg().m
630 mp.profilehz = hz
631
632
633 if mp.profileTimerValid.Load() {
634 timerid := mp.profileTimer
635 mp.profileTimerValid.Store(false)
636 mp.profileTimer = 0
637
638 ret := timer_delete(timerid)
639 if ret != 0 {
640 print("runtime: failed to disable profiling timer; timer_delete(", timerid, ") errno=", -ret, "\n")
641 throw("timer_delete")
642 }
643 }
644
645 if hz == 0 {
646
647 return
648 }
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669 spec := new(itimerspec)
670 spec.it_value.setNsec(1 + int64(cheaprandn(uint32(1e9/hz))))
671 spec.it_interval.setNsec(1e9 / int64(hz))
672
673 var timerid int32
674 var sevp sigevent
675 sevp.notify = _SIGEV_THREAD_ID
676 sevp.signo = _SIGPROF
677 sevp.sigev_notify_thread_id = int32(mp.procid)
678 ret := timer_create(_CLOCK_THREAD_CPUTIME_ID, &sevp, &timerid)
679 if ret != 0 {
680
681
682 return
683 }
684
685 ret = timer_settime(timerid, 0, spec, nil)
686 if ret != 0 {
687 print("runtime: failed to configure profiling timer; timer_settime(", timerid,
688 ", 0, {interval: {",
689 spec.it_interval.tv_sec, "s + ", spec.it_interval.tv_nsec, "ns} value: {",
690 spec.it_value.tv_sec, "s + ", spec.it_value.tv_nsec, "ns}}, nil) errno=", -ret, "\n")
691 throw("timer_settime")
692 }
693
694 mp.profileTimer = timerid
695 mp.profileTimerValid.Store(true)
696 }
697
698
699
700 type perThreadSyscallArgs struct {
701 trap uintptr
702 a1 uintptr
703 a2 uintptr
704 a3 uintptr
705 a4 uintptr
706 a5 uintptr
707 a6 uintptr
708 r1 uintptr
709 r2 uintptr
710 }
711
712
713
714
715
716
717 var perThreadSyscall perThreadSyscallArgs
718
719
720
721
722
723
724
725
726
727 func syscall_runtime_doAllThreadsSyscall(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr) {
728 if iscgo {
729
730 panic("doAllThreadsSyscall not supported with cgo enabled")
731 }
732
733
734
735
736
737
738
739
740 stw := stopTheWorld(stwAllThreadsSyscall)
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762 allocmLock.lock()
763
764
765
766
767
768
769 acquirem()
770
771
772
773
774
775
776 r1, r2, errno := syscall.Syscall6(trap, a1, a2, a3, a4, a5, a6)
777 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
778
779 r2 = 0
780 }
781 if errno != 0 {
782 releasem(getg().m)
783 allocmLock.unlock()
784 startTheWorld(stw)
785 return r1, r2, errno
786 }
787
788 perThreadSyscall = perThreadSyscallArgs{
789 trap: trap,
790 a1: a1,
791 a2: a2,
792 a3: a3,
793 a4: a4,
794 a5: a5,
795 a6: a6,
796 r1: r1,
797 r2: r2,
798 }
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835 for mp := allm; mp != nil; mp = mp.alllink {
836 for atomic.Load64(&mp.procid) == 0 {
837
838 osyield()
839 }
840 }
841
842
843
844 gp := getg()
845 tid := gp.m.procid
846 for mp := allm; mp != nil; mp = mp.alllink {
847 if atomic.Load64(&mp.procid) == tid {
848
849 continue
850 }
851 mp.needPerThreadSyscall.Store(1)
852 signalM(mp, sigPerThreadSyscall)
853 }
854
855
856 for mp := allm; mp != nil; mp = mp.alllink {
857 if mp.procid == tid {
858 continue
859 }
860 for mp.needPerThreadSyscall.Load() != 0 {
861 osyield()
862 }
863 }
864
865 perThreadSyscall = perThreadSyscallArgs{}
866
867 releasem(getg().m)
868 allocmLock.unlock()
869 startTheWorld(stw)
870
871 return r1, r2, errno
872 }
873
874
875
876
877
878
879
880 func runPerThreadSyscall() {
881 gp := getg()
882 if gp.m.needPerThreadSyscall.Load() == 0 {
883 return
884 }
885
886 args := perThreadSyscall
887 r1, r2, errno := syscall.Syscall6(args.trap, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6)
888 if GOARCH == "ppc64" || GOARCH == "ppc64le" {
889
890 r2 = 0
891 }
892 if errno != 0 || r1 != args.r1 || r2 != args.r2 {
893 print("trap:", args.trap, ", a123456=[", args.a1, ",", args.a2, ",", args.a3, ",", args.a4, ",", args.a5, ",", args.a6, "]\n")
894 print("results: got {r1=", r1, ",r2=", r2, ",errno=", errno, "}, want {r1=", args.r1, ",r2=", args.r2, ",errno=0}\n")
895 fatal("AllThreadsSyscall6 results differ between threads; runtime corrupted")
896 }
897
898 gp.m.needPerThreadSyscall.Store(0)
899 }
900
901 const (
902 _SI_USER = 0
903 _SI_TKILL = -6
904 _SYS_SECCOMP = 1
905 )
906
907
908
909
910
911 func (c *sigctxt) sigFromUser() bool {
912 code := int32(c.sigcode())
913 return code == _SI_USER || code == _SI_TKILL
914 }
915
916
917
918
919 func (c *sigctxt) sigFromSeccomp() bool {
920 code := int32(c.sigcode())
921 return code == _SYS_SECCOMP
922 }
923
924
925 func mprotect(addr unsafe.Pointer, n uintptr, prot int32) (ret int32, errno int32) {
926 r, _, err := syscall.Syscall6(syscall.SYS_MPROTECT, uintptr(addr), n, uintptr(prot), 0, 0, 0)
927 return int32(r), int32(err)
928 }
929
View as plain text