diff -urP linux-2.5.40/Documentation/coredump.txt linux-2.5.40-coredump-file-control/Documentation/coredump.txt --- linux-2.5.40/Documentation/coredump.txt Wed Dec 31 19:00:00 1969 +++ linux-2.5.40-coredump-file-control/Documentation/coredump.txt Thu Oct 3 08:18:28 2002 @@ -0,0 +1,110 @@ +Core dump file name control /proc/sys/kernel/core_name_format +=========================== + +Provides for a way to securely move where core files show up and to +set the name pattern for core files to include the UID, Program, +Hostname, and/or PID of the process that caused the core dump. +This is very handy for diskless clusters where all of the core +dumps go to the same disk and for production servers where core +dumps want to be segregated from the main production disks. This +also lets you get core dumps even when programs are running with +the current working directory being read-only. + + +-- Background and how it works -- + +The new /proc/sys/kernel/core_name_format sysctl provides you a way tp +control the name of core files. This name is actually a format +string such that certain values from the process can be included. + +The default name format is set to "core" to match the previous +behavior of the kernel. Old behavior of appending the PID to +the "core" name is also preserved with added logic of only doing +so if the PID is not already part of the name format. This fully +preserves existing behaviors within the system while still providing +for the full control of the format of the core file name. The +previous behavior is not a special case but "falls out" of the +general case when the format is set to "core". + + +-- The following format options are available in that string -- + + %P The Process ID (current->pid) + %U The UID of the process (current->uid) + %N The command name of the process (current->comm) + %H The nodename of the system (system_utsname.nodename) + %% A "%" + +For example, in my clusters, I have an NFS R/W mount at /coredumps +that all nodes have access to. The format string I use is: + + sysctl -w "kernel.core_name_format=/coredumps/%H-%N-%P.core" + +This then causes core dumps to be of the format: + + /coredumps/whale.sinz.org-badprogram-13917.core + +Another possible use it to collect core dumps for specific hosts or +programs in their own directories. For example: + + echo "/cores/%N/%H.core" > /proc/sys/kernel/core_name_format + +would put coredumps into something like: + + /cores/badprogram/whale.sinz.org.core + +Note that only programs that have directories already created in +/cores would get their cores saved as the coredump process will not +create a directory. + +Another form would be to just name the core dumps based on the program +name but still put them into the current working directory. For example: + + sysctl -w "kernel.core_name_format=%N-%P.core" + +would put, into CWD, files of the form: + + badprogram-1927.core + +The flexibility in the format string lets you define the behavior +you need in your environment. + + +-- Some notes on security -- + +This patch does add the ability of a system administrator to make a core +dump format string that could cause problems. If the format string is set +to be a fixed file name of say, "/bin/sh" it would be a "bad thing" to +have a core dump happen :-) + +There is always the problem of someone with root access making a bad +setting in the sysctl. But then, if they have root, they don't need to +set some sysctl in order to cause damage. + +However, I have also worked through the security and reliability of the +code assuming that the system administrator does not set a blatantly bad +pattern. In addition to the standard prevention of buffer over-runs and +the like, I also make sure that any user adjustable input gets filtered +to remove "/" characters such that directories can not be changed via a +program name of, say "../foo/x" (assuming that some program goes and +changes its process name to that) + +So it does not prevent someone from making a name format that would be +bad (such as "/bin/sh" or "/usr/bin/%N") but then "rm -rf" still works too :-) + +One thing that I do feel is very good about this is that you can now +segregate your core files to a different partition and thus prevent the +writing to and/or filling up of your important partitions. For the +diskless clusters that I am building, it also provides a way to track +who caused the core dump and, in our cluster, a place to write it +since all of the other disks are read-only or /dev/tmpfs devices. + + +-- Why upper case characters -- + +I used upper case characters to reduce the chance of getting confused +with format() characters and to be somewhat simular to the mechanism +that exists on FreeBSD. + + +02/21/2002 -- Michael Sinz -- http://www.sinz.org/ diff -urP linux-2.5.40/fs/exec.c linux-2.5.40-coredump-file-control/fs/exec.c --- linux-2.5.40/fs/exec.c Tue Oct 1 03:06:31 2002 +++ linux-2.5.40-coredump-file-control/fs/exec.c Thu Oct 3 08:18:28 2002 @@ -37,6 +37,7 @@ #include #include #include +#include #define __NO_VERSION__ #include #include @@ -52,6 +53,7 @@ #endif int core_uses_pid; +char core_name_format[64] = {"core"}; static struct linux_binfmt *formats; static rwlock_t binfmt_lock = RW_LOCK_UNLOCKED; @@ -1093,13 +1095,21 @@ __MOD_DEC_USE_COUNT(old->module); } +#define MAX_CORE_NAME (160) int do_coredump(long signr, struct pt_regs * regs) { struct linux_binfmt * binfmt; - char corename[6+sizeof(current->comm)+10]; + + /* The 11 extra are for the support of the old "uses PID option" + and such that the PID option does not need to double-check size */ + char corename[MAX_CORE_NAME+1+11]; struct file * file; struct inode * inode; int retval = 0; + int fmt_i; + int name_n; + int addPID; + char *cname; lock_kernel(); binfmt = current->binfmt; @@ -1111,9 +1121,88 @@ if (current->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump) goto fail; - memcpy(corename,"core", 5); /* include trailing \0 */ - if (core_uses_pid || atomic_read(¤t->mm->mm_users) != 1) - sprintf(&corename[4], ".%d", current->pid); + /* Set this to true if we are going to add the PID. If the PID + already is added in the format we will end up clearing this. + The purpose is to provide for the old behavior of adding the + PID to the core file name but to not add it if it already + was included via the file name format pattern. */ + addPID = (core_uses_pid || atomic_read(¤t->mm->mm_users) != 1); + + /* Build the core file name as needed from the format string */ + for (fmt_i=0, name_n=0; + (name_n < MAX_CORE_NAME) && core_name_format[fmt_i]; + fmt_i++) + { + switch (core_name_format[fmt_i]) + { + case '%': /* A format character */ + fmt_i++; + switch (core_name_format[fmt_i]) + { + case '%': /* The way we get this character */ + corename[name_n++] = '%'; + break; + + case 'N': /* Process name */ + cname=current->comm; + + /* Only copy as much as will fit within the MAX_CORE_NAME */ + while (*cname && (name_n < MAX_CORE_NAME)) { + if (*cname != '/') + corename[name_n++] = *cname; + cname++; + } + break; + + case 'H': /* Host name */ + down_read(&uts_sem); + cname=system_utsname.nodename; + + /* Only copy as much as will fit within the MAX_CORE_NAME */ + while (*cname && (name_n < MAX_CORE_NAME)) { + if (*cname != '/') + corename[name_n++] = *cname; + cname++; + } + up_read(&uts_sem); + break; + + case 'P': /* Process PID */ + /* Since we are adding it here, don't append at end */ + addPID=0; + + /* We don't need to pre-check that the number fits since we + added a padding of 11 characters to the end of the string + buffer just so that we don't need to do an extra check */ + name_n += sprintf(&corename[name_n],"%d",current->pid); + break; + + case 'U': /* UID of the process */ + /* We don't need to pre-check that the number fits since we + added a padding of 11 characters to the end of the string + buffer just so that we don't need to do an extra check */ + name_n += sprintf(&corename[name_n],"%d",current->uid); + break; + } + break; + + default: /* Anything else just pass along */ + corename[name_n++] = core_name_format[fmt_i]; + } + } + + /* If we still want to append the PID and there is room, do so */ + /* This is mainly for compatibility */ + if (addPID && (name_n < MAX_CORE_NAME)) { + name_n += sprintf(&corename[name_n],".%d",current->pid); + } + + corename[name_n]='\0'; + + /* Check that we actually got a name */ + if (name_n < 1) + goto fail; + file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW, 0600); if (IS_ERR(file)) goto fail; diff -urP linux-2.5.40/include/linux/sysctl.h linux-2.5.40-coredump-file-control/include/linux/sysctl.h --- linux-2.5.40/include/linux/sysctl.h Tue Oct 1 03:05:47 2002 +++ linux-2.5.40-coredump-file-control/include/linux/sysctl.h Thu Oct 3 08:18:28 2002 @@ -128,6 +128,7 @@ KERN_TAINTED=53, /* int: various kernel tainted flags */ KERN_CADPID=54, /* int: PID of the process to notify on CAD */ KERN_PIDMAX=55, /* int: PID # limit */ + KERN_CORE_NAME_FORMAT=56, /* string: core file name format string */ }; diff -urP linux-2.5.40/kernel/sysctl.c linux-2.5.40-coredump-file-control/kernel/sysctl.c --- linux-2.5.40/kernel/sysctl.c Tue Oct 1 03:06:17 2002 +++ linux-2.5.40-coredump-file-control/kernel/sysctl.c Thu Oct 3 08:18:28 2002 @@ -50,6 +50,7 @@ extern int max_queued_signals; extern int sysrq_enabled; extern int core_uses_pid; +extern char core_name_format []; extern int cad_pid; extern int pid_max; @@ -176,6 +177,8 @@ 0644, NULL, &proc_dointvec}, {KERN_CORE_USES_PID, "core_uses_pid", &core_uses_pid, sizeof(int), 0644, NULL, &proc_dointvec}, + {KERN_CORE_NAME_FORMAT, "core_name_format", core_name_format, 64, + 0644, NULL, &proc_doutsstring, &sysctl_string}, {KERN_TAINTED, "tainted", &tainted, sizeof(int), 0644, NULL, &proc_dointvec}, {KERN_CAP_BSET, "cap-bound", &cap_bset, sizeof(kernel_cap_t),