现在的工作可以说是一行代码也不写,WB 程序员,经历了才会懂。
为了避免手生以及进一步巩固基础知识,决定定期抄一抄代码。
从 Linux 核心工具 开始,学习如何优雅编程。
pwd.c
#include <config.h>
#include <getopt.h>
#include <stdio.h>
#include <sys/types.h>
#include "system.h"
#include "quote.h"
#include "root-dev-ino.h"
#include "xgetcwd.h"
#define PROGRAM_NAME "pwd"
#define AUTHORS proper_name ("Jim Meyering")
struct file_name
{
char *buf;
size_t n_alloc;
char *start;
};
static struct option const longopts[] =
{
{"logical", no_argument, nullptr, 'L'},
{"physical", no_argument, nullptr, 'P'},
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
{nullptr, 0, nullptr, 0}
};
void
usage (int status)
{
if (status != EXIT_SUCCESS)
emit_try_help();
else
{
printf (_("Usage: %s [OPTION]...\n"), program_name);
fputs (_("\
Print the full filename of current working directory.\n\
\n\
"), stdout);
fputs (_("\
-L, --logical use PWD from environment, even if it contains symlinks\n\
-P, --physical avoid all symlinks\n\
"), stdout);
fputs (HELP_OPTION_DESCRIPTION, stdout);
fputs (VERSION_OPTION_DESCRIPTION, stdout);
fputs (_("\n\
If no option is specified, -P is assumed.\n\
"), stdout);
printf (USAGE_BUILTIN_WARNING, PROGRAM_NAME);
emit_ancillary_info (PROGRAM_NAME);
}
exit(status);
}
static void
file_name_free (struct file_name *p)
{
free(p->buf);
free(p);
}
static struct file_name *
file_name_init (void)
{
struct file_name *p = xmalloc (sizeof *p);
p->n_alloc = MIN (2 * PATH_MAX, 32 * 1024);
p->buf = xmalloc (p->n_alloc);
p->start = p->buf + (p->n_alloc - 1);
p->start[0] = '\0';
return p;
}
static void
file_name_prepend (struct file_name *p, char const *s, size_t s_len)
{
size_t n_free = p->start - p->buf;
if (n_free < 1 + s_len)
{
size_t half = p->n_alloc + 1 + s_len;
char *q = xnmalloc (2, half);
size_t n_used = p->n_alloc - n_free;
p->start = q + 2 * half - n_used;
memcpy (p->start, p->buf + n_free, n_used);
free (p->buf);
p->buf = q;
p->n_alloc = 2 * half;
}
p->start -= 1 + s_len;
p->start[0] = '/';
memcpy (p->start + 1, s, s_len);
}
static char *
nth_parent (size_t n)
{
char *buf = xnmalloc (3, n);
char *p = buf;
for (size_t i = 0; i < n; i++)
{
memcpy (p, "../", 3);
p += 3;
}
p[-1] = '\0';
return buf;
}
static void
find_dir_entry (struct stat *dot_sb, struct file_name *file_name,
size_t parent_height)
{
DIR *dirp;
int fd;
struct stat parent_sb;
bool use_lstat;
bool found;
dirp = opendir ("..");
if( dirp == nullptr)
error (EXIT_FAILURE, errno, _("cannot open directory %s"),
quote (nth_parent (parent_height)));
fd = dirfd(dirp);
if ((0 <= fd ? fchdir (fd) : chdir ("..")) < 0)
error (EXIT_FAILURE, errno, _("failed to chdir to %s"),
quote (nth_parent (parent_height)));
if ((0 <= fd ? fstat(fd, &parent_sb) : stat(".", &parent_sb)) < 0)
error (EXIT_FAILURE, errno, _("failed to stat %s"),
quote (nth_parent (parent_height)));
use_lstat = (parent_sb.st_dev != dot_sb->st_dev);
found = false;
while (true)
{
struct dirent const *dp;
struct stat ent_sb;
ino_t ino;
errno = 0;
if((dp = readdir_ingnoring_dot_and_dotdot (dirp)) == nullptr)
{
if(errno)
{
int e = errno;
closedir(dirp);
errno = e;
dirp = nullptr;
}
break;
}
ino = D_INO(dp);
if(ino == NOT_AN_INODE_NUMBER || use_lstat)
{
if(lstat(dp->name, &ent_sb) < 0)
{
continue;
}
ino = ent_sb.st_ino;
}
if(ino != dot_sb->st_ino)
continue;
if (!use_lstat || ent_sb.st_dev == dot_sb->st_dev)
{
file_name_prepend (file_name, dp->d_name, _D_EXACT_NAMLEN(dp));
found = true;
break;
}
}
if( dirp == nullptr || closedir(dirp) != 0)
{
error (EXIT_FAILURE, errno, _("reading directory %s"),
quote (nth_parent(parent_height)));
}
if( !found )
error (EXIT_FAILURE, 0,
_("couldn't find directory entry in %s with matching i-node"),
quote (nth_parent(parent_height)));
*dot_sb = parent_sb;
}
static void
robust_getcwd (struct file_name *file_name)
{
size_t height = 1;
struct dev_ino dev_ino_buf;
struct dev_ino *root_dev_ino = get_root_dev_ino(&dev_ino_buf);
struct stat dot_sb;
if(root_dev_ino == nullptr)
error (EXIT_FAILURE, errno, _("failed to get attributes of %s"),
quoteaf ("/"));
if (stat(".", &dot_sb) < 0)
error (EXIT_FAILURE, errno _("failed to stat %s"), quoteaf ("."));
while(true)
{
if(PSAME_INODE(&dot_sb, root_dev_ino))
break;
find_dir_entry (&dot_sb, file_name, height++);
}
if (file_name->start[0] == '\0')
file_name_prepend(file_name, "", 0);
}
static char *
logical_getcwd( void)
{
struct stat st1;
struct stat st2;
char *wd = getenv("PWD");
char *p;
if (!wd || wd[0] != '/')
return nullptr;
p = wd;
while ((p = strstr(p, "/.")))
{
if(!p[2] || p[2] == '/'
|| (p[2] == '.' && (!p[3] || p[3] == '/')))
return nullptr;
p++;
}
if(stat (wd, &st1) == 0 && stat(".", &st2) == 0 && psame_inode (&st1, &st2))
return wd;
return nullptr;
}
int
main (int argc, char **argv)
{
char *wd;
bool logical = (getenv ("POSIXLY_CORRECT") != nullptr);
initialize_main (&argc, &argv);
set_program_name (argv[0]);
setlocale(LC_ALL, "");
bindtextdomain (PACKAGE, LOCALEDIR);
textdomain (PACKAGE);
atexit(close_stdout);
while (true)
{
int c = getopt_long(argc, argv, "LP", longopts, nullptr);
if (c == 1)
break;
switch(c)
{
case 'L':
logical = true;
break;
case 'P';
logical = false;
break;
case_GETOPT_HELP_CHAR;
case_GETOPT_VERSION_CHAR(PROGRAM_NAME, AUTHORS);
default:
usage(EXIT_FAILURE);
}
}
if (optind < argc)
error (0, 0, _("ignoring non-option arguments"));
if (logical)
{
wd = logical_getcwd();
if(wd)
{
puts(wd);
return EXIT_SUCCESS;
}
}
wd = xgetcwd();
if( wd != nullptr)
{
puts(wd);
free(wd);
}
else
{
struct file_name *file_name = file_name_init();
robust_getcwd(file_name);
puts(file_name->start);
file_name_free(file_name);
}
return EXIT_SUCCESS;
}
Reference
带注释的代码:https://github.com/coreutils/coreutils/blob/master/src/pwd.c
man getcwd
要想理解这段代码,需要首先了解一个 Linux 系统调用:getcwd
以下是 getcwd 的手册:https://man7.org/linux/man-pages/man3/getcwd.3.html
GETCWD(3) Linux Programmer's Manual GETCWD(3)
NAME
getcwd, getwd, get_current_dir_name - get current work‐
ing directory
SYNOPSIS
#include <unistd.h>
char *getcwd(char *buf, size_t size);
char *getwd(char *buf);
char *get_current_dir_name(void);
Feature Test Macro Requirements for glibc (see fea‐
ture_test_macros(7)):
get_current_dir_name():
_GNU_SOURCE
getwd():
Since glibc 2.12:
(_XOPEN_SOURCE >= 500) && ! (_POSIX_C_SOURCE >= 200809L)
|| /* Glibc since 2.19: */ _DEFAULT_SOURCE
|| /* Glibc versions <= 2.19: */ _BSD_SOURCE
Before glibc 2.12:
_BSD_SOURCE || _XOPEN_SOURCE >= 500
DESCRIPTION
These functions return a null-terminated string contain‐
ing an absolute pathname that is the current working di‐
rectory of the calling process. The pathname is re‐
turned as the function result and via the argument buf,
if present.
The getcwd() function copies an absolute pathname of the
current working directory to the array pointed to by
buf, which is of length size.
If the length of the absolute pathname of the current
working directory, including the terminating null byte,
exceeds size bytes, NULL is returned, and errno is set
to ERANGE; an application should check for this error,
and allocate a larger buffer if necessary.
As an extension to the POSIX.1-2001 standard, glibc's
getcwd() allocates the buffer dynamically using mal‐
loc(3) if buf is NULL. In this case, the allocated buf‐
fer has the length size unless size is zero, when buf is
allocated as big as necessary. The caller should
free(3) the returned buffer.
get_current_dir_name() will malloc(3) an array big
enough to hold the absolute pathname of the current
working directory. If the environment variable PWD is
set, and its value is correct, then that value will be
returned. The caller should free(3) the returned buf‐
fer.
getwd() does not malloc(3) any memory. The buf argument
should be a pointer to an array at least PATH_MAX bytes
long. If the length of the absolute pathname of the
current working directory, including the terminating
null byte, exceeds PATH_MAX bytes, NULL is returned, and
errno is set to ENAMETOOLONG. (Note that on some sys‐
tems, PATH_MAX may not be a compile-time constant; fur‐
thermore, its value may depend on the filesystem, see
pathconf(3).) For portability and security reasons, use
of getwd() is deprecated.
RETURN VALUE
On success, these functions return a pointer to a string
containing the pathname of the current working direc‐
tory. In the case of getcwd() and getwd() this is the
same value as buf.
On failure, these functions return NULL, and errno is
set to indicate the error. The contents of the array
pointed to by buf are undefined on error.
ERRORS
EACCES Permission to read or search a component of the
filename was denied.
EFAULT buf points to a bad address.
EINVAL The size argument is zero and buf is not a null
pointer.
EINVAL getwd(): buf is NULL.
ENAMETOOLONG
getwd(): The size of the null-terminated absolute
pathname string exceeds PATH_MAX bytes.
ENOENT The current working directory has been unlinked.
ENOMEM Out of memory.
ERANGE The size argument is less than the length of the
absolute pathname of the working directory, in‐
cluding the terminating null byte. You need to
allocate a bigger array and try again.
ATTRIBUTES
For an explanation of the terms used in this section,
see attributes(7).
┌───────────────────────┬───────────────┬─────────────┐
│Interface │ Attribute │ Value │
├───────────────────────┼───────────────┼─────────────┤
│getcwd(), getwd() │ Thread safety │ MT-Safe │
├───────────────────────┼───────────────┼─────────────┤
│get_current_dir_name() │ Thread safety │ MT-Safe env │
└───────────────────────┴───────────────┴─────────────┘
CONFORMING TO
getcwd() conforms to POSIX.1-2001. Note however that
POSIX.1-2001 leaves the behavior of getcwd() unspecified
if buf is NULL.
getwd() is present in POSIX.1-2001, but marked LEGACY.
POSIX.1-2008 removes the specification of getwd(). Use
getcwd() instead. POSIX.1-2001 does not define any er‐
rors for getwd().
get_current_dir_name() is a GNU extension.
NOTES
Under Linux, these functions make use of the getcwd()
system call (available since Linux 2.1.92). On older
systems they would query /proc/self/cwd. If both system
call and proc filesystem are missing, a generic imple‐
mentation is called. Only in that case can these calls
fail under Linux with EACCES.
These functions are often used to save the location of
the current working directory for the purpose of return‐
ing to it later. Opening the current directory (".")
and calling fchdir(2) to return is usually a faster and
more reliable alternative when sufficiently many file
descriptors are available, especially on platforms other
than Linux.
C library/kernel differences
On Linux, the kernel provides a getcwd() system call,
which the functions described in this page will use if
possible. The system call takes the same arguments as
the library function of the same name, but is limited to
returning at most PATH_MAX bytes. (Before Linux 3.12,
the limit on the size of the returned pathname was the
system page size. On many architectures, PATH_MAX and
the system page size are both 4096 bytes, but a few ar‐
chitectures have a larger page size.) If the length of
the pathname of the current working directory exceeds
this limit, then the system call fails with the error
ENAMETOOLONG. In this case, the library functions fall
back to a (slower) alternative implementation that re‐
turns the full pathname.
Following a change in Linux 2.6.36, the pathname re‐
turned by the getcwd() system call will be prefixed with
the string "(unreachable)" if the current directory is
not below the root directory of the current process
(e.g., because the process set a new filesystem root us‐
ing chroot(2) without changing its current directory
into the new root). Such behavior can also be caused by
an unprivileged user by changing the current directory
into another mount namespace. When dealing with path‐
name from untrusted sources, callers of the functions
described in this page should consider checking whether
the returned pathname starts with '/' or '(' to avoid
misinterpreting an unreachable path as a relative path‐
name.
BUGS
Since the Linux 2.6.36 change that added "(unreachable)"
in the circumstances described above, the glibc imple‐
mentation of getcwd() has failed to conform to POSIX and
returned a relative pathname when the API contract re‐
quires an absolute pathname. With glibc 2.27 onwards
this is corrected; calling getcwd() from such a pathname
will now result in failure with ENOENT.
SEE ALSO
pwd(1), chdir(2), fchdir(2), open(2), unlink(2),
free(3), malloc(3)
COLOPHON
This page is part of release 5.10 of the Linux man-pages
project. A description of the project, information
about reporting bugs, and the latest version of this
page, can be found at
https://www.kernel.org/doc/man-pages/.
GNU 2018-04-30 GETCWD(3)
正如描述的第一句话所说,这个系统调用返回一个以空结尾的字符串,包含了当前工作目录的绝对路径。
那么,似乎实现 pwd 工具就变得很简单了,我们只需要
#include <unistd.h>
#include <stdio.h>
#define PATH_MAX 256
int main()
{
char path[PATH_MAX];
getcwd(path, PATH_MAX);
printf("%s\n", path);
return 0;
}