summaryrefslogtreecommitdiff
path: root/glob.c
diff options
context:
space:
mode:
Diffstat (limited to 'glob.c')
-rw-r--r--glob.c678
1 files changed, 537 insertions, 141 deletions
diff --git a/glob.c b/glob.c
index 74dfe52fdd..69af7b1618 100644
--- a/glob.c
+++ b/glob.c
@@ -1,184 +1,580 @@
-/************************************************
+/* File-name wildcard pattern matching for GNU.
+ Copyright (C) 1985, 1988, 1989 Free Software Foundation, Inc.
- glob.c -
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 1, or (at your option)
+ any later version.
- $Author: matz $
- $Date: 1994/12/09 09:47:52 $
- created at: Mon Sep 12 18:56:43 JST 1994
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
-************************************************/
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* To whomever it may concern: I have never seen the code which most
+ Unix programs use to perform this function. I wrote this from scratch
+ based on specifications for the pattern matching. --RMS. */
+
+#include "config.h"
+
+#if defined (SHELL)
+# if defined (HAVE_STDLIB_H)
+# include <stdlib.h>
+# else
+# include "ansi_stdlib.h"
+# endif /* HAVE_STDLIB_H */
+# include <config.h>
+#endif
+
+#include <sys/types.h>
+
+#if !defined (SHELL) && (defined (_POSIX_VERSION) || defined (USGr3))
+# if !defined (HAVE_DIRENT_H)
+# define HAVE_DIRENT_H
+# endif /* !HAVE_DIRENT_H */
+#endif /* !SHELL && (_POSIX_VERSION || USGr3) */
+
+#if defined (HAVE_DIRENT_H)
+# include <dirent.h>
+# if !defined (direct)
+# define direct dirent
+# endif /* !direct */
+# define D_NAMLEN(d) strlen ((d)->d_name)
+#else /* !HAVE_DIRENT_H */
+# define D_NAMLEN(d) ((d)->d_namlen)
+# if defined (USG)
+# if defined (Xenix)
+# include <sys/ndir.h>
+# else /* !Xenix (but USG...) */
+# include "ndir.h"
+# endif /* !Xenix */
+# else /* !USG */
+# include <sys/dir.h>
+# endif /* !USG */
+#endif /* !HAVE_DIRENT_H */
+
+#if defined (_POSIX_SOURCE)
+/* Posix does not require that the d_ino field be present, and some
+ systems do not provide it. */
+# define REAL_DIR_ENTRY(dp) 1
+#else
+# define REAL_DIR_ENTRY(dp) (dp->d_ino != 0)
+#endif /* _POSIX_SOURCE */
+
+#if defined (USG) || defined (NeXT)
+# if !defined (HAVE_STRING_H)
+# define HAVE_STRING_H
+# endif /* !HAVE_STRING_H */
+#endif /* USG || NeXT */
+
+#if defined (HAVE_STRING_H)
+# include <string.h>
+#else /* !HAVE_STRING_H */
+# include <strings.h>
+#endif /* !HAVE_STRING_H */
+
+#if defined (USG)
+# if !defined (isc386)
+# include <memory.h>
+# endif /* !isc386 */
+# if defined (RISC6000)
+extern void bcopy ();
+# else /* !RISC6000 */
+# define bcopy(s, d, n) ((void) memcpy ((d), (s), (n)))
+# endif /* !RISC6000 */
+#endif /* USG */
+
+#if defined(HAVE_ALLOCA_H) && !defined(__GNUC__)
+#include <alloca.h>
+#else
+char *alloca ();
+#endif
-#include <sys/param.h>
-#include "ruby.h"
#include "fnmatch.h"
-char *strdup();
+/* If the opendir () on your system lets you open non-directory files,
+ then we consider that not robust. Define OPENDIR_NOT_ROBUST in the
+ SYSDEP_CFLAGS for your machines entry in machines.h. */
+#if defined (OPENDIR_NOT_ROBUST)
+# if defined (SHELL)
+# include "posixstat.h"
+# else /* !SHELL */
+# include <sys/stat.h>
+# endif /* !SHELL */
+#endif /* OPENDIR_NOT_ROBUST */
+
+extern void *xmalloc (), *xrealloc ();
+#if !defined (HAVE_STDLIB_H)
+extern void free ();
+#endif /* !HAVE_STDLIB_H */
+
+#if !defined (NULL)
+# if defined (__STDC__)
+# define NULL ((void *) 0)
+# else
+# define NULL 0x0
+# endif /* __STDC__ */
+#endif /* !NULL */
-VALUE C_Glob;
+#if defined (SHELL)
+extern int interrupt_state;
+#endif /* SHELL */
-struct glob_data {
- char **globs;
-};
+/* Global variable which controls whether or not * matches .*.
+ Non-zero means don't match .*. */
+int noglob_dot_filenames = 1;
-static ID id_data;
+/* Global variable to return to signify an error in globbing. */
+char *glob_error_return;
+
+/* Return nonzero if PATTERN has any special globbing chars in it. */
+int
+glob_pattern_p (pattern)
+ char *pattern;
+{
+ register char *p = pattern;
+ register char c;
+ int open = 0;
+
+ while ((c = *p++) != '\0')
+ switch (c)
+ {
+ case '?':
+ case '*':
+ return (1);
+
+ case '[': /* Only accept an open brace if there is a close */
+ open++; /* brace to match it. Bracket expressions must be */
+ continue; /* complete, according to Posix.2 */
+ case ']':
+ if (open)
+ return (1);
+ continue;
+
+ case '\\':
+ if (*p++ == '\0')
+ return (0);
+ }
+
+ return (0);
+}
+
+/* Remove backslashes quoting characters in PATHNAME by modifying PATHNAME. */
static void
-glob_free(data)
- struct glob_data *data;
+dequote_pathname (pathname)
+ char *pathname;
{
- char **globs = data->globs;
- while (*globs) {
- free(*globs);
- globs++;
+ register int i, j;
+
+ for (i = j = 0; pathname && pathname[i]; )
+ {
+ if (pathname[i] == '\\')
+ i++;
+
+ pathname[j++] = pathname[i++];
+
+ if (!pathname[i - 1])
+ break;
}
- free(data->globs);
+ pathname[j] = '\0';
}
-#define isdelim(c) ((c)==' '||(c)=='\t'||(c)=='\n'||(c)=='\0')
+
+/* Return a vector of names of files in directory DIR
+ whose names match glob pattern PAT.
+ The names are not in any particular order.
+ Wildcards at the beginning of PAT do not match an initial period.
+
+ The vector is terminated by an element that is a null pointer.
-char *strchr();
-char *strdup();
+ To free the space allocated, first free the vector's elements,
+ then free the vector.
-static int
-expand_brace(s, data, len)
- char *s;
- struct glob_data *data;
- int len;
+ Return 0 if cannot get enough memory to hold the pointer
+ and the names.
+
+ Return -1 if cannot access directory DIR.
+ Look in errno for more information. */
+
+char **
+glob_vector (pat, dir)
+ char *pat;
+ char *dir;
{
- char org[MAXPATHLEN], path[MAXPATHLEN];
- char *pre, *post, *head, *p, *t;
-
- strcpy(org, s);
- pre = strchr(org, '{');
- if (pre) post = strchr(pre, '}');
- if (!pre || !post) {
- data->globs[len++] = strdup(s);
- REALLOC_N(data->globs, char*, len+1);
- return len;
+ struct globval
+ {
+ struct globval *next;
+ char *name;
+ };
+
+ DIR *d;
+ register struct direct *dp;
+ struct globval *lastlink;
+ register struct globval *nextlink;
+ register char *nextname;
+ unsigned int count;
+ int lose, skip;
+ register char **name_vector;
+ register unsigned int i;
+#if defined (OPENDIR_NOT_ROBUST)
+ struct stat finfo;
+
+ if (stat (dir, &finfo) < 0)
+ return ((char **) &glob_error_return);
+
+ if (!S_ISDIR (finfo.st_mode))
+ return ((char **) &glob_error_return);
+#endif /* OPENDIR_NOT_ROBUST */
+
+ d = opendir (dir);
+ if (d == NULL)
+ return ((char **) &glob_error_return);
+
+ lastlink = 0;
+ count = 0;
+ lose = 0;
+ skip = 0;
+
+ /* If PAT is empty, skip the loop, but return one (empty) filename. */
+ if (!pat || !*pat)
+ {
+ nextlink = (struct globval *)alloca (sizeof (struct globval));
+ nextlink->next = lastlink;
+ nextname = (char *) xmalloc (1);
+ if (!nextname)
+ lose = 1;
+ else
+ {
+ lastlink = nextlink;
+ nextlink->name = nextname;
+ nextname[0] = '\0';
+ count++;
+ }
+ skip = 1;
}
- memcpy(path, org, pre - org);
- p = org + (pre - org) + 1;
- head = path + (pre - org);
+ /* Scan the directory, finding all names that match.
+ For each name that matches, allocate a struct globval
+ on the stack and store the name in it.
+ Chain those structs together; lastlink is the front of the chain. */
+ while (!skip)
+ {
+ int flags; /* Flags passed to fnmatch (). */
+#if defined (SHELL)
+ /* Make globbing interruptible in the bash shell. */
+ if (interrupt_state)
+ {
+ closedir (d);
+ lose = 1;
+ goto lost;
+ }
+#endif /* SHELL */
+
+ dp = readdir (d);
+ if (dp == NULL)
+ break;
+
+ /* If this directory entry is not to be used, try again. */
+ if (!REAL_DIR_ENTRY (dp))
+ continue;
- while (p < post) {
- t = p;
- while (t < post) {
- if (*t == ',') break;
- t++;
+ /* If a dot must be explicity matched, check to see if they do. */
+ if (noglob_dot_filenames && dp->d_name[0] == '.' && pat[0] != '.')
+ continue;
+
+ flags = (noglob_dot_filenames ? FNM_PERIOD : 0) | FNM_PATHNAME;
+
+ if (fnmatch (pat, dp->d_name, flags) != FNM_NOMATCH)
+ {
+ nextlink = (struct globval *) alloca (sizeof (struct globval));
+ nextlink->next = lastlink;
+ nextname = (char *) xmalloc (D_NAMLEN (dp) + 1);
+ if (nextname == NULL)
+ {
+ lose = 1;
+ break;
+ }
+ lastlink = nextlink;
+ nextlink->name = nextname;
+ bcopy (dp->d_name, nextname, D_NAMLEN (dp) + 1);
+ ++count;
}
- memcpy(head, p, t-p);
- strcpy(head+(t-p), post+1);
- len = expand_brace(path, data, len);
- p = t + 1;
}
- return len;
-}
+ (void) closedir (d);
-static VALUE
-glob_new0(class, str)
- VALUE class;
- struct RString *str;
-{
- VALUE new;
- struct glob_data *data;
- char *p1, *p2, *pend, *s;
- int len = 0;
-
- new = obj_alloc(class);
- Make_Data_Struct(new, id_data, struct glob_data, Qnil, glob_free, data);
- data->globs = ALLOC_N(char*, 1);
-
- p1 = p2 = str->ptr;
- pend = p1 + str->len;
- while (p1 < pend) {
- char s[MAXPATHLEN];
- int d;
-
- while (isdelim(*p1)) p1++;
- p2 = p1;
- while (!isdelim(*p2)) p2++;
- d = p2 - p1;
- memcpy(s, p1, d);
- s[d] = '\0';
- len = expand_brace(s, data, len);
- p1 = p2;
+ if (!lose)
+ {
+ name_vector = (char **) xmalloc ((count + 1) * sizeof (char *));
+ lose |= name_vector == NULL;
}
- data->globs[len] = Qnil;
- return new;
-}
+ /* Have we run out of memory? */
+ lost:
+ if (lose)
+ {
+ /* Here free the strings we have got. */
+ while (lastlink)
+ {
+ free (lastlink->name);
+ lastlink = lastlink->next;
+ }
+#if defined (SHELL)
+ if (interrupt_state)
+ throw_to_top_level ();
+#endif /* SHELL */
+ return (NULL);
+ }
-VALUE
-glob_new(str)
- struct RString *str;
-{
- return glob_new0(C_Glob, str);
+ /* Copy the name pointers from the linked list into the vector. */
+ for (i = 0; i < count; ++i)
+ {
+ name_vector[i] = lastlink->name;
+ lastlink = lastlink->next;
+ }
+
+ name_vector[count] = NULL;
+ return (name_vector);
}
+
+/* Return a new array which is the concatenation of each string in ARRAY
+ to DIR. This function expects you to pass in an allocated ARRAY, and
+ it takes care of free()ing that array. Thus, you might think of this
+ function as side-effecting ARRAY. */
+static char **
+glob_dir_to_array (dir, array)
+ char *dir, **array;
+{
+ register unsigned int i, l;
+ int add_slash;
+ char **result;
+
+ l = strlen (dir);
+ if (l == 0)
+ return (array);
+
+ add_slash = dir[l - 1] != '/';
-char **glob_filename();
+ i = 0;
+ while (array[i] != NULL)
+ ++i;
+
+ result = (char **) xmalloc ((i + 1) * sizeof (char *));
+ if (result == NULL)
+ return (NULL);
+
+ for (i = 0; array[i] != NULL; i++)
+ {
+ result[i] = (char *) xmalloc (l + (add_slash ? 1 : 0)
+ + strlen (array[i]) + 1);
+ if (result[i] == NULL)
+ return (NULL);
+ sprintf (result[i], "%s%s%s", dir, add_slash ? "/" : "", array[i]);
+ }
+ result[i] = NULL;
-static VALUE
-Fglob_each(glob)
- VALUE glob;
+ /* Free the input array. */
+ for (i = 0; array[i] != NULL; i++)
+ free (array[i]);
+ free ((char *) array);
+
+ return (result);
+}
+
+/* Do globbing on PATHNAME. Return an array of pathnames that match,
+ marking the end of the array with a null-pointer as an element.
+ If no pathnames match, then the array is empty (first element is null).
+ If there isn't enough memory, then return NULL.
+ If a file system error occurs, return -1; `errno' has the error code. */
+char **
+glob_filename (pathname)
+ char *pathname;
{
- struct glob_data *data;
- char **patv, **fnames, **ff;
-
- Get_Data_Struct(glob, id_data, struct glob_data, data);
- for (patv = data->globs; *patv; patv++) {
- if (!glob_pattern_p(*patv)) {
- rb_yield(str_new2(*patv));
- continue;
- }
- fnames = glob_filename(*patv);
- if (fnames == (char**)-1) rb_sys_fail(*patv);
- if (fnames[0] == Qnil) {
- rb_yield(str_new2(*patv));
+ char *strrchr();
+
+ char **result;
+ unsigned int result_size;
+ char *directory_name, *filename;
+ unsigned int directory_len;
+
+ result = (char **) xmalloc (sizeof (char *));
+ result_size = 1;
+ if (result == NULL)
+ return (NULL);
+
+ result[0] = NULL;
+
+ /* Find the filename. */
+ filename = strrchr (pathname, '/');
+ if (filename == NULL)
+ {
+ filename = pathname;
+ directory_name = "";
+ directory_len = 0;
+ }
+ else
+ {
+ directory_len = (filename - pathname) + 1;
+ directory_name = (char *) alloca (directory_len + 1);
+
+ bcopy (pathname, directory_name, directory_len);
+ directory_name[directory_len] = '\0';
+ ++filename;
+ }
+
+ /* If directory_name contains globbing characters, then we
+ have to expand the previous levels. Just recurse. */
+ if (glob_pattern_p (directory_name))
+ {
+ char **directories;
+ register unsigned int i;
+
+ if (directory_name[directory_len - 1] == '/')
+ directory_name[directory_len - 1] = '\0';
+
+ directories = glob_filename (directory_name);
+
+ if (directories == NULL)
+ goto memory_error;
+ else if (directories == (char **)&glob_error_return)
+ return ((char **) &glob_error_return);
+ else if (*directories == NULL)
+ {
+ free ((char *) directories);
+ return ((char **) &glob_error_return);
}
- else {
- ff = fnames;
- while (*ff) {
- rb_yield(str_new2(*ff));
- free(*ff);
- ff++;
+
+ /* We have successfully globbed the preceding directory name.
+ For each name in DIRECTORIES, call glob_vector on it and
+ FILENAME. Concatenate the results together. */
+ for (i = 0; directories[i] != NULL; ++i)
+ {
+ char **temp_results;
+
+ /* Scan directory even on a NULL pathname. That way, `*h/'
+ returns only directories ending in `h', instead of all
+ files ending in `h' with a `/' appended. */
+ temp_results = glob_vector (filename, directories[i]);
+
+ /* Handle error cases. */
+ if (temp_results == NULL)
+ goto memory_error;
+ else if (temp_results == (char **)&glob_error_return)
+ /* This filename is probably not a directory. Ignore it. */
+ ;
+ else
+ {
+ char **array;
+ register unsigned int l;
+
+ array = glob_dir_to_array (directories[i], temp_results);
+ l = 0;
+ while (array[l] != NULL)
+ ++l;
+
+ result =
+ (char **)xrealloc(result, (result_size + l) * sizeof (char *));
+
+ if (result == NULL)
+ goto memory_error;
+
+ for (l = 0; array[l] != NULL; ++l)
+ result[result_size++ - 1] = array[l];
+
+ result[result_size - 1] = NULL;
+
+ /* Note that the elements of ARRAY are not freed. */
+ free ((char *) array);
}
}
- free(fnames);
+ /* Free the directories. */
+ for (i = 0; directories[i]; i++)
+ free (directories[i]);
+
+ free ((char *) directories);
+
+ return (result);
}
- return Qnil;
-}
-VALUE
-Fglob_match(glob, str)
- VALUE glob;
- struct RString *str;
-{
- struct glob_data *data;
- char **patv;
-
- Check_Type(str, T_STRING);
- Get_Data_Struct(glob, id_data, struct glob_data, data);
- patv = data->globs;
- while (*patv) {
- if (fnmatch(*patv, str->ptr, 0) != FNM_NOMATCH)
- return TRUE;
- patv++;
+ /* If there is only a directory name, return it. */
+ if (*filename == '\0')
+ {
+ result = (char **) xrealloc ((char *) result, 2 * sizeof (char *));
+ if (result == NULL)
+ return (NULL);
+ result[0] = (char *) xmalloc (directory_len + 1);
+ if (result[0] == NULL)
+ goto memory_error;
+ bcopy (directory_name, result[0], directory_len + 1);
+ result[1] = NULL;
+ return (result);
}
- return FALSE;
-}
+ else
+ {
+ char **temp_results;
-extern VALUE M_Enumerable;
+ /* There are no unquoted globbing characters in DIRECTORY_NAME.
+ Dequote it before we try to open the directory since there may
+ be quoted globbing characters which should be treated verbatim. */
+ if (directory_len > 0)
+ dequote_pathname (directory_name);
-Init_Glob()
-{
- C_Glob = rb_define_class("Glob", C_Object);
- rb_include_module(C_Glob, M_Enumerable);
+ /* We allocated a small array called RESULT, which we won't be using.
+ Free that memory now. */
+ free (result);
+
+ /* Just return what glob_vector () returns appended to the
+ directory name. */
+ temp_results =
+ glob_vector (filename, (directory_len == 0 ? "." : directory_name));
- rb_define_single_method(C_Glob, "new", glob_new0, 1);
+ if (temp_results == NULL || temp_results == (char **)&glob_error_return)
+ return (temp_results);
- rb_define_method(C_Glob, "each", Fglob_each, 0);
- rb_define_method(C_Glob, "=~", Fglob_match, 1);
+ return (glob_dir_to_array (directory_name, temp_results));
+ }
+
+ /* We get to memory_error if the program has run out of memory, or
+ if this is the shell, and we have been interrupted. */
+ memory_error:
+ if (result != NULL)
+ {
+ register unsigned int i;
+ for (i = 0; result[i] != NULL; ++i)
+ free (result[i]);
+ free ((char *) result);
+ }
+#if defined (SHELL)
+ if (interrupt_state)
+ throw_to_top_level ();
+#endif /* SHELL */
+ return (NULL);
+}
+
+#if defined (TEST)
+
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ unsigned int i;
+
+ for (i = 1; i < argc; ++i)
+ {
+ char **value = glob_filename (argv[i]);
+ if (value == NULL)
+ puts ("Out of memory.");
+ else if (value == &glob_error_return)
+ perror (argv[i]);
+ else
+ for (i = 0; value[i] != NULL; i++)
+ puts (value[i]);
+ }
- id_data = rb_intern("data");
+ exit (0);
}
+#endif /* TEST. */