From 6408f79cce401e1bfecf923e7156f84f96e021e3 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 23 Jun 2005 20:59:16 -0700 Subject: [LIB]: Naive finite state machine based textsearch A finite state machine consists of n states (struct ts_fsm_token) representing the pattern as a finite automation. The data is read sequentially on a octet basis. Every state token specifies the number of recurrences and the type of value accepted which can be either a specific character or ctype based set of characters. The available type of recurrences include 1, (0|1), [0 n], and [1 n]. The algorithm differs between strict/non-strict mode specyfing whether the pattern has to start at the first octect. Strict mode is enabled by default and can be disabled by inserting TS_FSM_HEAD_IGNORE as the first token in the chain. The runtime performance of the algorithm should be around O(n), however while in strict mode the average runtime can be better. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/textsearch_fsm.h | 48 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 include/linux/textsearch_fsm.h (limited to 'include') diff --git a/include/linux/textsearch_fsm.h b/include/linux/textsearch_fsm.h new file mode 100644 index 0000000..fdfa078 --- /dev/null +++ b/include/linux/textsearch_fsm.h @@ -0,0 +1,48 @@ +#ifndef __LINUX_TEXTSEARCH_FSM_H +#define __LINUX_TEXTSEARCH_FSM_H + +#include + +enum { + TS_FSM_SPECIFIC, /* specific character */ + TS_FSM_WILDCARD, /* any character */ + TS_FSM_DIGIT, /* isdigit() */ + TS_FSM_XDIGIT, /* isxdigit() */ + TS_FSM_PRINT, /* isprint() */ + TS_FSM_ALPHA, /* isalpha() */ + TS_FSM_ALNUM, /* isalnum() */ + TS_FSM_ASCII, /* isascii() */ + TS_FSM_CNTRL, /* iscntrl() */ + TS_FSM_GRAPH, /* isgraph() */ + TS_FSM_LOWER, /* islower() */ + TS_FSM_UPPER, /* isupper() */ + TS_FSM_PUNCT, /* ispunct() */ + TS_FSM_SPACE, /* isspace() */ + __TS_FSM_TYPE_MAX, +}; +#define TS_FSM_TYPE_MAX (__TS_FSM_TYPE_MAX - 1) + +enum { + TS_FSM_SINGLE, /* 1 occurrence */ + TS_FSM_PERHAPS, /* 1 or 0 occurrence */ + TS_FSM_ANY, /* 0..n occurrences */ + TS_FSM_MULTI, /* 1..n occurrences */ + TS_FSM_HEAD_IGNORE, /* 0..n ignored occurrences at head */ + __TS_FSM_RECUR_MAX, +}; +#define TS_FSM_RECUR_MAX (__TS_FSM_RECUR_MAX - 1) + +/** + * struct ts_fsm_token - state machine token (state) + * @type: type of token + * @recur: number of recurrences + * @value: character value for TS_FSM_SPECIFIC + */ +struct ts_fsm_token +{ + __u16 type; + __u8 recur; + __u8 value; +}; + +#endif -- cgit v1.1