summaryrefslogtreecommitdiffstats
path: root/branches/1.0/tinyHTTP/ragel/thttp_parser_url.rl
blob: 3d2dcb3edcb82604158796e50bd6cafb2d0b9312 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
/*
* Copyright (C) 2009-2010 Mamadou Diop.
*
* Contact: Mamadou Diop <diopmamadou(at)doubango.org>
*	
* This file is part of Open Source Doubango Framework.
*
* DOUBANGO is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*	
* DOUBANGO is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*	
* You should have received a copy of the GNU General Public License
* along with DOUBANGO.
*
*/

/**@file thttp_parser_url.c
 * @brief HTTP/HTTPS URL parser.
 *
 * @author Mamadou Diop <diopmamadou(at)doubango.org>
 *
 * @date Created: Sat Nov 8 16:54:58 2009 mdiop
 */
#include "tinyhttp/parsers/thttp_parser_url.h"

#include "tsk_string.h"
#include "tsk_memory.h"
#include "tsk_debug.h"

/***********************************
*	Ragel state machine.
*/
%%{
	machine thttp_machine_parser_url;

	# Includes
	include thttp_machine_utils "./ragel/thttp_machine_utils.rl";
			
	action tag{
		tag_start = p;
	}

	#/* Sets URL type */
	action is_http { url->scheme = tsk_strdup("http"), url->type = thttp_url_http; }
	action is_https { url->scheme = tsk_strdup("https"), url->type = thttp_url_https; }

	#/* Sets HOST type */
	action is_ipv4 { url->host_type = url->host_type = thttp_host_ipv4; }
	action is_ipv6 { url->host_type = url->host_type = thttp_host_ipv6; }
	action is_hostname { url->host_type = url->host_type = thttp_host_hostname; }

	action parse_host{
		TSK_PARSER_SET_STRING(url->host);
	}

	action parse_port{
		have_port = 1;
		TSK_PARSER_SET_INT(url->port);
	}

	action parse_hpath{
		TSK_PARSER_SET_STRING(url->hpath);
	}

	action parse_search{
		TSK_PARSER_SET_STRING(url->search);
	}

	action eob{
	}

	#// RFC 1738: "http://" hostport [ "/" hpath [ "?" search ]]
	#// FIXME: hpath is no optional (see above) but in my def. I use it as opt (any*).

	search = any* >tag %parse_search;
	hpath = any* >tag %parse_hpath;
	port = DIGIT+ >tag %parse_port;
	myhost = ((IPv6reference >is_ipv6) | (IPv4address >is_ipv4) | (hostname >is_hostname)) >tag %parse_host;
	hostport = myhost ( ":" port )?;
	main := (("http:"i>tag %is_http | "https:"i>tag %is_https) "//" hostport :>("/" hpath :>("?" search)?)? ) @eob;
	
}%%

/**@ingroup thttp_url_group
* Parses a HTTP/HTTPS url.
* @param urlstring A pointer to a valid url string. If the port is missing, then it's default value will be 443 if
* the scheme is 'https' and 80 otherwise.<br>
* @param length The length of the url string.
* @retval A well-defined object representing the url string.
*
* @code
* thttp_url_t* url = thttp_url_parse("http://www.google.com", tsk_strlen("http://www.google.com"));
* @endcode
*
* @sa @ref thttp_url_tostring<br>@ref thttp_url_serialize
**/
thttp_url_t *thttp_url_parse(const char *urlstring, tsk_size_t length)
{
	tsk_bool_t have_port = tsk_false;
	int cs = 0;
	const char *p = urlstring;
	const char *pe = p + length;
	const char *eof = pe;

	const char *ts = 0, *te = 0;
	int act =0;

	thttp_url_t *url = thttp_url_create(thttp_url_unknown);
	
	const char *tag_start = 0;
	
	%%write data;
	%%write init;
	%%write exec;
	
	if( cs < %%{ write first_final; }%% ){
		TSK_DEBUG_ERROR("Failed to parse HTTP/HTTPS URL.");
		TSK_OBJECT_SAFE_FREE(url);
	}
	else if(!have_port){
		if(url->type == thttp_url_https){
			url->port = 443;
		}
		else{
			url->port = 80;
		}
	}
	
	return url;
}
OpenPOWER on IntegriCloud