/* #line 1 "./ragel/thttp_parser_url.rl" */ /* * Copyright (C) 2010-2015 Mamadou Diop. * * This file is part of Open Source Doubango Framework. * * DOUBANGO is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * DOUBANGO is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with DOUBANGO. * */ /**@file thttp_parser_url.c * @brief HTTP/HTTPS URL parser. */ #include "tinyhttp/parsers/thttp_parser_url.h" #include "tsk_string.h" #include "tsk_memory.h" #include "tsk_debug.h" /*********************************** * Ragel state machine. */ /* #line 82 "./ragel/thttp_parser_url.rl" */ /**@ingroup thttp_url_group * Parses a HTTP/HTTPS url. * @param urlstring A pointer to a valid url string. If the port is missing, then it's default value will be 443 if * the scheme is 'https' and 80 otherwise.
* @param length The length of the url string. * @retval A well-defined object representing the url string. * * @code * thttp_url_t* url = thttp_url_parse("http://www.google.com", tsk_strlen("http://www.google.com")); * @endcode * * @sa @ref thttp_url_tostring
@ref thttp_url_serialize **/ thttp_url_t *thttp_url_parse(const char *urlstring, tsk_size_t length) { tsk_bool_t have_port = tsk_false; int cs = 0; const char *p = urlstring; const char *pe = p + length; const char *eof = pe; const char *ts = 0, *te = 0; int act =0; thttp_url_t *url = thttp_url_create(thttp_url_unknown); const char *tag_start = 0; TSK_RAGEL_DISABLE_WARNINGS_BEGIN() /* #line 69 "./src/parsers/thttp_parser_url.c" */ static const char _thttp_machine_parser_url_actions[] = { 0, 1, 1, 1, 2, 1, 6, 1, 7, 1, 8, 1, 9, 1, 10, 2, 0, 4, 2, 0, 8, 2, 0, 9, 2, 0, 10, 2, 6, 10, 2, 7, 10, 2, 8, 10, 3, 0, 3, 5, 3, 0, 5, 10, 3, 0, 8, 10 }; static const short _thttp_machine_parser_url_key_offsets[] = { 0, 0, 8, 15, 23, 29, 36, 38, 44, 52, 58, 66, 72, 80, 88, 96, 104, 112, 120, 123, 124, 131, 139, 147, 155, 157, 164, 173, 175, 178, 180, 183, 185, 188, 191, 192, 195, 196, 199, 200, 209, 218, 226, 234, 242, 250, 252, 258, 267, 276, 285, 287, 290, 293, 294, 295, 298, 308, 309, 310, 310, 310, 320, 328, 331, 341, 351, 361, 373, 385, 397, 409, 417, 419 }; static const char _thttp_machine_parser_url_trans_keys[] = { 45, 46, 48, 57, 65, 90, 97, 122, 45, 48, 57, 65, 90, 97, 122, 45, 46, 48, 57, 65, 90, 97, 122, 48, 57, 65, 90, 97, 122, 45, 48, 57, 65, 90, 97, 122, 48, 57, 48, 57, 65, 90, 97, 122, 45, 46, 48, 57, 65, 90, 97, 122, 48, 57, 65, 90, 97, 122, 45, 46, 48, 57, 65, 90, 97, 122, 48, 57, 65, 90, 97, 122, 45, 46, 48, 57, 65, 90, 97, 122, 45, 46, 48, 57, 65, 90, 97, 122, 45, 46, 48, 57, 65, 90, 97, 122, 45, 46, 48, 57, 65, 90, 97, 122, 45, 46, 48, 57, 65, 90, 97, 122, 45, 46, 48, 57, 65, 90, 97, 122, 47, 48, 57, 47, 58, 48, 57, 65, 70, 97, 102, 58, 93, 48, 57, 65, 70, 97, 102, 58, 93, 48, 57, 65, 70, 97, 102, 58, 93, 48, 57, 65, 70, 97, 102, 58, 93, 58, 48, 57, 65, 70, 97, 102, 46, 58, 93, 48, 57, 65, 70, 97, 102, 48, 57, 46, 48, 57, 48, 57, 46, 48, 57, 48, 57, 93, 48, 57, 93, 48, 57, 93, 46, 48, 57, 46, 46, 48, 57, 46, 46, 58, 93, 48, 57, 65, 70, 97, 102, 46, 58, 93, 48, 57, 65, 70, 97, 102, 58, 93, 48, 57, 65, 70, 97, 102, 58, 93, 48, 57, 65, 70, 97, 102, 58, 93, 48, 57, 65, 70, 97, 102, 58, 93, 48, 57, 65, 70, 97, 102, 58, 93, 48, 57, 65, 70, 97, 102, 46, 58, 93, 48, 57, 65, 70, 97, 102, 46, 58, 93, 48, 57, 65, 70, 97, 102, 46, 58, 93, 48, 57, 65, 70, 97, 102, 48, 57, 46, 48, 57, 46, 48, 57, 46, 58, 47, 48, 57, 47, 72, 91, 104, 48, 57, 65, 90, 97, 122, 63, 63, 45, 46, 47, 58, 48, 57, 65, 90, 97, 122, 47, 58, 48, 57, 65, 90, 97, 122, 47, 48, 57, 45, 46, 47, 58, 48, 57, 65, 90, 97, 122, 45, 46, 47, 58, 48, 57, 65, 90, 97, 122, 45, 46, 47, 58, 48, 57, 65, 90, 97, 122, 45, 46, 47, 58, 84, 116, 48, 57, 65, 90, 97, 122, 45, 46, 47, 58, 84, 116, 48, 57, 65, 90, 97, 122, 45, 46, 47, 58, 80, 112, 48, 57, 65, 90, 97, 122, 45, 46, 47, 58, 83, 115, 48, 57, 65, 90, 97, 122, 47, 91, 48, 57, 65, 90, 97, 122, 47, 58, 45, 46, 47, 58, 48, 57, 65, 90, 97, 122, 0 }; static const char _thttp_machine_parser_url_single_lengths[] = { 0, 2, 1, 2, 0, 1, 0, 0, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 1, 3, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 3, 3, 2, 2, 2, 2, 2, 0, 3, 3, 3, 0, 1, 1, 1, 1, 1, 4, 1, 1, 0, 0, 4, 2, 1, 4, 4, 4, 6, 6, 6, 6, 2, 2, 4 }; static const char _thttp_machine_parser_url_range_lengths[] = { 0, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 0, 3, 3, 3, 3, 0, 3, 3, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 1, 1, 1, 0, 0, 1, 3, 0, 0, 0, 0, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3 }; static const short _thttp_machine_parser_url_index_offsets[] = { 0, 0, 6, 11, 17, 21, 26, 28, 32, 38, 42, 48, 52, 58, 64, 70, 76, 82, 88, 91, 93, 98, 104, 110, 116, 119, 124, 131, 133, 136, 138, 141, 143, 146, 149, 151, 154, 156, 159, 161, 168, 175, 181, 187, 193, 199, 202, 206, 213, 220, 227, 229, 232, 235, 237, 239, 242, 250, 252, 254, 255, 256, 264, 270, 273, 281, 289, 297, 307, 317, 327, 337, 343, 346 }; static const char _thttp_machine_parser_url_indicies[] = { 0, 2, 3, 4, 4, 1, 0, 4, 4, 4, 1, 0, 5, 4, 4, 4, 1, 4, 6, 6, 1, 7, 6, 6, 6, 1, 8, 1, 9, 6, 6, 1, 0, 10, 11, 4, 4, 1, 12, 6, 6, 1, 0, 13, 14, 4, 4, 1, 15, 6, 6, 1, 0, 13, 16, 4, 4, 1, 0, 13, 4, 4, 4, 1, 0, 10, 17, 4, 4, 1, 0, 10, 4, 4, 4, 1, 0, 2, 18, 4, 4, 1, 0, 2, 4, 4, 4, 1, 19, 8, 1, 20, 1, 22, 21, 21, 21, 1, 24, 25, 23, 23, 23, 1, 24, 25, 26, 26, 26, 1, 24, 25, 27, 27, 27, 1, 24, 25, 1, 29, 28, 21, 21, 1, 30, 24, 25, 31, 23, 23, 1, 32, 1, 33, 34, 1, 35, 1, 36, 37, 1, 38, 1, 25, 39, 1, 25, 40, 1, 25, 1, 36, 41, 1, 36, 1, 33, 42, 1, 33, 1, 30, 24, 25, 43, 26, 26, 1, 30, 24, 25, 27, 27, 27, 1, 45, 25, 44, 44, 44, 1, 47, 25, 46, 46, 46, 1, 47, 25, 48, 48, 48, 1, 47, 25, 49, 49, 49, 1, 47, 25, 1, 50, 44, 44, 1, 30, 47, 25, 51, 46, 46, 1, 30, 47, 25, 52, 48, 48, 1, 30, 47, 25, 49, 49, 49, 1, 53, 1, 30, 54, 1, 30, 55, 1, 30, 1, 29, 1, 56, 8, 1, 57, 60, 61, 60, 58, 59, 59, 1, 63, 62, 65, 64, 66, 67, 7, 68, 69, 70, 6, 6, 6, 1, 69, 70, 4, 6, 6, 1, 71, 72, 1, 0, 5, 69, 70, 73, 4, 4, 1, 0, 5, 69, 70, 74, 4, 4, 1, 0, 5, 69, 70, 4, 4, 4, 1, 7, 68, 69, 70, 75, 75, 6, 6, 6, 1, 7, 68, 69, 70, 76, 76, 6, 6, 6, 1, 7, 68, 69, 70, 77, 77, 6, 6, 6, 1, 7, 68, 69, 78, 79, 79, 6, 6, 6, 1, 57, 61, 58, 59, 59, 1, 69, 70, 1, 7, 68, 69, 80, 6, 6, 6, 1, 0 }; static const char _thttp_machine_parser_url_trans_targs[] = { 2, 0, 7, 16, 3, 4, 61, 5, 63, 8, 9, 14, 10, 11, 12, 64, 13, 15, 17, 19, 71, 21, 54, 22, 25, 72, 23, 24, 26, 41, 27, 39, 28, 29, 37, 30, 31, 35, 32, 33, 34, 36, 38, 40, 42, 50, 43, 46, 44, 45, 47, 48, 49, 51, 52, 53, 19, 57, 1, 61, 67, 20, 58, 59, 58, 59, 60, 60, 62, 57, 6, 57, 63, 65, 66, 68, 69, 70, 18, 73, 55 }; static const char _thttp_machine_parser_url_trans_actions[] = { 0, 0, 0, 0, 0, 0, 13, 0, 24, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 1, 13, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 13, 36, 40, 40, 15, 24, 44, 13, 33, 24, 13, 13, 27, 5, 30, 13, 13, 13, 13, 13, 13, 5, 13, 5 }; static const char _thttp_machine_parser_url_eof_actions[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 9, 21, 11, 5, 5, 7, 5, 5, 5, 5, 5, 5, 5, 0, 5, 5 }; static const int thttp_machine_parser_url_start = 56; static const int thttp_machine_parser_url_first_final = 56; static const int thttp_machine_parser_url_error = 0; static const int thttp_machine_parser_url_en_main = 56; /* #line 114 "./ragel/thttp_parser_url.rl" */ (void)(ts); (void)(te); (void)(act); (void)(eof); (void)(thttp_machine_parser_url_first_final); (void)(thttp_machine_parser_url_error); (void)(thttp_machine_parser_url_en_main); /* #line 294 "./src/parsers/thttp_parser_url.c" */ { cs = thttp_machine_parser_url_start; } /* #line 122 "./ragel/thttp_parser_url.rl" */ /* #line 301 "./src/parsers/thttp_parser_url.c" */ { int _klen; unsigned int _trans; const char *_acts; unsigned int _nacts; const char *_keys; if ( p == pe ) { goto _test_eof; } if ( cs == 0 ) { goto _out; } _resume: _keys = _thttp_machine_parser_url_trans_keys + _thttp_machine_parser_url_key_offsets[cs]; _trans = _thttp_machine_parser_url_index_offsets[cs]; _klen = _thttp_machine_parser_url_single_lengths[cs]; if ( _klen > 0 ) { const char *_lower = _keys; const char *_mid; const char *_upper = _keys + _klen - 1; while (1) { if ( _upper < _lower ) { break; } _mid = _lower + ((_upper-_lower) >> 1); if ( (*p) < *_mid ) { _upper = _mid - 1; } else if ( (*p) > *_mid ) { _lower = _mid + 1; } else { _trans += (_mid - _keys); goto _match; } } _keys += _klen; _trans += _klen; } _klen = _thttp_machine_parser_url_range_lengths[cs]; if ( _klen > 0 ) { const char *_lower = _keys; const char *_mid; const char *_upper = _keys + (_klen<<1) - 2; while (1) { if ( _upper < _lower ) { break; } _mid = _lower + (((_upper-_lower) >> 1) & ~1); if ( (*p) < _mid[0] ) { _upper = _mid - 2; } else if ( (*p) > _mid[1] ) { _lower = _mid + 2; } else { _trans += ((_mid - _keys)>>1); goto _match; } } _trans += _klen; } _match: _trans = _thttp_machine_parser_url_indicies[_trans]; cs = _thttp_machine_parser_url_trans_targs[_trans]; if ( _thttp_machine_parser_url_trans_actions[_trans] == 0 ) { goto _again; } _acts = _thttp_machine_parser_url_actions + _thttp_machine_parser_url_trans_actions[_trans]; _nacts = (unsigned int) *_acts++; while ( _nacts-- > 0 ) { switch ( *_acts++ ) { case 0: /* #line 38 "./ragel/thttp_parser_url.rl" */ { tag_start = p; } break; case 1: /* #line 43 "./ragel/thttp_parser_url.rl" */ { url->scheme = tsk_strdup("http"), url->type = thttp_url_http; } break; case 2: /* #line 44 "./ragel/thttp_parser_url.rl" */ { url->scheme = tsk_strdup("https"), url->type = thttp_url_https; } break; case 3: /* #line 47 "./ragel/thttp_parser_url.rl" */ { url->host_type = url->host_type = thttp_host_ipv4; } break; case 4: /* #line 48 "./ragel/thttp_parser_url.rl" */ { url->host_type = url->host_type = thttp_host_ipv6; } break; case 5: /* #line 49 "./ragel/thttp_parser_url.rl" */ { url->host_type = url->host_type = thttp_host_hostname; } break; case 6: /* #line 51 "./ragel/thttp_parser_url.rl" */ { TSK_PARSER_SET_STRING(url->host); } break; case 7: /* #line 55 "./ragel/thttp_parser_url.rl" */ { have_port = 1; TSK_PARSER_SET_INT(url->port); } break; case 8: /* #line 60 "./ragel/thttp_parser_url.rl" */ { TSK_PARSER_SET_STRING(url->hpath); } break; case 10: /* #line 68 "./ragel/thttp_parser_url.rl" */ { } break; /* #line 425 "./src/parsers/thttp_parser_url.c" */ } } _again: if ( cs == 0 ) { goto _out; } if ( ++p != pe ) { goto _resume; } _test_eof: { } if ( p == eof ) { const char *__acts = _thttp_machine_parser_url_actions + _thttp_machine_parser_url_eof_actions[cs]; unsigned int __nacts = (unsigned int) *__acts++; while ( __nacts-- > 0 ) { switch ( *__acts++ ) { case 0: /* #line 38 "./ragel/thttp_parser_url.rl" */ { tag_start = p; } break; case 6: /* #line 51 "./ragel/thttp_parser_url.rl" */ { TSK_PARSER_SET_STRING(url->host); } break; case 7: /* #line 55 "./ragel/thttp_parser_url.rl" */ { have_port = 1; TSK_PARSER_SET_INT(url->port); } break; case 8: /* #line 60 "./ragel/thttp_parser_url.rl" */ { TSK_PARSER_SET_STRING(url->hpath); } break; case 9: /* #line 64 "./ragel/thttp_parser_url.rl" */ { TSK_PARSER_SET_STRING(url->search); } break; /* #line 472 "./src/parsers/thttp_parser_url.c" */ } } } _out: { } } /* #line 123 "./ragel/thttp_parser_url.rl" */ TSK_RAGEL_DISABLE_WARNINGS_END() if( cs < /* #line 484 "./src/parsers/thttp_parser_url.c" */ 56 /* #line 125 "./ragel/thttp_parser_url.rl" */ ) { TSK_DEBUG_ERROR("Failed to parse HTTP/HTTPS URL: '%.*s'", length, urlstring); TSK_OBJECT_SAFE_FREE(url); } else if(!have_port) { if(url->type == thttp_url_https) { url->port = 443; } else { url->port = 80; } } return url; }