/**

* Copyright (c) 2009, 2010 Eric Wong (all bugs are Eric's fault)
* Copyright (c) 2005 Zed A. Shaw
* You can redistribute it and/or modify it under the same terms as Ruby 1.8
* or the GPLv2 or later.
*/

include “ruby.h” include “ext_help.h” include <assert.h> include <stdlib.h> include <string.h> include <sys/types.h> include <limits.h> include “c_util.h”

static VALUE eParserError; static ID id_uminus, id_sq, id_sq_set; static VALUE g_rack_url_scheme,

g_80, g_443, g_http, g_https,
g_HTTP_HOST, g_HTTP_CONNECTION, g_HTTP_TRAILER, g_HTTP_TRANSFER_ENCODING,
g_HTTP_VERSION,
g_CONTENT_LENGTH, g_CONTENT_TYPE, g_FRAGMENT,
g_PATH_INFO, g_QUERY_STRING,
g_REQUEST_METHOD, g_REQUEST_PATH, g_REQUEST_URI,
g_SERVER_NAME, g_SERVER_PORT, g_SERVER_PROTOCOL;

static VALUE e413, e414;

/** Defines common length and error messages for input length validation. */ define DEF_MAX_LENGTH(N, length) \

static const size_t MAX_##N##_LENGTH = length; \
static const char MAX_##N##_LENGTH_ERR[] = \
  "HTTP element " # N  " is longer than the " # length " allowed length."

/**

* Validates the max length of given input and throws an ParserError
* exception if over.
*/

define VALIDATE_MAX_LENGTH(len, N) do { \

if (len > MAX_##N##_LENGTH) \
  rb_raise(eParserError, MAX_##N##_LENGTH_ERR); \

} while (0)

define VALIDATE_MAX_URI_LENGTH(len, N) do { \

if (len > MAX_##N##_LENGTH) \
  rb_raise(e414, MAX_##N##_LENGTH_ERR); \

} while (0)

/* Defines the maximum allowed lengths for various input elements.*/ DEF_MAX_LENGTH(FIELD_NAME, 256); DEF_MAX_LENGTH(FIELD_VALUE, 80 * 1024); DEF_MAX_LENGTH(HEADER, (1024 * (80 + 32))); DEF_MAX_LENGTH(REQUEST_URI, 1024 * 15); DEF_MAX_LENGTH(REQUEST_PATH, 4096); /* common PATH_MAX on modern systems */ DEF_MAX_LENGTH(QUERY_STRING, (1024 * 10));

struct http_parser {

int cs; /* Ragel internal state */
unsigned int is_request:1;
unsigned int has_query:1;
unsigned int has_scheme:1;
unsigned int chunked:1;
unsigned int has_body:1;
unsigned int in_body:1;
unsigned int has_trailer:1;
unsigned int in_trailer:1;
unsigned int in_chunk:1;
unsigned int persistent:1;
unsigned int has_header:1;
unsigned int body_eof_seen:1;
unsigned int is_https:1;
unsigned int padding:19;
unsigned int mark;
unsigned int offset;
union { /* these 2 fields don't nest */
  unsigned int field;
  unsigned int query;
} start;
union {
  unsigned int field_len; /* only used during header processing */
  unsigned int dest_offset; /* only used during body processing */
} s;
VALUE cont; /* Qfalse: unset, Qnil: ignored header, T_STRING: append */
union {
  /* String or Qnil */
  VALUE status; /* status string for responses */
  VALUE host; /* Host: header for requests */
} v;
union {
  off_t content;
  off_t chunk;
} len;

};

static unsigned int ulong2uint(unsigned long n) {

unsigned int i = (unsigned int)n;

if (sizeof(unsigned int) != sizeof(unsigned long)) {
  if ((unsigned long)i != n) {
    rb_raise(rb_eRangeError, "too large to be 32-bit uint: %lu", n);
  }
}
return i;

}

define REMAINING (unsigned long)(pe - p) define LEN(AT, FPC) (ulong2uint(FPC - buffer) - hp->AT) define MARK(M,FPC) (hp->M = ulong2uint((FPC) - buffer)) define PTR_TO(F) (buffer + hp->F) define STR_NEW(M,FPC) rb_str_new(PTR_TO(M), LEN(M, FPC)) define STRIPPED_STR_NEW(M,FPC) stripped_str_new(PTR_TO(M), LEN(M, FPC))

/* Downcases a single ASCII character. Locale-agnostic. */ static void downcase_char(char *c) {

if (*c >= 'A' && *c <= 'Z')
  *c |= 0x20;

}

static int is_lws(char c) {

return (c == ' ' || c == '\t');

}

/* this will dedupe under Ruby 2.5+ (December 2017) */ static VALUE str_dd_freeze(VALUE str) {

if (STR_UMINUS_DEDUPE)
  return rb_funcall(str, id_uminus, 0);

/* freeze,since it speeds up MRI slightly */
OBJ_FREEZE(str);
return str;

}

static VALUE str_new_dd_freeze(const char *ptr, long len) {

return str_dd_freeze(rb_str_new(ptr, len));

}

static VALUE stripped_str_new(const char *str, long len) {

long end;

for (end = len - 1; end >= 0 && is_lws(str[end]); end--);

return rb_str_new(str, end + 1);

}

static VALUE request_host_val(struct http_parser *hp) {

assert(hp->is_request == 1 && "not a request");
return NIL_P(hp->v.host) ? Qfalse : hp->v.host;

}

static void set_server_vars(struct http_parser *hp, VALUE env, VALUE host) {

char *host_ptr = RSTRING_PTR(host);
long host_len = RSTRING_LEN(host);
char *colon;
VALUE server_name = host;
VALUE server_port = hp->has_scheme ? (hp->is_https ? g_443 : g_80) : Qfalse;

if (*host_ptr == '[') { /* ipv6 address format */
  char *rbracket = memchr(host_ptr + 1, ']', host_len - 1);

  if (rbracket)
    colon = (rbracket[1] == ':') ? rbracket + 1 : NULL;
  else
    colon = memchr(host_ptr + 1, ':', host_len - 1);
} else {
  colon = memchr(host_ptr, ':', host_len);
}

if (colon) {
  long port_start = colon - host_ptr + 1;
  long port_len = host_len - port_start;

  server_name = rb_str_substr(host, 0, colon - host_ptr);
  server_name = str_dd_freeze(server_name);
  if (port_len > 0) {
    server_port = rb_str_substr(host, port_start, port_len);
    server_port = str_dd_freeze(server_port);
  }
}
rb_hash_aset(env, g_SERVER_NAME, server_name);
if (server_port != Qfalse)
  rb_hash_aset(env, g_SERVER_PORT, server_port);

}

static void finalize_header(struct http_parser *hp, VALUE hdr) {

if (hp->has_trailer && !hp->chunked)
  rb_raise(eParserError, "trailer but not chunked");
if (hp->is_request) {
  if (hp->chunked) {
    if (hp->len.chunk >= 0)
      rb_raise(eParserError, "Content-Length set with chunked encoding");
    else
      hp->len.chunk = 0;
  } else if (hp->len.content < 0) {
      hp->len.content = 0;
  }

  if (!hp->has_query)
    rb_hash_aset(hdr, g_QUERY_STRING, rb_str_new(NULL, 0));
  if (hp->has_header) {
    VALUE host = request_host_val(hp);
    if (host != Qfalse)
      set_server_vars(hp, hdr, host);
  }
}

}

/*

* handles values of the "Connection:" header, keepalive is implied
* for HTTP/1.1 but needs to be explicitly enabled with HTTP/1.0
* Additionally, we require GET/HEAD requests to support keepalive.
*/

static void hp_keepalive_connection(struct http_parser *hp, VALUE val) {

/* REQUEST_METHOD is always set before any headers */
if (STR_CSTR_CASE_EQ(val, "keep-alive")) {
  /* basically have HTTP/1.0 masquerade as HTTP/1.1+ */
  hp->persistent = 1;
} else if (STR_CSTR_CASE_EQ(val, "close")) {
  /*
   * it doesn't matter what HTTP version or request method we have,
   * if a server says "Connection: close", we disable keepalive
   */
  hp->persistent = 0;
} else {
  /*
   * server could've sent anything, ignore it for now.  Maybe
   * "hp->persistent = 0;" just in case?
   * Raising an exception might be too mean...
   */
}

}

static void request_method(VALUE env, const char *ptr, size_t len) {

rb_hash_aset(env, g_REQUEST_METHOD, str_new_dd_freeze(ptr, len));

}

static void url_scheme(struct http_parser *hp, VALUE env, const char *ptr, size_t len) {

VALUE val;

hp->has_scheme = 1;
/* Ragel machine downcases and enforces this as "http" or "https" */
if (len == 5) {
  hp->is_https = 1;
  assert(CONST_MEM_EQ("https", ptr, len) && "len == 5 but not 'https'");
  val = g_https;
} else {
  assert(CONST_MEM_EQ("http", ptr, len) && "len != 4 but not 'http'");
  val = g_http;
}
rb_hash_aset(env, g_rack_url_scheme, val);

}

static void request_host(struct http_parser *hp, VALUE env, const char *ptr, size_t len) {

VALUE val = rb_str_new(ptr, len);

rb_hash_aset(env, g_HTTP_HOST, val);
hp->v.host = val;

}

static void set_fragment(VALUE env, const char *ptr, size_t len) {

VALUE val = rb_str_new(ptr, len);
rb_hash_aset(env, g_FRAGMENT, val);

}

static void request_uri(VALUE env, const char *ptr, size_t len) {

VALUE val;

VALIDATE_MAX_URI_LENGTH(len, REQUEST_URI);
val = rb_str_new(ptr, len);
rb_hash_aset(env, g_REQUEST_URI, val);

/*
 * rack says PATH_INFO must start with "/" or be empty,
 * but "OPTIONS *" is a valid request
 */
if (CONST_MEM_EQ("*", ptr, len)) {
  val = rb_str_new(NULL, 0);
  rb_hash_aset(env, g_PATH_INFO, val);
  rb_hash_aset(env, g_REQUEST_PATH, val);
}

}

static void query_string(struct http_parser *hp, VALUE env, const char *ptr, size_t len) {

VALIDATE_MAX_URI_LENGTH(len, QUERY_STRING);

hp->has_query = 1;
rb_hash_aset(env, g_QUERY_STRING, rb_str_new(ptr, len));

}

static void request_path(VALUE env, const char *ptr, size_t len) {

VALUE val;

VALIDATE_MAX_URI_LENGTH(len, REQUEST_PATH);
val = rb_str_new(ptr, len);

rb_hash_aset(env, g_REQUEST_PATH, val);
rb_hash_aset(env, g_PATH_INFO, val);

}

static void http_version(struct http_parser *hp, VALUE env, const char *ptr, size_t len) {

if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) {
  /* HTTP/1.1 implies keepalive unless "Connection: close" is set */
  hp->persistent = 1;
}
if (hp->is_request) {
  VALUE v = str_new_dd_freeze(ptr, len);
  hp->has_header = 1;

  rb_hash_aset(env, g_SERVER_PROTOCOL, v);
  rb_hash_aset(env, g_HTTP_VERSION, v);
}

}

static void status_phrase(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len) {

long nr;

hp->v.status = str_new_dd_freeze(ptr, len);

/* RSTRING_PTR is null terminated, ptr is not */
nr = strtol(RSTRING_PTR(hp->v.status), NULL, 10);

if (nr < 100 || nr > 999)
  rb_raise(eParserError, "invalid status: %s", RSTRING_PTR(hp->v.status));

if ( !((nr >= 100 && nr <= 199) || nr == 204 || nr == 304) )
  hp->has_body = 1;

}

static inline void invalid_if_trailer(struct http_parser *hp) {

if (hp->in_trailer)
  rb_raise(eParserError, "invalid Trailer");

}

static void write_cont_value(struct http_parser *hp,

char *buffer, const char *p)

{

char *vptr;
long end;
long len = LEN(mark, p);
long cont_len;

if (hp->cont == Qfalse)
  rb_raise(eParserError, "invalid continuation line");

if (NIL_P(hp->cont))
  return; /* we're ignoring this header (probably Status:) */

assert(TYPE(hp->cont) == T_STRING && "continuation line is not a string");
assert(hp->mark > 0 && "impossible continuation line offset");

if (len == 0)
  return;

cont_len = RSTRING_LEN(hp->cont);
if (cont_len > 0) {
  --hp->mark;
  len = LEN(mark, p);
}

vptr = PTR_TO(mark);

/* normalize tab to space */
if (cont_len > 0) {
  assert(is_lws(*vptr) && "invalid leading white space");
  *vptr = ' ';
}
for (end = len - 1; end >= 0 && is_lws(vptr[end]); end--);
rb_str_buf_cat(hp->cont, vptr, end + 1);

}

static void write_response_value(struct http_parser *hp, VALUE hdr,

const char *buffer, const char *p)

{

VALUE f, v;
VALUE hclass;
const char *fptr = PTR_TO(start.field);
size_t flen = hp->s.field_len;
const char *vptr;
size_t vlen;

hp->has_header = 1;

/* Rack does not like Status headers, so we never send them */
if (CSTR_CASE_EQ(fptr, flen, "status")) {
  hp->cont = Qnil;
  return;
}

vptr = PTR_TO(mark);
vlen = LEN(mark, p);
VALIDATE_MAX_LENGTH(vlen, FIELD_VALUE);
VALIDATE_MAX_LENGTH(flen, FIELD_NAME);
f = str_new_dd_freeze(fptr, (long)flen);
v = stripped_str_new(vptr, (long)vlen);

/* needs more tests for error-checking here */
/*
 * TODO:
 * some of these tests might be too strict for real-world HTTP servers,
 * report real-world examples as we find them:
 */
if (STR_CSTR_CASE_EQ(f, "connection")) {
  hp_keepalive_connection(hp, v);
} else if (STR_CSTR_CASE_EQ(f, "content-length")) {
  if (!hp->has_body)
    rb_raise(eParserError, "Content-Length with no body expected");
  if (hp->chunked)
    rb_raise(eParserError,
             "Content-Length when chunked Transfer-Encoding is set");
  hp->len.content = parse_length(vptr, vlen);

  if (hp->len.content < 0)
    rb_raise(eParserError, "invalid Content-Length");

  invalid_if_trailer(hp);
} else if (STR_CSTR_CASE_EQ(f, "transfer-encoding")) {
  if (STR_CSTR_CASE_EQ(v, "chunked")) {
    if (!hp->has_body)
      rb_raise(eParserError,
               "chunked Transfer-Encoding with no body expected");
    if (hp->len.content >= 0)
      rb_raise(eParserError,
               "chunked Transfer-Encoding when Content-Length is set");

    hp->len.chunk = 0;
    hp->chunked = 1;
  }
  invalid_if_trailer(hp);
} else if (STR_CSTR_CASE_EQ(f, "trailer")) {
  if (!hp->has_body)
    rb_raise(eParserError, "trailer with no body");
  hp->has_trailer = 1;
  invalid_if_trailer(hp);
}

hclass = CLASS_OF(hdr);
if (hclass == rb_cArray) {
  rb_ary_push(hdr, rb_ary_new3(2, f, v));
  hp->cont = v;
} else {
  /* hash-ish, try rb_hash_* first and fall back to slow rb_funcall */
  VALUE e;

  /* try to read the existing value */
  if (hclass == rb_cHash)
    e = rb_hash_aref(hdr, f);
  else
    e = rb_funcall(hdr, id_sq, 1, f);

  if (NIL_P(e)) {
    if (hclass == rb_cHash)
      rb_hash_aset(hdr, f, v);
    else
      rb_funcall(hdr, id_sq_set, 2, f, v);

    hp->cont = v;
  } else {
    /*
     * existing value, append to it, Rack 1.x uses newlines to represent
     * repeated cookies:
     *    { 'Set-Cookie' => "a=b\nc=d" }
     * becomes:
     *    "Set-Cookie: a=b\r\nSet-Cookie: c=d\r\n"
     */
    rb_str_buf_cat(e, "\n", 1);
    hp->cont = rb_str_buf_append(e, v);
  }
}

}

static VALUE req_field(const char *ptr, size_t len) {

size_t pfxlen = sizeof("HTTP_") - 1;
VALUE str = rb_str_new(NULL, pfxlen + len);
char *dst = RSTRING_PTR(str);

memcpy(dst, "HTTP_", pfxlen);
memcpy(dst + pfxlen, ptr, len);
assert(*(dst + RSTRING_LEN(str)) == '\0' &&
       "string didn't end with \\0"); /* paranoia */

return str;

}

static void snake_upcase(char *ptr, size_t len) {

char *c;

for (c = ptr; len--; c++) {
  if (*c >= 'a' && *c <= 'z')
    *c &= ~0x20;
  else if (*c == '-')
    *c = '_';
}

}

static void write_request_value(struct http_parser *hp, VALUE env,

char *buffer, const char *p)

{

char *fptr = PTR_TO(start.field);
size_t flen = hp->s.field_len;
char *vptr = PTR_TO(mark);
size_t vlen = LEN(mark, p);
VALUE key, val;
VALUE existing;

VALIDATE_MAX_LENGTH(flen, FIELD_NAME);
VALIDATE_MAX_LENGTH(LEN(mark, p), FIELD_VALUE);
snake_upcase(fptr, flen);

/*
 * ignore "Version" headers since they conflict with the HTTP_VERSION
 * rack env variable.
 */
if (CONST_MEM_EQ("VERSION", fptr, flen)) {
  hp->cont = Qnil;
  return;
}
val = vlen == 0 ? rb_str_new(0, 0) : stripped_str_new(vptr, vlen);

if (CONST_MEM_EQ("CONNECTION", fptr, flen)) {
  key = g_HTTP_CONNECTION;
  hp_keepalive_connection(hp, val);
} else if (CONST_MEM_EQ("CONTENT_LENGTH", fptr, flen)) {
  key = g_CONTENT_LENGTH;
  hp->len.content = parse_length(vptr, vlen);
  if (hp->len.content < 0)
    rb_raise(eParserError, "invalid Content-Length");
  if (hp->len.content != 0)
    hp->has_body = 1;
  invalid_if_trailer(hp);
} else if (CONST_MEM_EQ("CONTENT_TYPE", fptr, flen)) {
  key = g_CONTENT_TYPE;
} else if (CONST_MEM_EQ("TRANSFER_ENCODING", fptr, flen)) {
  key = g_HTTP_TRANSFER_ENCODING;
  if (STR_CSTR_CASE_EQ(val, "chunked")) {
    hp->chunked = 1;
    hp->has_body = 1;
  }
  invalid_if_trailer(hp);
} else if (CONST_MEM_EQ("TRAILER", fptr, flen)) {
  key = g_HTTP_TRAILER;
  hp->has_trailer = 1;
  invalid_if_trailer(hp);
} else if (CONST_MEM_EQ("HOST", fptr, flen)) {
  key = g_HTTP_HOST;
  if (NIL_P(hp->v.host))
    hp->v.host = val;
} else {
  key = req_field(fptr, flen);
  if (!HASH_ASET_DEDUPE)
    key = str_dd_freeze(key);
}
existing = rb_hash_aref(env, key);
if (NIL_P(existing)) {
  hp->cont = rb_hash_aset(env, key, val);
/*
 * Ignore repeated Host headers and favor host set by absolute URIs.
 * absoluteURI Request-URI takes precedence over
 * the Host: header (ref: rfc 2616, section 5.2.1)
 */
} else if (key == g_HTTP_HOST) {
   hp->cont = Qnil;
} else {
  rb_str_buf_cat(existing, ",", 1);
  hp->cont = rb_str_buf_append(existing, val);
}

}

static void write_value(struct http_parser *hp, VALUE hdr,

char *buf, const char *p)

{

hp->is_request ? write_request_value(hp, hdr, buf, p) :
                 write_response_value(hp, hdr, buf, p);

}

/** Machine **/

%%{

machine http_parser;

action mark {MARK(mark, fpc); }

action snake_upcase_field { snake_upcase_char(deconst(fpc)); }
action downcase_char { downcase_char(deconst(fpc)); }
action request_method { request_method(hdr, PTR_TO(mark), LEN(mark, fpc)); }
action url_scheme { url_scheme(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
action host { request_host(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
action request_uri { request_uri(hdr, PTR_TO(mark), LEN(mark, fpc)); }
action fragment { set_fragment(hdr, PTR_TO(mark), LEN(mark, fpc)); }
action start_query { MARK(start.query, fpc); }
action query_string {
  query_string(hp, hdr, PTR_TO(start.query), LEN(start.query, fpc));
}
action request_path { request_path(hdr, PTR_TO(mark), LEN(mark, fpc)); }
action start_field { MARK(start.field, fpc); }
action write_field { hp->s.field_len = LEN(start.field, fpc); }
action start_value { MARK(mark, fpc); }
action write_value { write_value(hp, hdr, buffer, fpc); }
action write_cont_value { write_cont_value(hp, buffer, fpc); }
action http_version { http_version(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
action status_phrase { status_phrase(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }

action add_to_chunk_size {
  hp->len.chunk = step_incr(hp->len.chunk, fc, 16);
  if (hp->len.chunk < 0)
    rb_raise(eParserError, "invalid chunk size");
}
action header_done {
  finalize_header(hp, hdr);
  cs = http_parser_first_final;

  if (hp->chunked)
    cs = http_parser_en_ChunkedBody;

  /*
   * go back to Ruby so we can call the Rack application, we'll reenter
   * the parser iff the body needs to be processed.
   */
  goto post_exec;
}

action end_trailers {
  cs = http_parser_first_final;
  goto post_exec;
}

action end_chunked_body {
  hp->in_trailer = 1;
  cs = http_parser_en_Trailers;
  ++p;
  assert(p <= pe && "buffer overflow after chunked body");
  goto post_exec;
}

action skip_chunk_data {
skip_chunk_data_hack: {
  size_t nr = MIN((size_t)hp->len.chunk, REMAINING);
  memcpy(RSTRING_PTR(hdr) + hp->s.dest_offset, fpc, nr);
  hp->s.dest_offset += nr;
  hp->len.chunk -= nr;
  p += nr;
  assert(hp->len.chunk >= 0 && "negative chunk length");
  if ((size_t)hp->len.chunk > REMAINING) {
    hp->in_chunk = 1;
    goto post_exec;
  } else {
    fhold;
    fgoto chunk_end;
  }
}}

include kcar_http_common "kcar_http_common.rl";

}%%

/** Data **/ %% write data;

static void http_parser_init(struct http_parser *hp) {

int cs = 0;
memset(hp, 0, sizeof(struct http_parser));
hp->cont = Qfalse; /* zero on MRI, should be optimized away by above */
hp->v.status = Qnil;
hp->len.content = -1;
%% write init;
hp->cs = cs;

}

/** exec **/ static void http_parser_execute(struct http_parser *hp,

VALUE hdr, char *buffer, size_t len)

{

const char *p, *pe;
int cs = hp->cs;
size_t off = hp->offset;

if (cs == http_parser_first_final)
  return;

assert(off <= len && "offset past end of buffer");

p = buffer+off;
pe = buffer+len;

assert((void *)(pe - p) == (void *)(len - off) &&
       "pointers aren't same distance");

if (hp->in_chunk) {
  hp->in_chunk = 0;
  goto skip_chunk_data_hack;
}
%% write exec;

post_exec: /* “_out:” also goes here */

if (hp->cs != http_parser_error)
  hp->cs = cs;
hp->offset = ulong2uint(p - buffer);

assert(p <= pe && "buffer overflow after parsing execute");
assert(hp->offset <= len && "offset longer than length");

}

static void kcar_mark(void *ptr) {

struct http_parser *hp = ptr;

rb_gc_mark(hp->cont);
rb_gc_mark(hp->v.status);

}

static size_t kcar_memsize(const void *ptr) {

return sizeof(struct http_parser);

}

static const rb_data_type_t kcar_type = {

"kcar_parser",
{ kcar_mark, RUBY_TYPED_DEFAULT_FREE, kcar_memsize, /* reserved */ },
/* parent, data, [ flags ] */

};

static VALUE kcar_alloc(VALUE klass) {

struct http_parser *hp;
return TypedData_Make_Struct(klass, struct http_parser, &kcar_type, hp);

}

static struct http_parser *data_get(VALUE self) {

struct http_parser *hp;

TypedData_Get_Struct(self, struct http_parser, &kcar_type, hp);
assert(hp && "failed to extract http_parser struct");
return hp;

}

/**

* call-seq:
*    Kcar::Parser.new => parser
*
* Creates a new parser.
*
* Document-method: reset
*
* call-seq:
*    parser.reset => parser
*
* Resets the parser so it can be reused by another client
*/

static VALUE initialize(VALUE self) {

http_parser_init(data_get(self));

return self;

}

static void advance_str(VALUE str, off_t nr) {

long len = RSTRING_LEN(str);

if (len == 0)
  return;

rb_str_modify(str);

assert(nr <= len && "trying to advance past end of buffer");
len -= nr;
if (len > 0) /* unlikely, len is usually 0 */
  memmove(RSTRING_PTR(str), RSTRING_PTR(str) + nr, len);
rb_str_set_len(str, len);

}

/**

* call-seq:
*   parser.body_bytes_left => nil or Integer
*
* Returns the number of bytes left to run through Parser#filter_body.
* This will initially be the value of the "Content-Length" HTTP header
* after header parsing is complete and will decrease in value as
* Parser#filter_body is called for each chunk.  This should return
* zero for responses with no body.
*
* This will return nil on "Transfer-Encoding: chunked" responses as
* well as HTTP/1.0 responses where Content-Length is not set
*/

static VALUE body_bytes_left(VALUE self) {

struct http_parser *hp = data_get(self);

if (hp->chunked)
  return Qnil;
if (hp->len.content >= 0)
  return OFFT2NUM(hp->len.content);

return Qnil;

}

/**

* call-seq:
*   parser.body_bytes_left = Integer
*
* Sets the number of bytes left to download for HTTP responses
* with "Content-Length".  This raises RuntimeError for chunked
* responses.
*/

static VALUE body_bytes_left_set(VALUE self, VALUE bytes) {

struct http_parser *hp = data_get(self);

if (hp->chunked)
  rb_raise(rb_eRuntimeError, "body_bytes_left= is not for chunked bodies");
hp->len.content = NUM2OFFT(bytes);
if (hp->len.content == 0)
    hp->body_eof_seen = 1;
return bytes;

}

/**

* Document-method: chunked
* call-seq:
*    parser.chunked? => true or false
*
* This is used to detect if a response uses chunked Transfer-Encoding or not.
*/

static VALUE chunked(VALUE self) {

struct http_parser *hp = data_get(self);

return hp->chunked ? Qtrue : Qfalse;

}

static void check_buffer_size(long dlen) {

if ((uint64_t)dlen > UINT_MAX)
  rb_raise(rb_eRangeError, "headers too large to process (%ld bytes)", dlen);

}

static void parser_execute(struct http_parser *hp, VALUE hdr, VALUE buf) {

char *ptr;
long len;

Check_Type(buf, T_STRING);
rb_str_modify(buf);
ptr = RSTRING_PTR(buf);
len = RSTRING_LEN(buf);
check_buffer_size(len);

http_parser_execute(hp, hdr, ptr, len);

if (hp->cs == http_parser_error)
  rb_raise(eParserError, "Invalid HTTP format, parsing fails.");

}

/**

* Document-method: headers
* call-seq:
*    parser.headers(hdr, data) => hdr or nil
*
* Takes a Hash and a String of data, parses the String of data filling
* in the Hash returning the Hash if parsing is finished, nil otherwise
* When returning the hdr Hash, it may modify data to point to where
* body processing should begin.
*
* Raises ParserError if there are parsing errors.
*/

static VALUE headers(VALUE self, VALUE hdr, VALUE data) {

struct http_parser *hp = data_get(self);

if (hp->is_request)
  rb_raise(rb_eRuntimeError, "parser is handling a request, not response");

parser_execute(hp, hdr, data);
VALIDATE_MAX_LENGTH(hp->offset, HEADER);

if (hp->cs == http_parser_first_final ||
    hp->cs == http_parser_en_ChunkedBody) {
  advance_str(data, hp->offset + 1);
  hp->offset = 0;
  if (hp->in_trailer)
    return hdr;
  else
    return rb_ary_new3(2, hp->v.status, hdr);
}

return Qnil;

}

static VALUE request(VALUE self, VALUE env, VALUE buf) {

struct http_parser *hp = data_get(self);

hp->is_request = 1;
Check_Type(buf, T_STRING);
parser_execute(hp, env, buf);

if (hp->cs == http_parser_first_final ||
    hp->cs == http_parser_en_ChunkedBody) {
  advance_str(buf, hp->offset + 1);
  hp->offset = 0;
  if (hp->in_trailer)
    hp->body_eof_seen = 1;

  return env;
}
return Qnil; /* incomplete */

}

static int chunked_eof(struct http_parser *hp) {

return ((hp->cs == http_parser_first_final) || hp->in_trailer);

}

/**

* call-seq:
*    parser.body_eof? => true or false
*
* Detects if we're done filtering the body or not.  This can be used
* to detect when to stop calling Parser#filter_body.
*/

static VALUE body_eof(VALUE self) {

struct http_parser *hp = data_get(self);

if (!hp->has_header && hp->persistent)
  return Qtrue;

if (hp->chunked)
  return chunked_eof(hp) ? Qtrue : Qfalse;

if (!hp->has_body)
  return Qtrue;

return hp->len.content == 0 ? Qtrue : Qfalse;

}

/**

* call-seq:
*    parser.keepalive? => true or false
*
* This should be used to detect if a request can really handle
* keepalives and pipelining.  Currently, the rules are:
*
* 1. MUST be HTTP/1.1 +or+ HTTP/1.0 with "Connection: keep-alive"
* 2. MUST NOT have "Connection: close" set
* 3. If there is a response body, either a) Content-Length is set
*    or b) chunked encoding is used
*/

static VALUE keepalive(VALUE self) {

struct http_parser *hp = data_get(self);

if (hp->persistent) {
  if (hp->has_header && hp->has_body) {
    if (hp->chunked || (hp->len.content >= 0)) {
      if (!hp->is_request)
        return Qtrue;
      else
        return hp->body_eof_seen ? Qtrue : Qfalse;
    }

    /* unknown Content-Length and not chunked, we must assume close */
    return Qfalse;
  } else {
    /* 100 Continue, 304 Not Modified, etc... */
    return Qtrue;
  }
}
return Qfalse;

}

/**

* call-seq:
*    parser.filter_body(dst, src) => nil/dst
*
* Takes a String of +src+, will modify src if dechunking is done.
* Returns +nil+ if there is more +src+ left to process.  Returns
* +dst+ if body processing is complete. When returning +dst+,
* it may modify +src+ so the start of the string points to where
* the body ended so that trailer processing can begin.
*
* Raises ParserError if there are dechunking errors.
* Basically this is a glorified memcpy(3) that copies +src+
* into +dst+ while filtering it through the dechunker.
*/

static VALUE filter_body(VALUE self, VALUE dst, VALUE src) {

struct http_parser *hp = data_get(self);
char *sptr;
long slen;

sptr = RSTRING_PTR(src);
slen = RSTRING_LEN(src);
check_buffer_size(slen);

StringValue(dst);
rb_str_modify(dst);
OBJ_TAINT(dst); /* keep weirdo $SAFE users happy */

/*
 * for now, only support filter_body for identity requests,
 * not responses; it's rather inefficient to blindly memcpy
 * giant request bodies; on the other hand, it simplifies
 * server-side code.
 */
if (hp->is_request && !hp->chunked) {
  /* no need to enter the Ragel machine for unchunked transfers */
  assert(hp->len.content >= 0 && "negative Content-Length");
  if (hp->len.content > 0) {
    long nr = MIN(slen, hp->len.content);

    rb_str_resize(dst, nr);
    memcpy(RSTRING_PTR(dst), sptr, nr);
    hp->len.content -= nr;
    if (hp->len.content == 0)
      hp->body_eof_seen = 1;
    advance_str(src, nr);
  }
  return dst;
}

if (!hp->chunked)
  rb_raise(rb_eRuntimeError, "filter_body is only for chunked bodies");

rb_str_resize(dst, slen); /* we can never copy more than slen bytes */
if (!chunked_eof(hp)) {
  hp->s.dest_offset = 0;
  http_parser_execute(hp, dst, sptr, slen);
  if (hp->cs == http_parser_error)
    rb_raise(eParserError, "Invalid HTTP format, parsing fails.");

  assert(hp->s.dest_offset <= hp->offset &&
         "destination buffer overflow");
  advance_str(src, hp->offset);
  rb_str_set_len(dst, hp->s.dest_offset);

  if (RSTRING_LEN(dst) == 0 && chunked_eof(hp)) {
    assert(hp->len.chunk == 0 && "chunk at EOF but more to parse");
  } else {
    dst = Qnil;
  }
}
hp->offset = 0; /* for trailer parsing */
return dst;

}

void Init_kcar_ext(void) {

VALUE mKcar = rb_define_module("Kcar");
VALUE cParser = rb_define_class_under(mKcar, "Parser", rb_cObject);

/*
 * Document-class: Kcar::ParserError
 *
 * This is raised if there are parsing errors.
 */
eParserError = rb_define_class_under(mKcar, "ParserError", rb_eIOError);
e413 = rb_define_class_under(mKcar, "RequestEntityTooLargeError",
                             eParserError);
e414 = rb_define_class_under(mKcar, "RequestURITooLongError",
                             eParserError);

rb_define_alloc_func(cParser, kcar_alloc);
rb_define_method(cParser, "initialize", initialize, 0);
rb_define_method(cParser, "reset", initialize, 0);
rb_define_method(cParser, "request", request, 2);
rb_define_method(cParser, "headers", headers, 2);
rb_define_method(cParser, "trailers", headers, 2);
rb_define_method(cParser, "filter_body", filter_body, 2);
rb_define_method(cParser, "body_bytes_left", body_bytes_left, 0);
rb_define_method(cParser, "body_bytes_left=", body_bytes_left_set, 1);
rb_define_method(cParser, "body_eof?", body_eof, 0);
rb_define_method(cParser, "keepalive?", keepalive, 0);
rb_define_method(cParser, "chunked?", chunked, 0);

/*
 * The maximum size a single chunk when using chunked transfer encoding.
 * This is only a theoretical maximum used to detect errors in clients,
 * it is highly unlikely to encounter clients that send more than
 * several kilobytes at once.
 */
rb_define_const(cParser, "CHUNK_MAX", OFFT2NUM(UH_OFF_T_MAX));

/*
 * The maximum size of the body as specified by Content-Length.
 * This is only a theoretical maximum, the actual limit is subject
 * to the limits of the file system used for +Dir.tmpdir+.
 */
rb_define_const(cParser, "LENGTH_MAX", OFFT2NUM(UH_OFF_T_MAX));
id_sq = rb_intern("[]");
id_sq_set = rb_intern("[]=");
id_uminus = rb_intern("-@");

/* TODO: gperf to make a perfect hash of common strings */

define C(var, cstr) do { \

var = str_new_dd_freeze((cstr), sizeof(cstr) - 1); \
rb_gc_register_mark_object((var)); \

} while (0);

C(g_CONTENT_LENGTH, "CONTENT_LENGTH");
C(g_CONTENT_TYPE, "CONTENT_TYPE");
C(g_FRAGMENT, "FRAGMENT");
C(g_HTTP_HOST, "HTTP_HOST");
C(g_HTTP_CONNECTION, "HTTP_CONNECTION");
C(g_HTTP_TRAILER, "HTTP_TRAILER");
C(g_HTTP_TRANSFER_ENCODING, "HTTP_TRANSFER_ENCODING");
C(g_HTTP_VERSION, "HTTP_VERSION");
C(g_PATH_INFO, "PATH_INFO");
C(g_QUERY_STRING, "QUERY_STRING");
C(g_REQUEST_METHOD, "REQUEST_METHOD");
C(g_REQUEST_PATH, "REQUEST_PATH");
C(g_REQUEST_URI, "REQUEST_URI");
C(g_SERVER_NAME, "SERVER_NAME");
C(g_SERVER_PORT, "SERVER_PORT");
C(g_SERVER_PROTOCOL, "SERVER_PROTOCOL");
C(g_rack_url_scheme, "rack.url_scheme");
C(g_http, "http");
C(g_https, "https");
C(g_80, "80");
C(g_443, "443");

undef C }