mirror of
https://github.com/golang/go.git
synced 2025-05-12 19:04:36 +00:00
HTML vs Html, URL vs Url, HTTP vs Http, current source is 6:1 in favor of the former, so change instances of the latter. R=r CC=go-dev http://go/go-review/1024026
655 lines
16 KiB
Go
655 lines
16 KiB
Go
// Copyright 2009 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// HTTP Request reading and parsing.
|
|
|
|
// The http package implements parsing of HTTP requests, replies,
|
|
// and URLs and provides an extensible HTTP server and a basic
|
|
// HTTP client.
|
|
package http
|
|
|
|
import (
|
|
"bufio";
|
|
"bytes";
|
|
"container/vector";
|
|
"fmt";
|
|
"io";
|
|
"os";
|
|
"strconv";
|
|
"strings";
|
|
)
|
|
|
|
const (
|
|
maxLineLength = 1024; // assumed < bufio.DefaultBufSize
|
|
maxValueLength = 1024;
|
|
maxHeaderLines = 1024;
|
|
chunkSize = 4<<10; // 4 KB chunks
|
|
)
|
|
|
|
// HTTP request parsing errors.
|
|
type ProtocolError struct {
|
|
os.ErrorString;
|
|
}
|
|
|
|
var (
|
|
ErrLineTooLong = &ProtocolError{"header line too long"};
|
|
ErrHeaderTooLong = &ProtocolError{"header too long"};
|
|
ErrShortBody = &ProtocolError{"entity body too short"};
|
|
)
|
|
|
|
type badStringError struct {
|
|
what string;
|
|
str string;
|
|
}
|
|
|
|
func (e *badStringError) String() string { return fmt.Sprintf("%s %q", e.what, e.str) }
|
|
|
|
// A Request represents a parsed HTTP request header.
|
|
type Request struct {
|
|
Method string; // GET, POST, PUT, etc.
|
|
RawURL string; // The raw URL given in the request.
|
|
URL *URL; // Parsed URL.
|
|
Proto string; // "HTTP/1.0"
|
|
ProtoMajor int; // 1
|
|
ProtoMinor int; // 0
|
|
|
|
// A header mapping request lines to their values.
|
|
// If the header says
|
|
//
|
|
// Accept-Language: en-us
|
|
// accept-encoding: gzip, deflate
|
|
// Connection: keep-alive
|
|
//
|
|
// then
|
|
//
|
|
// Header = map[string]string{
|
|
// "Accept-Encoding": "en-us",
|
|
// "Accept-Language": "gzip, deflate",
|
|
// "Connection": "keep-alive"
|
|
// }
|
|
//
|
|
// HTTP defines that header names are case-insensitive.
|
|
// The request parser implements this by canonicalizing the
|
|
// name, making the first character and any characters
|
|
// following a hyphen uppercase and the rest lowercase.
|
|
Header map[string]string;
|
|
|
|
// The message body.
|
|
Body io.Reader;
|
|
|
|
// Whether to close the connection after replying to this request.
|
|
Close bool;
|
|
|
|
// The host on which the URL is sought.
|
|
// Per RFC 2616, this is either the value of the Host: header
|
|
// or the host name given in the URL itself.
|
|
Host string;
|
|
|
|
// The referring URL, if sent in the request.
|
|
//
|
|
// Referer is misspelled as in the request itself,
|
|
// a mistake from the earliest days of HTTP.
|
|
// This value can also be fetched from the Header map
|
|
// as Header["Referer"]; the benefit of making it
|
|
// available as a structure field is that the compiler
|
|
// can diagnose programs that use the alternate
|
|
// (correct English) spelling req.Referrer but cannot
|
|
// diagnose programs that use Header["Referrer"].
|
|
Referer string;
|
|
|
|
// The User-Agent: header string, if sent in the request.
|
|
UserAgent string;
|
|
|
|
// The parsed form. Only available after ParseForm is called.
|
|
Form map[string][]string;
|
|
}
|
|
|
|
// ProtoAtLeast returns whether the HTTP protocol used
|
|
// in the request is at least major.minor.
|
|
func (r *Request) ProtoAtLeast(major, minor int) bool {
|
|
return r.ProtoMajor > major ||
|
|
r.ProtoMajor == major && r.ProtoMinor >= minor;
|
|
}
|
|
|
|
// Return value if nonempty, def otherwise.
|
|
func valueOrDefault(value, def string) string {
|
|
if value != "" {
|
|
return value;
|
|
}
|
|
return def;
|
|
}
|
|
|
|
// TODO(rsc): Change default UserAgent before open-source release.
|
|
const defaultUserAgent = "http.Client"
|
|
|
|
// Write writes an HTTP/1.1 request -- header and body -- in wire format.
|
|
// This method consults the following fields of req:
|
|
// URL
|
|
// Method (defaults to "GET")
|
|
// UserAgent (defaults to defaultUserAgent)
|
|
// Referer
|
|
// Header
|
|
// Body
|
|
//
|
|
// If Body is present, "Transfer-Encoding: chunked" is forced as a header.
|
|
func (req *Request) Write(w io.Writer) os.Error {
|
|
uri := URLEscape(req.URL.Path);
|
|
if req.URL.RawQuery != "" {
|
|
uri += "?" + req.URL.RawQuery;
|
|
}
|
|
|
|
fmt.Fprintf(w, "%s %s HTTP/1.1\r\n", valueOrDefault(req.Method, "GET"), uri);
|
|
fmt.Fprintf(w, "Host: %s\r\n", req.URL.Host);
|
|
fmt.Fprintf(w, "User-Agent: %s\r\n", valueOrDefault(req.UserAgent, defaultUserAgent));
|
|
|
|
if req.Referer != "" {
|
|
fmt.Fprintf(w, "Referer: %s\r\n", req.Referer);
|
|
}
|
|
|
|
if req.Body != nil {
|
|
// Force chunked encoding
|
|
req.Header["Transfer-Encoding"] = "chunked";
|
|
}
|
|
|
|
// TODO: split long values? (If so, should share code with Conn.Write)
|
|
// TODO: if Header includes values for Host, User-Agent, or Referer, this
|
|
// may conflict with the User-Agent or Referer headers we add manually.
|
|
// One solution would be to remove the Host, UserAgent, and Referer fields
|
|
// from Request, and introduce Request methods along the lines of
|
|
// Response.{GetHeader,AddHeader} and string constants for "Host",
|
|
// "User-Agent" and "Referer".
|
|
for k, v := range req.Header {
|
|
io.WriteString(w, k+": "+v+"\r\n");
|
|
}
|
|
|
|
io.WriteString(w, "\r\n");
|
|
|
|
if req.Body != nil {
|
|
buf := make([]byte, chunkSize);
|
|
Loop:
|
|
for {
|
|
var nr, nw int;
|
|
var er, ew os.Error;
|
|
if nr, er = req.Body.Read(buf); nr > 0 {
|
|
if er == nil || er == os.EOF {
|
|
fmt.Fprintf(w, "%x\r\n", nr);
|
|
nw, ew = w.Write(buf[0:nr]);
|
|
fmt.Fprint(w, "\r\n");
|
|
}
|
|
}
|
|
switch {
|
|
case er != nil:
|
|
if er == os.EOF {
|
|
break Loop;
|
|
}
|
|
return er;
|
|
case ew != nil:
|
|
return ew;
|
|
case nw < nr:
|
|
return io.ErrShortWrite;
|
|
}
|
|
}
|
|
// last-chunk CRLF
|
|
fmt.Fprint(w, "0\r\n\r\n");
|
|
}
|
|
|
|
return nil;
|
|
}
|
|
|
|
// Read a line of bytes (up to \n) from b.
|
|
// Give up if the line exceeds maxLineLength.
|
|
// The returned bytes are a pointer into storage in
|
|
// the bufio, so they are only valid until the next bufio read.
|
|
func readLineBytes(b *bufio.Reader) (p []byte, err os.Error) {
|
|
if p, err = b.ReadSlice('\n'); err != nil {
|
|
// We always know when EOF is coming.
|
|
// If the caller asked for a line, there should be a line.
|
|
if err == os.EOF {
|
|
err = io.ErrUnexpectedEOF;
|
|
}
|
|
return nil, err;
|
|
}
|
|
if len(p) >= maxLineLength {
|
|
return nil, ErrLineTooLong;
|
|
}
|
|
|
|
// Chop off trailing white space.
|
|
var i int;
|
|
for i = len(p); i > 0; i-- {
|
|
if c := p[i-1]; c != ' ' && c != '\r' && c != '\t' && c != '\n' {
|
|
break;
|
|
}
|
|
}
|
|
return p[0:i], nil;
|
|
}
|
|
|
|
// readLineBytes, but convert the bytes into a string.
|
|
func readLine(b *bufio.Reader) (s string, err os.Error) {
|
|
p, e := readLineBytes(b);
|
|
if e != nil {
|
|
return "", e;
|
|
}
|
|
return string(p), nil;
|
|
}
|
|
|
|
var colon = []byte{':'}
|
|
|
|
// Read a key/value pair from b.
|
|
// A key/value has the form Key: Value\r\n
|
|
// and the Value can continue on multiple lines if each continuation line
|
|
// starts with a space.
|
|
func readKeyValue(b *bufio.Reader) (key, value string, err os.Error) {
|
|
line, e := readLineBytes(b);
|
|
if e != nil {
|
|
return "", "", e;
|
|
}
|
|
if len(line) == 0 {
|
|
return "", "", nil;
|
|
}
|
|
|
|
// Scan first line for colon.
|
|
i := bytes.Index(line, colon);
|
|
if i < 0 {
|
|
goto Malformed;
|
|
}
|
|
|
|
key = string(line[0:i]);
|
|
if strings.Index(key, " ") >= 0 {
|
|
// Key field has space - no good.
|
|
goto Malformed;
|
|
}
|
|
|
|
// Skip initial space before value.
|
|
for i++; i < len(line); i++ {
|
|
if line[i] != ' ' {
|
|
break;
|
|
}
|
|
}
|
|
value = string(line[i:len(line)]);
|
|
|
|
// Look for extension lines, which must begin with space.
|
|
for {
|
|
c, e := b.ReadByte();
|
|
if c != ' ' {
|
|
if e != os.EOF {
|
|
b.UnreadByte();
|
|
}
|
|
break;
|
|
}
|
|
|
|
// Eat leading space.
|
|
for c == ' ' {
|
|
if c, e = b.ReadByte(); e != nil {
|
|
if e == os.EOF {
|
|
e = io.ErrUnexpectedEOF;
|
|
}
|
|
return "", "", e;
|
|
}
|
|
}
|
|
b.UnreadByte();
|
|
|
|
// Read the rest of the line and add to value.
|
|
if line, e = readLineBytes(b); e != nil {
|
|
return "", "", e;
|
|
}
|
|
value += " "+string(line);
|
|
|
|
if len(value) >= maxValueLength {
|
|
return "", "", &badStringError{"value too long for key", key};
|
|
}
|
|
}
|
|
return key, value, nil;
|
|
|
|
Malformed:
|
|
return "", "", &badStringError{"malformed header line", string(line)};
|
|
}
|
|
|
|
// Convert decimal at s[i:len(s)] to integer,
|
|
// returning value, string position where the digits stopped,
|
|
// and whether there was a valid number (digits, not too big).
|
|
func atoi(s string, i int) (n, i1 int, ok bool) {
|
|
const Big = 1000000;
|
|
if i >= len(s) || s[i] < '0' || s[i] > '9' {
|
|
return 0, 0, false;
|
|
}
|
|
n = 0;
|
|
for ; i < len(s) && '0' <= s[i] && s[i] <= '9'; i++ {
|
|
n = n*10 + int(s[i]-'0');
|
|
if n > Big {
|
|
return 0, 0, false;
|
|
}
|
|
}
|
|
return n, i, true;
|
|
}
|
|
|
|
// Parse HTTP version: "HTTP/1.2" -> (1, 2, true).
|
|
func parseHTTPVersion(vers string) (int, int, bool) {
|
|
if vers[0:5] != "HTTP/" {
|
|
return 0, 0, false;
|
|
}
|
|
major, i, ok := atoi(vers, 5);
|
|
if !ok || i >= len(vers) || vers[i] != '.' {
|
|
return 0, 0, false;
|
|
}
|
|
var minor int;
|
|
minor, i, ok = atoi(vers, i+1);
|
|
if !ok || i != len(vers) {
|
|
return 0, 0, false;
|
|
}
|
|
return major, minor, true;
|
|
}
|
|
|
|
var cmap = make(map[string]string)
|
|
|
|
// CanonicalHeaderKey returns the canonical format of the
|
|
// HTTP header key s. The canonicalization converts the first
|
|
// letter and any letter following a hyphen to upper case;
|
|
// the rest are converted to lowercase. For example, the
|
|
// canonical key for "accept-encoding" is "Accept-Encoding".
|
|
func CanonicalHeaderKey(s string) string {
|
|
if t, ok := cmap[s]; ok {
|
|
return t;
|
|
}
|
|
|
|
// canonicalize: first letter upper case
|
|
// and upper case after each dash.
|
|
// (Host, User-Agent, If-Modified-Since).
|
|
// HTTP headers are ASCII only, so no Unicode issues.
|
|
a := strings.Bytes(s);
|
|
upper := true;
|
|
for i, v := range a {
|
|
if upper && 'a' <= v && v <= 'z' {
|
|
a[i] = v+'A'-'a';
|
|
}
|
|
if !upper && 'A' <= v && v <= 'Z' {
|
|
a[i] = v+'a'-'A';
|
|
}
|
|
upper = false;
|
|
if v == '-' {
|
|
upper = true;
|
|
}
|
|
}
|
|
t := string(a);
|
|
cmap[s] = t;
|
|
return t;
|
|
}
|
|
|
|
type chunkedReader struct {
|
|
r *bufio.Reader;
|
|
n uint64; // unread bytes in chunk
|
|
err os.Error;
|
|
}
|
|
|
|
func newChunkedReader(r *bufio.Reader) *chunkedReader {
|
|
return &chunkedReader{r: r};
|
|
}
|
|
|
|
func (cr *chunkedReader) beginChunk() {
|
|
// chunk-size CRLF
|
|
var line string;
|
|
line, cr.err = readLine(cr.r);
|
|
if cr.err != nil {
|
|
return;
|
|
}
|
|
cr.n, cr.err = strconv.Btoui64(line, 16);
|
|
if cr.err != nil {
|
|
return;
|
|
}
|
|
if cr.n == 0 {
|
|
// trailer CRLF
|
|
for {
|
|
line, cr.err = readLine(cr.r);
|
|
if cr.err != nil {
|
|
return;
|
|
}
|
|
if line == "" {
|
|
break;
|
|
}
|
|
}
|
|
cr.err = os.EOF;
|
|
}
|
|
}
|
|
|
|
func (cr *chunkedReader) Read(b []uint8) (n int, err os.Error) {
|
|
if cr.err != nil {
|
|
return 0, cr.err;
|
|
}
|
|
if cr.n == 0 {
|
|
cr.beginChunk();
|
|
if cr.err != nil {
|
|
return 0, cr.err;
|
|
}
|
|
}
|
|
if uint64(len(b)) > cr.n {
|
|
b = b[0 : cr.n];
|
|
}
|
|
n, cr.err = cr.r.Read(b);
|
|
cr.n -= uint64(n);
|
|
if cr.n == 0 && cr.err == nil {
|
|
// end of chunk (CRLF)
|
|
b := make([]byte, 2);
|
|
if _, cr.err = io.ReadFull(cr.r, b); cr.err == nil {
|
|
if b[0] != '\r' || b[1] != '\n' {
|
|
cr.err = os.NewError("malformed chunked encoding");
|
|
}
|
|
}
|
|
}
|
|
return n, cr.err;
|
|
}
|
|
|
|
// ReadRequest reads and parses a request from b.
|
|
func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) {
|
|
req = new(Request);
|
|
|
|
// First line: GET /index.html HTTP/1.0
|
|
var s string;
|
|
if s, err = readLine(b); err != nil {
|
|
return nil, err;
|
|
}
|
|
|
|
var f []string;
|
|
if f = strings.Split(s, " ", 3); len(f) < 3 {
|
|
return nil, &badStringError{"malformed HTTP request", s};
|
|
}
|
|
req.Method, req.RawURL, req.Proto = f[0], f[1], f[2];
|
|
var ok bool;
|
|
if req.ProtoMajor, req.ProtoMinor, ok = parseHTTPVersion(req.Proto); !ok {
|
|
return nil, &badStringError{"malformed HTTP version", req.Proto};
|
|
}
|
|
|
|
if req.URL, err = ParseURL(req.RawURL); err != nil {
|
|
return nil, err;
|
|
}
|
|
|
|
// Subsequent lines: Key: value.
|
|
nheader := 0;
|
|
req.Header = make(map[string]string);
|
|
for {
|
|
var key, value string;
|
|
if key, value, err = readKeyValue(b); err != nil {
|
|
return nil, err;
|
|
}
|
|
if key == "" {
|
|
break;
|
|
}
|
|
if nheader++; nheader >= maxHeaderLines {
|
|
return nil, ErrHeaderTooLong;
|
|
}
|
|
|
|
key = CanonicalHeaderKey(key);
|
|
|
|
// RFC 2616 says that if you send the same header key
|
|
// multiple times, it has to be semantically equivalent
|
|
// to concatenating the values separated by commas.
|
|
oldvalue, present := req.Header[key];
|
|
if present {
|
|
req.Header[key] = oldvalue+","+value;
|
|
} else {
|
|
req.Header[key] = value;
|
|
}
|
|
}
|
|
|
|
// RFC2616: Must treat
|
|
// GET /index.html HTTP/1.1
|
|
// Host: www.google.com
|
|
// and
|
|
// GET http://www.google.com/index.html HTTP/1.1
|
|
// Host: doesntmatter
|
|
// the same. In the second case, any Host line is ignored.
|
|
if v, present := req.Header["Host"]; present && req.URL.Host == "" {
|
|
req.Host = v;
|
|
}
|
|
|
|
// RFC2616: Should treat
|
|
// Pragma: no-cache
|
|
// like
|
|
// Cache-Control: no-cache
|
|
if v, present := req.Header["Pragma"]; present && v == "no-cache" {
|
|
if _, presentcc := req.Header["Cache-Control"]; !presentcc {
|
|
req.Header["Cache-Control"] = "no-cache";
|
|
}
|
|
}
|
|
|
|
// Determine whether to hang up after sending the reply.
|
|
if req.ProtoMajor < 1 || (req.ProtoMajor == 1 && req.ProtoMinor < 1) {
|
|
req.Close = true;
|
|
} else if v, present := req.Header["Connection"]; present {
|
|
// TODO: Should split on commas, toss surrounding white space,
|
|
// and check each field.
|
|
if v == "close" {
|
|
req.Close = true;
|
|
}
|
|
}
|
|
|
|
// Pull out useful fields as a convenience to clients.
|
|
if v, present := req.Header["Referer"]; present {
|
|
req.Referer = v;
|
|
}
|
|
if v, present := req.Header["User-Agent"]; present {
|
|
req.UserAgent = v;
|
|
}
|
|
|
|
// TODO: Parse specific header values:
|
|
// Accept
|
|
// Accept-Encoding
|
|
// Accept-Language
|
|
// Authorization
|
|
// Cache-Control
|
|
// Connection
|
|
// Date
|
|
// Expect
|
|
// From
|
|
// If-Match
|
|
// If-Modified-Since
|
|
// If-None-Match
|
|
// If-Range
|
|
// If-Unmodified-Since
|
|
// Max-Forwards
|
|
// Proxy-Authorization
|
|
// Referer [sic]
|
|
// TE (transfer-codings)
|
|
// Trailer
|
|
// Transfer-Encoding
|
|
// Upgrade
|
|
// User-Agent
|
|
// Via
|
|
// Warning
|
|
|
|
// A message body exists when either Content-Length or Transfer-Encoding
|
|
// headers are present. Transfer-Encoding trumps Content-Length.
|
|
if v, present := req.Header["Transfer-Encoding"]; present && v == "chunked" {
|
|
req.Body = newChunkedReader(b);
|
|
} else if v, present := req.Header["Content-Length"]; present {
|
|
length, err := strconv.Btoui64(v, 10);
|
|
if err != nil {
|
|
return nil, &badStringError{"invalid Content-Length", v};
|
|
}
|
|
// TODO: limit the Content-Length. This is an easy DoS vector.
|
|
raw := make([]byte, length);
|
|
n, err := b.Read(raw);
|
|
if err != nil || uint64(n) < length {
|
|
return nil, ErrShortBody;
|
|
}
|
|
req.Body = bytes.NewBuffer(raw);
|
|
}
|
|
|
|
return req, nil;
|
|
}
|
|
|
|
func parseForm(query string) (m map[string][]string, err os.Error) {
|
|
data := make(map[string]*vector.StringVector);
|
|
for _, kv := range strings.Split(query, "&", 0) {
|
|
kvPair := strings.Split(kv, "=", 2);
|
|
|
|
var key, value string;
|
|
var e os.Error;
|
|
key, e = URLUnescape(kvPair[0]);
|
|
if e == nil && len(kvPair) > 1 {
|
|
value, e = URLUnescape(kvPair[1]);
|
|
}
|
|
if e != nil {
|
|
err = e;
|
|
}
|
|
|
|
vec, ok := data[key];
|
|
if !ok {
|
|
vec = vector.NewStringVector(0);
|
|
data[key] = vec;
|
|
}
|
|
vec.Push(value);
|
|
}
|
|
|
|
m = make(map[string][]string);
|
|
for k, vec := range data {
|
|
m[k] = vec.Data();
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
// ParseForm parses the request body as a form for POST requests, or the raw query for GET requests.
|
|
// It is idempotent.
|
|
func (r *Request) ParseForm() (err os.Error) {
|
|
if r.Form != nil {
|
|
return;
|
|
}
|
|
|
|
var query string;
|
|
|
|
switch r.Method {
|
|
case "GET":
|
|
query = r.URL.RawQuery;
|
|
case "POST":
|
|
if r.Body == nil {
|
|
return os.ErrorString("missing form body");
|
|
}
|
|
ct, _ := r.Header["Content-Type"];
|
|
switch strings.Split(ct, ";", 2)[0] {
|
|
case "text/plain", "application/x-www-form-urlencoded", "":
|
|
var b []byte;
|
|
if b, err = io.ReadAll(r.Body); err != nil {
|
|
return;
|
|
}
|
|
query = string(b);
|
|
// TODO(dsymonds): Handle multipart/form-data
|
|
default:
|
|
return &badStringError{"unknown Content-Type", ct};
|
|
}
|
|
}
|
|
r.Form, err = parseForm(query);
|
|
return;
|
|
}
|
|
|
|
// FormValue returns the first value for the named component of the query.
|
|
// FormValue calls ParseForm if necessary.
|
|
func (r *Request) FormValue(key string) string {
|
|
if r.Form == nil {
|
|
r.ParseForm();
|
|
}
|
|
if vs, ok := r.Form[key]; ok && len(vs) > 0 {
|
|
return vs[0];
|
|
}
|
|
return "";
|
|
}
|