/* 
 * Copyright (C) 2000-2004 by Oswald Buddenhagen <puf@ossi.cjb.net>
 * based on puf 0.1.x (C) 1999,2000 by Anders Gavare <gavare@hotmail.com>
 *
 * You may modify and distribute this code under the terms of the GPL.
 * There is NO WARRANTY of any kind. See COPYING for details.
 *
 * puf.h - global definitions
 *
 */

#ifndef _PUF_H
#define _PUF_H

#include <config.h>                                       

#ifdef USE_LL
# define _FILE_OFFSET_BITS 64
# define SSOFFT "lli"
#else
# define SSOFFT "li"
#endif
#define SOFFT "%"SSOFFT

#ifndef _O_BINARY
# define _O_BINARY 0
#endif

#include <sys/types.h>                             
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <sys/socket.h>
#include <sys/utsname.h>
#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <stdio.h>
#include <ctype.h>
#include <strings.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <time.h>
#include <utime.h>
#include <fcntl.h>
#include <errno.h>
#include <stdarg.h>
#include <stddef.h>
#include <limits.h>
#include <locale.h>

/*  Undef this, if you don't want the -d switch  */
#define DEBUG

/*  Undef this, if you want no message about incorrect DNS resolver configs  */
#define CORRECT_DNS

/*  String length limit:  */
#define	SHORTSTR			512

/*  Max buf size when reading from the network.  */
#define	MAXBUFSIZE			0x8000
/*  Overlapping lenght of consecutive buffer scans  */
#define OVERLAPLEN			200
/*  Maximal allowed header length  */
#define MAXHEADERLEN			0x10000

/*  Max file size when using -xo/-xO or multi-src targets. In MB.  */
#define DEFAULT_MAX_BUFFER		10

/*  Filename of a url ending with a slash:  */
#define	DEFAULT_INDEX_FILE_NAME		"index.html"

/*  Extension to append to patial files  */
#define PART_EXT			".part"

/*  This could be set higher than 0 if you like the debug output...  */
#define	DEFAULT_VERBOSITY		0

/*  Default max nr of connections:  */
#define	DEFAULT_MAX_ACTIVE		20

/*  Default max of ansynchronous dns lookups:  */
#define	DEFAULT_MAX_DNS_FORKS		10

/*  Default timeout when awaiting DNS lookup completion  */
#define	DEFAULT_TIMEOUT_DNS		60
/*  Default timeout when connecting a host  */
#define	DEFAULT_TIMEOUT_CONNECT		60
/*  Default timeout when awaiting http reply data  */
#define	DEFAULT_TIMEOUT_DATA		120

/*  How long the bandwidht averaging timeframe should be */
#define AVERAGING_TIMEFRAME		100

/*  Default Max number of connection attempts per url:  */
#define	DEFAULT_MAX_ATTEMPTS		5

#ifndef HAVE_STRDUP
char *strdup(const char *s);
#endif

#if 1
# define RND(m) ((rand() >> 15) * m / ((RAND_MAX >> 15) + 1)) /* m < 32768 */
#else
# define RND(m) ((int)(rand() * 1.0 * m / (RAND_MAX + 1.0))) /* needs float */
#endif

/*  Structures for hostlist.c  */

typedef struct haddr_t {
    struct in_addr addr;
    time_t retry_time;
    u_int
	attempt:8,
	last_errt:3,
	err_wait:16;
} haddr_t;

typedef struct hinfo_t {
    char *name;			/*  back link to primary name  */
    char *lname;		/*  back link to local storage name  */
    u_int
	is_http11:1,		/*  1 till opposite proven  */
	maybe:1;		/*  proxy finder state flag  */
    short num_ips;		/*  0 if hostname not found  */
    short cur_ip;		/*  for round-robin  */
    haddr_t ips[0];		/*  actually num_ips  */
} hinfo_t;

typedef struct host_t {
    struct host_t *next;

    hinfo_t *info;		/*  whost_t* if !ready  */
    u_char ready:1;
    char name[0];
} host_t;


/*  proxy spec.  */
typedef struct proxy_t {
    struct proxy_t *next;	/*  next proxy in chain  */

    host_t *host;		/*  Proxy host  */
    u_short port;		/*  Proxy port  */
    u_char have_auth:1;		/*  User:Password (BASE64) after cgi_path?  */
    char cgi_path[0];		/*  Only for "cgi-proxies"  */
} proxy_t;

typedef struct proxyent_t {
    proxy_t *proxy;
    int score;			/*  Connection/byte count - for capacity-driven balancing  */
    u_short ratio;		/*  Requested load ratio  */
/*    u_short cur_conn;*/		/*  Currently open connections - for load-driven balancing - now unused */
} proxyent_t;

typedef struct proxyarr_t {
    u_int nents, rents, spare:31, cow:1;	/* BC with ptrarr_t */
    proxyent_t ents[0];
} proxyarr_t;


/*  fake user agent  */
typedef struct agent_t {
    int ratio;
    char agent[0];
} agent_t;


/*  generic array of pointers  */
typedef struct ptrarr_t {
    u_int nents, rents, spare:31, cow:1;
    void *ents[0];
} ptrarr_t;


/*  disposition path spec. for -P  */
typedef struct disp_path_t {
    int file_num;		/* for -xE */
    char path[0];
} disp_path_t;


/*  extension/pattern/mimetype spec. for -A/-R  */
typedef struct filter_t {
    u_int acc:1;
    u_int type:1;
    u_int pat:1;
    char data[0];
} filter_t;

/*  command line options  */
typedef struct options_t {
    int follow_src;		/*  -p[r[+[+]]]		:3	*/
    int follow_href;		/*  -r[+[+]]		:3	*/
    int follows_max;		/*  [-{p,{{pr,r}{,+{,+}}}}]	:3	*/
    int dir_mode;		/*  -nd & -xd		:2	*/
    int max_depth;		/*  -ld			short	*/
    int max_recurse;		/*  -l			short	*/
    ptrarr_t *filter_list;	/*  -A/-R			*/
    ptrarr_t *ldom_list;	/*  -D/-Dl			*/
    ptrarr_t *rdom_list;	/*  -D/-Dr			*/
    off_t max_bytes;		/*  -lb				*/
    off_t buff_size;		/*  -xs				*/
    int inhibit_cgiget;		/*  -xg & -ng		:2	*/
    int force_html;		/*  -F			:1	*/
    int update_mode;		/*  -u, -c & -nc	:2	*/
    int ext_dump;		/*  -xo & -xO		:2	*/
    int dump_refs;		/*  -xr			:1	*/
    disp_path_t *disp_path;	/*  -P				*/
    char *index_filename;	/*  -xi				*/
#ifndef HAVE_CYGWIN
    int fat_quotes;		/*  -xq			:1	*/
#endif
    int no_touch;		/*  -nt			:1	*/
    int delete_broken;		/*  -nb			:1	*/
    int send_if_range;		/*  -ni			:1	*/
    int send_referer;		/*  -nR			:1	*/
    int uar_total;		/*  [-U & -iU]			*/
    ptrarr_t *user_agents;	/*  -U & -iU			*/
    ptrarr_t *aux_headers;	/*  -xH				*/
    ptrarr_t *save_headers;	/*  -xh				*/
    int timeout_connect;	/*  -Tc			short	*/
    int timeout_data;		/*  -Td			short	*/
    int max_attempts;		/*  -t			short	*/
    int fail_no_wait;		/*  -nw			:1	*/
    int http_err_trans;		/*  -xT			:1	*/
    ptrarr_t *bind_addrs;	/*  -xb & ib			*/
    proxyarr_t *proxies;	/*  -y & -iy			*/

    char *url_prefix;		/*  -B				*/
    int enumerate_urls;		/*  -xE			:1	*/

    u_int cow:1;
} options_t;


typedef struct disp_t {
    u_char multi:1;		/*  -r, -p or multiple URLs	*/
    u_char created:1;
    u_char devnull:1;
    char disp[0];
} disp_t;

/*  shared url parameters  */
typedef struct url_parm_t {
    options_t *opt;
    char *http_auth;		/*  User:Password in BASE64 encoding  */
    disp_t *disposition;	/*  -O, [-xE]			*/
    proxy_t *proxy;		/*  -xy, -xyy			*/
    time_t time_stamp;		/*  -xu				*/
    uint ref_count:31;		/*  >1 -> actually copy in detach_parm() */
    uint strictproxy:1;		/*  -xyy			*/
} url_parm_t;

typedef struct url_t {
    struct url_t *next;		/*  next url in global chain  */
    struct url_t *referer;	/*  Referer URL (or NULL)  */

    url_parm_t *parm;		/*  URL parameters  */
    u_int
	attempt:8,		/*  Nr of attempt to download this URL  */
	is_top_dir:1,		/*  Get disp_off from this url?  */
	is_requisite:1,		/*  1=comes from src=, otherwise href=  */
	save_content:2,		/*  May be unset by -A/-R */
	relocs:3;		/*  How often this URL was redirected  */

    /*  Url: host, port and local part:  */
    host_t *host;		/*  Host of URL  */
    u_short port;		/*  Host port  */
    short link_depth;		/*  For -l */
    short path_len;		/*  Length of the path in the local part  */
    short disp_pathoff;		/*  Disposition path; points into local_part  */
    char local_part[0];		/*  Variable length local part  */
} url_t;

/*  queue element for waiting urls in download queues  */
typedef struct wurl_t {
    struct wurl_t *next;	/*  next url in current queue  */

    url_t *url;			/*  the associated url  */
} wurl_t;


/*  buffer element  */
typedef struct buffe_t {
    struct buffe_t *next;
    size_t len;
    char data[0];
} buffe_t;

/*  queue element for active urls  */
typedef struct aurl_t {
    struct aurl_t *next;	/*  next url in current queue  */

    url_t *url;			/*  the associated url  */

    off_t file_off;		/*  Length of partial content  */
    off_t size_total;		/*  Size according to host (0=?)  */
    off_t size_fetched;		/*  Size fetched so far  */

    time_t file_time;		/*  Partial content creation time  */
    time_t timeout;		/*  Next timeout  */
    
    int socket;			/*  TCP socket  */
    int ipidx;			/*  Index of the used haddr_t  */
/*    int bipidx;	*/		/*  Index of the bound outgoing ip  */
    proxyent_t *proxye;		/*  Pointer to used proxy entry  */
    proxy_t *proxy;		/*  Pointer to used proxy  */
    int pipidx;			/*  Index of the proxy's used haddr_t  */
    
    int f;			/*  File descriptor (-1 if switched)  */

    u_int
	file_created:1,		/*  Did we already create the file?  */
	http_done_header:1,	/*  End of header reached?  */
	content_is_text:1,	/*  Dump to multi-dest?  */
	content_is_html:1,	/*  Try recursive search on file?  */
	reloc:1;		/*  Is this url being redirected?  */

    char *auth_chall;		/*  Challenge for auth_lev  */
    
    char *headers;		/*  HTTP headers for saving to file  */
    int hdrslen, hdrssiz;	/*  Actual and allocated lenght of above  */

    /*  Input data overlap buffer  */
    char *buffer;		/*  Overlap buffer  */
    short size;			/*  Current size of overlap buffer  */
    short offset;		/*  Current fill state of overlap buffer  */

    buffe_t *buff;		/*  Data to writeout; circular_queue  */
    size_t buff_len;		/*  To prevent chewing up all mem  */

    short http_result_code;	/*  200, 400 etc. 0 means no status code by now  */

    short displen;		/*  Length of >>  */
    char disposition[0];	/*  Local file name  */
} aurl_t;

/*  parsed url  */
typedef struct {
/*    const char *srct;*/		/*  only for error messages  */
    host_t *host;
    const char *auth;
    const char *lpart;
    int authlen;
    int lpartlen;
    int pathlen;
    u_short port;
/* #define PR_HTTP	-1 */
/*    short proto; */		/*  >= 0 => strlen(proto_string)  */
} purl_t;

/*  queue element for hosts waiting in lookup queues  */
typedef struct whost_t {
    struct whost_t *next;	/*  fork/read queue link  */
    host_t *host;		/*  host being looked up  */

    url_t *urls;		/*  urls depending on this lookup; circular queue  */
    int num_proxies;		/*  number of proxies depending on this lookup  */
} whost_t;

typedef struct dnsproc_t {
    struct dnsproc_t *next;

    whost_t *whost;		/*  host being looked up  */
    int fd;			/*  control socket  */
    int pid;			/*  pid of the DNS helper process  */
    long timeout;		/*  lookup or idle timeout  */
} dnsproc_t;


/*
 * notes on the iterators:
 * - removal must be done before the set/queue is possibly extended
 * - new elements will be missed by the iterator
 */

/* set: prepend, remove random */
#define ls_add(q, e) do {e->next = q; q = e;} while(0)
#define ls_iterate(q, t, e, a) do {t *e; for (e = q; e; e = e->next) a} while(0)
#define ls_iterate_rm(q, t, e, a) do {t *e, **e##p; for (e##p = &q; (e = *e##p); ) {a e##p = &e->next; }} while(0)
#define ls_remove(e) *e##p = e->next

/* circular queue: append, remove first, move first to end */
#define cq_append(q, e) do {if(q) {e->next = q->next; q->next = e;} else e->next = e; q = e;} while(0)
#define cq_prepend(q, e) do {if(q) {e->next = q->next; q->next = e;} else {e->next = e; q = e;}} while(0)
#define cq_iterate(q, t, e, a) do {if (q) {t *e = q; do {e = e->next; a} while (e != q);}} while(0)
#define cq_consume(q, t, e, a) do {t *e, *e##fp; if (q) {e##fp = q; do {e = q->next; a} while (e != e##fp);}} while(0)
#define cq_rm1st(q) do {if(q->next == q) q = 0; else q->next = q->next->next;} while(0)


/*  Update modes for already existing files  */
#define EX_CLOBBER	0
#define EX_UPDATE	1
#define EX_CONTINUE	2
#define EX_NO_CLOBBER	3

/*  Directory hierarchy creation modes  */
#define DIRS_NONE	0
#define DIRS_NORMAL	1
#define DIRS_ALWAYS	2

/*  Levels of url recursion:  */
#define NOT_RECURSIVE		0
#define SAMEDIR_RECURSIVE	1
#define SUBDIR_RECURSIVE	2
#define HOST_RECURSIVE		3
#define GLOBAL_RECURSIVE	4

/*  Generic return codes  */
#define RT_OK		0	/* go on */
#define RT_SKIP		1	/* deny existence */
#define RT_AGAIN	2	/* non-error retry */
#define RT_RETRY	3	/* error retry */
#define RT_GIVEUP	4	/* fatal error */
#define RT_DONE		5	/* finished */
#define RT_TIMEOUT	6	/* proxy->server timeout */
#define RT_REFUSED	7	/* proxy->server refusal */
#define RT_RESTART	8	/* immediate retry */

/*  Functions:  */

/*  main.c:  */
extern char *progname;
extern int verbose;
int calc_nhash(const u_char *data, int len);
int calc_hash(const u_char *data);
void *mmalloc(size_t size);
void *mrealloc(void *ptr, size_t size);
void die(int ret, const char *msg, ...);
int errm(url_t *u, const char *txt, ...);
int verrm(url_t *u, const char *txt, va_list va);
void prx(int lev, const char *txt, ...);
void prxu(int lev, url_t *u, const char *txt, ...);

#define NFO 3
#define WRN 2
#define ERR 1

#ifdef DEBUG
extern int debug;
void dbp(const char *txt, ...);
void dbpe(const char *txt, ...);
void dbpu(url_t *u, const char *txt, ...);
void dbpeu(url_t *u, const char *txt, ...);
# define dbg(wht, tdo) do { if (wht & debug) dbp tdo; } while (0)
# define dbge(wht, tdo) do { if (wht & debug) dbpe tdo; } while (0)
# define dbgu(wht, tdo) do { if (wht & debug) dbpu tdo; } while (0)
# define dbgeu(wht, tdo) do { if (wht & debug) dbpeu tdo; } while (0)
#else
# define dbg(wht, tdo)
# define dbge(wht, tdo)
# define dbgu(wht, tdo)
# define dbgeu(wht, tdo)
#endif

#define URL 1
#define DNS 2
#define QUE 4
#define CON 8
#define HDR 16
#define REF 32
#define MEM 64

#if 0
# define ierr(m) die(3, m)
#else
# define ierr(m) *(char *)0 = 0
#endif

int patternMatch(const char *string, int slen, const char *pattern);

/*  hostlist.c:  */
extern int always_primary_name;
extern whost_t *queue_dns_lookup;
extern dnsproc_t *list_dns_busy, *list_dns_idle;
host_t *host_lookup_fast(const char *name, int namlen);
whost_t *host_lookup_full(const char *name, int namlen);
int start_lookup(dnsproc_t *pr);
int finish_lookup(dnsproc_t *pr);
dnsproc_t *fork_dnsproc(void);
void reap_dnsproc(dnsproc_t *pr);

/*  getopts.c:  */
void getopts(int argc, char *argv[]);

/*  fetch.c:  */
extern off_t max_bytes, fetched_bytes, total_bytes;
extern int max_dnss_active;
extern int max_urls_active;
extern int timeout_dns;
extern int max_time;
extern int max_urls;
extern int num_urls;
extern int num_urls_active;
extern int num_urls_done;
extern int num_urls_fail;
extern int num_errors;
extern int show_stat;
extern int waiting_proxies;
extern int all_proxy_wait;	/* unused */
extern struct timeval cur_tv, throttle;
extern struct sockaddr_in bind_addr;
extern wurl_t *queue_urls_connect;
extern aurl_t *list_urls_request, *list_urls_reply;
int touch(aurl_t *au);
void byebye(const char *msg);
void fetch_all(void);

/*  url.c:  */
extern int economize_dns;
int same_host(url_t *referer, host_t *host, u_short port, int is_req);
int same_dir(const char *path, int len, url_t *referer, int is_req);
int find_url(const char *path, int len, hinfo_t *hinfo, u_short port, int *hashp);
proxy_t *parse_add_proxy(const char *srct, const char *proxy);
int test_pat(const char *path, int len, int fp,
	     const char *mtype, int mtl, url_parm_t *parm);
int print_url(char *buf, int bufl, url_t *u, int auth);
int parse_add_url(const char *srct, const char *url, int len, url_t *base,
		  url_t *referer, url_parm_t *parm,
		  int isreq, int relocs, int link_depth,
		  aurl_t *au);
void finish_whost(whost_t *wh);
int queue_url(url_t *u);
void add_url(url_t *u, int hash);
void free_url(url_t *u);
int detach_parm(url_t *u);

/*  recurse.c:  */
int recurse_buff(url_t *u, const char *databuf, int len, int notlast, aurl_t *au);
void recurse_pfile(url_t *u, int fi, char **bupo, int *lepo, aurl_t *au);
void recurse_file(url_t *u, char *name);
int needs_recurse_u(url_t *u, int dump);
int needs_recurse_au(aurl_t *au, int dump);

/*  http_conn.c:  */
int activate_url(url_t *u, aurl_t **au);
int connect_url(aurl_t *au, struct timeval *next_conn_tv);
void disconnect_url(aurl_t *au);
void deactivate_url(aurl_t *au);

/*  http_req.c:  */
void init_user_agent(void);
#define len_enc_auth(x) (4 * (((x) + 2) / 3) + 1)
void encode_auth(char *buf, const u_char *auth, int len);
#define len_dec_auth(x) (3 * (((x) + 3) / 4) + 1)
int decode_auth(char *p, const u_char *s);
int send_http_get(aurl_t *au);

/*  http_rsp.c:  */
extern int economize_files;
extern size_t max_buffer;
int mmfopen(char *name, int flags, int *f);
int free_fd(int steallast);
int werrm(aurl_t *au, int sts, const char *msg, ...);
int uwerrm(url_t *u, int sts, const char *msg, ...);
void write_usts(url_t *u, int sts);
void write_psts(url_parm_t *parm, const char *u, int ul, int istopdir, int sts);
int handle_reply(aurl_t *au);

/*  util_date.c:  */
#define BAD_DATE 0
time_t parseHTTPdate(const char *date);


#define cat_chr(b,l,c) b[l++] = c
#define cat_mem(b,l,ml,m) do { int _ml = ml; memcpy(b + l, m, _ml); l += _ml; } while(0)
#define cat_str(b,l,s) do { const char *_s = s; cat_mem(b, l, strlen(_s), _s); } while(0)

#define lcat_chr(b,bl,l,c) do { if (l < bl) b[l] = c; l++; } while(0)
#define lcat_mem(b,bl,l,ml,m) do { int _ml = ml; if (l + _ml < bl) memcpy(b + l, m, _ml); l += _ml; } while(0)
#define lcat_str(b,bl,l,s) do { const char *_s = s; lcat_mem(b, bl, l, strlen(_s), _s); } while(0)


#define BUCKET_SHIFT 6

static inline int
hash_shift(int num)
{
    int i;

#if defined(__GNUC__) && defined(__i386__)
    asm("xorl %0,%0\n\tbsr %1,%0" : "=r" (i) : "rm" (num));
    i -= BUCKET_SHIFT - 1;
    if (i < 0)
	i = 0;
#else
    for (i = 0; num >= (1 << BUCKET_SHIFT << i); i++);
#endif
    return i;
}


#define h_find(str, strc, strt, hash, fcmp, ffound) \
    do { \
	strt *s; \
	int osz = hash_shift(strc); \
	for (s = osz ? ((strt **)str)[hash & ((1 << osz) - 1)] : str; \
	     s; s = s->next) \
	{ \
	    if (fcmp(s)) { \
		ffound(s) \
	    } \
	} \
    } while(0)

#define h_add(str, strc, strt, s, hash, ferr, fhash) \
    do { \
	strt *sp, *nsp, **spp, **nspp; \
	int i, osz, nsz, nhash; \
 \
	osz = hash_shift(strc); \
	strc++; \
	nsz = hash_shift(strc); \
	if (osz != nsz) { \
	    if (!(nspp = mmalloc((1 << nsz) * sizeof(strt *)))) { \
		ferr \
	    } else { \
		memset(nspp, 0, (1 << nsz) * sizeof(strt *)); \
		for (i = 0; i < (1 << osz); i++) \
		    for (sp = osz ? ((strt **)str)[i] : str; sp; sp = nsp) { \
			nsp = sp->next; \
			nhash = fhash(sp); \
			spp = nspp + (nhash & ((1 << nsz) - 1)); \
			sp->next = *spp; \
			*spp = sp; \
		    } \
		if (osz) \
		    free(str); \
		str = (strt *)nspp; \
	    } \
	} \
	spp = nsz ? ((strt **)str) + (hash & ((1 << nsz) - 1)) : &str; \
	s->next = *spp; \
	*spp = s; \
    } while(0)

#endif				/*  _PUF_H  */
