mirror of
https://github.com/vlang/v.git
synced 2025-09-10 07:47:20 -04:00
net.http: implement http.download_file_with_progress/2, saving each chunk, as it is received, without growing the memory usage (#21633)
This commit is contained in:
parent
2e567ff9e5
commit
8504beaaab
@ -9,6 +9,8 @@ import crypto.sha256
|
|||||||
struct Context {
|
struct Context {
|
||||||
mut:
|
mut:
|
||||||
show_help bool
|
show_help bool
|
||||||
|
show_sha1 bool
|
||||||
|
show_sha256 bool
|
||||||
target_folder string
|
target_folder string
|
||||||
continue_on_failure bool
|
continue_on_failure bool
|
||||||
retries int
|
retries int
|
||||||
@ -27,6 +29,8 @@ fn main() {
|
|||||||
fp.limit_free_args_to_at_least(1)!
|
fp.limit_free_args_to_at_least(1)!
|
||||||
ctx.show_help = fp.bool('help', `h`, false, 'Show this help screen.')
|
ctx.show_help = fp.bool('help', `h`, false, 'Show this help screen.')
|
||||||
ctx.target_folder = fp.string('target-folder', `t`, '.', 'The target folder, where the file will be stored. It will be created, if it does not exist. Default is current folder.')
|
ctx.target_folder = fp.string('target-folder', `t`, '.', 'The target folder, where the file will be stored. It will be created, if it does not exist. Default is current folder.')
|
||||||
|
ctx.show_sha1 = fp.bool('sha1', `1`, false, 'Show the SHA1 hash of the downloaded file.')
|
||||||
|
ctx.show_sha256 = fp.bool('sha256', `2`, false, 'Show the SHA256 hash of the downloaded file.')
|
||||||
ctx.continue_on_failure = fp.bool('continue', `c`, false, 'Continue on download failures. If you download 5 URLs, and several of them fail, continue without error. False by default.')
|
ctx.continue_on_failure = fp.bool('continue', `c`, false, 'Continue on download failures. If you download 5 URLs, and several of them fail, continue without error. False by default.')
|
||||||
ctx.retries = fp.int('retries', `r`, 10, 'Number of retries, when an URL fails to download.')
|
ctx.retries = fp.int('retries', `r`, 10, 'Number of retries, when an URL fails to download.')
|
||||||
ctx.delay = time.Duration(u64(fp.float('delay', `d`, 1.0, 'Delay in seconds, after each retry.') * time.second))
|
ctx.delay = time.Duration(u64(fp.float('delay', `d`, 1.0, 'Delay in seconds, after each retry.') * time.second))
|
||||||
@ -49,13 +53,18 @@ fn main() {
|
|||||||
sw := time.new_stopwatch()
|
sw := time.new_stopwatch()
|
||||||
mut errors := 0
|
mut errors := 0
|
||||||
mut downloaded := 0
|
mut downloaded := 0
|
||||||
|
downloader := if os.is_atty(1) > 0 {
|
||||||
|
&http.Downloader(http.TerminalStreamingDownloader{})
|
||||||
|
} else {
|
||||||
|
&http.Downloader(http.SilentStreamingDownloader{})
|
||||||
|
}
|
||||||
for idx, url in ctx.urls {
|
for idx, url in ctx.urls {
|
||||||
fname := url.all_after_last('/')
|
fname := url.all_after_last('/')
|
||||||
fpath := '${ctx.target_folder}/${fname}'
|
fpath := '${ctx.target_folder}/${fname}'
|
||||||
mut file_errors := 0
|
mut file_errors := 0
|
||||||
log.info('Downloading [${idx + 1}/${ctx.urls.len}] from url: ${url} to ${fpath} ...')
|
log.info('Downloading [${idx + 1}/${ctx.urls.len}] from url: ${url} to ${fpath} ...')
|
||||||
for retry in 0 .. ctx.retries {
|
for retry in 0 .. ctx.retries {
|
||||||
http.download_file(url, fname) or {
|
http.download_file_with_progress(url, fname, downloader: downloader) or {
|
||||||
log.error(' retry ${retry + 1}/${ctx.retries}, failed downloading from url: ${url}. Error: ${err}.')
|
log.error(' retry ${retry + 1}/${ctx.retries}, failed downloading from url: ${url}. Error: ${err}.')
|
||||||
file_errors++
|
file_errors++
|
||||||
time.sleep(ctx.delay)
|
time.sleep(ctx.delay)
|
||||||
@ -77,19 +86,27 @@ fn main() {
|
|||||||
log.info(' Finished downloading file: ${fpath} .')
|
log.info(' Finished downloading file: ${fpath} .')
|
||||||
log.info(' size: ${fstat.size} bytes')
|
log.info(' size: ${fstat.size} bytes')
|
||||||
|
|
||||||
fbytes := os.read_bytes(fname)!
|
if !ctx.show_sha256 && !ctx.show_sha1 {
|
||||||
mut digest256 := sha256.new()
|
continue
|
||||||
digest256.write(fbytes)!
|
}
|
||||||
mut sum256 := digest256.sum([])
|
|
||||||
hash256 := sum256.hex()
|
|
||||||
|
|
||||||
|
fbytes := os.read_bytes(fname)!
|
||||||
|
if ctx.show_sha1 {
|
||||||
mut digest1 := sha1.new()
|
mut digest1 := sha1.new()
|
||||||
digest1.write(fbytes)!
|
digest1.write(fbytes)!
|
||||||
mut sum1 := digest1.sum([])
|
mut sum1 := digest1.sum([])
|
||||||
hash1 := sum1.hex()
|
hash1 := sum1.hex()
|
||||||
log.info(' SHA1: ${hash1}')
|
log.info(' SHA1: ${hash1}')
|
||||||
|
}
|
||||||
|
|
||||||
|
if ctx.show_sha256 {
|
||||||
|
mut digest256 := sha256.new()
|
||||||
|
digest256.write(fbytes)!
|
||||||
|
mut sum256 := digest256.sum([])
|
||||||
|
hash256 := sum256.hex()
|
||||||
log.info(' SHA256: ${hash256}')
|
log.info(' SHA256: ${hash256}')
|
||||||
}
|
}
|
||||||
|
}
|
||||||
println('Downloaded: ${downloaded} file(s) . Elapsed time: ${sw.elapsed()} . Errors: ${errors} .')
|
println('Downloaded: ${downloaded} file(s) . Elapsed time: ${sw.elapsed()} . Errors: ${errors} .')
|
||||||
if !ctx.continue_on_failure && errors > 0 {
|
if !ctx.continue_on_failure && errors > 0 {
|
||||||
exit(1)
|
exit(1)
|
||||||
|
@ -35,40 +35,29 @@ fn net_ssl_do(req &Request, port int, method Method, host_name string, path stri
|
|||||||
|
|
||||||
req_headers := req.build_request_headers(method, host_name, path)
|
req_headers := req.build_request_headers(method, host_name, path)
|
||||||
$if trace_http_request ? {
|
$if trace_http_request ? {
|
||||||
eprintln('> ${req_headers}')
|
eprint('> ')
|
||||||
|
eprint(req_headers)
|
||||||
|
eprintln('')
|
||||||
}
|
}
|
||||||
|
|
||||||
return req.do_request(req_headers, mut ssl_conn)!
|
return req.do_request(req_headers, mut ssl_conn)!
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn read_from_ssl_connection_cb(con voidptr, buf &u8, bufsize int) !int {
|
||||||
|
mut ssl_conn := unsafe { &ssl.SSLConn(con) }
|
||||||
|
return ssl_conn.socket_read_into_ptr(buf, bufsize)
|
||||||
|
}
|
||||||
|
|
||||||
fn (req &Request) do_request(req_headers string, mut ssl_conn ssl.SSLConn) !Response {
|
fn (req &Request) do_request(req_headers string, mut ssl_conn ssl.SSLConn) !Response {
|
||||||
ssl_conn.write_string(req_headers) or { return err }
|
ssl_conn.write_string(req_headers) or { return err }
|
||||||
|
mut content := strings.new_builder(4096)
|
||||||
mut content := strings.new_builder(100)
|
req.receive_all_data_from_cb_in_builder(mut content, voidptr(ssl_conn), read_from_ssl_connection_cb)!
|
||||||
mut buff := [bufsize]u8{}
|
|
||||||
bp := unsafe { &buff[0] }
|
|
||||||
mut readcounter := 0
|
|
||||||
for {
|
|
||||||
readcounter++
|
|
||||||
len := ssl_conn.socket_read_into_ptr(bp, bufsize) or { break }
|
|
||||||
$if debug_http ? {
|
|
||||||
eprintln('ssl_do, read ${readcounter:4d} | len: ${len}')
|
|
||||||
eprintln('-'.repeat(20))
|
|
||||||
eprintln(unsafe { tos(bp, len) })
|
|
||||||
eprintln('-'.repeat(20))
|
|
||||||
}
|
|
||||||
if len <= 0 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
unsafe { content.write_ptr(bp, len) }
|
|
||||||
if req.on_progress != unsafe { nil } {
|
|
||||||
req.on_progress(req, content[content.len - len..], u64(content.len))!
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ssl_conn.shutdown()!
|
ssl_conn.shutdown()!
|
||||||
response_text := content.str()
|
response_text := content.str()
|
||||||
$if trace_http_response ? {
|
$if trace_http_response ? {
|
||||||
eprintln('< ${response_text}')
|
eprint('< ')
|
||||||
|
eprint(response_text)
|
||||||
|
eprinln('')
|
||||||
}
|
}
|
||||||
if req.on_finish != unsafe { nil } {
|
if req.on_finish != unsafe { nil } {
|
||||||
req.on_finish(req, u64(response_text.len))!
|
req.on_finish(req, u64(response_text.len))!
|
||||||
|
@ -21,11 +21,6 @@ pub fn download_file(url string, out_file_path string) ! {
|
|||||||
os.write_file(out_file_path, s.body)!
|
os.write_file(out_file_path, s.body)!
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: implement download_file_with_progress
|
|
||||||
// type DownloadChunkFn = fn (written int)
|
|
||||||
// type DownloadFinishedFn = fn ()
|
|
||||||
// pub fn download_file_with_progress(url string, out_file_path string, cb_chunk DownloadChunkFn, cb_finished DownloadFinishedFn)
|
|
||||||
|
|
||||||
pub fn download_file_with_cookies(url string, out_file_path string, cookies map[string]string) ! {
|
pub fn download_file_with_cookies(url string, out_file_path string, cookies map[string]string) ! {
|
||||||
$if debug_http ? {
|
$if debug_http ? {
|
||||||
println('http.download_file url=${url} out_file_path=${out_file_path}')
|
println('http.download_file url=${url} out_file_path=${out_file_path}')
|
||||||
|
77
vlib/net/http/download_progress.v
Normal file
77
vlib/net/http/download_progress.v
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
module http
|
||||||
|
|
||||||
|
// Downloader is the interface that you have to implement, if you need to customise
|
||||||
|
// how download_file_with_progress works, and what output it produces while a file
|
||||||
|
// is downloaded.
|
||||||
|
pub interface Downloader {
|
||||||
|
mut:
|
||||||
|
// Called once, at the start of the streaming download. You can do setup here,
|
||||||
|
// like opening a target file, changing request.stop_copying_limit to a different value,
|
||||||
|
// if you need it.
|
||||||
|
on_start(mut request Request, path string) !
|
||||||
|
// Called many times, once a chunk of data is received
|
||||||
|
on_chunk(request &Request, chunk []u8, already_received u64, expected u64) !
|
||||||
|
// Called once, at the end of the streaming download. Do cleanup here,
|
||||||
|
// like closing a file (opened in on_start), reporting stats etc.
|
||||||
|
on_finish(request &Request, response &Response) !
|
||||||
|
}
|
||||||
|
|
||||||
|
// DownloaderParams is similar to FetchConfig, but it also allows you to pass
|
||||||
|
// a `downloader: your_downloader_instance` parameter.
|
||||||
|
// See also http.SilentStreamingDownloader, and http.TerminalStreamingDownloader .
|
||||||
|
@[params]
|
||||||
|
pub struct DownloaderParams {
|
||||||
|
FetchConfig
|
||||||
|
pub mut:
|
||||||
|
downloader &Downloader = TerminalStreamingDownloader{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// download_file_with_progress will save the URL `url` to the filepath `path` .
|
||||||
|
// Unlike download_file/2, it *does not* load the whole content in memory, but
|
||||||
|
// instead streams it chunk by chunk to the target `path`, as the chunks are received
|
||||||
|
// from the network. This makes it suitable for downloading big files, *without* increasing
|
||||||
|
// the memory consumption of your application.
|
||||||
|
//
|
||||||
|
// By default, it will also show a progress line, while the download happens.
|
||||||
|
// If you do not want a status line, you can call it like this:
|
||||||
|
// `http.download_file_with_progress(url, path, downloader: http.SilentStreamingDownloader{})`,
|
||||||
|
// or you can implement your own http.Downloader and pass that instead.
|
||||||
|
//
|
||||||
|
// Note: the returned response by this function, will have a truncated .body, after the first
|
||||||
|
// few KBs, because it does not accumulate all its data in memory, instead relying on the
|
||||||
|
// downloaders to save the received data chunk by chunk. You can parametrise this by
|
||||||
|
// using `stop_copying_limit:` but you need to pass a number that is big enough to fit
|
||||||
|
// at least all headers in the response, otherwise the parsing of the response at the end will
|
||||||
|
// fail, despite saving all the data in the file before that. The default is 65536 bytes.
|
||||||
|
pub fn download_file_with_progress(url string, path string, params DownloaderParams) !Response {
|
||||||
|
mut d := unsafe { params.downloader }
|
||||||
|
mut config := params.FetchConfig
|
||||||
|
config.url = url
|
||||||
|
config.user_ptr = voidptr(d)
|
||||||
|
config.on_progress_body = download_progres_cb
|
||||||
|
if config.stop_copying_limit == -1 {
|
||||||
|
// leave more than enough space for potential redirect headers
|
||||||
|
config.stop_copying_limit = 65536
|
||||||
|
}
|
||||||
|
mut req := prepare(config)!
|
||||||
|
d.on_start(mut req, path)!
|
||||||
|
response := req.do()!
|
||||||
|
d.on_finish(req, response)!
|
||||||
|
return response
|
||||||
|
}
|
||||||
|
|
||||||
|
const zz = &Downloader(unsafe { nil })
|
||||||
|
|
||||||
|
fn download_progres_cb(request &Request, chunk []u8, body_so_far u64, expected_size u64, status_code int) ! {
|
||||||
|
// TODO: remove this hack, when `unsafe { &Downloader( request.user_ptr ) }` works reliably,
|
||||||
|
// by just casting, without trying to promote the argument to the heap at all.
|
||||||
|
mut d := unsafe { http.zz }
|
||||||
|
pd := unsafe { &voidptr(&d) }
|
||||||
|
unsafe {
|
||||||
|
*pd = request.user_ptr
|
||||||
|
}
|
||||||
|
if status_code == 200 {
|
||||||
|
// ignore redirects, we are interested in the chunks of the final file:
|
||||||
|
d.on_chunk(request, chunk, body_so_far, expected_size)!
|
||||||
|
}
|
||||||
|
}
|
28
vlib/net/http/download_silent_downloader.v
Normal file
28
vlib/net/http/download_silent_downloader.v
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
module http
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
// SilentStreamingDownloader just saves the downloaded file chunks to the given path.
|
||||||
|
// It does *no reporting at all*.
|
||||||
|
// Note: the folder part of the path should already exist, and has to be writable.
|
||||||
|
pub struct SilentStreamingDownloader {
|
||||||
|
pub mut:
|
||||||
|
path string
|
||||||
|
f os.File
|
||||||
|
}
|
||||||
|
|
||||||
|
// on_start is called once at the start of the download.
|
||||||
|
pub fn (mut d SilentStreamingDownloader) on_start(mut request Request, path string) ! {
|
||||||
|
d.path = path
|
||||||
|
d.f = os.create(path)!
|
||||||
|
}
|
||||||
|
|
||||||
|
// on_chunk is called multiple times, once per chunk of received content.
|
||||||
|
pub fn (mut d SilentStreamingDownloader) on_chunk(request &Request, chunk []u8, already_received u64, expected u64) ! {
|
||||||
|
d.f.write(chunk)!
|
||||||
|
}
|
||||||
|
|
||||||
|
// on_finish is called once at the end of the download.
|
||||||
|
pub fn (mut d SilentStreamingDownloader) on_finish(request &Request, response &Response) ! {
|
||||||
|
d.f.close()
|
||||||
|
}
|
46
vlib/net/http/download_terminal_downloader.v
Normal file
46
vlib/net/http/download_terminal_downloader.v
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
module http
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
|
// TerminalStreamingDownloader is the same as http.SilentStreamingDownloader, but produces a progress line on stdout.
|
||||||
|
pub struct TerminalStreamingDownloader {
|
||||||
|
SilentStreamingDownloader
|
||||||
|
mut:
|
||||||
|
start_time time.Time
|
||||||
|
past_time time.Time
|
||||||
|
past_received u64
|
||||||
|
}
|
||||||
|
|
||||||
|
// on_start is called once at the start of the download.
|
||||||
|
pub fn (mut d TerminalStreamingDownloader) on_start(mut request Request, path string) ! {
|
||||||
|
d.SilentStreamingDownloader.on_start(mut request, path)!
|
||||||
|
d.start_time = time.now()
|
||||||
|
d.past_time = time.now()
|
||||||
|
}
|
||||||
|
|
||||||
|
// on_chunk is called multiple times, once per chunk of received content.
|
||||||
|
pub fn (mut d TerminalStreamingDownloader) on_chunk(request &Request, chunk []u8, already_received u64, expected u64) ! {
|
||||||
|
now := time.now()
|
||||||
|
elapsed := now - d.start_time
|
||||||
|
// delta_elapsed := now - d.past_time
|
||||||
|
// delta_bytes := already_received - d.past_received
|
||||||
|
d.past_time = now
|
||||||
|
d.past_received = already_received
|
||||||
|
ratio := f64(already_received) / f64(expected)
|
||||||
|
estimated := time.Duration(i64(f64(elapsed) / ratio))
|
||||||
|
speed := f64(time.millisecond) * f64(already_received) / f64(elapsed)
|
||||||
|
elapsed_s := elapsed.seconds()
|
||||||
|
estimated_s := estimated.seconds()
|
||||||
|
eta_s := f64_max(estimated_s - elapsed_s, 0.0)
|
||||||
|
|
||||||
|
d.SilentStreamingDownloader.on_chunk(request, chunk, already_received, expected)!
|
||||||
|
print('\rDownloading to `${d.path}` ${100.0 * ratio:6.2f}%, ${f64(already_received) / (1024 * 1024):7.3f}/${f64(expected) / (1024 * 1024):-7.3f}MB, ${speed:6.0f}KB/s, elapsed: ${elapsed_s:6.0f}s, eta: ${eta_s:6.0f}s')
|
||||||
|
flush_stdout()
|
||||||
|
}
|
||||||
|
|
||||||
|
// on_finish is called once at the end of the download.
|
||||||
|
pub fn (mut d TerminalStreamingDownloader) on_finish(request &Request, response &Response) ! {
|
||||||
|
d.SilentStreamingDownloader.on_finish(request, response)!
|
||||||
|
println('')
|
||||||
|
flush_stdout()
|
||||||
|
}
|
@ -8,7 +8,8 @@ import net.urllib
|
|||||||
const max_redirects = 16 // safari max - other browsers allow up to 20
|
const max_redirects = 16 // safari max - other browsers allow up to 20
|
||||||
|
|
||||||
const content_type_default = 'text/plain'
|
const content_type_default = 'text/plain'
|
||||||
const bufsize = 1536
|
|
||||||
|
const bufsize = 64 * 1024
|
||||||
|
|
||||||
// FetchConfig holds configuration data for the fetch function.
|
// FetchConfig holds configuration data for the fetch function.
|
||||||
pub struct FetchConfig {
|
pub struct FetchConfig {
|
||||||
@ -31,10 +32,14 @@ pub mut:
|
|||||||
in_memory_verification bool // if true, verify, cert, and cert_key are read from memory, not from a file
|
in_memory_verification bool // if true, verify, cert, and cert_key are read from memory, not from a file
|
||||||
allow_redirect bool = true // whether to allow redirect
|
allow_redirect bool = true // whether to allow redirect
|
||||||
max_retries int = 5 // maximum number of retries required when an underlying socket error occurs
|
max_retries int = 5 // maximum number of retries required when an underlying socket error occurs
|
||||||
// callbacks to allow custom reporting code to run, while the request is running
|
// callbacks to allow custom reporting code to run, while the request is running, and to implement streaming
|
||||||
on_redirect RequestRedirectFn = unsafe { nil }
|
on_redirect RequestRedirectFn = unsafe { nil }
|
||||||
on_progress RequestProgressFn = unsafe { nil }
|
on_progress RequestProgressFn = unsafe { nil }
|
||||||
|
on_progress_body RequestProgressBodyFn = unsafe { nil }
|
||||||
on_finish RequestFinishFn = unsafe { nil }
|
on_finish RequestFinishFn = unsafe { nil }
|
||||||
|
//
|
||||||
|
stop_copying_limit i64 = -1 // after this many bytes are received, stop copying to the response. Note that on_progress and on_progress_body callbacks, will continue to fire normally, until the full response is read, which allows you to implement streaming downloads, without keeping the whole big response in memory
|
||||||
|
stop_receiving_limit i64 = -1 // after this many bytes are received, break out of the loop that reads the response, effectively stopping the request early. No more on_progress callbacks will be fired. The on_finish callback will fire.
|
||||||
}
|
}
|
||||||
|
|
||||||
// new_request creates a new Request given the request `method`, `url_`, and
|
// new_request creates a new Request given the request `method`, `url_`, and
|
||||||
@ -153,10 +158,10 @@ pub fn delete(url string) !Response {
|
|||||||
return fetch(method: .delete, url: url)
|
return fetch(method: .delete, url: url)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: @[noinline] attribute is used for temporary fix the 'get_text()' intermittent segfault / nil value when compiling with GCC 13.2.x and -prod option ( Issue #20506 )
|
// prepare prepares a new request for fetching, but does not call its .do() method.
|
||||||
// fetch sends an HTTP request to the `url` with the given method and configuration.
|
// It is useful, if you want to reuse request objects, for several requests in a row,
|
||||||
@[noinline]
|
// modifying the request each time, then calling .do() to get the new response.
|
||||||
pub fn fetch(config FetchConfig) !Response {
|
pub fn prepare(config FetchConfig) !Request {
|
||||||
if config.url == '' {
|
if config.url == '' {
|
||||||
return error('http.fetch: empty url')
|
return error('http.fetch: empty url')
|
||||||
}
|
}
|
||||||
@ -179,11 +184,21 @@ pub fn fetch(config FetchConfig) !Response {
|
|||||||
allow_redirect: config.allow_redirect
|
allow_redirect: config.allow_redirect
|
||||||
max_retries: config.max_retries
|
max_retries: config.max_retries
|
||||||
on_progress: config.on_progress
|
on_progress: config.on_progress
|
||||||
|
on_progress_body: config.on_progress_body
|
||||||
on_redirect: config.on_redirect
|
on_redirect: config.on_redirect
|
||||||
on_finish: config.on_finish
|
on_finish: config.on_finish
|
||||||
|
stop_copying_limit: config.stop_copying_limit
|
||||||
|
stop_receiving_limit: config.stop_receiving_limit
|
||||||
}
|
}
|
||||||
res := req.do()!
|
return req
|
||||||
return res
|
}
|
||||||
|
|
||||||
|
// TODO: @[noinline] attribute is used for temporary fix the 'get_text()' intermittent segfault / nil value when compiling with GCC 13.2.x and -prod option ( Issue #20506 )
|
||||||
|
// fetch sends an HTTP request to the `url` with the given method and configuration.
|
||||||
|
@[noinline]
|
||||||
|
pub fn fetch(config FetchConfig) !Response {
|
||||||
|
req := prepare(config)!
|
||||||
|
return req.do()!
|
||||||
}
|
}
|
||||||
|
|
||||||
// get_text sends an HTTP GET request to the given `url` and returns the text content of the response.
|
// get_text sends an HTTP GET request to the given `url` and returns the text content of the response.
|
||||||
|
@ -14,6 +14,8 @@ pub type RequestRedirectFn = fn (request &Request, nredirects int, new_url strin
|
|||||||
|
|
||||||
pub type RequestProgressFn = fn (request &Request, chunk []u8, read_so_far u64) !
|
pub type RequestProgressFn = fn (request &Request, chunk []u8, read_so_far u64) !
|
||||||
|
|
||||||
|
pub type RequestProgressBodyFn = fn (request &Request, chunk []u8, body_read_so_far u64, body_expected_size u64, status_code int) !
|
||||||
|
|
||||||
pub type RequestFinishFn = fn (request &Request, final_size u64) !
|
pub type RequestFinishFn = fn (request &Request, final_size u64) !
|
||||||
|
|
||||||
// Request holds information about an HTTP request (either received by
|
// Request holds information about an HTTP request (either received by
|
||||||
@ -43,10 +45,14 @@ pub mut:
|
|||||||
in_memory_verification bool // if true, verify, cert, and cert_key are read from memory, not from a file
|
in_memory_verification bool // if true, verify, cert, and cert_key are read from memory, not from a file
|
||||||
allow_redirect bool = true // whether to allow redirect
|
allow_redirect bool = true // whether to allow redirect
|
||||||
max_retries int = 5 // maximum number of retries required when an underlying socket error occurs
|
max_retries int = 5 // maximum number of retries required when an underlying socket error occurs
|
||||||
// callbacks to allow custom reporting code to run, while the request is running
|
// callbacks to allow custom reporting code to run, while the request is running, and to implement streaming
|
||||||
on_redirect RequestRedirectFn = unsafe { nil }
|
on_redirect RequestRedirectFn = unsafe { nil }
|
||||||
on_progress RequestProgressFn = unsafe { nil }
|
on_progress RequestProgressFn = unsafe { nil }
|
||||||
|
on_progress_body RequestProgressBodyFn = unsafe { nil }
|
||||||
on_finish RequestFinishFn = unsafe { nil }
|
on_finish RequestFinishFn = unsafe { nil }
|
||||||
|
//
|
||||||
|
stop_copying_limit i64 = -1 // after this many bytes are received, stop copying to the response. Note that on_progress and on_progress_body callbacks, will continue to fire normally, until the full response is read, which allows you to implement streaming downloads, without keeping the whole big response in memory
|
||||||
|
stop_receiving_limit i64 = -1 // after this many bytes are received, break out of the loop that reads the response, effectively stopping the request early. No more on_progress callbacks will be fired. The on_finish callback will fire.
|
||||||
}
|
}
|
||||||
|
|
||||||
fn (mut req Request) free() {
|
fn (mut req Request) free() {
|
||||||
@ -176,40 +182,75 @@ fn (req &Request) method_and_url_to_response(method Method, url urllib.URL) !Res
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn (req &Request) build_request_headers(method Method, host_name string, path string) string {
|
fn (req &Request) build_request_headers(method Method, host_name string, path string) string {
|
||||||
ua := req.user_agent
|
mut sb := strings.new_builder(4096)
|
||||||
mut uheaders := []string{}
|
version := if req.version == .unknown { Version.v1_1 } else { req.version }
|
||||||
|
sb.write_string(method.str())
|
||||||
|
sb.write_string(' ')
|
||||||
|
sb.write_string(path)
|
||||||
|
sb.write_string(' ')
|
||||||
|
sb.write_string(version.str())
|
||||||
|
sb.write_string('\r\n')
|
||||||
if !req.header.contains(.host) {
|
if !req.header.contains(.host) {
|
||||||
uheaders << 'Host: ${host_name}\r\n'
|
sb.write_string('Host: ')
|
||||||
|
sb.write_string(host_name)
|
||||||
|
sb.write_string('\r\n')
|
||||||
}
|
}
|
||||||
if !req.header.contains(.user_agent) {
|
if !req.header.contains(.user_agent) {
|
||||||
uheaders << 'User-Agent: ${ua}\r\n'
|
ua := req.user_agent
|
||||||
|
sb.write_string('User-Agent: ')
|
||||||
|
sb.write_string(ua)
|
||||||
|
sb.write_string('\r\n')
|
||||||
}
|
}
|
||||||
if req.data.len > 0 && !req.header.contains(.content_length) {
|
if req.data.len > 0 && !req.header.contains(.content_length) {
|
||||||
uheaders << 'Content-Length: ${req.data.len}\r\n'
|
sb.write_string('Content-Length: ')
|
||||||
|
sb.write_string(req.data.len.str())
|
||||||
|
sb.write_string('\r\n')
|
||||||
}
|
}
|
||||||
|
chkey := CommonHeader.cookie.str()
|
||||||
for key in req.header.keys() {
|
for key in req.header.keys() {
|
||||||
if key == CommonHeader.cookie.str() {
|
if key == chkey {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
val := req.header.custom_values(key).join('; ')
|
val := req.header.custom_values(key).join('; ')
|
||||||
uheaders << '${key}: ${val}\r\n'
|
sb.write_string(key)
|
||||||
|
sb.write_string(': ')
|
||||||
|
sb.write_string(val)
|
||||||
|
sb.write_string('\r\n')
|
||||||
}
|
}
|
||||||
uheaders << req.build_request_cookies_header()
|
sb.write_string(req.build_request_cookies_header())
|
||||||
version := if req.version == .unknown { Version.v1_1 } else { req.version }
|
sb.write_string('Connection: close\r\n')
|
||||||
return '${method} ${path} ${version}\r\n' + uheaders.join('') + 'Connection: close\r\n\r\n' +
|
sb.write_string('\r\n')
|
||||||
req.data
|
sb.write_string(req.data)
|
||||||
|
return sb.str()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn (req &Request) build_request_cookies_header() string {
|
fn (req &Request) build_request_cookies_header() string {
|
||||||
if req.cookies.keys().len < 1 {
|
if req.cookies.len < 1 {
|
||||||
return ''
|
return ''
|
||||||
}
|
}
|
||||||
mut cookie := []string{}
|
mut sb_cookie := strings.new_builder(1024)
|
||||||
|
hvcookies := req.header.values(.cookie)
|
||||||
|
total_cookies := req.cookies.len + hvcookies.len
|
||||||
|
sb_cookie.write_string('Cookie: ')
|
||||||
|
mut idx := 0
|
||||||
for key, val in req.cookies {
|
for key, val in req.cookies {
|
||||||
cookie << '${key}=${val}'
|
sb_cookie.write_string(key)
|
||||||
|
sb_cookie.write_string('=')
|
||||||
|
sb_cookie.write_string(val)
|
||||||
|
if idx < total_cookies - 1 {
|
||||||
|
sb_cookie.write_string('; ')
|
||||||
}
|
}
|
||||||
cookie << req.header.values(.cookie)
|
idx++
|
||||||
return 'Cookie: ' + cookie.join('; ') + '\r\n'
|
}
|
||||||
|
for c in hvcookies {
|
||||||
|
sb_cookie.write_string(c)
|
||||||
|
if idx < total_cookies - 1 {
|
||||||
|
sb_cookie.write_string('; ')
|
||||||
|
}
|
||||||
|
idx++
|
||||||
|
}
|
||||||
|
sb_cookie.write_string('\r\n')
|
||||||
|
return sb_cookie.str()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn (req &Request) http_do(host string, method Method, path string) !Response {
|
fn (req &Request) http_do(host string, method Method, path string) !Response {
|
||||||
@ -221,13 +262,17 @@ fn (req &Request) http_do(host string, method Method, path string) !Response {
|
|||||||
// TODO: this really needs to be exposed somehow
|
// TODO: this really needs to be exposed somehow
|
||||||
client.write(s.bytes())!
|
client.write(s.bytes())!
|
||||||
$if trace_http_request ? {
|
$if trace_http_request ? {
|
||||||
eprintln('> ${s}')
|
eprint('> ')
|
||||||
|
eprint(s)
|
||||||
|
eprintln('')
|
||||||
}
|
}
|
||||||
mut bytes := req.read_all_from_client_connection(client)!
|
mut bytes := req.read_all_from_client_connection(client)!
|
||||||
client.close()!
|
client.close()!
|
||||||
response_text := bytes.bytestr()
|
response_text := bytes.bytestr()
|
||||||
$if trace_http_response ? {
|
$if trace_http_response ? {
|
||||||
eprintln('< ${response_text}')
|
eprint('< ')
|
||||||
|
eprint(response_text)
|
||||||
|
eprintln('')
|
||||||
}
|
}
|
||||||
if req.on_finish != unsafe { nil } {
|
if req.on_finish != unsafe { nil } {
|
||||||
req.on_finish(req, u64(response_text.len))!
|
req.on_finish(req, u64(response_text.len))!
|
||||||
@ -235,24 +280,81 @@ fn (req &Request) http_do(host string, method Method, path string) !Response {
|
|||||||
return parse_response(response_text)
|
return parse_response(response_text)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn (req &Request) read_all_from_client_connection(r &net.TcpConn) ![]u8 {
|
// abstract over reading the whole content from TCP or SSL connections:
|
||||||
mut read := i64(0)
|
type FnReceiveChunk = fn (con voidptr, buf &u8, bufsize int) !int
|
||||||
mut b := []u8{len: 32768}
|
|
||||||
|
fn (req &Request) receive_all_data_from_cb_in_builder(mut content strings.Builder, con voidptr, receive_chunk_cb FnReceiveChunk) ! {
|
||||||
|
mut buff := [bufsize]u8{}
|
||||||
|
bp := unsafe { &buff[0] }
|
||||||
|
mut readcounter := 0
|
||||||
|
mut body_pos := u64(0)
|
||||||
|
mut old_len := u64(0)
|
||||||
|
mut new_len := u64(0)
|
||||||
|
mut expected_size := u64(0)
|
||||||
|
mut status_code := -1
|
||||||
for {
|
for {
|
||||||
old_read := read
|
readcounter++
|
||||||
new_read := r.read(mut b[read..]) or { break }
|
len := receive_chunk_cb(con, bp, bufsize) or { break }
|
||||||
if new_read <= 0 {
|
$if debug_http ? {
|
||||||
|
eprintln('ssl_do, read ${readcounter:4d} | len: ${len}')
|
||||||
|
eprintln('-'.repeat(20))
|
||||||
|
eprintln(unsafe { tos(bp, len) })
|
||||||
|
eprintln('-'.repeat(20))
|
||||||
|
}
|
||||||
|
if len <= 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
read += new_read
|
new_len = old_len + u64(len)
|
||||||
|
// Note: `schunk` and `bchunk` are used as convenient stack located views to the currently filled part of `buff`:
|
||||||
|
schunk := unsafe { bp.vstring_literal_with_len(len) }
|
||||||
|
mut bchunk := unsafe { bp.vbytes(len) }
|
||||||
|
if readcounter == 1 {
|
||||||
|
http_line := schunk.all_before('\r\n')
|
||||||
|
status_code = http_line.all_after(' ').all_before(' ').int()
|
||||||
|
}
|
||||||
if req.on_progress != unsafe { nil } {
|
if req.on_progress != unsafe { nil } {
|
||||||
req.on_progress(req, b[old_read..read], u64(read))!
|
req.on_progress(req, bchunk, u64(new_len))!
|
||||||
}
|
}
|
||||||
for b.len <= read {
|
if body_pos == 0 {
|
||||||
unsafe { b.grow_len(4096) }
|
bidx := schunk.index('\r\n\r\n') or { -1 }
|
||||||
|
if bidx > 0 {
|
||||||
|
body_buffer_offset := bidx + 4
|
||||||
|
bchunk = unsafe { (&u8(bchunk.data) + body_buffer_offset).vbytes(len - body_buffer_offset) }
|
||||||
|
body_pos = u64(old_len) + u64(body_buffer_offset)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return b[..read]
|
body_so_far := u64(new_len) - body_pos
|
||||||
|
if req.on_progress_body != unsafe { nil } {
|
||||||
|
if expected_size == 0 {
|
||||||
|
lidx := schunk.index('Content-Length: ') or { -1 }
|
||||||
|
if lidx > 0 {
|
||||||
|
esize := schunk[lidx..].all_before('\r\n').all_after(': ').u64()
|
||||||
|
if esize > 0 {
|
||||||
|
expected_size = esize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
req.on_progress_body(req, bchunk, body_so_far, expected_size, status_code)!
|
||||||
|
}
|
||||||
|
if !(req.stop_copying_limit > 0 && new_len > req.stop_copying_limit) {
|
||||||
|
unsafe { content.write_ptr(bp, len) }
|
||||||
|
}
|
||||||
|
if req.stop_receiving_limit > 0 && new_len > req.stop_receiving_limit {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
old_len = new_len
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_from_tcp_connection_cb(con voidptr, buf &u8, bufsize int) !int {
|
||||||
|
mut r := unsafe { &net.TcpConn(con) }
|
||||||
|
return r.read_ptr(buf, bufsize)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn (req &Request) read_all_from_client_connection(r &net.TcpConn) ![]u8 {
|
||||||
|
mut content := strings.new_builder(4096)
|
||||||
|
req.receive_all_data_from_cb_in_builder(mut content, voidptr(r), read_from_tcp_connection_cb)!
|
||||||
|
return content
|
||||||
}
|
}
|
||||||
|
|
||||||
// referer returns 'Referer' header value of the given request
|
// referer returns 'Referer' header value of the given request
|
||||||
|
@ -152,7 +152,7 @@ fn test_server_custom_handler() {
|
|||||||
on_progress: fn (req &http.Request, chunk []u8, read_so_far u64) ! {
|
on_progress: fn (req &http.Request, chunk []u8, read_so_far u64) ! {
|
||||||
mut progress_calls := unsafe { &ProgressCalls(req.user_ptr) }
|
mut progress_calls := unsafe { &ProgressCalls(req.user_ptr) }
|
||||||
eprintln('>>>>>>>> on_progress, req.url: ${req.url} | got chunk.len: ${chunk.len:5}, read_so_far: ${read_so_far:8}, chunk: ${chunk#[0..30].bytestr()}')
|
eprintln('>>>>>>>> on_progress, req.url: ${req.url} | got chunk.len: ${chunk.len:5}, read_so_far: ${read_so_far:8}, chunk: ${chunk#[0..30].bytestr()}')
|
||||||
progress_calls.chunks << chunk
|
progress_calls.chunks << chunk.clone()
|
||||||
progress_calls.reads << read_so_far
|
progress_calls.reads << read_so_far
|
||||||
}
|
}
|
||||||
on_finish: fn (req &http.Request, final_size u64) ! {
|
on_finish: fn (req &http.Request, final_size u64) ! {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user