何时何地检查通道是否不会获得更多数据？

我正在尝试解决练习：Web Crawler在本练习中，您将使用 Go 的并发特性来并行化网络爬虫。修改 Crawl 函数以并行获取 URL，而不需要两次获取相同的 URL。我应该什么时候检查所有 url 是否已经被抓取？（或者我怎么知道是否不再有数据排队？）package mainimport ( "fmt")type Result struct { Url string Depth int}type Stor struct { Queue chan Result Visited map[string]int} func NewStor() *Stor { return &Stor{ Queue: make(chan Result,1000), Visited: map[string]int{}, }}type Fetcher interface { // Fetch returns the body of URL and // a slice of URLs found on that page. Fetch(url string) (body string, urls []string, err error)}// Crawl uses fetcher to recursively crawl// pages starting with url, to a maximum of depth.func Crawl(res Result, fetcher Fetcher, stor *Stor) { defer func() { /* if len(stor.Queue) == 0 { close(stor.Queue) } */ // this is wrong, it makes the channel closes too early }() if res.Depth <= 0 { return } // TODO: Don't fetch the same URL twice. url := res.Url stor.Visited[url]++ if stor.Visited[url] > 1 { fmt.Println("skip:",stor.Visited[url],url) return } body, urls, err := fetcher.Fetch(url) if err != nil { fmt.Println(err) return } fmt.Printf("found: %s %q\n", url, body) for _, u := range urls { stor.Queue <- Result{u,res.Depth-1} } return}func main() { stor := NewStor() Crawl(Result{"http://golang.org/", 4}, fetcher, stor) for res := range stor.Queue { // TODO: Fetch URLs in parallel. go Crawl(res,fetcher,stor) }}// fakeFetcher is Fetcher that returns canned results.type fakeFetcher map[string]*fakeResulttype fakeResult struct { body string urls []string}func (f fakeFetcher) Fetch(url string) (string, []string, error) { if res, ok := f[url]; ok { return res.body, res.urls, nil } return "", nil, fmt.Errorf("not found: %s", url)}输出是一个死锁，因为stor.Queue通道从未关闭。

查看完整描述

何时何地检查通道是否不会获得更多数据？

何时何地检查通道是否不会获得更多数据？

2 回答

添加回答

热搜

最近搜索清空

何时何地检查通道是否不会获得更多数据？

何时何地检查通道是否不会获得更多数据？

2 回答

添加回答