跳至主要内容

go spider example hello golang crawler

package main

import (
"github.com/PuerkitoBio/goquery"
"github.com/hu17889/go_spider/core/common/page"
"github.com/hu17889/go_spider/core/pipeline"
"github.com/hu17889/go_spider/core/spider"
)

type MyPageProcesser struct {
}

func NewMyPageProcesser() *MyPageProcesser {
return &MyPageProcesser{}
}

// Parse html dom here and record the parse result that we want to Page.
// Package goquery (http://godoc.org/github.com/PuerkitoBio/goquery) is used to parse html.
func (this *MyPageProcesser) Process(p *page.Page) {
query := p.GetHtmlParser()
query.Find("td div[class='flex-middle']").Each(func(i int, s *goquery.Selection) {
println(s.Text())
})
}
func (*MyPageProcesser) Finish() {
}
func main() {
spider.NewSpider(NewMyPageProcesser(), "TaskName").
AddUrl("http://101.200.54.63/", "html").    // start url, html is the responce type ("html" or "json")
AddPipeline(pipeline.NewPipelineConsole()). // print result on screen
SetThreadnum(3).                            // crawl request by three Coroutines
Run()
}

评论

此博客中的热门博文

android hide actionbar

public class MainActivity extends Activity { ActionBar actionBar; //声明ActionBar @Override protected void onCreate( Bundle savedInstanceState) { super .onCreate(savedInstanceState); setContentView( R .layout.activity_main); actionBar = getSupportActionBar(); //得 到ActionBar actionBar.hide(); //隐藏ActionBar } }

go url encoding

func  QueryUnescape func QueryUnescape (s string ) ( string , error ) QueryUnescape does the inverse transformation of QueryEscape, converting %AB into the byte 0xAB and '+' into ' ' (space). It returns an error if any % is not followed by two hexadecimal digits. func  QueryUnescape func QueryUnescape (s string ) ( string , error ) QueryUnescape does the inverse transformation of QueryEscape, converting %AB into the byte 0xAB and '+' into ' ' (space). It returns an error if any % is not followed by two hexadecimal digits.