2 changed files with 155 additions and 0 deletions
@ -0,0 +1,78 @@ |
|||
package skysnow |
|||
|
|||
import ( |
|||
"bytes" |
|||
"errors" |
|||
"fmt" |
|||
"git.realxlfd.cc/RealXLFD/golib/net/utils/urlbuilder" |
|||
"github.com/PuerkitoBio/goquery" |
|||
"github.com/parnurzeal/gorequest" |
|||
"strings" |
|||
) |
|||
|
|||
type SubResult struct { |
|||
Title string |
|||
Link string |
|||
} |
|||
type SubResults []SubResult |
|||
|
|||
func (c *Client) FindSubs(keyword string) (SubResults, []error) { |
|||
// 构建咨询字符串
|
|||
target, err := urlbuilder.New(BaseURL.String()).AddPath( |
|||
`/torrents_list_ajax.php`, |
|||
).AddQueries( |
|||
fmt.Sprintf( |
|||
"cat=16&mod=torrents&search=%s", keyword, |
|||
), |
|||
).Get() |
|||
if err != nil { |
|||
panic(err) |
|||
} |
|||
// 发送网络请求
|
|||
|
|||
req := gorequest.New().Get(target.String()).Set(`Cookie`, c.Cookies).Set( |
|||
`User-Agent`, AgentChrome, |
|||
) |
|||
if Proxy != nil { |
|||
req.Proxy(Proxy.String()) |
|||
} |
|||
resp, body, errs := req.End() |
|||
if errs != nil { |
|||
return nil, errs |
|||
} |
|||
if resp.StatusCode != 200 { |
|||
return nil, []error{errors.New(`server error`)} |
|||
} |
|||
// 解析数据
|
|||
htmlBody := fmt.Sprintf(`<html><body>%s</body></html>`, body) |
|||
var results SubResults |
|||
doc, _ := goquery.NewDocumentFromReader(bytes.NewReader([]byte(htmlBody))) |
|||
doc.Find(`.embedded.torrent_title > a:first-child`).Each( |
|||
func(i int, s *goquery.Selection) { |
|||
title, _ := s.Attr(`title`) |
|||
link, _ := s.Attr(`href`) |
|||
results = append( |
|||
results, SubResult{ |
|||
Title: title, |
|||
Link: link, |
|||
}, |
|||
) |
|||
}, |
|||
) |
|||
return results, nil |
|||
} |
|||
|
|||
func (s SubResults) Filter(studio string) SubResults { |
|||
var results SubResults |
|||
for _, sub := range s { |
|||
if strings.Contains(sub.Title, studio) { |
|||
results = append(results, sub) |
|||
} |
|||
} |
|||
return results |
|||
} |
|||
|
|||
func (s SubResult) Download() error { |
|||
// TODO
|
|||
return nil |
|||
} |
@ -0,0 +1,77 @@ |
|||
package crawler |
|||
|
|||
import ( |
|||
"fmt" |
|||
"github.com/tebeka/selenium" |
|||
"github.com/tebeka/selenium/chrome" |
|||
) |
|||
|
|||
const ( |
|||
Stopped State = iota |
|||
Runing |
|||
) |
|||
|
|||
type State int |
|||
type Crawler struct { |
|||
Service *selenium.Service |
|||
count int |
|||
headless bool |
|||
port int |
|||
state State |
|||
} |
|||
|
|||
func Init(chromeDriverPath string, Port int, headless bool) (*Crawler, error) { |
|||
opts := []selenium.ServiceOption{ |
|||
selenium.ChromeDriver(chromeDriverPath), // 指定 ChromeDriver 路径
|
|||
selenium.Output(nil), // 输出日志到 os.Stderr
|
|||
} |
|||
service, err := selenium.NewChromeDriverService( |
|||
chromeDriverPath, |
|||
Port, opts..., |
|||
) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
crawler := &Crawler{ |
|||
Service: service, |
|||
count: 0, |
|||
headless: headless, |
|||
port: Port, |
|||
state: Runing, |
|||
} |
|||
return crawler, nil |
|||
} |
|||
|
|||
func (c *Crawler) Stop() { |
|||
if c.state != Runing { |
|||
return |
|||
} |
|||
c.Service.Stop() |
|||
c.state = Stopped |
|||
} |
|||
func (c *Crawler) CreateInstance(url string) ( |
|||
driver selenium.WebDriver, err error, |
|||
) { |
|||
caps := selenium.Capabilities{"browserName": "chrome"} |
|||
chromeCaps := chrome.Capabilities{} |
|||
if c.headless { |
|||
chromeCaps.Args = []string{ |
|||
"--headless", |
|||
"--no-sandbox", |
|||
"--disable-gpu", |
|||
} |
|||
} |
|||
caps.AddChrome(chromeCaps) |
|||
driver, err = selenium.NewRemote( |
|||
caps, |
|||
fmt.Sprintf("http://localhost:%d/wd/hub", c.port), |
|||
) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
err = driver.Get(url) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
return driver, nil |
|||
} |
Loading…
Reference in new issue