5 changed files with 523 additions and 1 deletions
@ -0,0 +1,76 @@ |
|||
package ym |
|||
|
|||
import ( |
|||
"encoding/json" |
|||
"errors" |
|||
"fmt" |
|||
"github.com/parnurzeal/gorequest" |
|||
"net/http" |
|||
"net/url" |
|||
) |
|||
|
|||
var ( |
|||
Token = `` |
|||
BaseURL = &url.URL{ |
|||
Host: `api.jfbym.com`, |
|||
Scheme: `http`, |
|||
Path: `/api/YmServer/customApi`, |
|||
} |
|||
Proxy = &url.URL{ |
|||
Host: `127.0.0.1:7890`, |
|||
Scheme: `http`, |
|||
} |
|||
) |
|||
|
|||
type RespCommonVerify struct { |
|||
Msg string `json:"msg"` |
|||
Code int `json:"code"` |
|||
Data struct { |
|||
Code int `json:"code"` |
|||
Data string `json:"data"` |
|||
Time float64 `json:"time"` |
|||
Externel int `json:"externel"` |
|||
UniqueCode string `json:"unique_code"` |
|||
FilePath string `json:"file_path"` |
|||
} `json:"data"` |
|||
} |
|||
|
|||
func CommonVerify(imgBase64 string, method int) (result string, errs []error) { |
|||
if Token == `` { |
|||
return ``, []error{errors.New(`token is empty`)} |
|||
} |
|||
content := fmt.Sprintf( |
|||
`image=%s&type=%d&token=%s`, imgBase64, |
|||
method, Token, |
|||
) |
|||
target := BaseURL.String() |
|||
req := gorequest.New().Post(target).Send(content).Set( |
|||
`Content-Type`, `application/x-www-form-urlencoded`, |
|||
) |
|||
if Proxy != nil { |
|||
req.Proxy(Proxy.String()) |
|||
} |
|||
var resp *http.Response |
|||
var body string |
|||
resp, body, errs = req.End() |
|||
if errs != nil { |
|||
return result, errs |
|||
} |
|||
if resp.StatusCode != 200 { |
|||
return result, []error{ |
|||
fmt.Errorf( |
|||
`server error: status code %d`, resp.StatusCode, |
|||
), |
|||
} |
|||
} |
|||
v := &RespCommonVerify{} |
|||
err := json.Unmarshal([]byte(body), v) |
|||
if err != nil { |
|||
return result, []error{errors.New(`server can not parse image data`)} |
|||
} |
|||
if v.Code != 10000 { |
|||
return result, []error{fmt.Errorf(`server error: %s`, v.Msg)} |
|||
} |
|||
result = v.Data.Data |
|||
return result, nil |
|||
} |
@ -0,0 +1,416 @@ |
|||
package crawler |
|||
|
|||
import ( |
|||
"errors" |
|||
"fmt" |
|||
"git.realxlfd.cc/RealXLFD/golib/net/apis/ym" |
|||
"git.realxlfd.cc/RealXLFD/golib/net/utils/cookie" |
|||
"github.com/tebeka/selenium" |
|||
"math" |
|||
"net/url" |
|||
"regexp" |
|||
"strconv" |
|||
"strings" |
|||
"time" |
|||
) |
|||
|
|||
type Mode struct { |
|||
driver selenium.WebDriver |
|||
executor []func() error |
|||
Ctx map[string]string |
|||
Selected []selenium.WebElement |
|||
} |
|||
|
|||
func (m *Mode) Exec() error { |
|||
for _, exec := range m.executor { |
|||
if err := exec(); err != nil { |
|||
return err |
|||
} |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
var ( |
|||
re = regexp.MustCompile(`\{(.*?)\}`) |
|||
) |
|||
|
|||
const ( |
|||
scriptPre = `var canvas = document.createElement('canvas'); |
|||
var img = document.querySelector("%s") |
|||
var ctx = canvas.getContext('2d'); |
|||
canvas.width = img.width; |
|||
canvas.height = img.height; |
|||
ctx.drawImage(img, 0, 0); |
|||
return canvas.toDataURL();` |
|||
) |
|||
|
|||
func Parse(process string, drive selenium.WebDriver) (*Mode, error) { |
|||
var err error |
|||
lines := strings.Split(process, "\r\n") |
|||
var actions []Action |
|||
for _, line := range lines { |
|||
if line == "" { |
|||
continue |
|||
} |
|||
var action Action |
|||
action, err = parseOperation(line) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
actions = append(actions, action) |
|||
} |
|||
return toMode(actions, drive), nil |
|||
} |
|||
|
|||
func toMode(actions []Action, driver selenium.WebDriver) *Mode { |
|||
var mode = &Mode{ |
|||
driver: driver, |
|||
Ctx: make(map[string]string), |
|||
} |
|||
var executor []func() error |
|||
for _, action := range actions { |
|||
switch action.Act { |
|||
case TO: |
|||
if action.Dst == "" { |
|||
executor = append( |
|||
executor, func() error { |
|||
if window, ok := mode.Ctx[action.Save]; ok { |
|||
return driver.SwitchWindow(window) |
|||
} |
|||
return fmt.Errorf( |
|||
`not found the window: %s`, action.Save, |
|||
) |
|||
}, |
|||
) |
|||
break |
|||
} |
|||
executor = append( |
|||
executor, func() error { |
|||
return driver.Get(action.Dst) |
|||
}, |
|||
) |
|||
case SELECT: |
|||
executor = append( |
|||
executor, func() error { |
|||
var err2 error |
|||
mode.Selected, err2 = driver.FindElements( |
|||
selenium.ByCSSSelector, action.Dst, |
|||
) |
|||
return err2 |
|||
}, |
|||
) |
|||
case GET: |
|||
executor = append( |
|||
executor, func() error { |
|||
var err error |
|||
if mode.Selected == nil { |
|||
return errors.New("no selected element") |
|||
} |
|||
if strings.ToLower(action.Dst) == "text" { |
|||
mode.Ctx[action.Save], err = mode.Selected[0].Text() |
|||
return err |
|||
} |
|||
mode.Ctx[action.Save], err = mode.Selected[0].GetAttribute( |
|||
action.Dst, |
|||
) |
|||
return err |
|||
}, |
|||
) |
|||
case CLICK: |
|||
executor = append( |
|||
executor, func() error { |
|||
var err error |
|||
if mode.Selected == nil { |
|||
return errors.New("no selected element") |
|||
} |
|||
for _, e := range mode.Selected { |
|||
err = e.Click() |
|||
if err != nil { |
|||
return err |
|||
} |
|||
} |
|||
return nil |
|||
}, |
|||
) |
|||
case FILL: |
|||
executor = append( |
|||
executor, func() error { |
|||
var err error |
|||
if mode.Selected == nil { |
|||
return errors.New("no selected element") |
|||
} |
|||
for _, e := range mode.Selected { |
|||
if action.Save != "" { |
|||
action.Dst = mode.Ctx[action.Save] |
|||
} |
|||
err = e.SendKeys(action.Dst) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
} |
|||
return nil |
|||
}, |
|||
) |
|||
case COOKIE: |
|||
executor = append( |
|||
executor, func() error { |
|||
var err2 error |
|||
var cookies []selenium.Cookie |
|||
cookies, err2 = driver.GetCookies() |
|||
if err2 != nil { |
|||
return err2 |
|||
} |
|||
var rawCookie strings.Builder |
|||
for _, c := range cookies { |
|||
rawCookie.WriteString(c.Name) |
|||
rawCookie.WriteString("=") |
|||
rawCookie.WriteString(c.Value) |
|||
rawCookie.WriteString("; ") |
|||
} |
|||
mode.Ctx[action.Dst] = rawCookie.String() |
|||
return nil |
|||
}, |
|||
) |
|||
case WAIT: |
|||
executor = append( |
|||
executor, func() error { |
|||
start := time.Now() |
|||
s, err := strconv.Atoi(action.Save) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
timeout := time.Duration(s) * time.Second |
|||
for { |
|||
if time.Since(start) > timeout { |
|||
return nil |
|||
} |
|||
time.Sleep( |
|||
200 * time. |
|||
Millisecond, |
|||
) // 简短暂停再次尝试,以避免过于频繁的查询
|
|||
if action.Dst == "" { |
|||
continue |
|||
} |
|||
if _, err = driver.FindElement( |
|||
selenium.ByCSSSelector, |
|||
action.Dst, |
|||
); err == nil { |
|||
break |
|||
} |
|||
} |
|||
return nil |
|||
}, |
|||
) |
|||
case HTML: |
|||
executor = append( |
|||
executor, func() error { |
|||
var err2 error |
|||
mode.Ctx[action.Save], err2 = driver.PageSource() |
|||
return err2 |
|||
}, |
|||
) |
|||
case SETCOOKIE: |
|||
executor = append( |
|||
executor, func() error { |
|||
var err error |
|||
err = driver.DeleteAllCookies() |
|||
if err != nil { |
|||
return err |
|||
} |
|||
cookies := cookie.New(action.Dst) |
|||
domain, err := driver.CurrentURL() |
|||
if err != nil { |
|||
return err |
|||
} |
|||
u, err := url.Parse(domain) |
|||
if err != nil { |
|||
panic(err) |
|||
} |
|||
for k, v := range cookies { |
|||
if err = driver.AddCookie( |
|||
&selenium.Cookie{ |
|||
Name: k, Value: v, |
|||
Domain: u.Host, Expiry: math.MaxUint32, |
|||
Path: u.Path, Secure: false, |
|||
}, |
|||
); err != nil { |
|||
return err |
|||
} |
|||
} |
|||
err = driver.Refresh() |
|||
return err |
|||
}, |
|||
) |
|||
case VERIFY: |
|||
executor = append( |
|||
executor, func() error { |
|||
script := fmt.Sprintf(scriptPre, action.Dst) |
|||
data, err := driver.ExecuteScript(script, nil) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
if imgBase64, ok := data.(string); ok || imgBase64 == "" { |
|||
// 默认为6位或一下数字字母模式
|
|||
result, errs := ym.CommonVerify(imgBase64, ModeNL8) |
|||
if errs != nil { |
|||
return errs[0] |
|||
} |
|||
mode.Ctx[action.Save] = result |
|||
return nil |
|||
} |
|||
return errors.New("can not parse image data") |
|||
}, |
|||
) |
|||
case WINDOW: |
|||
executor = append( |
|||
executor, func() error { |
|||
var err error |
|||
mode.Ctx[action.Dst], err = driver.CurrentWindowHandle() |
|||
return err |
|||
}, |
|||
) |
|||
default: |
|||
panic("unhandled default case") |
|||
} |
|||
} |
|||
mode.executor = executor |
|||
return mode |
|||
} |
|||
|
|||
func parseOperation(line string) (actions Action, err error) { |
|||
parts := strings.Split(line, " ") |
|||
if len(parts) == 0 { |
|||
return actions, errors.New("empty line") |
|||
} |
|||
mark := strings.ToUpper(parts[0]) |
|||
if mark == "SETCOOKIE" { |
|||
matches := re.FindAllStringSubmatch(line, -1) |
|||
if len(matches) != 1 || matches[0][1] == "" { |
|||
return Action{}, fmt.Errorf("invalid argument: %s", parts[1]) |
|||
} |
|||
return Action{Act: SETCOOKIE, Dst: matches[0][1]}, nil |
|||
} |
|||
if mark == "SELECT" { |
|||
matches := re.FindAllStringSubmatch(line, -1) |
|||
cssSelector := strings.Builder{} |
|||
for _, match := range matches { |
|||
cssSelector.WriteString(match[1]) |
|||
} |
|||
return Action{Act: SELECT, Dst: cssSelector.String()}, nil |
|||
} |
|||
if mark == "FILL" { |
|||
matches := re.FindAllStringSubmatch(line, -1) |
|||
if len(matches) == 0 { |
|||
after := strings.ReplaceAll( |
|||
line, fmt.Sprintf( |
|||
`%s `, |
|||
parts[0], |
|||
), "", |
|||
) |
|||
if after != "" && after[0] == '.' { |
|||
return Action{Act: FILL, Dst: "", Save: after[1:]}, nil |
|||
} |
|||
return Action{}, fmt.Errorf("invalid argument: %s", line) |
|||
} |
|||
if len(matches) != 1 || matches[0][1] == "" { |
|||
return Action{}, fmt.Errorf("invalid argument: %s", line) |
|||
} |
|||
return Action{Act: FILL, Dst: matches[0][1]}, nil |
|||
} |
|||
if mark == "WAIT" { |
|||
matches := re.FindAllStringSubmatch(line, -1) |
|||
if len(matches) != 2 || matches[1][1] == "" { |
|||
return Action{}, fmt.Errorf("invalid argument: %s", parts[1]) |
|||
} |
|||
return Action{Act: WAIT, Dst: matches[0][1], Save: matches[1][1]}, nil |
|||
} |
|||
if mark == "VERIFY" { |
|||
matches := re.FindAllStringSubmatch(line, -1) |
|||
if len(matches) != 1 || matches[0][1] == "" { |
|||
return Action{}, fmt.Errorf("invalid argument: %s", parts[1]) |
|||
} |
|||
replace := fmt.Sprintf(`%s %s `, parts[0], matches[0][0]) |
|||
after := strings.ReplaceAll(line, replace, "") |
|||
afters := strings.Split(after, " ") |
|||
if len(afters) != 2 || afters[0] != "->" { |
|||
return Action{}, fmt.Errorf("invalid argument: %s", after) |
|||
} |
|||
return Action{Act: VERIFY, Dst: matches[0][1], Save: afters[1]}, nil |
|||
} |
|||
switch len(parts) { |
|||
case 1: |
|||
switch mark { |
|||
case "CLICK": |
|||
return Action{Act: CLICK}, nil |
|||
default: |
|||
return actions, fmt.Errorf("unknown operation: %s", line) |
|||
} |
|||
case 2: |
|||
switch mark { |
|||
case "TO": |
|||
if strings.HasPrefix(parts[1], ".") { |
|||
return Action{ |
|||
Act: TO, Dst: "", Save: strings.TrimPrefix(parts[1], "."), |
|||
}, nil |
|||
} |
|||
return Action{Act: TO, Dst: parts[1]}, nil |
|||
case "HTML": |
|||
return Action{Act: GET, Save: parts[1]}, nil |
|||
default: |
|||
return actions, fmt.Errorf("unknown operation: %s", line) |
|||
} |
|||
case 3: |
|||
switch mark { |
|||
case "WINDOW": |
|||
if parts[1] == "->" { |
|||
return Action{Act: WINDOW, Dst: parts[2]}, nil |
|||
} |
|||
return actions, fmt.Errorf("unknown operation: %s", line) |
|||
case "COOKIE": |
|||
if parts[1] == "->" { |
|||
return Action{Act: COOKIE, Dst: parts[2]}, nil |
|||
} |
|||
fallthrough |
|||
default: |
|||
return actions, fmt.Errorf("unknown operation: %s", line) |
|||
} |
|||
case 4: |
|||
switch mark { |
|||
case "GET": |
|||
if parts[2] == "->" { |
|||
return Action{Act: GET, Dst: parts[1], Save: parts[3]}, nil |
|||
} |
|||
fallthrough |
|||
default: |
|||
return actions, fmt.Errorf("unknown operation: %s", line) |
|||
} |
|||
} |
|||
return actions, fmt.Errorf("can not parse line: %s", line) |
|||
} |
|||
|
|||
const ( |
|||
TO Operation = iota |
|||
SELECT |
|||
GET |
|||
CLICK |
|||
FILL |
|||
COOKIE |
|||
WAIT |
|||
HTML |
|||
SETCOOKIE |
|||
VERIFY |
|||
WINDOW |
|||
) |
|||
const ( |
|||
ModeNLP6 = 10104 |
|||
ModeNL8 = 10111 |
|||
) |
|||
|
|||
type VerifyMode int |
|||
type Action struct { |
|||
Act Operation |
|||
Dst string |
|||
Save string |
|||
} |
|||
|
|||
type Operation int |
Loading…
Reference in new issue