172 lines
3.8 KiB
Go
172 lines
3.8 KiB
Go
package main
|
|
|
|
import (
|
|
"strings"
|
|
"github.com/PuerkitoBio/goquery"
|
|
lua "github.com/yuin/gopher-lua"
|
|
)
|
|
|
|
type HtmlParser struct {
|
|
Doc *goquery.Document
|
|
}
|
|
|
|
const luaHtmlParserTypeName = "html"
|
|
var luaHtmlParserMethods = map[string]lua.LGFunction{
|
|
"select": select_html_node,
|
|
"remove": remove_html_node,
|
|
"get": get_document,
|
|
"get_attribute": get_html_node_attribute,
|
|
"rewrite_nodes": rewrite_html_nodes,
|
|
}
|
|
|
|
func registerHtmlParserType(L *lua.LState) {
|
|
logger.Debug("Registering html type")
|
|
mt := L.NewTypeMetatable(luaHtmlParserTypeName)
|
|
L.SetGlobal(luaHtmlParserTypeName, mt)
|
|
L.SetField(mt, "new", L.NewFunction(newHtmlParser))
|
|
L.SetField(mt, "__index", L.SetFuncs(L.NewTable(), luaHtmlParserMethods))
|
|
}
|
|
|
|
func checkHtmlParser(L *lua.LState) *HtmlParser {
|
|
ud := L.CheckUserData(1)
|
|
if v, ok := ud.Value.(*HtmlParser); ok {
|
|
return v
|
|
}
|
|
L.ArgError(1, "html_parser expected")
|
|
return nil
|
|
}
|
|
|
|
func newHtmlParser(L *lua.LState) int {
|
|
source := L.CheckString(1)
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(source))
|
|
if err != nil {
|
|
logger.Error(err)
|
|
return 0
|
|
}
|
|
|
|
// Return doc as userdata
|
|
ud := L.NewUserData()
|
|
ud.Value = doc
|
|
L.SetMetatable(ud, L.GetTypeMetatable(luaHtmlParserTypeName))
|
|
L.Push(ud)
|
|
|
|
return 1;
|
|
}
|
|
|
|
func select_html_node(L *lua.LState) int {
|
|
ud := L.CheckUserData(1)
|
|
selector := L.CheckString(2)
|
|
doc, ok := ud.Value.(*goquery.Document)
|
|
if !ok {
|
|
logger.Error("Expected html_parser userdata")
|
|
return 0
|
|
}
|
|
|
|
var result []string
|
|
doc.Find(selector).Each(func(_ int, s *goquery.Selection) {
|
|
pHtml, err := s.Html()
|
|
if err != nil {
|
|
logger.Error(err)
|
|
return
|
|
}
|
|
result = append(result, pHtml)
|
|
})
|
|
|
|
L.Push(lua.LString(strings.Join(result, "\n")))
|
|
return 1
|
|
}
|
|
|
|
func remove_html_node(L *lua.LState) int {
|
|
ud := L.CheckUserData(1)
|
|
selector := L.CheckString(2)
|
|
doc, ok := ud.Value.(*goquery.Document)
|
|
if !ok {
|
|
logger.Error("Expected html_parser userdata")
|
|
return 0
|
|
}
|
|
|
|
doc.Find(selector).Each(func(_ int, s *goquery.Selection) {
|
|
s.Remove()
|
|
})
|
|
|
|
return 0
|
|
}
|
|
|
|
func get_html_node_attribute(L *lua.LState) int {
|
|
// Get the node, for example <img src="local/image.jpg"> -> <img src="http://example.com/image.jpg">
|
|
// Get the attribute, src
|
|
ud := L.CheckUserData(1)
|
|
node := L.CheckString(2)
|
|
attribute := L.CheckString(3)
|
|
|
|
doc, ok := ud.Value.(*goquery.Document)
|
|
if !ok {
|
|
logger.Error("Expected html_parser userdata")
|
|
return 0
|
|
}
|
|
|
|
result := []string{}
|
|
doc.Find(node).Each(func(_ int, s *goquery.Selection) {
|
|
pHtml, ok := s.Attr(attribute)
|
|
if !ok {
|
|
logger.Error("Could not find attribute: ", attribute, " in node: ", node)
|
|
return
|
|
}
|
|
result = append(result, pHtml)
|
|
})
|
|
|
|
table := L.NewTable()
|
|
for i := range result {
|
|
table.Append(lua.LString(result[i]))
|
|
}
|
|
L.Push(table)
|
|
return 1
|
|
}
|
|
|
|
// article:rewrite("img", "src", {"http://example.com/image.jpg", "http://example.com/image2.jpg"})
|
|
func rewrite_html_nodes(L *lua.LState) int {
|
|
// Get the node, for example <img src="local/image.jpg"> -> <img src="http://example.com/image.jpg">
|
|
// Get the attribute, src
|
|
ud := L.CheckUserData(1)
|
|
node := L.CheckString(2)
|
|
attribute := L.CheckString(3)
|
|
rewriteArr := L.CheckTable(4)
|
|
|
|
doc, ok := ud.Value.(*goquery.Document)
|
|
if !ok {
|
|
logger.Error("Expected html_parser userdata")
|
|
}
|
|
|
|
rewriteStrings := []string{}
|
|
for i := 1; i <= rewriteArr.Len(); i++ {
|
|
rewriteStrings = append(rewriteStrings, rewriteArr.RawGetInt(i).String())
|
|
}
|
|
i := 0
|
|
doc.Find(node).Each(func(_ int, s *goquery.Selection) {
|
|
rewrite := rewriteStrings[i]
|
|
s.SetAttr(attribute, rewrite)
|
|
logger.Debug("Rewrote: ", attribute, " to: ", rewrite)
|
|
i++
|
|
})
|
|
return 0
|
|
}
|
|
|
|
func get_document(L *lua.LState) int {
|
|
ud := L.CheckUserData(1)
|
|
doc, ok := ud.Value.(*goquery.Document)
|
|
if !ok {
|
|
logger.Error("Expected html_parser userdata")
|
|
return 0
|
|
}
|
|
|
|
html, err := doc.Html()
|
|
if err != nil {
|
|
logger.Error(err)
|
|
return 0
|
|
}
|
|
|
|
L.Push(lua.LString(html))
|
|
return 1
|
|
}
|
|
|