package main import ( "strings" "github.com/PuerkitoBio/goquery" lua "github.com/yuin/gopher-lua" ) type HtmlParser struct { Doc *goquery.Document } const luaHtmlParserTypeName = "html" var luaHtmlParserMethods = map[string]lua.LGFunction{ "select": select_html_node, "remove": remove_html_node, "get": get_document, "get_attribute": get_html_node_attribute, "rewrite_nodes": rewrite_html_nodes, } func registerHtmlParserType(L *lua.LState) { logger.Debug("Registering html type") mt := L.NewTypeMetatable(luaHtmlParserTypeName) L.SetGlobal(luaHtmlParserTypeName, mt) L.SetField(mt, "new", L.NewFunction(newHtmlParser)) L.SetField(mt, "__index", L.SetFuncs(L.NewTable(), luaHtmlParserMethods)) } func checkHtmlParser(L *lua.LState) *HtmlParser { ud := L.CheckUserData(1) if v, ok := ud.Value.(*HtmlParser); ok { return v } L.ArgError(1, "html_parser expected") return nil } func newHtmlParser(L *lua.LState) int { source := L.CheckString(1) doc, err := goquery.NewDocumentFromReader(strings.NewReader(source)) if err != nil { logger.Error(err) return 0 } // Return doc as userdata ud := L.NewUserData() ud.Value = doc L.SetMetatable(ud, L.GetTypeMetatable(luaHtmlParserTypeName)) L.Push(ud) return 1; } func select_html_node(L *lua.LState) int { ud := L.CheckUserData(1) selector := L.CheckString(2) doc, ok := ud.Value.(*goquery.Document) if !ok { logger.Error("Expected html_parser userdata") return 0 } var result []string doc.Find(selector).Each(func(_ int, s *goquery.Selection) { pHtml, err := s.Html() if err != nil { logger.Error(err) return } result = append(result, pHtml) }) L.Push(lua.LString(strings.Join(result, "\n"))) return 1 } func remove_html_node(L *lua.LState) int { ud := L.CheckUserData(1) selector := L.CheckString(2) doc, ok := ud.Value.(*goquery.Document) if !ok { logger.Error("Expected html_parser userdata") return 0 } doc.Find(selector).Each(func(_ int, s *goquery.Selection) { s.Remove() }) return 0 } func get_html_node_attribute(L *lua.LState) int { // Get the node, for example -> // Get the attribute, src ud := L.CheckUserData(1) node := L.CheckString(2) attribute := L.CheckString(3) doc, ok := ud.Value.(*goquery.Document) if !ok { logger.Error("Expected html_parser userdata") return 0 } result := []string{} doc.Find(node).Each(func(_ int, s *goquery.Selection) { pHtml, ok := s.Attr(attribute) if !ok { logger.Error("Could not find attribute: ", attribute, " in node: ", node) return } result = append(result, pHtml) }) table := L.NewTable() for i := range result { table.Append(lua.LString(result[i])) } L.Push(table) return 1 } // article:rewrite("img", "src", {"http://example.com/image.jpg", "http://example.com/image2.jpg"}) func rewrite_html_nodes(L *lua.LState) int { // Get the node, for example -> // Get the attribute, src ud := L.CheckUserData(1) node := L.CheckString(2) attribute := L.CheckString(3) rewriteArr := L.CheckTable(4) doc, ok := ud.Value.(*goquery.Document) if !ok { logger.Error("Expected html_parser userdata") } rewriteStrings := []string{} for i := 1; i <= rewriteArr.Len(); i++ { rewriteStrings = append(rewriteStrings, rewriteArr.RawGetInt(i).String()) } i := 0 doc.Find(node).Each(func(_ int, s *goquery.Selection) { rewrite := rewriteStrings[i] s.SetAttr(attribute, rewrite) logger.Debug("Rewrote: ", attribute, " to: ", rewrite) i++ }) return 0 } func get_document(L *lua.LState) int { ud := L.CheckUserData(1) doc, ok := ud.Value.(*goquery.Document) if !ok { logger.Error("Expected html_parser userdata") return 0 } html, err := doc.Html() if err != nil { logger.Error(err) return 0 } L.Push(lua.LString(html)) return 1 }