Download videos by hashtag; get json data without video downloading; limit option

This commit is contained in:
alexpin
2020-02-25 00:56:19 +02:00
parent 1b3f985f42
commit f724f0f2a2
13 changed files with 308 additions and 165 deletions

View File

@@ -1,101 +1,102 @@
package client
import (
"context"
"github.com/chromedp/chromedp"
"io/ioutil"
"log"
"os"
"time"
"context"
"github.com/chromedp/chromedp"
"io/ioutil"
"log"
"os"
"time"
config "../models/config"
utils "../utils"
config "../models/config"
utils "../utils"
)
// GetMusicUploads - Get all uploads by given music
func executeClientAction(url string, jsAction string) string {
dir, err := ioutil.TempDir("", "chromedp-example")
utils.CheckErr(err)
defer os.RemoveAll(dir)
dir, err := ioutil.TempDir("", "chromedp-example")
utils.CheckErr(err)
defer os.RemoveAll(dir)
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.DisableGPU,
chromedp.UserDataDir(dir),
chromedp.Flag("headless", !config.Config.Debug),
)
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.DisableGPU,
chromedp.UserDataDir(dir),
chromedp.Flag("headless", !config.Config.Debug),
)
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
defer cancel()
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
defer cancel()
ctx, cancel := chromedp.NewContext(
allocCtx,
chromedp.WithLogf(log.Printf),
)
defer cancel()
ctx, cancel := chromedp.NewContext(
allocCtx,
chromedp.WithLogf(log.Printf),
)
defer cancel()
ctx, cancel = context.WithTimeout(ctx, time.Duration(config.Config.Deadline)*time.Second)
defer cancel()
ctx, cancel = context.WithTimeout(ctx, time.Duration(config.Config.Deadline)*time.Second)
defer cancel()
var jsOutput string
jsOutput = runScrapeWithInfo(ctx, jsAction, url)
var jsOutput string
jsOutput = runScrapeWithInfo(ctx, jsAction, url)
return jsOutput
return jsOutput
}
func runScrapeQuiet(ctx context.Context, jsAction string, url string) string {
var jsOutput string
err := chromedp.Run(ctx,
// Navigate to user's page
chromedp.Navigate(url),
// Execute url grabber script
chromedp.EvaluateAsDevTools(utils.ReadFileAsString("scraper.js"), &jsOutput),
chromedp.EvaluateAsDevTools(jsAction, &jsOutput),
// Wait until custom js finishes
chromedp.WaitVisible(`video_urls`),
// Grab url links from our element
chromedp.InnerHTML(`video_urls`, &jsOutput),
)
utils.CheckErr(err)
return jsOutput
var jsOutput string
err := chromedp.Run(ctx,
// Navigate to user's page
chromedp.Navigate(url),
// Execute url grabber script
chromedp.EvaluateAsDevTools(utils.ReadFileAsString("scraper.js"), &jsOutput),
chromedp.EvaluateAsDevTools(jsAction, &jsOutput),
// Wait until custom js finishes
chromedp.WaitVisible(`video_urls`),
// Grab url links from our element
chromedp.InnerHTML(`video_urls`, &jsOutput),
)
utils.CheckErr(err)
return jsOutput
}
func runScrapeWithInfo(ctx context.Context, jsAction string, url string) string {
var jsOutput string
err := chromedp.Run(ctx,
// Navigate to user's page
chromedp.Navigate(url),
// Execute url grabber script
chromedp.EvaluateAsDevTools(utils.ReadFileAsString("scraper.js"), &jsOutput),
chromedp.EvaluateAsDevTools(jsAction, &jsOutput),
)
utils.CheckErr(err)
var jsOutput string
err := chromedp.Run(ctx,
// Navigate to user's page
chromedp.Navigate(url),
// Execute url grabber script
chromedp.WaitReady("video"),
chromedp.EvaluateAsDevTools(utils.ReadFileAsString("scraper.js"), &jsOutput),
chromedp.EvaluateAsDevTools(jsAction, &jsOutput),
)
utils.CheckErr(err)
for {
err = chromedp.Run(ctx, chromedp.EvaluateAsDevTools("currentState.preloadCount.toString()", &jsOutput))
utils.CheckErr(err)
if jsOutput != "0" {
utils.Logf("\rPreloading... Currently loaded %s items.", jsOutput)
} else {
utils.Logf("\rPreloading...")
}
for {
err = chromedp.Run(ctx, chromedp.EvaluateAsDevTools("currentState.preloadCount.toString()", &jsOutput))
utils.CheckErr(err)
if jsOutput != "0" {
utils.Logf("\rPreloading... %s items have been founded.", jsOutput)
} else {
utils.Logf("\rPreloading...")
}
err = chromedp.Run(ctx, chromedp.EvaluateAsDevTools("currentState.finished.toString()", &jsOutput))
utils.CheckErr(err)
if jsOutput == "true" {
break
}
err = chromedp.Run(ctx, chromedp.EvaluateAsDevTools("currentState.finished.toString()", &jsOutput))
utils.CheckErr(err)
if jsOutput == "true" {
break
}
time.Sleep(50 * time.Millisecond)
}
time.Sleep(50 * time.Millisecond)
}
utils.Log("\nRetrieving items...")
err = chromedp.Run(ctx,
// Wait until custom js finishes
chromedp.WaitVisible(`video_urls`),
// Grab url links from our element
chromedp.InnerHTML(`video_urls`, &jsOutput),
)
utils.CheckErr(err)
utils.Log("\nRetrieving items...")
err = chromedp.Run(ctx,
// Wait until custom js finishes
chromedp.WaitVisible(`video_urls`),
// Grab url links from our element
chromedp.InnerHTML(`video_urls`, &jsOutput),
)
utils.CheckErr(err)
return jsOutput
return jsOutput
}

View File

@@ -0,0 +1,19 @@
package client
import (
models "../models"
config "../models/config"
"fmt"
)
// GetUserUploads - Get all uploads marked with given hashtag
func GetHashtagUploads(hashtagURL string) []models.Upload {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput := executeClientAction(hashtagURL, jsMethod)
return models.ParseUploads(actionOutput)
}
func GetHashtagUploadsJson(hashtagURL string) string {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
return executeClientAction(hashtagURL, jsMethod)
}

View File

@@ -1,11 +1,19 @@
package client
import (
models "../models"
models "../models"
config "../models/config"
"fmt"
)
// GetMusicUploads - Get all uploads by given music
func GetMusicUploads(url string) []models.Upload {
actionOutput := executeClientAction(url, "bootstrapIteratingVideos()")
return models.ParseUploads(actionOutput)
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput := executeClientAction(url, jsMethod)
return models.ParseUploads(actionOutput)
}
func GetMusicUploadsJson(url string) string {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
return executeClientAction(url, jsMethod)
}

View File

@@ -2,10 +2,18 @@ package client
import (
models "../models"
config "../models/config"
"fmt"
)
// GetUserUploads - Get all uploads by user
func GetUserUploads(username string) []models.Upload {
actionOutput := executeClientAction(`https://www.tiktok.com/@`+username, "bootstrapIteratingVideos()")
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput := executeClientAction(`https://www.tiktok.com/@`+username, jsMethod)
return models.ParseUploads(actionOutput)
}
func GetUserUploadsJson(username string) string {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
return executeClientAction(`https://www.tiktok.com/@`+username, jsMethod)
}