TTDL-3 Add `deadline` flag

This commit is contained in:
Pijus Kamandulis 2020-01-30 18:59:34 +02:00
parent 2af96e899e
commit 673bbe1340
6 changed files with 68 additions and 147 deletions

View File

@ -20,6 +20,7 @@ Clone this repository and run `go build` to build the executable.
* `-output some_directory` - Output path (default "./downloads") * `-output some_directory` - Output path (default "./downloads")
* `-metadata` - Write video metadata to a .json file * `-metadata` - Write video metadata to a .json file
* `-batch-file` - File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored. * `-batch-file` - File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.
* `-deadline` - Sets the timout for scraper logic in seconds (used as a workaround for context deadline exceeded error) (default 1500)
## Acknowledgments ## Acknowledgments
This software uses the **chromedp** for web scraping, it can be found here: https://github.com/chromedp/chromedp \ This software uses the **chromedp** for web scraping, it can be found here: https://github.com/chromedp/chromedp \

View File

@ -0,0 +1,58 @@
package client
import (
"context"
"github.com/chromedp/chromedp"
"io/ioutil"
"log"
"os"
"time"
models "../models"
utils "../utils"
)
// GetMusicUploads - Get all uploads by given music
func executeClientAction(url string, jsAction string) string {
dir, err := ioutil.TempDir("", "chromedp-example")
if err != nil {
panic(err)
}
defer os.RemoveAll(dir)
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.DisableGPU,
chromedp.UserDataDir(dir),
chromedp.Flag("headless", !models.Config.Debug),
)
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
defer cancel()
ctx, cancel := chromedp.NewContext(
allocCtx,
chromedp.WithLogf(log.Printf),
)
defer cancel()
ctx, cancel = context.WithTimeout(ctx, time.Duration(models.Config.Deadline)*time.Second)
defer cancel()
var jsOutput string
err = chromedp.Run(ctx,
// Navigate to user's page
chromedp.Navigate(url),
// Execute url grabber script
chromedp.EvaluateAsDevTools(utils.ReadFileAsString("scraper.js"), &jsOutput),
chromedp.EvaluateAsDevTools(jsAction, &jsOutput),
// Wait until custom js finishes
chromedp.WaitVisible(`video_urls`),
// Grab url links from our element
chromedp.InnerHTML(`video_urls`, &jsOutput),
)
if err != nil {
log.Fatal(err)
}
return jsOutput
}

View File

@ -1,58 +1,11 @@
package client package client
import ( import (
"context"
"github.com/chromedp/chromedp"
"io/ioutil"
"log"
"os"
"time"
models "../models" models "../models"
utils "../utils"
) )
// GetMusicUploads - Get all uploads by given music // GetMusicUploads - Get all uploads by given music
func GetMusicUploads(url string) []models.Upload { func GetMusicUploads(url string) []models.Upload {
dir, err := ioutil.TempDir("", "chromedp-example") actionOutput := executeClientAction(url, "bootstrapIteratingVideos()")
if err != nil { return models.ParseUploads(actionOutput)
panic(err)
}
defer os.RemoveAll(dir)
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.DisableGPU,
chromedp.UserDataDir(dir),
chromedp.Flag("headless", !models.Config.Debug),
)
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
defer cancel()
ctx, cancel := chromedp.NewContext(
allocCtx,
chromedp.WithLogf(log.Printf),
)
defer cancel()
ctx, cancel = context.WithTimeout(ctx, 1500*time.Second)
defer cancel()
var jsOutput string
err = chromedp.Run(ctx,
// Navigate to user's page
chromedp.Navigate(url),
// Execute url grabber script
chromedp.EvaluateAsDevTools(utils.ReadFileAsString("scraper.js"), &jsOutput),
chromedp.EvaluateAsDevTools("bootstrapIteratingVideos()", &jsOutput),
// Wait until custom js finishes
chromedp.WaitVisible(`video_urls`),
// Grab url links from our element
chromedp.InnerHTML(`video_urls`, &jsOutput),
)
if err != nil {
log.Fatal(err)
}
return models.ParseUploads(jsOutput)
} }

View File

@ -1,58 +1,11 @@
package client package client
import ( import (
"context"
"github.com/chromedp/chromedp"
"io/ioutil"
"log"
"os"
"time"
models "../models" models "../models"
utils "../utils"
) )
// GetUserUploads - Get all uploads by user // GetUserUploads - Get all uploads by user
func GetUserUploads(username string) []models.Upload { func GetUserUploads(username string) []models.Upload {
dir, err := ioutil.TempDir("", "chromedp-example") actionOutput := executeClientAction(`https://www.tiktok.com/@`+username, "bootstrapIteratingVideos()")
if err != nil { return models.ParseUploads(actionOutput)
panic(err)
}
defer os.RemoveAll(dir)
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.DisableGPU,
chromedp.UserDataDir(dir),
chromedp.Flag("headless", !models.Config.Debug),
)
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
defer cancel()
ctx, cancel := chromedp.NewContext(
allocCtx,
chromedp.WithLogf(log.Printf),
)
defer cancel()
ctx, cancel = context.WithTimeout(ctx, 1500*time.Second)
defer cancel()
var jsOutput string
err = chromedp.Run(ctx,
// Navigate to user's page
chromedp.Navigate(`https://www.tiktok.com/@`+username),
// Execute url grabber script
chromedp.EvaluateAsDevTools(utils.ReadFileAsString("scraper.js"), &jsOutput),
chromedp.EvaluateAsDevTools("bootstrapIteratingVideos()", &jsOutput),
// Wait until custom js finishes
chromedp.WaitVisible(`video_urls`),
// Grab url links from our element
chromedp.InnerHTML(`video_urls`, &jsOutput),
)
if err != nil {
log.Fatal(err)
}
return models.ParseUploads(jsOutput)
} }

View File

@ -1,58 +1,11 @@
package client package client
import ( import (
"context"
"github.com/chromedp/chromedp"
"io/ioutil"
"log"
"os"
"time"
models "../models" models "../models"
utils "../utils"
) )
// GetVideoDetails - returns details of video // GetVideoDetails - returns details of video
func GetVideoDetails(videoURL string) models.Upload { func GetVideoDetails(videoURL string) models.Upload {
dir, err := ioutil.TempDir("", "chromedp-example") actionOutput := executeClientAction(videoURL, "bootstrapGetCurrentVideo()")
if err != nil { return models.ParseUpload(actionOutput)
panic(err)
}
defer os.RemoveAll(dir)
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.DisableGPU,
chromedp.UserDataDir(dir),
chromedp.Flag("headless", !models.Config.Debug),
)
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
defer cancel()
ctx, cancel := chromedp.NewContext(
allocCtx,
chromedp.WithLogf(log.Printf),
)
defer cancel()
ctx, cancel = context.WithTimeout(ctx, 1500*time.Second)
defer cancel()
var jsOutput string
err = chromedp.Run(ctx,
// Navigate to user's page
chromedp.Navigate(videoURL),
// Execute url grabber script
chromedp.EvaluateAsDevTools(utils.ReadFileAsString("scraper.js"), &jsOutput),
chromedp.EvaluateAsDevTools("bootstrapGetCurrentVideo()", &jsOutput),
// Wait until custom js finishes
chromedp.WaitVisible(`video_urls`),
// Grab url links from our element
chromedp.InnerHTML(`video_urls`, &jsOutput),
)
if err != nil {
log.Fatal(err)
}
return models.ParseUpload(jsOutput)
} }

View File

@ -15,6 +15,7 @@ var Config struct {
BatchFilePath string BatchFilePath string
Debug bool Debug bool
MetaData bool MetaData bool
Deadline int
} }
// GetConfig - Returns Config object // GetConfig - Returns Config object
@ -23,6 +24,7 @@ func GetConfig() {
batchFilePath := flag.String("batch-file", "", "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.") batchFilePath := flag.String("batch-file", "", "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.")
debug := flag.Bool("debug", false, "Enables debug mode") debug := flag.Bool("debug", false, "Enables debug mode")
metadata := flag.Bool("metadata", false, "Write video metadata to a .json file") metadata := flag.Bool("metadata", false, "Write video metadata to a .json file")
deadline := flag.Int("deadline", 1500, "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)")
flag.Parse() flag.Parse()
args := flag.Args() args := flag.Args()
@ -41,6 +43,7 @@ func GetConfig() {
Config.BatchFilePath = *batchFilePath Config.BatchFilePath = *batchFilePath
Config.Debug = *debug Config.Debug = *debug
Config.MetaData = *metadata Config.MetaData = *metadata
Config.Deadline = *deadline
} }
// GetUsername - Get's username from passed URL param // GetUsername - Get's username from passed URL param