Download videos by hashtag; get json data without video downloading; limit option

This commit is contained in:
alexpin 2020-02-25 00:56:19 +02:00
parent 1b3f985f42
commit f724f0f2a2
13 changed files with 308 additions and 165 deletions

View File

@ -22,6 +22,8 @@ Clone this repository and run `go build` to build the executable.
* `-batch-file` - File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.
* `-deadline` - Sets the timout for scraper logic in seconds (used as a workaround for context deadline exceeded error) (default 1500)
* `-quiet` - Supress output
* `-json` - Returns whole data, that was scraped from TikTok, in json
* `-limit` - Sets the max count of video that will be downloaded (default infinity)
## Acknowledgments
This software uses the **chromedp** for web scraping, it can be found here: https://github.com/chromedp/chromedp \

View File

@ -1,101 +1,102 @@
package client
import (
"context"
"github.com/chromedp/chromedp"
"io/ioutil"
"log"
"os"
"time"
"context"
"github.com/chromedp/chromedp"
"io/ioutil"
"log"
"os"
"time"
config "../models/config"
utils "../utils"
config "../models/config"
utils "../utils"
)
// GetMusicUploads - Get all uploads by given music
func executeClientAction(url string, jsAction string) string {
dir, err := ioutil.TempDir("", "chromedp-example")
utils.CheckErr(err)
defer os.RemoveAll(dir)
dir, err := ioutil.TempDir("", "chromedp-example")
utils.CheckErr(err)
defer os.RemoveAll(dir)
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.DisableGPU,
chromedp.UserDataDir(dir),
chromedp.Flag("headless", !config.Config.Debug),
)
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.DisableGPU,
chromedp.UserDataDir(dir),
chromedp.Flag("headless", !config.Config.Debug),
)
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
defer cancel()
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
defer cancel()
ctx, cancel := chromedp.NewContext(
allocCtx,
chromedp.WithLogf(log.Printf),
)
defer cancel()
ctx, cancel := chromedp.NewContext(
allocCtx,
chromedp.WithLogf(log.Printf),
)
defer cancel()
ctx, cancel = context.WithTimeout(ctx, time.Duration(config.Config.Deadline)*time.Second)
defer cancel()
ctx, cancel = context.WithTimeout(ctx, time.Duration(config.Config.Deadline)*time.Second)
defer cancel()
var jsOutput string
jsOutput = runScrapeWithInfo(ctx, jsAction, url)
var jsOutput string
jsOutput = runScrapeWithInfo(ctx, jsAction, url)
return jsOutput
return jsOutput
}
func runScrapeQuiet(ctx context.Context, jsAction string, url string) string {
var jsOutput string
err := chromedp.Run(ctx,
// Navigate to user's page
chromedp.Navigate(url),
// Execute url grabber script
chromedp.EvaluateAsDevTools(utils.ReadFileAsString("scraper.js"), &jsOutput),
chromedp.EvaluateAsDevTools(jsAction, &jsOutput),
// Wait until custom js finishes
chromedp.WaitVisible(`video_urls`),
// Grab url links from our element
chromedp.InnerHTML(`video_urls`, &jsOutput),
)
utils.CheckErr(err)
return jsOutput
var jsOutput string
err := chromedp.Run(ctx,
// Navigate to user's page
chromedp.Navigate(url),
// Execute url grabber script
chromedp.EvaluateAsDevTools(utils.ReadFileAsString("scraper.js"), &jsOutput),
chromedp.EvaluateAsDevTools(jsAction, &jsOutput),
// Wait until custom js finishes
chromedp.WaitVisible(`video_urls`),
// Grab url links from our element
chromedp.InnerHTML(`video_urls`, &jsOutput),
)
utils.CheckErr(err)
return jsOutput
}
func runScrapeWithInfo(ctx context.Context, jsAction string, url string) string {
var jsOutput string
err := chromedp.Run(ctx,
// Navigate to user's page
chromedp.Navigate(url),
// Execute url grabber script
chromedp.EvaluateAsDevTools(utils.ReadFileAsString("scraper.js"), &jsOutput),
chromedp.EvaluateAsDevTools(jsAction, &jsOutput),
)
utils.CheckErr(err)
var jsOutput string
err := chromedp.Run(ctx,
// Navigate to user's page
chromedp.Navigate(url),
// Execute url grabber script
chromedp.WaitReady("video"),
chromedp.EvaluateAsDevTools(utils.ReadFileAsString("scraper.js"), &jsOutput),
chromedp.EvaluateAsDevTools(jsAction, &jsOutput),
)
utils.CheckErr(err)
for {
err = chromedp.Run(ctx, chromedp.EvaluateAsDevTools("currentState.preloadCount.toString()", &jsOutput))
utils.CheckErr(err)
if jsOutput != "0" {
utils.Logf("\rPreloading... Currently loaded %s items.", jsOutput)
} else {
utils.Logf("\rPreloading...")
}
for {
err = chromedp.Run(ctx, chromedp.EvaluateAsDevTools("currentState.preloadCount.toString()", &jsOutput))
utils.CheckErr(err)
if jsOutput != "0" {
utils.Logf("\rPreloading... %s items have been founded.", jsOutput)
} else {
utils.Logf("\rPreloading...")
}
err = chromedp.Run(ctx, chromedp.EvaluateAsDevTools("currentState.finished.toString()", &jsOutput))
utils.CheckErr(err)
if jsOutput == "true" {
break
}
err = chromedp.Run(ctx, chromedp.EvaluateAsDevTools("currentState.finished.toString()", &jsOutput))
utils.CheckErr(err)
if jsOutput == "true" {
break
}
time.Sleep(50 * time.Millisecond)
}
time.Sleep(50 * time.Millisecond)
}
utils.Log("\nRetrieving items...")
err = chromedp.Run(ctx,
// Wait until custom js finishes
chromedp.WaitVisible(`video_urls`),
// Grab url links from our element
chromedp.InnerHTML(`video_urls`, &jsOutput),
)
utils.CheckErr(err)
utils.Log("\nRetrieving items...")
err = chromedp.Run(ctx,
// Wait until custom js finishes
chromedp.WaitVisible(`video_urls`),
// Grab url links from our element
chromedp.InnerHTML(`video_urls`, &jsOutput),
)
utils.CheckErr(err)
return jsOutput
return jsOutput
}

View File

@ -0,0 +1,19 @@
package client
import (
models "../models"
config "../models/config"
"fmt"
)
// GetUserUploads - Get all uploads marked with given hashtag
func GetHashtagUploads(hashtagURL string) []models.Upload {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput := executeClientAction(hashtagURL, jsMethod)
return models.ParseUploads(actionOutput)
}
func GetHashtagUploadsJson(hashtagURL string) string {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
return executeClientAction(hashtagURL, jsMethod)
}

View File

@ -1,11 +1,19 @@
package client
import (
models "../models"
models "../models"
config "../models/config"
"fmt"
)
// GetMusicUploads - Get all uploads by given music
func GetMusicUploads(url string) []models.Upload {
actionOutput := executeClientAction(url, "bootstrapIteratingVideos()")
return models.ParseUploads(actionOutput)
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput := executeClientAction(url, jsMethod)
return models.ParseUploads(actionOutput)
}
func GetMusicUploadsJson(url string) string {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
return executeClientAction(url, jsMethod)
}

View File

@ -2,10 +2,18 @@ package client
import (
models "../models"
config "../models/config"
"fmt"
)
// GetUserUploads - Get all uploads by user
func GetUserUploads(username string) []models.Upload {
actionOutput := executeClientAction(`https://www.tiktok.com/@`+username, "bootstrapIteratingVideos()")
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput := executeClientAction(`https://www.tiktok.com/@`+username, jsMethod)
return models.ParseUploads(actionOutput)
}
func GetUserUploadsJson(username string) string {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
return executeClientAction(`https://www.tiktok.com/@`+username, jsMethod)
}

View File

@ -1,48 +1,57 @@
package config
import (
"flag"
"fmt"
"os"
"flag"
"fmt"
"os"
)
// Config - Runtime configuration
var Config struct {
URL string
OutputPath string
BatchFilePath string
Debug bool
MetaData bool
Quiet bool
Deadline int
URL string
OutputPath string
BatchFilePath string
Debug bool
MetaData bool
Quiet bool
Deadline int
Limit int
JSONOnly bool
}
// GetConfig - Returns Config object
func GetConfig() {
outputPath := flag.String("output", "./downloads", "Output path")
batchFilePath := flag.String("batch-file", "", "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.")
debug := flag.Bool("debug", false, "Enables debug mode")
metadata := flag.Bool("metadata", false, "Write video metadata to a .json file")
quiet := flag.Bool("quiet", false, "Supress output")
deadline := flag.Int("deadline", 1500, "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)")
flag.Parse()
outputPath := flag.String("output", "./downloads", "Output path")
batchFilePath := flag.String("batch-file", "", "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.")
debug := flag.Bool("debug", false, "Enables debug mode")
metadata := flag.Bool("metadata", false, "Write video metadata to a .json file")
quiet := flag.Bool("quiet", false, "Supress output")
deadline := flag.Int("deadline", 1500, "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)")
limit := flag.Int("limit", 0, "Sets the videos count limit (useful when there too many videos from the user or by hashtag)")
jsonOnly := flag.Bool("json", false, "Just get JSON data from scraper (without video downloading)")
flag.Parse()
args := flag.Args()
if len(args) < 1 && *batchFilePath == "" {
fmt.Println("Usage: tiktok-dl [OPTIONS] TIKTOK_USERNAME|TIKTOK_URL")
fmt.Println(" or: tiktok-dl [OPTIONS] -batch-file path/to/users.txt")
os.Exit(2)
}
args := flag.Args()
if len(args) < 1 && *batchFilePath == "" {
fmt.Println("Usage: tiktok-dl [OPTIONS] TIKTOK_USERNAME|TIKTOK_URL")
fmt.Println(" or: tiktok-dl [OPTIONS] -batch-file path/to/users.txt")
os.Exit(2)
}
if len(args) > 0 {
Config.URL = flag.Args()[len(args)-1]
} else {
Config.URL = ""
}
Config.OutputPath = *outputPath
Config.BatchFilePath = *batchFilePath
Config.Debug = *debug
Config.MetaData = *metadata
Config.Quiet = *quiet
Config.Deadline = *deadline
if len(args) > 0 {
Config.URL = flag.Args()[len(args)-1]
} else {
Config.URL = ""
}
Config.OutputPath = *outputPath
Config.BatchFilePath = *batchFilePath
Config.Debug = *debug
Config.MetaData = *metadata
Config.Quiet = *quiet
if *jsonOnly {
Config.Quiet = true
}
Config.Deadline = *deadline
Config.Limit = *limit
Config.JSONOnly = *jsonOnly;
}

View File

@ -1,7 +1,7 @@
optStrings = {
selectors: {
feedLoading: 'div.tiktok-loading.feed-loading',
modalArrowLeft: 'div.video-card-modal > div > img.arrow-right',
modalArrowRight: 'div.video-card-modal > div > img.arrow-right',
modalClose: '.video-card-modal > div > div.close',
modalPlayer: 'div > div > main > div.video-card-modal > div > div.video-card-big > div.video-card-container > div > div > video',
modalShareInput: '.copy-link-container > input',
@ -30,6 +30,7 @@ optStrings = {
currentState = {
preloadCount: 0,
finished: false,
limit: 100
};
createVidUrlElement = function(outputObj) {
@ -37,7 +38,7 @@ createVidUrlElement = function(outputObj) {
urlSetElement.innerText = JSON.stringify(outputObj);
document.getElementsByTagName(optStrings.tags.resultParentTag)[0].appendChild(urlSetElement);
currentState.finished = true;
}
};
buldVidUrlArray = function(finishCallback) {
var feedItem = document.getElementsByClassName(optStrings.classes.feedVideoItem)[0];
@ -46,8 +47,14 @@ buldVidUrlArray = function(finishCallback) {
var videoArray = [];
var intervalID = window.setInterval(x => {
videoArray.push(getCurrentModalVideo());
var arrowRight = document.querySelectorAll(optStrings.selectors.modalArrowLeft)[0];
if(currentState.limit > 0) {
if (videoArray.length >= currentState.limit) {
window.clearInterval(intervalID);
document.querySelector(optStrings.selectors.modalClose).click();
finishCallback(videoArray);
}
}
var arrowRight = document.querySelectorAll(optStrings.selectors.modalArrowRight)[0];
if (arrowRight.classList.contains(optStrings.classes.modalCloseDisabled)) {
window.clearInterval(intervalID);
document.querySelector(optStrings.selectors.modalClose).click();
@ -78,7 +85,7 @@ getCurrentModalVideo = function() {
link: soundHref,
},
};
}
};
getCurrentVideo = function() {
var player = document.querySelector(optStrings.selectors.videoPlayer);
@ -100,13 +107,19 @@ getCurrentVideo = function() {
link: soundHref,
},
};
}
};
scrollWhileNew = function(finishCallback) {
var state = { count: 0 };
var intervalID = window.setInterval(x => {
var oldCount = state.count;
state.count = document.getElementsByClassName(optStrings.classes.feedVideoItem).length;
if(currentState.limit > 0) {
if (currentState.preloadCount >= currentState.limit || state.count >= currentState.limit) {
finishCallback(createVidUrlElement);
window.clearInterval(intervalID);
}
}
if (oldCount !== state.count) {
currentState.preloadCount = state.count;
window.scrollTo(0, document.body.scrollHeight);
@ -121,7 +134,8 @@ scrollWhileNew = function(finishCallback) {
}, 1000);
};
bootstrapIteratingVideos = function() {
bootstrapIteratingVideos = function(limit) {
currentState.limit = limit;
scrollWhileNew(buldVidUrlArray);
return 'bootstrapIteratingVideos';
};
@ -130,7 +144,7 @@ bootstrapGetCurrentVideo = function() {
var video = getCurrentVideo();
createVidUrlElement(video);
return 'bootstrapGetCurrentVideo';
}
};
init = () => {
const newProto = navigator.__proto__;

16
utils/getHashtag.go Normal file
View File

@ -0,0 +1,16 @@
package utils
import (
res "../resources"
"fmt"
"strings"
)
// GetHashtagFromURL - Get's tag name from passed url
func GetHashtagFromURL(str string) string {
if match := strings.Contains(str, "/tag/"); match {
return strings.Split(str, "/tag/")[1]
}
panic(fmt.Sprintf(res.ErrorCouldNotRecogniseURL, str))
}

View File

@ -0,0 +1,36 @@
package workflows
import (
client "../client"
config "../models/config"
utils "../utils"
"fmt"
"strings"
)
// CanUseDownloadHashtag - Test's if this workflow can be used for parameter
func CanUseDownloadHashtag(url string) bool {
match := strings.Contains(url, "/tag/")
return match
}
// DownloadHashtag - Download videos marked with given hashtag
func DownloadHashtag(url string) {
uploads := client.GetHashtagUploads(url)
uploadCount := len(uploads)
hashtag := utils.GetHashtagFromURL(url)
downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, hashtag)
utils.InitOutputDirectory(downloadDir)
for index, upload := range uploads {
downloadVideo(upload, downloadDir)
utils.Logf("\r[%d/%d] Downloaded", index+1, uploadCount)
}
utils.Log()
}
func GetHashtagJson(url string) {
uploads := client.GetHashtagUploads(url)
fmt.Printf("%s", uploads)
}

View File

@ -1,31 +1,36 @@
package workflows
import (
client "../client"
config "../models/config"
utils "../utils"
"fmt"
"regexp"
client "../client"
config "../models/config"
utils "../utils"
"fmt"
"regexp"
)
// CanUseDownloadMusic - Check's if DownloadMusic can be used for parameter
func CanUseDownloadMusic(url string) bool {
match, _ := regexp.MatchString(".com\\/music\\/.+", url)
return match
match, _ := regexp.MatchString(".com\\/music\\/.+", url)
return match
}
// DownloadMusic - Download all videos by given music
func DownloadMusic(url string) {
uploads := client.GetMusicUploads(url)
uploadCount := len(uploads)
uploads := client.GetMusicUploads(url)
uploadCount := len(uploads)
for index, upload := range uploads {
username := utils.GetUsernameFromString(upload.Uploader)
downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username)
for index, upload := range uploads {
username := utils.GetUsernameFromString(upload.Uploader)
downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username)
utils.InitOutputDirectory(downloadDir)
downloadVideo(upload, downloadDir)
utils.Logf("\r[%d/%d] Downloaded", index+1, uploadCount)
}
utils.Log()
utils.InitOutputDirectory(downloadDir)
downloadVideo(upload, downloadDir)
utils.Logf("\r[%d/%d] Downloaded", index+1, uploadCount)
}
utils.Log()
}
func GetMusicJson(url string) {
uploads := client.GetMusicUploadsJson(url)
fmt.Printf("%s", uploads)
}

View File

@ -28,3 +28,8 @@ func DownloadUser(username string) {
}
utils.Log()
}
func GetUserVideosJson(username string) {
uploads := client.GetUserUploadsJson(username)
fmt.Printf("%s", uploads)
}

View File

@ -1,44 +1,44 @@
package workflows
import (
client "../client"
models "../models"
config "../models/config"
utils "../utils"
"fmt"
"regexp"
client "../client"
models "../models"
config "../models/config"
utils "../utils"
"fmt"
"regexp"
)
// CanUseDownloadSingleVideo - Check's if DownloadSingleVideo can be used for parameter
func CanUseDownloadSingleVideo(url string) bool {
match, _ := regexp.MatchString("\\/@.+\\/video\\/[0-9]+", url)
return match
match, _ := regexp.MatchString("\\/@.+\\/video\\/[0-9]+", url)
return match
}
// DownloadSingleVideo - Downloads single video
func DownloadSingleVideo(url string) {
username := utils.GetUsernameFromString(url)
upload := client.GetVideoDetails(url)
downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username)
username := utils.GetUsernameFromString(url)
upload := client.GetVideoDetails(url)
downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username)
utils.InitOutputDirectory(downloadDir)
downloadVideo(upload, downloadDir)
utils.Log("[1/1] Downloaded\n")
utils.InitOutputDirectory(downloadDir)
downloadVideo(upload, downloadDir)
utils.Log("[1/1] Downloaded\n")
}
// DownloadVideo - Downloads one video
func downloadVideo(upload models.Upload, downloadDir string) {
uploadID := upload.GetUploadID()
downloadPath := fmt.Sprintf("%s/%s.mp4", downloadDir, uploadID)
uploadID := upload.GetUploadID()
downloadPath := fmt.Sprintf("%s/%s.mp4", downloadDir, uploadID)
if utils.CheckIfExists(downloadPath) {
return
}
if utils.CheckIfExists(downloadPath) {
return
}
utils.DownloadFile(downloadPath, upload.URL)
utils.DownloadFile(downloadPath, upload.URL)
if config.Config.MetaData {
metadataPath := fmt.Sprintf("%s/%s.json", downloadDir, uploadID)
upload.WriteToFile(metadataPath)
}
}
if config.Config.MetaData {
metadataPath := fmt.Sprintf("%s/%s.json", downloadDir, uploadID)
upload.WriteToFile(metadataPath)
}
}

View File

@ -1,6 +1,7 @@
package workflows
import (
config "../models/config"
res "../resources"
utils "../utils"
)
@ -10,7 +11,11 @@ func StartWorkflowByParameter(url string) {
// Music
if CanUseDownloadMusic(url) {
DownloadMusic(url)
if config.Config.JSONOnly {
GetMusicJson(url)
} else {
DownloadMusic(url)
}
return
}
@ -22,7 +27,22 @@ func StartWorkflowByParameter(url string) {
// Tiktok user
if CanUseDownloadUser(url) {
DownloadUser(utils.GetUsernameFromString(url))
if config.Config.JSONOnly {
GetUserVideosJson(utils.GetUsernameFromString(url))
} else {
DownloadUser(utils.GetUsernameFromString(url))
}
return
}
// Tiktok hashtag
if CanUseDownloadHashtag(url) {
if config.Config.JSONOnly {
GetHashtagJson(url)
} else {
DownloadHashtag(url)
}
return
}