Merge pull request #6 from intracomof/master

Download videos by hashtag; limit option; get just json data
This commit is contained in:
Pijus Kamandulis 2020-02-25 21:33:57 +02:00 committed by GitHub
commit 70c605a696
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 183 additions and 9 deletions

View File

@ -22,6 +22,8 @@ Clone this repository and run `go build` to build the executable.
* `-batch-file` - File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored. * `-batch-file` - File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.
* `-deadline` - Sets the timout for scraper logic in seconds (used as a workaround for context deadline exceeded error) (default 1500) * `-deadline` - Sets the timout for scraper logic in seconds (used as a workaround for context deadline exceeded error) (default 1500)
* `-quiet` - Supress output * `-quiet` - Supress output
* `-json` - Returns whole data, that was scraped from TikTok, in json
* `-limit` - Sets the max count of video that will be downloaded (default infinity)
## Acknowledgments ## Acknowledgments
This software uses the **chromedp** for web scraping, it can be found here: https://github.com/chromedp/chromedp \ This software uses the **chromedp** for web scraping, it can be found here: https://github.com/chromedp/chromedp \

View File

@ -84,7 +84,7 @@ func runScrapeWithInfo(ctx context.Context, jsAction string, url string) (string
} }
if jsOutput != "0" { if jsOutput != "0" {
utils.Logf("\rPreloading... Currently loaded %s items.", jsOutput) utils.Logf("\rPreloading... %s items have been founded.", jsOutput)
} else { } else {
utils.Logf("\rPreloading...") utils.Logf("\rPreloading...")
} }

View File

@ -0,0 +1,26 @@
package client
import (
models "../models"
config "../models/config"
"fmt"
)
// GetUserUploads - Get all uploads marked with given hashtag
func GetHashtagUploads(hashtagURL string) ([]models.Upload, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(hashtagURL, jsMethod)
if err != nil {
return nil, err
}
return models.ParseUploads(actionOutput), nil
}
func GetHashtagUploadsJson(hashtagURL string) (string, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(hashtagURL, jsMethod)
if err != nil {
return "", err
}
return actionOutput, nil
}

View File

@ -2,13 +2,25 @@ package client
import ( import (
models "../models" models "../models"
config "../models/config"
"fmt"
) )
// GetMusicUploads - Get all uploads by given music // GetMusicUploads - Get all uploads by given music
func GetMusicUploads(url string) ([]models.Upload, error) { func GetMusicUploads(url string) ([]models.Upload, error) {
actionOutput, err := executeClientAction(url, "bootstrapIteratingVideos()") jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(url, jsMethod)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return models.ParseUploads(actionOutput), nil return models.ParseUploads(actionOutput), nil
} }
func GetMusicUploadsJson(url string) (string, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(url, jsMethod)
if err != nil {
return "", err
}
return actionOutput, nil
}

View File

@ -2,13 +2,25 @@ package client
import ( import (
models "../models" models "../models"
config "../models/config"
"fmt"
) )
// GetUserUploads - Get all uploads by user // GetUserUploads - Get all uploads by user
func GetUserUploads(username string) ([]models.Upload, error) { func GetUserUploads(username string) ([]models.Upload, error) {
actionOutput, err := executeClientAction(`https://www.tiktok.com/@`+username, "bootstrapIteratingVideos()") jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(`https://www.tiktok.com/@`+username, jsMethod)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return models.ParseUploads(actionOutput), nil return models.ParseUploads(actionOutput), nil
} }
func GetUserUploadsJson(username string) (string, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(`https://www.tiktok.com/@`+username, jsMethod)
if err != nil {
return "", err
}
return actionOutput, nil
}

View File

@ -15,6 +15,8 @@ var Config struct {
MetaData bool MetaData bool
Quiet bool Quiet bool
Deadline int Deadline int
Limit int
JSONOnly bool
} }
// GetConfig - Returns Config object // GetConfig - Returns Config object
@ -25,6 +27,8 @@ func GetConfig() {
metadata := flag.Bool("metadata", false, "Write video metadata to a .json file") metadata := flag.Bool("metadata", false, "Write video metadata to a .json file")
quiet := flag.Bool("quiet", false, "Supress output") quiet := flag.Bool("quiet", false, "Supress output")
deadline := flag.Int("deadline", 1500, "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)") deadline := flag.Int("deadline", 1500, "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)")
limit := flag.Int("limit", 0, "Sets the videos count limit (useful when there too many videos from the user or by hashtag)")
jsonOnly := flag.Bool("json", false, "Just get JSON data from scraper (without video downloading)")
flag.Parse() flag.Parse()
args := flag.Args() args := flag.Args()
@ -44,5 +48,10 @@ func GetConfig() {
Config.Debug = *debug Config.Debug = *debug
Config.MetaData = *metadata Config.MetaData = *metadata
Config.Quiet = *quiet Config.Quiet = *quiet
if *jsonOnly {
Config.Quiet = true
}
Config.Deadline = *deadline Config.Deadline = *deadline
Config.Limit = *limit
Config.JSONOnly = *jsonOnly
} }

View File

@ -1,7 +1,7 @@
optStrings = { optStrings = {
selectors: { selectors: {
feedLoading: 'div.tiktok-loading.feed-loading', feedLoading: 'div.tiktok-loading.feed-loading',
modalArrowLeft: 'div.video-card-modal > div > img.arrow-right', modalArrowRight: 'div.video-card-modal > div > img.arrow-right',
modalClose: '.video-card-modal > div > div.close', modalClose: '.video-card-modal > div > div.close',
modalPlayer: 'div > div > main > div.video-card-modal > div > div.video-card-big > div.video-card-container > div > div > video', modalPlayer: 'div > div > main > div.video-card-modal > div > div.video-card-big > div.video-card-container > div > div > video',
modalShareInput: '.copy-link-container > input', modalShareInput: '.copy-link-container > input',
@ -36,6 +36,7 @@ optStrings = {
currentState = { currentState = {
preloadCount: 0, preloadCount: 0,
finished: false, finished: false,
limit: 0
}; };
checkForErrors = function() { checkForErrors = function() {
@ -65,8 +66,14 @@ buldVidUrlArray = function(finishCallback) {
var videoArray = []; var videoArray = [];
var intervalID = window.setInterval(x => { var intervalID = window.setInterval(x => {
videoArray.push(getCurrentModalVideo()); videoArray.push(getCurrentModalVideo());
if(currentState.limit > 0) {
var arrowRight = document.querySelectorAll(optStrings.selectors.modalArrowLeft)[0]; if (videoArray.length >= currentState.limit) {
window.clearInterval(intervalID);
document.querySelector(optStrings.selectors.modalClose).click();
finishCallback(videoArray);
}
}
var arrowRight = document.querySelectorAll(optStrings.selectors.modalArrowRight)[0];
if (arrowRight.classList.contains(optStrings.classes.modalCloseDisabled)) { if (arrowRight.classList.contains(optStrings.classes.modalCloseDisabled)) {
window.clearInterval(intervalID); window.clearInterval(intervalID);
document.querySelector(optStrings.selectors.modalClose).click(); document.querySelector(optStrings.selectors.modalClose).click();
@ -127,6 +134,12 @@ scrollWhileNew = function(finishCallback) {
var intervalID = window.setInterval(x => { var intervalID = window.setInterval(x => {
var oldCount = state.count; var oldCount = state.count;
state.count = document.getElementsByClassName(optStrings.classes.feedVideoItem).length; state.count = document.getElementsByClassName(optStrings.classes.feedVideoItem).length;
if(currentState.limit > 0) {
if (currentState.preloadCount >= currentState.limit || state.count >= currentState.limit) {
finishCallback(createVidUrlElement);
window.clearInterval(intervalID);
}
}
if(checkForErrors()) { if(checkForErrors()) {
window.clearInterval(intervalID); window.clearInterval(intervalID);
return; return;
@ -145,7 +158,8 @@ scrollWhileNew = function(finishCallback) {
}, 1000); }, 1000);
}; };
bootstrapIteratingVideos = function() { bootstrapIteratingVideos = function(limit) {
currentState.limit = limit;
scrollWhileNew(buldVidUrlArray); scrollWhileNew(buldVidUrlArray);
return 'bootstrapIteratingVideos'; return 'bootstrapIteratingVideos';
}; };

16
utils/getHashtag.go Normal file
View File

@ -0,0 +1,16 @@
package utils
import (
res "../resources"
"fmt"
"strings"
)
// GetHashtagFromURL - Get's tag name from passed url
func GetHashtagFromURL(str string) string {
if match := strings.Contains(str, "/tag/"); match {
return strings.Split(str, "/tag/")[1]
}
panic(fmt.Sprintf(res.ErrorCouldNotRecogniseURL, str))
}

View File

@ -0,0 +1,45 @@
package workflows
import (
client "../client"
config "../models/config"
res "../resources"
utils "../utils"
"fmt"
"strings"
)
// CanUseDownloadHashtag - Test's if this workflow can be used for parameter
func CanUseDownloadHashtag(url string) bool {
match := strings.Contains(url, "/tag/")
return match
}
// DownloadHashtag - Download videos marked with given hashtag
func DownloadHashtag(url string) {
uploads, err := client.GetHashtagUploads(url)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
uploadCount := len(uploads)
hashtag := utils.GetHashtagFromURL(url)
downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, hashtag)
utils.InitOutputDirectory(downloadDir)
for index, upload := range uploads {
downloadVideo(upload, downloadDir)
utils.Logf("\r[%d/%d] Downloaded", index+1, uploadCount)
}
utils.Log()
}
func GetHashtagJson(url string) {
uploads, err := client.GetHashtagUploads(url)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
fmt.Printf("%s", uploads)
}

View File

@ -34,3 +34,12 @@ func DownloadMusic(url string) {
} }
utils.Log() utils.Log()
} }
func GetMusicJson(url string) {
uploads, err := client.GetMusicUploadsJson(url)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
fmt.Printf("%s", uploads)
}

View File

@ -35,3 +35,12 @@ func DownloadUser(username string) {
} }
utils.Log() utils.Log()
} }
func GetUserVideosJson(username string) {
uploads, err := client.GetUserUploadsJson(username)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
fmt.Printf("%s", uploads)
}

View File

@ -1,6 +1,7 @@
package workflows package workflows
import ( import (
config "../models/config"
res "../resources" res "../resources"
utils "../utils" utils "../utils"
) )
@ -10,7 +11,11 @@ func StartWorkflowByParameter(url string) {
// Music // Music
if CanUseDownloadMusic(url) { if CanUseDownloadMusic(url) {
DownloadMusic(url) if config.Config.JSONOnly {
GetMusicJson(url)
} else {
DownloadMusic(url)
}
return return
} }
@ -22,7 +27,22 @@ func StartWorkflowByParameter(url string) {
// Tiktok user // Tiktok user
if CanUseDownloadUser(url) { if CanUseDownloadUser(url) {
DownloadUser(utils.GetUsernameFromString(url)) if config.Config.JSONOnly {
GetUserVideosJson(utils.GetUsernameFromString(url))
} else {
DownloadUser(utils.GetUsernameFromString(url))
}
return
}
// Tiktok hashtag
if CanUseDownloadHashtag(url) {
if config.Config.JSONOnly {
GetHashtagJson(url)
} else {
DownloadHashtag(url)
}
return return
} }