9 Commits
1.6 ... 1.8

Author SHA1 Message Date
Pijus Kamandulis
9a65746fd4 Update go.yml 2020-02-25 21:44:43 +02:00
Pijus Kamandulis
70c605a696 Merge pull request #6 from intracomof/master
Download videos by hashtag; limit option; get just json data
2020-02-25 21:33:57 +02:00
alexpin
208bffb846 error handling 2020-02-25 21:16:57 +02:00
alexpin
7b9b7688a1 formatter 2020-02-25 21:03:06 +02:00
intracomof
e77c904f89 Merge branch 'master' into master 2020-02-25 21:01:43 +02:00
alexpin
68612282ee default limit value updated; WaitReady(video) removed 2020-02-25 20:55:56 +02:00
Pijus Kamandulis
7a691ad32d TTDL-5 Added better error handling 2020-02-25 20:12:01 +02:00
alexpin
b6bb470064 formatter 2020-02-25 01:01:10 +02:00
alexpin
f724f0f2a2 Download videos by hashtag; get json data without video downloading; limit option 2020-02-25 00:56:19 +02:00
17 changed files with 295 additions and 49 deletions

View File

@@ -1,5 +1,5 @@
name: tiktok-dl_CI
on: [push]
on: [push, pull_request]
jobs:
build:
strategy:

View File

@@ -22,6 +22,8 @@ Clone this repository and run `go build` to build the executable.
* `-batch-file` - File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.
* `-deadline` - Sets the timout for scraper logic in seconds (used as a workaround for context deadline exceeded error) (default 1500)
* `-quiet` - Supress output
* `-json` - Returns whole data, that was scraped from TikTok, in json
* `-limit` - Sets the max count of video that will be downloaded (default infinity)
## Acknowledgments
This software uses the **chromedp** for web scraping, it can be found here: https://github.com/chromedp/chromedp \

View File

@@ -2,10 +2,12 @@ package client
import (
"context"
"errors"
"github.com/chromedp/chromedp"
"io/ioutil"
"log"
"os"
"strings"
"time"
config "../models/config"
@@ -13,9 +15,11 @@ import (
)
// GetMusicUploads - Get all uploads by given music
func executeClientAction(url string, jsAction string) string {
func executeClientAction(url string, jsAction string) (string, error) {
dir, err := ioutil.TempDir("", "chromedp-example")
utils.CheckErr(err)
if err != nil {
return "", err
}
defer os.RemoveAll(dir)
opts := append(chromedp.DefaultExecAllocatorOptions[:],
@@ -36,15 +40,16 @@ func executeClientAction(url string, jsAction string) string {
ctx, cancel = context.WithTimeout(ctx, time.Duration(config.Config.Deadline)*time.Second)
defer cancel()
var jsOutput string
jsOutput = runScrapeWithInfo(ctx, jsAction, url)
return jsOutput
jsOutput, err := runScrapeWithInfo(ctx, jsAction, url)
if strings.HasPrefix(jsOutput, "\"ERR:") {
err = errors.New(jsOutput)
}
return jsOutput, err
}
func runScrapeQuiet(ctx context.Context, jsAction string, url string) string {
func runScrapeQuiet(ctx context.Context, jsAction string, url string) (string, error) {
var jsOutput string
err := chromedp.Run(ctx,
if err := chromedp.Run(ctx,
// Navigate to user's page
chromedp.Navigate(url),
// Execute url grabber script
@@ -54,33 +59,40 @@ func runScrapeQuiet(ctx context.Context, jsAction string, url string) string {
chromedp.WaitVisible(`video_urls`),
// Grab url links from our element
chromedp.InnerHTML(`video_urls`, &jsOutput),
)
utils.CheckErr(err)
return jsOutput
); err != nil {
return "", err
}
return jsOutput, nil
}
func runScrapeWithInfo(ctx context.Context, jsAction string, url string) string {
func runScrapeWithInfo(ctx context.Context, jsAction string, url string) (string, error) {
var jsOutput string
err := chromedp.Run(ctx,
if err := chromedp.Run(ctx,
// Navigate to user's page
chromedp.Navigate(url),
// Execute url grabber script
chromedp.EvaluateAsDevTools(utils.ReadFileAsString("scraper.js"), &jsOutput),
chromedp.EvaluateAsDevTools(jsAction, &jsOutput),
)
utils.CheckErr(err)
); err != nil {
return "", err
}
for {
err = chromedp.Run(ctx, chromedp.EvaluateAsDevTools("currentState.preloadCount.toString()", &jsOutput))
utils.CheckErr(err)
if err := chromedp.Run(ctx, chromedp.EvaluateAsDevTools("currentState.preloadCount.toString()", &jsOutput)); err != nil {
return "", err
}
if jsOutput != "0" {
utils.Logf("\rPreloading... Currently loaded %s items.", jsOutput)
utils.Logf("\rPreloading... %s items have been founded.", jsOutput)
} else {
utils.Logf("\rPreloading...")
}
err = chromedp.Run(ctx, chromedp.EvaluateAsDevTools("currentState.finished.toString()", &jsOutput))
utils.CheckErr(err)
if err := chromedp.Run(ctx, chromedp.EvaluateAsDevTools("currentState.finished.toString()", &jsOutput)); err != nil {
return "", err
}
if jsOutput == "true" {
break
}
@@ -89,13 +101,14 @@ func runScrapeWithInfo(ctx context.Context, jsAction string, url string) string
}
utils.Log("\nRetrieving items...")
err = chromedp.Run(ctx,
if err := chromedp.Run(ctx,
// Wait until custom js finishes
chromedp.WaitVisible(`video_urls`),
// Grab url links from our element
chromedp.InnerHTML(`video_urls`, &jsOutput),
)
utils.CheckErr(err)
); err != nil {
return "", err
}
return jsOutput
return jsOutput, nil
}

View File

@@ -0,0 +1,26 @@
package client
import (
models "../models"
config "../models/config"
"fmt"
)
// GetUserUploads - Get all uploads marked with given hashtag
func GetHashtagUploads(hashtagURL string) ([]models.Upload, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(hashtagURL, jsMethod)
if err != nil {
return nil, err
}
return models.ParseUploads(actionOutput), nil
}
func GetHashtagUploadsJson(hashtagURL string) (string, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(hashtagURL, jsMethod)
if err != nil {
return "", err
}
return actionOutput, nil
}

View File

@@ -2,10 +2,25 @@ package client
import (
models "../models"
config "../models/config"
"fmt"
)
// GetMusicUploads - Get all uploads by given music
func GetMusicUploads(url string) []models.Upload {
actionOutput := executeClientAction(url, "bootstrapIteratingVideos()")
return models.ParseUploads(actionOutput)
func GetMusicUploads(url string) ([]models.Upload, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(url, jsMethod)
if err != nil {
return nil, err
}
return models.ParseUploads(actionOutput), nil
}
func GetMusicUploadsJson(url string) (string, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(url, jsMethod)
if err != nil {
return "", err
}
return actionOutput, nil
}

View File

@@ -2,10 +2,25 @@ package client
import (
models "../models"
config "../models/config"
"fmt"
)
// GetUserUploads - Get all uploads by user
func GetUserUploads(username string) []models.Upload {
actionOutput := executeClientAction(`https://www.tiktok.com/@`+username, "bootstrapIteratingVideos()")
return models.ParseUploads(actionOutput)
func GetUserUploads(username string) ([]models.Upload, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(`https://www.tiktok.com/@`+username, jsMethod)
if err != nil {
return nil, err
}
return models.ParseUploads(actionOutput), nil
}
func GetUserUploadsJson(username string) (string, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(`https://www.tiktok.com/@`+username, jsMethod)
if err != nil {
return "", err
}
return actionOutput, nil
}

View File

@@ -5,7 +5,10 @@ import (
)
// GetVideoDetails - returns details of video
func GetVideoDetails(videoURL string) models.Upload {
actionOutput := executeClientAction(videoURL, "bootstrapGetCurrentVideo()")
return models.ParseUpload(actionOutput)
func GetVideoDetails(videoURL string) (models.Upload, error) {
actionOutput, err := executeClientAction(videoURL, "bootstrapGetCurrentVideo()")
if err != nil {
return models.Upload{}, err
}
return models.ParseUpload(actionOutput), nil
}

View File

@@ -15,6 +15,8 @@ var Config struct {
MetaData bool
Quiet bool
Deadline int
Limit int
JSONOnly bool
}
// GetConfig - Returns Config object
@@ -25,6 +27,8 @@ func GetConfig() {
metadata := flag.Bool("metadata", false, "Write video metadata to a .json file")
quiet := flag.Bool("quiet", false, "Supress output")
deadline := flag.Int("deadline", 1500, "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)")
limit := flag.Int("limit", 0, "Sets the videos count limit (useful when there too many videos from the user or by hashtag)")
jsonOnly := flag.Bool("json", false, "Just get JSON data from scraper (without video downloading)")
flag.Parse()
args := flag.Args()
@@ -44,5 +48,10 @@ func GetConfig() {
Config.Debug = *debug
Config.MetaData = *metadata
Config.Quiet = *quiet
if *jsonOnly {
Config.Quiet = true
}
Config.Deadline = *deadline
Config.Limit = *limit
Config.JSONOnly = *jsonOnly
}

View File

@@ -6,5 +6,8 @@ var ErrorCouldNotSerializeJSON = "Could not serialize json for video: %s\n"
// ErrorCouldNotRecogniseURL -
var ErrorCouldNotRecogniseURL = "Could not recognise URL format of string %s"
// ErrorCouldNotGetUserUploads -
var ErrorCouldNotGetUserUploads = "Failed to get user uploads: %s\n"
// ErrorPathNotFound -
var ErrorPathNotFound = "File path %s not found."

View File

@@ -1,7 +1,7 @@
optStrings = {
selectors: {
feedLoading: 'div.tiktok-loading.feed-loading',
modalArrowLeft: 'div.video-card-modal > div > img.arrow-right',
modalArrowRight: 'div.video-card-modal > div > img.arrow-right',
modalClose: '.video-card-modal > div > div.close',
modalPlayer: 'div > div > main > div.video-card-modal > div > div.video-card-big > div.video-card-container > div > div > video',
modalShareInput: '.copy-link-container > input',
@@ -17,6 +17,7 @@ optStrings = {
classes: {
feedVideoItem: 'video-feed-item-wrapper',
modalCloseDisabled: 'disabled',
titleMessage: 'title',
},
tags: {
resultTag: 'video_urls',
@@ -25,11 +26,30 @@ optStrings = {
attributes: {
src: "src",
},
tiktokMessages: [
"Couldn't find this account",
"No videos yet",
"Video currently unavailable",
],
};
currentState = {
preloadCount: 0,
finished: false,
limit: 0
};
checkForErrors = function() {
var titles = document.getElementsByClassName(optStrings.classes.titleMessage);
debugger;
if (titles && titles.length) {
var error = Array.from(titles).find(x => optStrings.tiktokMessages.includes(x.textContent)).textContent;
if (error) {
createVidUrlElement("ERR: " + error);
return true;
}
}
return false;
};
createVidUrlElement = function(outputObj) {
@@ -37,7 +57,7 @@ createVidUrlElement = function(outputObj) {
urlSetElement.innerText = JSON.stringify(outputObj);
document.getElementsByTagName(optStrings.tags.resultParentTag)[0].appendChild(urlSetElement);
currentState.finished = true;
}
};
buldVidUrlArray = function(finishCallback) {
var feedItem = document.getElementsByClassName(optStrings.classes.feedVideoItem)[0];
@@ -46,8 +66,14 @@ buldVidUrlArray = function(finishCallback) {
var videoArray = [];
var intervalID = window.setInterval(x => {
videoArray.push(getCurrentModalVideo());
var arrowRight = document.querySelectorAll(optStrings.selectors.modalArrowLeft)[0];
if(currentState.limit > 0) {
if (videoArray.length >= currentState.limit) {
window.clearInterval(intervalID);
document.querySelector(optStrings.selectors.modalClose).click();
finishCallback(videoArray);
}
}
var arrowRight = document.querySelectorAll(optStrings.selectors.modalArrowRight)[0];
if (arrowRight.classList.contains(optStrings.classes.modalCloseDisabled)) {
window.clearInterval(intervalID);
document.querySelector(optStrings.selectors.modalClose).click();
@@ -78,9 +104,10 @@ getCurrentModalVideo = function() {
link: soundHref,
},
};
}
};
getCurrentVideo = function() {
if(checkForErrors()) return;
var player = document.querySelector(optStrings.selectors.videoPlayer);
var vidUrl = player.getAttribute(optStrings.attributes.src);
var shareLink = document.querySelector(optStrings.selectors.videoShareInput).value;
@@ -100,13 +127,23 @@ getCurrentVideo = function() {
link: soundHref,
},
};
}
};
scrollWhileNew = function(finishCallback) {
var state = { count: 0 };
var intervalID = window.setInterval(x => {
var oldCount = state.count;
state.count = document.getElementsByClassName(optStrings.classes.feedVideoItem).length;
if(currentState.limit > 0) {
if (currentState.preloadCount >= currentState.limit || state.count >= currentState.limit) {
finishCallback(createVidUrlElement);
window.clearInterval(intervalID);
}
}
if(checkForErrors()) {
window.clearInterval(intervalID);
return;
}
if (oldCount !== state.count) {
currentState.preloadCount = state.count;
window.scrollTo(0, document.body.scrollHeight);
@@ -121,7 +158,8 @@ scrollWhileNew = function(finishCallback) {
}, 1000);
};
bootstrapIteratingVideos = function() {
bootstrapIteratingVideos = function(limit) {
currentState.limit = limit;
scrollWhileNew(buldVidUrlArray);
return 'bootstrapIteratingVideos';
};
@@ -130,7 +168,7 @@ bootstrapGetCurrentVideo = function() {
var video = getCurrentVideo();
createVidUrlElement(video);
return 'bootstrapGetCurrentVideo';
}
};
init = () => {
const newProto = navigator.__proto__;

16
utils/getHashtag.go Normal file
View File

@@ -0,0 +1,16 @@
package utils
import (
res "../resources"
"fmt"
"strings"
)
// GetHashtagFromURL - Get's tag name from passed url
func GetHashtagFromURL(str string) string {
if match := strings.Contains(str, "/tag/"); match {
return strings.Split(str, "/tag/")[1]
}
panic(fmt.Sprintf(res.ErrorCouldNotRecogniseURL, str))
}

View File

@@ -3,6 +3,7 @@ package utils
import (
config "../models/config"
"fmt"
"os"
)
// Log - Write to std out
@@ -23,3 +24,8 @@ func Logf(format string, a ...interface{}) {
func LogFatal(format string, a ...interface{}) {
panic(fmt.Sprintf(format, a...))
}
// LogErr - Write error
func LogErr(format string, a ...interface{}) {
fmt.Fprintf(os.Stderr, format, a...)
}

View File

@@ -0,0 +1,45 @@
package workflows
import (
client "../client"
config "../models/config"
res "../resources"
utils "../utils"
"fmt"
"strings"
)
// CanUseDownloadHashtag - Test's if this workflow can be used for parameter
func CanUseDownloadHashtag(url string) bool {
match := strings.Contains(url, "/tag/")
return match
}
// DownloadHashtag - Download videos marked with given hashtag
func DownloadHashtag(url string) {
uploads, err := client.GetHashtagUploads(url)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
uploadCount := len(uploads)
hashtag := utils.GetHashtagFromURL(url)
downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, hashtag)
utils.InitOutputDirectory(downloadDir)
for index, upload := range uploads {
downloadVideo(upload, downloadDir)
utils.Logf("\r[%d/%d] Downloaded", index+1, uploadCount)
}
utils.Log()
}
func GetHashtagJson(url string) {
uploads, err := client.GetHashtagUploads(url)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
fmt.Printf("%s", uploads)
}

View File

@@ -3,6 +3,7 @@ package workflows
import (
client "../client"
config "../models/config"
res "../resources"
utils "../utils"
"fmt"
"regexp"
@@ -16,7 +17,11 @@ func CanUseDownloadMusic(url string) bool {
// DownloadMusic - Download all videos by given music
func DownloadMusic(url string) {
uploads := client.GetMusicUploads(url)
uploads, err := client.GetMusicUploads(url)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
uploadCount := len(uploads)
for index, upload := range uploads {
@@ -29,3 +34,12 @@ func DownloadMusic(url string) {
}
utils.Log()
}
func GetMusicJson(url string) {
uploads, err := client.GetMusicUploadsJson(url)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
fmt.Printf("%s", uploads)
}

View File

@@ -3,20 +3,27 @@ package workflows
import (
client "../client"
config "../models/config"
res "../resources"
utils "../utils"
"fmt"
"regexp"
"strings"
)
// CanUseDownloadUser - Test's if this workflow can be used for parameter
func CanUseDownloadUser(url string) bool {
match := strings.Contains(url, "/")
return !match
isURL := strings.Contains(url, "/")
match, _ := regexp.MatchString(".+com\\/@[^\\/]+", url)
return !isURL || match
}
// DownloadUser - Download all user's videos
func DownloadUser(username string) {
uploads := client.GetUserUploads(username)
uploads, err := client.GetUserUploads(username)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
uploadCount := len(uploads)
downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username)
@@ -28,3 +35,12 @@ func DownloadUser(username string) {
}
utils.Log()
}
func GetUserVideosJson(username string) {
uploads, err := client.GetUserUploadsJson(username)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
fmt.Printf("%s", uploads)
}

View File

@@ -4,6 +4,7 @@ import (
client "../client"
models "../models"
config "../models/config"
res "../resources"
utils "../utils"
"fmt"
"regexp"
@@ -18,7 +19,11 @@ func CanUseDownloadSingleVideo(url string) bool {
// DownloadSingleVideo - Downloads single video
func DownloadSingleVideo(url string) {
username := utils.GetUsernameFromString(url)
upload := client.GetVideoDetails(url)
upload, err := client.GetVideoDetails(url)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username)
utils.InitOutputDirectory(downloadDir)

View File

@@ -1,6 +1,7 @@
package workflows
import (
config "../models/config"
res "../resources"
utils "../utils"
)
@@ -10,7 +11,11 @@ func StartWorkflowByParameter(url string) {
// Music
if CanUseDownloadMusic(url) {
DownloadMusic(url)
if config.Config.JSONOnly {
GetMusicJson(url)
} else {
DownloadMusic(url)
}
return
}
@@ -22,7 +27,22 @@ func StartWorkflowByParameter(url string) {
// Tiktok user
if CanUseDownloadUser(url) {
DownloadUser(utils.GetUsernameFromString(url))
if config.Config.JSONOnly {
GetUserVideosJson(utils.GetUsernameFromString(url))
} else {
DownloadUser(utils.GetUsernameFromString(url))
}
return
}
// Tiktok hashtag
if CanUseDownloadHashtag(url) {
if config.Config.JSONOnly {
GetHashtagJson(url)
} else {
DownloadHashtag(url)
}
return
}