TTDL-7 Added flag; Code clean up

This commit is contained in:
Pijus Kamandulis 2020-03-22 02:10:24 +02:00
parent 9a65746fd4
commit f9d35e3bf2
19 changed files with 188 additions and 79 deletions

View File

@ -16,14 +16,15 @@ You can download items listed in a text file by running `./tiktok-dl [OPTIONS] -
Clone this repository and run `go build` to build the executable.
## Available options
* `-debug` - enables debug mode
* `-output some_directory` - Output path (default "./downloads")
* `-metadata` - Write video metadata to a .json file
* `-archive` - Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.
* `-batch-file` - File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.
* `-deadline` - Sets the timout for scraper logic in seconds (used as a workaround for context deadline exceeded error) (default 1500)
* `-quiet` - Supress output
* `-debug` - enables debug mode
* `-json` - Returns whole data, that was scraped from TikTok, in json
* `-limit` - Sets the max count of video that will be downloaded (default infinity)
* `-metadata` - Write video metadata to a .json file
* `-output some_directory` - Output path (default "./downloads")
* `-quiet` - Supress output
## Acknowledgments
This software uses the **chromedp** for web scraping, it can be found here: https://github.com/chromedp/chromedp \

View File

@ -3,15 +3,16 @@ package client
import (
"context"
"errors"
"github.com/chromedp/chromedp"
"io/ioutil"
"log"
"os"
"strings"
"time"
"github.com/chromedp/chromedp"
config "../models/config"
utils "../utils"
log "../utils/log"
)
// GetMusicUploads - Get all uploads by given music
@ -33,7 +34,7 @@ func executeClientAction(url string, jsAction string) (string, error) {
ctx, cancel := chromedp.NewContext(
allocCtx,
chromedp.WithLogf(log.Printf),
chromedp.WithLogf(log.Logf),
)
defer cancel()
@ -84,9 +85,9 @@ func runScrapeWithInfo(ctx context.Context, jsAction string, url string) (string
}
if jsOutput != "0" {
utils.Logf("\rPreloading... %s items have been founded.", jsOutput)
log.Logf("\rPreloading... %s items have been found.", jsOutput)
} else {
utils.Logf("\rPreloading...")
log.Logf("\rPreloading...")
}
if err := chromedp.Run(ctx, chromedp.EvaluateAsDevTools("currentState.finished.toString()", &jsOutput)); err != nil {
@ -100,7 +101,7 @@ func runScrapeWithInfo(ctx context.Context, jsAction string, url string) (string
time.Sleep(50 * time.Millisecond)
}
utils.Log("\nRetrieving items...")
log.Log("\nRetrieving items...")
if err := chromedp.Run(ctx,
// Wait until custom js finishes
chromedp.WaitVisible(`video_urls`),

View File

@ -1,22 +1,24 @@
package client
import (
"fmt"
models "../models"
config "../models/config"
"fmt"
)
// GetUserUploads - Get all uploads marked with given hashtag
// GetHashtagUploads - Get all uploads marked with given hashtag
func GetHashtagUploads(hashtagURL string) ([]models.Upload, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(hashtagURL, jsMethod)
actionOutput, err := GetHashtagUploadsJSON(hashtagURL)
if err != nil {
return nil, err
}
return models.ParseUploads(actionOutput), nil
}
func GetHashtagUploadsJson(hashtagURL string) (string, error) {
// GetHashtagUploadsJSON - Get hashtag uploads scrape
func GetHashtagUploadsJSON(hashtagURL string) (string, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(hashtagURL, jsMethod)
if err != nil {

View File

@ -1,22 +1,23 @@
package client
import (
"fmt"
models "../models"
config "../models/config"
"fmt"
)
// GetMusicUploads - Get all uploads by given music
func GetMusicUploads(url string) ([]models.Upload, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(url, jsMethod)
actionOutput, err := GetMusicUploadsJSON(url)
if err != nil {
return nil, err
}
return models.ParseUploads(actionOutput), nil
}
func GetMusicUploadsJson(url string) (string, error) {
// GetMusicUploadsJSON - Get music uploads scrape
func GetMusicUploadsJSON(url string) (string, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(url, jsMethod)
if err != nil {

View File

@ -1,22 +1,23 @@
package client
import (
"fmt"
models "../models"
config "../models/config"
"fmt"
)
// GetUserUploads - Get all uploads by user
func GetUserUploads(username string) ([]models.Upload, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(`https://www.tiktok.com/@`+username, jsMethod)
actionOutput, err := GetUserUploadsJSON(username)
if err != nil {
return nil, err
}
return models.ParseUploads(actionOutput), nil
}
func GetUserUploadsJson(username string) (string, error) {
// GetUserUploadsJSON - Get user uploads scrape
func GetUserUploadsJSON(username string) (string, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(`https://www.tiktok.com/@`+username, jsMethod)
if err != nil {

View File

@ -8,27 +8,29 @@ import (
// Config - Runtime configuration
var Config struct {
URL string
OutputPath string
BatchFilePath string
Debug bool
MetaData bool
Quiet bool
Deadline int
Limit int
JSONOnly bool
URL string
OutputPath string
BatchFilePath string
ArchiveFilePath string
Debug bool
MetaData bool
Quiet bool
JSONOnly bool
Deadline int
Limit int
}
// GetConfig - Returns Config object
func GetConfig() {
outputPath := flag.String("output", "./downloads", "Output path")
batchFilePath := flag.String("batch-file", "", "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.")
archive := flag.String("archive", "", "Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.")
debug := flag.Bool("debug", false, "Enables debug mode")
metadata := flag.Bool("metadata", false, "Write video metadata to a .json file")
quiet := flag.Bool("quiet", false, "Supress output")
jsonOnly := flag.Bool("json", false, "Just get JSON data from scraper (without video downloading)")
deadline := flag.Int("deadline", 1500, "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)")
limit := flag.Int("limit", 0, "Sets the videos count limit (useful when there too many videos from the user or by hashtag)")
jsonOnly := flag.Bool("json", false, "Just get JSON data from scraper (without video downloading)")
flag.Parse()
args := flag.Args()
@ -45,13 +47,14 @@ func GetConfig() {
}
Config.OutputPath = *outputPath
Config.BatchFilePath = *batchFilePath
Config.ArchiveFilePath = *archive
Config.Debug = *debug
Config.MetaData = *metadata
Config.Quiet = *quiet
if *jsonOnly {
Config.Quiet = true
}
Config.JSONOnly = *jsonOnly
Config.Deadline = *deadline
Config.Limit = *limit
Config.JSONOnly = *jsonOnly
}

View File

@ -1,11 +1,13 @@
package models
import (
res "../resources"
utils "../utils"
"encoding/json"
"os"
"strings"
res "../resources"
checkErr "../utils/checkErr"
log "../utils/log"
)
// Upload - Upload object
@ -47,16 +49,16 @@ func (u Upload) GetUploadID() string {
func (u Upload) WriteToFile(outputPath string) {
bytes, err := json.Marshal(u)
if err != nil {
utils.Logf(res.ErrorCouldNotSerializeJSON, u.GetUploadID())
log.Logf(res.ErrorCouldNotSerializeJSON, u.GetUploadID())
panic(err)
}
// Create the file
out, err := os.Create(outputPath)
utils.CheckErr(err)
checkErr.CheckErr(err)
defer out.Close()
// Write to file
_, err = out.Write(bytes)
utils.CheckErr(err)
checkErr.CheckErr(err)
}

53
utils/archive.go Normal file
View File

@ -0,0 +1,53 @@
package utils
import (
models "../models"
config "../models/config"
log "./log"
)
// IsItemInArchive - Checks if the item is already archived
func IsItemInArchive(upload models.Upload) bool {
if len(RemoveArchivedItems([]models.Upload{upload})) == 0 {
return true
}
return false
}
// RemoveArchivedItems - Returns items slice without archived items
func RemoveArchivedItems(uploads []models.Upload) []models.Upload {
archiveFilePath := config.Config.ArchiveFilePath
if archiveFilePath == "" || !CheckIfExists(archiveFilePath) {
return uploads
}
removeArchivedItemsDelegate := func(archivedItem string) {
for i, upload := range uploads {
if upload.GetUploadID() == archivedItem {
uploads = append(uploads[:i], uploads[i+1:]...)
}
}
}
lenBeforeRemoval := len(uploads)
ReadFileLineByLine(archiveFilePath, removeArchivedItemsDelegate)
removedCount := lenBeforeRemoval - len(uploads)
if removedCount > 0 {
log.Logf("%d items, found in archive. Skipping...\n", removedCount)
}
return uploads
}
// AddItemToArchive - Adds item to archived list
func AddItemToArchive(uploadID string) {
archiveFilePath := config.Config.ArchiveFilePath
if archiveFilePath == "" {
return
}
AppendToFile(uploadID, archiveFilePath)
}

View File

@ -4,21 +4,23 @@ import (
"io"
"net/http"
"os"
checkErr "./checkErr"
)
// DownloadFile - Downloads content from `url` and stores it in `outputPath`
func DownloadFile(outputPath string, url string) {
// Get the data
resp, err := http.Get(url)
CheckErr(err)
checkErr.CheckErr(err)
defer resp.Body.Close()
// Create the file
out, err := os.Create(outputPath)
CheckErr(err)
checkErr.CheckErr(err)
defer out.Close()
// Write the body to file
_, err = io.Copy(out, resp.Body)
CheckErr(err)
checkErr.CheckErr(err)
}

View File

@ -4,6 +4,8 @@ import (
"bufio"
"io/ioutil"
"os"
checkErr "./checkErr"
)
type delegateString func(string)
@ -37,7 +39,7 @@ func ReadFileToString(path string) string {
// ReadFileLineByLine - Reads file line by line and calls delegate
func ReadFileLineByLine(path string, delegate delegateString) {
file, err := os.Open(path)
CheckErr(err)
checkErr.CheckErr(err)
defer file.Close()
scanner := bufio.NewScanner(file)
@ -49,3 +51,14 @@ func ReadFileLineByLine(path string, delegate delegateString) {
panic(err)
}
}
// AppendToFile - Appends line to file
func AppendToFile(str string, filePath string) {
f, err := os.OpenFile(filePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
checkErr.CheckErr(err)
defer f.Close()
if _, err := f.WriteString(str + "\n"); err != nil {
checkErr.CheckErr(err)
}
}

View File

@ -1,9 +1,10 @@
package utils
import (
config "../models/config"
"fmt"
"os"
config "../../models/config"
)
// Log - Write to std out

View File

@ -2,11 +2,13 @@ package utils
import (
"io/ioutil"
checkErr "./checkErr"
)
// ReadFileAsString - Returns contents of given file
func ReadFileAsString(fileName string) string {
content, err := ioutil.ReadFile(fileName)
CheckErr(err)
checkErr.CheckErr(err)
return string(content)
}

View File

@ -3,6 +3,7 @@ package workflows
import (
res "../resources"
utils "../utils"
log "../utils/log"
)
// CanUseDownloadBatchFile - Check's if DownloadBatchFile can be used
@ -13,7 +14,7 @@ func CanUseDownloadBatchFile(batchFilePath string) bool {
// DownloadBatchFile - Download items from batch file
func DownloadBatchFile(batchFilePath string) {
if !utils.CheckIfExists(batchFilePath) {
utils.LogFatal(res.ErrorPathNotFound, batchFilePath)
log.LogFatal(res.ErrorPathNotFound, batchFilePath)
}
utils.ReadFileLineByLine(batchFilePath, downloadItem)

View File

@ -1,12 +1,14 @@
package workflows
import (
"fmt"
"strings"
client "../client"
config "../models/config"
res "../resources"
utils "../utils"
"fmt"
"strings"
log "../utils/log"
)
// CanUseDownloadHashtag - Test's if this workflow can be used for parameter
@ -19,10 +21,13 @@ func CanUseDownloadHashtag(url string) bool {
func DownloadHashtag(url string) {
uploads, err := client.GetHashtagUploads(url)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
uploads = utils.RemoveArchivedItems(uploads)
uploadCount := len(uploads)
hashtag := utils.GetHashtagFromURL(url)
downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, hashtag)
@ -30,15 +35,16 @@ func DownloadHashtag(url string) {
for index, upload := range uploads {
downloadVideo(upload, downloadDir)
utils.Logf("\r[%d/%d] Downloaded", index+1, uploadCount)
log.Logf("\r[%d/%d] Downloaded", index+1, uploadCount)
}
utils.Log()
log.Log()
}
func GetHashtagJson(url string) {
uploads, err := client.GetHashtagUploads(url)
// GetHashtagJSON - Prints scraped info from hashtag
func GetHashtagJSON(url string) {
uploads, err := client.GetHashtagUploadsJSON(url)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
fmt.Printf("%s", uploads)

View File

@ -1,12 +1,14 @@
package workflows
import (
"fmt"
"regexp"
client "../client"
config "../models/config"
res "../resources"
utils "../utils"
"fmt"
"regexp"
log "../utils/log"
)
// CanUseDownloadMusic - Check's if DownloadMusic can be used for parameter
@ -19,9 +21,11 @@ func CanUseDownloadMusic(url string) bool {
func DownloadMusic(url string) {
uploads, err := client.GetMusicUploads(url)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
uploads = utils.RemoveArchivedItems(uploads)
uploadCount := len(uploads)
for index, upload := range uploads {
@ -30,15 +34,16 @@ func DownloadMusic(url string) {
utils.InitOutputDirectory(downloadDir)
downloadVideo(upload, downloadDir)
utils.Logf("\r[%d/%d] Downloaded", index+1, uploadCount)
log.Logf("\r[%d/%d] Downloaded", index+1, uploadCount)
}
utils.Log()
log.Log()
}
func GetMusicJson(url string) {
uploads, err := client.GetMusicUploadsJson(url)
// GetMusicJSON - Prints scraped info from music
func GetMusicJSON(url string) {
uploads, err := client.GetMusicUploadsJSON(url)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
fmt.Printf("%s", uploads)

View File

@ -1,13 +1,15 @@
package workflows
import (
"fmt"
"regexp"
"strings"
client "../client"
config "../models/config"
res "../resources"
utils "../utils"
"fmt"
"regexp"
"strings"
log "../utils/log"
)
// CanUseDownloadUser - Test's if this workflow can be used for parameter
@ -21,25 +23,29 @@ func CanUseDownloadUser(url string) bool {
func DownloadUser(username string) {
uploads, err := client.GetUserUploads(username)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
uploads = utils.RemoveArchivedItems(uploads)
uploadCount := len(uploads)
downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username)
utils.InitOutputDirectory(downloadDir)
for index, upload := range uploads {
downloadVideo(upload, downloadDir)
utils.Logf("\r[%d/%d] Downloaded", index+1, uploadCount)
log.Logf("\r[%d/%d] Downloaded", index+1, uploadCount)
}
utils.Log()
log.Log()
}
func GetUserVideosJson(username string) {
uploads, err := client.GetUserUploadsJson(username)
// GetUserVideosJSON - Prints scraped info from user
func GetUserVideosJSON(username string) {
uploads, err := client.GetUserUploadsJSON(username)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
fmt.Printf("%s", uploads)

View File

@ -1,13 +1,15 @@
package workflows
import (
"fmt"
"regexp"
client "../client"
models "../models"
config "../models/config"
res "../resources"
utils "../utils"
"fmt"
"regexp"
log "../utils/log"
)
// CanUseDownloadSingleVideo - Check's if DownloadSingleVideo can be used for parameter
@ -21,14 +23,18 @@ func DownloadSingleVideo(url string) {
username := utils.GetUsernameFromString(url)
upload, err := client.GetVideoDetails(url)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
if utils.IsItemInArchive(upload) {
return
}
downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username)
utils.InitOutputDirectory(downloadDir)
downloadVideo(upload, downloadDir)
utils.Log("[1/1] Downloaded\n")
log.Log("[1/1] Downloaded\n")
}
// DownloadVideo - Downloads one video
@ -46,4 +52,6 @@ func downloadVideo(upload models.Upload, downloadDir string) {
metadataPath := fmt.Sprintf("%s/%s.json", downloadDir, uploadID)
upload.WriteToFile(metadataPath)
}
utils.AddItemToArchive(upload.GetUploadID())
}

View File

@ -4,6 +4,7 @@ import (
config "../models/config"
res "../resources"
utils "../utils"
log "../utils/log"
)
// StartWorkflowByParameter - Start needed workflow by given parameter
@ -12,7 +13,7 @@ func StartWorkflowByParameter(url string) {
// Music
if CanUseDownloadMusic(url) {
if config.Config.JSONOnly {
GetMusicJson(url)
GetMusicJSON(url)
} else {
DownloadMusic(url)
}
@ -28,7 +29,7 @@ func StartWorkflowByParameter(url string) {
// Tiktok user
if CanUseDownloadUser(url) {
if config.Config.JSONOnly {
GetUserVideosJson(utils.GetUsernameFromString(url))
GetUserVideosJSON(utils.GetUsernameFromString(url))
} else {
DownloadUser(utils.GetUsernameFromString(url))
}
@ -39,12 +40,12 @@ func StartWorkflowByParameter(url string) {
// Tiktok hashtag
if CanUseDownloadHashtag(url) {
if config.Config.JSONOnly {
GetHashtagJson(url)
GetHashtagJSON(url)
} else {
DownloadHashtag(url)
}
return
}
utils.LogFatal(res.ErrorCouldNotRecogniseURL, url)
log.LogFatal(res.ErrorCouldNotRecogniseURL, url)
}