TTDL-7 Added flag; Code clean up

This commit is contained in:
Pijus Kamandulis 2020-03-22 02:10:24 +02:00
parent 9a65746fd4
commit f9d35e3bf2
19 changed files with 188 additions and 79 deletions

View File

@ -16,14 +16,15 @@ You can download items listed in a text file by running `./tiktok-dl [OPTIONS] -
Clone this repository and run `go build` to build the executable.
## Available options
* `-debug` - enables debug mode
* `-output some_directory` - Output path (default "./downloads")
* `-metadata` - Write video metadata to a .json file
* `-archive` - Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.
* `-batch-file` - File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.
* `-deadline` - Sets the timout for scraper logic in seconds (used as a workaround for context deadline exceeded error) (default 1500)
* `-quiet` - Supress output
* `-debug` - enables debug mode
* `-json` - Returns whole data, that was scraped from TikTok, in json
* `-limit` - Sets the max count of video that will be downloaded (default infinity)
* `-metadata` - Write video metadata to a .json file
* `-output some_directory` - Output path (default "./downloads")
* `-quiet` - Supress output
## Acknowledgments
This software uses the **chromedp** for web scraping, it can be found here: https://github.com/chromedp/chromedp \

View File

@ -3,15 +3,16 @@ package client
import (
"context"
"errors"
"github.com/chromedp/chromedp"
"io/ioutil"
"log"
"os"
"strings"
"time"
"github.com/chromedp/chromedp"
config "../models/config"
utils "../utils"
log "../utils/log"
)
// GetMusicUploads - Get all uploads by given music
@ -33,7 +34,7 @@ func executeClientAction(url string, jsAction string) (string, error) {
ctx, cancel := chromedp.NewContext(
allocCtx,
chromedp.WithLogf(log.Printf),
chromedp.WithLogf(log.Logf),
)
defer cancel()
@ -84,9 +85,9 @@ func runScrapeWithInfo(ctx context.Context, jsAction string, url string) (string
}
if jsOutput != "0" {
utils.Logf("\rPreloading... %s items have been founded.", jsOutput)
log.Logf("\rPreloading... %s items have been found.", jsOutput)
} else {
utils.Logf("\rPreloading...")
log.Logf("\rPreloading...")
}
if err := chromedp.Run(ctx, chromedp.EvaluateAsDevTools("currentState.finished.toString()", &jsOutput)); err != nil {
@ -100,7 +101,7 @@ func runScrapeWithInfo(ctx context.Context, jsAction string, url string) (string
time.Sleep(50 * time.Millisecond)
}
utils.Log("\nRetrieving items...")
log.Log("\nRetrieving items...")
if err := chromedp.Run(ctx,
// Wait until custom js finishes
chromedp.WaitVisible(`video_urls`),

View File

@ -1,22 +1,24 @@
package client
import (
"fmt"
models "../models"
config "../models/config"
"fmt"
)
// GetUserUploads - Get all uploads marked with given hashtag
// GetHashtagUploads - Get all uploads marked with given hashtag
func GetHashtagUploads(hashtagURL string) ([]models.Upload, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(hashtagURL, jsMethod)
actionOutput, err := GetHashtagUploadsJSON(hashtagURL)
if err != nil {
return nil, err
}
return models.ParseUploads(actionOutput), nil
}
func GetHashtagUploadsJson(hashtagURL string) (string, error) {
// GetHashtagUploadsJSON - Get hashtag uploads scrape
func GetHashtagUploadsJSON(hashtagURL string) (string, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(hashtagURL, jsMethod)
if err != nil {

View File

@ -1,22 +1,23 @@
package client
import (
"fmt"
models "../models"
config "../models/config"
"fmt"
)
// GetMusicUploads - Get all uploads by given music
func GetMusicUploads(url string) ([]models.Upload, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(url, jsMethod)
actionOutput, err := GetMusicUploadsJSON(url)
if err != nil {
return nil, err
}
return models.ParseUploads(actionOutput), nil
}
func GetMusicUploadsJson(url string) (string, error) {
// GetMusicUploadsJSON - Get music uploads scrape
func GetMusicUploadsJSON(url string) (string, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(url, jsMethod)
if err != nil {

View File

@ -1,22 +1,23 @@
package client
import (
"fmt"
models "../models"
config "../models/config"
"fmt"
)
// GetUserUploads - Get all uploads by user
func GetUserUploads(username string) ([]models.Upload, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(`https://www.tiktok.com/@`+username, jsMethod)
actionOutput, err := GetUserUploadsJSON(username)
if err != nil {
return nil, err
}
return models.ParseUploads(actionOutput), nil
}
func GetUserUploadsJson(username string) (string, error) {
// GetUserUploadsJSON - Get user uploads scrape
func GetUserUploadsJSON(username string) (string, error) {
jsMethod := fmt.Sprintf("bootstrapIteratingVideos(%d)", config.Config.Limit)
actionOutput, err := executeClientAction(`https://www.tiktok.com/@`+username, jsMethod)
if err != nil {

View File

@ -11,24 +11,26 @@ var Config struct {
URL string
OutputPath string
BatchFilePath string
ArchiveFilePath string
Debug bool
MetaData bool
Quiet bool
JSONOnly bool
Deadline int
Limit int
JSONOnly bool
}
// GetConfig - Returns Config object
func GetConfig() {
outputPath := flag.String("output", "./downloads", "Output path")
batchFilePath := flag.String("batch-file", "", "File containing URLs/Usernames to download, one value per line. Lines starting with '#', are considered as comments and ignored.")
archive := flag.String("archive", "", "Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.")
debug := flag.Bool("debug", false, "Enables debug mode")
metadata := flag.Bool("metadata", false, "Write video metadata to a .json file")
quiet := flag.Bool("quiet", false, "Supress output")
jsonOnly := flag.Bool("json", false, "Just get JSON data from scraper (without video downloading)")
deadline := flag.Int("deadline", 1500, "Sets the timout for scraper logic in seconds (used as a workaround for 'context deadline exceeded' error)")
limit := flag.Int("limit", 0, "Sets the videos count limit (useful when there too many videos from the user or by hashtag)")
jsonOnly := flag.Bool("json", false, "Just get JSON data from scraper (without video downloading)")
flag.Parse()
args := flag.Args()
@ -45,13 +47,14 @@ func GetConfig() {
}
Config.OutputPath = *outputPath
Config.BatchFilePath = *batchFilePath
Config.ArchiveFilePath = *archive
Config.Debug = *debug
Config.MetaData = *metadata
Config.Quiet = *quiet
if *jsonOnly {
Config.Quiet = true
}
Config.JSONOnly = *jsonOnly
Config.Deadline = *deadline
Config.Limit = *limit
Config.JSONOnly = *jsonOnly
}

View File

@ -1,11 +1,13 @@
package models
import (
res "../resources"
utils "../utils"
"encoding/json"
"os"
"strings"
res "../resources"
checkErr "../utils/checkErr"
log "../utils/log"
)
// Upload - Upload object
@ -47,16 +49,16 @@ func (u Upload) GetUploadID() string {
func (u Upload) WriteToFile(outputPath string) {
bytes, err := json.Marshal(u)
if err != nil {
utils.Logf(res.ErrorCouldNotSerializeJSON, u.GetUploadID())
log.Logf(res.ErrorCouldNotSerializeJSON, u.GetUploadID())
panic(err)
}
// Create the file
out, err := os.Create(outputPath)
utils.CheckErr(err)
checkErr.CheckErr(err)
defer out.Close()
// Write to file
_, err = out.Write(bytes)
utils.CheckErr(err)
checkErr.CheckErr(err)
}

53
utils/archive.go Normal file
View File

@ -0,0 +1,53 @@
package utils
import (
models "../models"
config "../models/config"
log "./log"
)
// IsItemInArchive - Checks if the item is already archived
func IsItemInArchive(upload models.Upload) bool {
if len(RemoveArchivedItems([]models.Upload{upload})) == 0 {
return true
}
return false
}
// RemoveArchivedItems - Returns items slice without archived items
func RemoveArchivedItems(uploads []models.Upload) []models.Upload {
archiveFilePath := config.Config.ArchiveFilePath
if archiveFilePath == "" || !CheckIfExists(archiveFilePath) {
return uploads
}
removeArchivedItemsDelegate := func(archivedItem string) {
for i, upload := range uploads {
if upload.GetUploadID() == archivedItem {
uploads = append(uploads[:i], uploads[i+1:]...)
}
}
}
lenBeforeRemoval := len(uploads)
ReadFileLineByLine(archiveFilePath, removeArchivedItemsDelegate)
removedCount := lenBeforeRemoval - len(uploads)
if removedCount > 0 {
log.Logf("%d items, found in archive. Skipping...\n", removedCount)
}
return uploads
}
// AddItemToArchive - Adds item to archived list
func AddItemToArchive(uploadID string) {
archiveFilePath := config.Config.ArchiveFilePath
if archiveFilePath == "" {
return
}
AppendToFile(uploadID, archiveFilePath)
}

View File

@ -4,21 +4,23 @@ import (
"io"
"net/http"
"os"
checkErr "./checkErr"
)
// DownloadFile - Downloads content from `url` and stores it in `outputPath`
func DownloadFile(outputPath string, url string) {
// Get the data
resp, err := http.Get(url)
CheckErr(err)
checkErr.CheckErr(err)
defer resp.Body.Close()
// Create the file
out, err := os.Create(outputPath)
CheckErr(err)
checkErr.CheckErr(err)
defer out.Close()
// Write the body to file
_, err = io.Copy(out, resp.Body)
CheckErr(err)
checkErr.CheckErr(err)
}

View File

@ -4,6 +4,8 @@ import (
"bufio"
"io/ioutil"
"os"
checkErr "./checkErr"
)
type delegateString func(string)
@ -37,7 +39,7 @@ func ReadFileToString(path string) string {
// ReadFileLineByLine - Reads file line by line and calls delegate
func ReadFileLineByLine(path string, delegate delegateString) {
file, err := os.Open(path)
CheckErr(err)
checkErr.CheckErr(err)
defer file.Close()
scanner := bufio.NewScanner(file)
@ -49,3 +51,14 @@ func ReadFileLineByLine(path string, delegate delegateString) {
panic(err)
}
}
// AppendToFile - Appends line to file
func AppendToFile(str string, filePath string) {
f, err := os.OpenFile(filePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
checkErr.CheckErr(err)
defer f.Close()
if _, err := f.WriteString(str + "\n"); err != nil {
checkErr.CheckErr(err)
}
}

View File

@ -1,9 +1,10 @@
package utils
import (
config "../models/config"
"fmt"
"os"
config "../../models/config"
)
// Log - Write to std out

View File

@ -2,11 +2,13 @@ package utils
import (
"io/ioutil"
checkErr "./checkErr"
)
// ReadFileAsString - Returns contents of given file
func ReadFileAsString(fileName string) string {
content, err := ioutil.ReadFile(fileName)
CheckErr(err)
checkErr.CheckErr(err)
return string(content)
}

View File

@ -3,6 +3,7 @@ package workflows
import (
res "../resources"
utils "../utils"
log "../utils/log"
)
// CanUseDownloadBatchFile - Check's if DownloadBatchFile can be used
@ -13,7 +14,7 @@ func CanUseDownloadBatchFile(batchFilePath string) bool {
// DownloadBatchFile - Download items from batch file
func DownloadBatchFile(batchFilePath string) {
if !utils.CheckIfExists(batchFilePath) {
utils.LogFatal(res.ErrorPathNotFound, batchFilePath)
log.LogFatal(res.ErrorPathNotFound, batchFilePath)
}
utils.ReadFileLineByLine(batchFilePath, downloadItem)

View File

@ -1,12 +1,14 @@
package workflows
import (
"fmt"
"strings"
client "../client"
config "../models/config"
res "../resources"
utils "../utils"
"fmt"
"strings"
log "../utils/log"
)
// CanUseDownloadHashtag - Test's if this workflow can be used for parameter
@ -19,10 +21,13 @@ func CanUseDownloadHashtag(url string) bool {
func DownloadHashtag(url string) {
uploads, err := client.GetHashtagUploads(url)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
uploads = utils.RemoveArchivedItems(uploads)
uploadCount := len(uploads)
hashtag := utils.GetHashtagFromURL(url)
downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, hashtag)
@ -30,15 +35,16 @@ func DownloadHashtag(url string) {
for index, upload := range uploads {
downloadVideo(upload, downloadDir)
utils.Logf("\r[%d/%d] Downloaded", index+1, uploadCount)
log.Logf("\r[%d/%d] Downloaded", index+1, uploadCount)
}
utils.Log()
log.Log()
}
func GetHashtagJson(url string) {
uploads, err := client.GetHashtagUploads(url)
// GetHashtagJSON - Prints scraped info from hashtag
func GetHashtagJSON(url string) {
uploads, err := client.GetHashtagUploadsJSON(url)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
fmt.Printf("%s", uploads)

View File

@ -1,12 +1,14 @@
package workflows
import (
"fmt"
"regexp"
client "../client"
config "../models/config"
res "../resources"
utils "../utils"
"fmt"
"regexp"
log "../utils/log"
)
// CanUseDownloadMusic - Check's if DownloadMusic can be used for parameter
@ -19,9 +21,11 @@ func CanUseDownloadMusic(url string) bool {
func DownloadMusic(url string) {
uploads, err := client.GetMusicUploads(url)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
uploads = utils.RemoveArchivedItems(uploads)
uploadCount := len(uploads)
for index, upload := range uploads {
@ -30,15 +34,16 @@ func DownloadMusic(url string) {
utils.InitOutputDirectory(downloadDir)
downloadVideo(upload, downloadDir)
utils.Logf("\r[%d/%d] Downloaded", index+1, uploadCount)
log.Logf("\r[%d/%d] Downloaded", index+1, uploadCount)
}
utils.Log()
log.Log()
}
func GetMusicJson(url string) {
uploads, err := client.GetMusicUploadsJson(url)
// GetMusicJSON - Prints scraped info from music
func GetMusicJSON(url string) {
uploads, err := client.GetMusicUploadsJSON(url)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
fmt.Printf("%s", uploads)

View File

@ -1,13 +1,15 @@
package workflows
import (
"fmt"
"regexp"
"strings"
client "../client"
config "../models/config"
res "../resources"
utils "../utils"
"fmt"
"regexp"
"strings"
log "../utils/log"
)
// CanUseDownloadUser - Test's if this workflow can be used for parameter
@ -21,25 +23,29 @@ func CanUseDownloadUser(url string) bool {
func DownloadUser(username string) {
uploads, err := client.GetUserUploads(username)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
uploads = utils.RemoveArchivedItems(uploads)
uploadCount := len(uploads)
downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username)
utils.InitOutputDirectory(downloadDir)
for index, upload := range uploads {
downloadVideo(upload, downloadDir)
utils.Logf("\r[%d/%d] Downloaded", index+1, uploadCount)
log.Logf("\r[%d/%d] Downloaded", index+1, uploadCount)
}
utils.Log()
log.Log()
}
func GetUserVideosJson(username string) {
uploads, err := client.GetUserUploadsJson(username)
// GetUserVideosJSON - Prints scraped info from user
func GetUserVideosJSON(username string) {
uploads, err := client.GetUserUploadsJSON(username)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
fmt.Printf("%s", uploads)

View File

@ -1,13 +1,15 @@
package workflows
import (
"fmt"
"regexp"
client "../client"
models "../models"
config "../models/config"
res "../resources"
utils "../utils"
"fmt"
"regexp"
log "../utils/log"
)
// CanUseDownloadSingleVideo - Check's if DownloadSingleVideo can be used for parameter
@ -21,14 +23,18 @@ func DownloadSingleVideo(url string) {
username := utils.GetUsernameFromString(url)
upload, err := client.GetVideoDetails(url)
if err != nil {
utils.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
log.LogErr(res.ErrorCouldNotGetUserUploads, err.Error())
return
}
if utils.IsItemInArchive(upload) {
return
}
downloadDir := fmt.Sprintf("%s/%s", config.Config.OutputPath, username)
utils.InitOutputDirectory(downloadDir)
downloadVideo(upload, downloadDir)
utils.Log("[1/1] Downloaded\n")
log.Log("[1/1] Downloaded\n")
}
// DownloadVideo - Downloads one video
@ -46,4 +52,6 @@ func downloadVideo(upload models.Upload, downloadDir string) {
metadataPath := fmt.Sprintf("%s/%s.json", downloadDir, uploadID)
upload.WriteToFile(metadataPath)
}
utils.AddItemToArchive(upload.GetUploadID())
}

View File

@ -4,6 +4,7 @@ import (
config "../models/config"
res "../resources"
utils "../utils"
log "../utils/log"
)
// StartWorkflowByParameter - Start needed workflow by given parameter
@ -12,7 +13,7 @@ func StartWorkflowByParameter(url string) {
// Music
if CanUseDownloadMusic(url) {
if config.Config.JSONOnly {
GetMusicJson(url)
GetMusicJSON(url)
} else {
DownloadMusic(url)
}
@ -28,7 +29,7 @@ func StartWorkflowByParameter(url string) {
// Tiktok user
if CanUseDownloadUser(url) {
if config.Config.JSONOnly {
GetUserVideosJson(utils.GetUsernameFromString(url))
GetUserVideosJSON(utils.GetUsernameFromString(url))
} else {
DownloadUser(utils.GetUsernameFromString(url))
}
@ -39,12 +40,12 @@ func StartWorkflowByParameter(url string) {
// Tiktok hashtag
if CanUseDownloadHashtag(url) {
if config.Config.JSONOnly {
GetHashtagJson(url)
GetHashtagJSON(url)
} else {
DownloadHashtag(url)
}
return
}
utils.LogFatal(res.ErrorCouldNotRecogniseURL, url)
log.LogFatal(res.ErrorCouldNotRecogniseURL, url)
}