Skip to content

Commit

Permalink
feat: support www.threads.net
Browse files Browse the repository at this point in the history
  • Loading branch information
omegaatt36 committed Aug 4, 2024
1 parent a7fc780 commit b43a4aa
Show file tree
Hide file tree
Showing 5 changed files with 240 additions and 0 deletions.
31 changes: 31 additions & 0 deletions .github/workflows/stream_threads.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: instagram

on:
push:
paths:
- "extractors/threads/*.go"
- ".github/workflows/stream_threads.yml"
pull_request:
paths:
- "extractors/threads/*.go"
- ".github/workflows/stream_threads.yml"
schedule:
# run ci weekly
- cron: "0 0 * * 0"

jobs:
test:
runs-on: ${{ matrix.os }}
strategy:
matrix:
go: ["1.22"]
os: [ubuntu-latest]
name: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: ${{ matrix.go }}

- name: Test
run: go test -timeout 5m -race -coverpkg=./... -coverprofile=coverage.txt github.com/iawia002/lux/extractors/threads
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,7 @@ $ lux -j "https://www.bilibili.com/video/av20203945"
| 秒拍 | <https://www.miaopai.com> || | | | | [![miaopai](https://github.com/iawia002/lux/actions/workflows/stream_miaopai.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_miaopai.yml) |
| 微博 | <https://weibo.com> || | | | | [![weibo](https://github.com/iawia002/lux/actions/workflows/stream_weibo.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_weibo.yml) |
| Instagram | <https://www.instagram.com> ||| | | | [![instagram](https://github.com/iawia002/lux/actions/workflows/stream_instagram.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_instagram.yml) |
| Threads | <https://www.threads.net> ||| | | | [![threads](https://github.com/iawia002/lux/actions/workflows/stream_threads.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_threads.yml) |
| Twitter | <https://twitter.com> || | | | | [![twitter](https://github.com/iawia002/lux/actions/workflows/stream_twitter.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_twitter.yml) |
| 腾讯视频 | <https://v.qq.com> || | | | | [![qq](https://github.com/iawia002/lux/actions/workflows/stream_qq.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_qq.yml) |
| 网易云音乐 | <https://music.163.com> || | | | | [![netease](https://github.com/iawia002/lux/actions/workflows/stream_netease.yml/badge.svg)](https://github.com/iawia002/lux/actions/workflows/stream_netease.yml) |
Expand Down
1 change: 1 addition & 0 deletions app/register.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
_ "github.com/iawia002/lux/extractors/rumble"
_ "github.com/iawia002/lux/extractors/streamtape"
_ "github.com/iawia002/lux/extractors/tangdou"
_ "github.com/iawia002/lux/extractors/threads"
_ "github.com/iawia002/lux/extractors/tiktok"
_ "github.com/iawia002/lux/extractors/tumblr"
_ "github.com/iawia002/lux/extractors/twitter"
Expand Down
151 changes: 151 additions & 0 deletions extractors/threads/threads.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
package threads

import (
"fmt"
"net"
"net/http"
netURL "net/url"
"strings"
"time"

"github.com/gocolly/colly/v2"
"github.com/pkg/errors"

"github.com/iawia002/lux/extractors"
"github.com/iawia002/lux/request"
"github.com/iawia002/lux/utils"
)

func init() {
extractors.Register("threads", New())
}

type extractor struct {
client *http.Client
}

// New returns a instagram extractor.
func New() extractors.Extractor {
return &extractor{
client: &http.Client{
Timeout: 10 * time.Second,
Transport: &http.Transport{
Dial: (&net.Dialer{
Timeout: 5 * time.Second,
}).Dial,
TLSHandshakeTimeout: 5 * time.Second,
},
},
}
}

type media struct {
URL string
Type extractors.DataType
}

// Extract is the main function to extract the data.
func (e *extractor) Extract(url string, option extractors.Options) ([]*extractors.Data, error) {
URL, err := netURL.Parse(url)
if err != nil {
return nil, errors.WithStack(err)
}

paths := strings.Split(URL.Path, "/")
if len(paths) < 3 {
return nil, errors.New("invalid URL format")
}

poster := paths[1]
shortCode := paths[3]

medias := make([]media, 0)

title := fmt.Sprintf("Threads %s - %s", poster, shortCode)

collector := colly.NewCollector()
collector.SetClient(e.client)

// case single image or video
collector.OnHTML("div.SingleInnerMediaContainer", func(e *colly.HTMLElement) {
if src := e.ChildAttr("img", "src"); src != "" {
medias = append(medias, media{
URL: src,
Type: extractors.DataTypeImage,
})
}
if src := e.ChildAttr("video > source", "src"); src != "" {
medias = append(medias, media{
URL: src,
Type: extractors.DataTypeVideo,
})
}
})

// case multiple image or video
collector.OnHTML("div.MediaScrollImageContainer", func(e *colly.HTMLElement) {
if src := e.ChildAttr("img", "src"); src != "" {
medias = append(medias, media{
URL: src,
Type: extractors.DataTypeImage,
})
}
if src := e.ChildAttr("video > source", "src"); src != "" {
medias = append(medias, media{
URL: src,
Type: extractors.DataTypeVideo,
})
}
})

// title with caption
// collector.OnHTML("span.BodyTextContainer", func(e *colly.HTMLElement) {
// title = e.Text
// })

if err := collector.Visit(URL.JoinPath("embed").String()); err != nil {
return nil, fmt.Errorf("failed to send HTTP request to the Threads: %w", errors.WithStack(err))
}

var totalSize int64
var parts []*extractors.Part

for _, m := range medias {
_, ext, err := utils.GetNameAndExt(m.URL)
if err != nil {
return nil, errors.WithStack(err)
}
fileSize, err := request.Size(m.URL, url)
if err != nil {
return nil, errors.WithStack(err)
}

part := &extractors.Part{
URL: m.URL,
Size: fileSize,
Ext: ext,
}
parts = append(parts, part)
}

for _, part := range parts {
totalSize += part.Size
}

streams := map[string]*extractors.Stream{
"default": {
Parts: parts,
Size: totalSize,
},
}

return []*extractors.Data{
{
Site: "Threads www.threads.net",
Title: title,
Type: extractors.DataTypeImage,
Streams: streams,
URL: url,
},
}, nil
}
56 changes: 56 additions & 0 deletions extractors/threads/threads_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package threads_test

import (
"testing"

"github.com/iawia002/lux/extractors"
"github.com/iawia002/lux/extractors/threads"
"github.com/iawia002/lux/test"
)

func TestDownload(t *testing.T) {
tests := []struct {
name string
args test.Args
}{
{
name: "video test",
args: test.Args{
URL: "https://www.threads.net/@rowancheung/post/C9xPmHcpfiN",
Title: `Threads @rowancheung - C9xPmHcpfiN`,
Size: 5740684,
},
},
{
name: "video shared test",
args: test.Args{
URL: "https://www.threads.net/@zuck/post/C9xRqbNPbx2",
Title: `Threads @zuck - C9xRqbNPbx2`,
Size: 5740684,
},
},
{
name: "image test",
args: test.Args{
URL: "https://www.threads.net/@zuck/post/C-BoS7lM8sH",
Title: `Threads @zuck - C-BoS7lM8sH`,
Size: 159331,
},
},
{
name: "hybrid album test",
args: test.Args{
URL: "https://www.threads.net/@meta/post/C95Z1DrPNhi",
Title: `Threads @meta - C95Z1DrPNhi`,
Size: 1131229,
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
data, err := threads.New().Extract(tt.args.URL, extractors.Options{})
test.CheckError(t, err)
test.Check(t, tt.args, data[0])
})
}
}

0 comments on commit b43a4aa

Please sign in to comment.