-
Notifications
You must be signed in to change notification settings - Fork 3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
a7fc780
commit b43a4aa
Showing
5 changed files
with
240 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
name: instagram | ||
|
||
on: | ||
push: | ||
paths: | ||
- "extractors/threads/*.go" | ||
- ".github/workflows/stream_threads.yml" | ||
pull_request: | ||
paths: | ||
- "extractors/threads/*.go" | ||
- ".github/workflows/stream_threads.yml" | ||
schedule: | ||
# run ci weekly | ||
- cron: "0 0 * * 0" | ||
|
||
jobs: | ||
test: | ||
runs-on: ${{ matrix.os }} | ||
strategy: | ||
matrix: | ||
go: ["1.22"] | ||
os: [ubuntu-latest] | ||
name: ${{ matrix.os }} | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- uses: actions/setup-go@v5 | ||
with: | ||
go-version: ${{ matrix.go }} | ||
|
||
- name: Test | ||
run: go test -timeout 5m -race -coverpkg=./... -coverprofile=coverage.txt github.com/iawia002/lux/extractors/threads |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
package threads | ||
|
||
import ( | ||
"fmt" | ||
"net" | ||
"net/http" | ||
netURL "net/url" | ||
"strings" | ||
"time" | ||
|
||
"github.com/gocolly/colly/v2" | ||
"github.com/pkg/errors" | ||
|
||
"github.com/iawia002/lux/extractors" | ||
"github.com/iawia002/lux/request" | ||
"github.com/iawia002/lux/utils" | ||
) | ||
|
||
func init() { | ||
extractors.Register("threads", New()) | ||
} | ||
|
||
type extractor struct { | ||
client *http.Client | ||
} | ||
|
||
// New returns a instagram extractor. | ||
func New() extractors.Extractor { | ||
return &extractor{ | ||
client: &http.Client{ | ||
Timeout: 10 * time.Second, | ||
Transport: &http.Transport{ | ||
Dial: (&net.Dialer{ | ||
Timeout: 5 * time.Second, | ||
}).Dial, | ||
TLSHandshakeTimeout: 5 * time.Second, | ||
}, | ||
}, | ||
} | ||
} | ||
|
||
type media struct { | ||
URL string | ||
Type extractors.DataType | ||
} | ||
|
||
// Extract is the main function to extract the data. | ||
func (e *extractor) Extract(url string, option extractors.Options) ([]*extractors.Data, error) { | ||
URL, err := netURL.Parse(url) | ||
if err != nil { | ||
return nil, errors.WithStack(err) | ||
} | ||
|
||
paths := strings.Split(URL.Path, "/") | ||
if len(paths) < 3 { | ||
return nil, errors.New("invalid URL format") | ||
} | ||
|
||
poster := paths[1] | ||
shortCode := paths[3] | ||
|
||
medias := make([]media, 0) | ||
|
||
title := fmt.Sprintf("Threads %s - %s", poster, shortCode) | ||
|
||
collector := colly.NewCollector() | ||
collector.SetClient(e.client) | ||
|
||
// case single image or video | ||
collector.OnHTML("div.SingleInnerMediaContainer", func(e *colly.HTMLElement) { | ||
if src := e.ChildAttr("img", "src"); src != "" { | ||
medias = append(medias, media{ | ||
URL: src, | ||
Type: extractors.DataTypeImage, | ||
}) | ||
} | ||
if src := e.ChildAttr("video > source", "src"); src != "" { | ||
medias = append(medias, media{ | ||
URL: src, | ||
Type: extractors.DataTypeVideo, | ||
}) | ||
} | ||
}) | ||
|
||
// case multiple image or video | ||
collector.OnHTML("div.MediaScrollImageContainer", func(e *colly.HTMLElement) { | ||
if src := e.ChildAttr("img", "src"); src != "" { | ||
medias = append(medias, media{ | ||
URL: src, | ||
Type: extractors.DataTypeImage, | ||
}) | ||
} | ||
if src := e.ChildAttr("video > source", "src"); src != "" { | ||
medias = append(medias, media{ | ||
URL: src, | ||
Type: extractors.DataTypeVideo, | ||
}) | ||
} | ||
}) | ||
|
||
// title with caption | ||
// collector.OnHTML("span.BodyTextContainer", func(e *colly.HTMLElement) { | ||
// title = e.Text | ||
// }) | ||
|
||
if err := collector.Visit(URL.JoinPath("embed").String()); err != nil { | ||
return nil, fmt.Errorf("failed to send HTTP request to the Threads: %w", errors.WithStack(err)) | ||
} | ||
|
||
var totalSize int64 | ||
var parts []*extractors.Part | ||
|
||
for _, m := range medias { | ||
_, ext, err := utils.GetNameAndExt(m.URL) | ||
if err != nil { | ||
return nil, errors.WithStack(err) | ||
} | ||
fileSize, err := request.Size(m.URL, url) | ||
if err != nil { | ||
return nil, errors.WithStack(err) | ||
} | ||
|
||
part := &extractors.Part{ | ||
URL: m.URL, | ||
Size: fileSize, | ||
Ext: ext, | ||
} | ||
parts = append(parts, part) | ||
} | ||
|
||
for _, part := range parts { | ||
totalSize += part.Size | ||
} | ||
|
||
streams := map[string]*extractors.Stream{ | ||
"default": { | ||
Parts: parts, | ||
Size: totalSize, | ||
}, | ||
} | ||
|
||
return []*extractors.Data{ | ||
{ | ||
Site: "Threads www.threads.net", | ||
Title: title, | ||
Type: extractors.DataTypeImage, | ||
Streams: streams, | ||
URL: url, | ||
}, | ||
}, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
package threads_test | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/iawia002/lux/extractors" | ||
"github.com/iawia002/lux/extractors/threads" | ||
"github.com/iawia002/lux/test" | ||
) | ||
|
||
func TestDownload(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
args test.Args | ||
}{ | ||
{ | ||
name: "video test", | ||
args: test.Args{ | ||
URL: "https://www.threads.net/@rowancheung/post/C9xPmHcpfiN", | ||
Title: `Threads @rowancheung - C9xPmHcpfiN`, | ||
Size: 5740684, | ||
}, | ||
}, | ||
{ | ||
name: "video shared test", | ||
args: test.Args{ | ||
URL: "https://www.threads.net/@zuck/post/C9xRqbNPbx2", | ||
Title: `Threads @zuck - C9xRqbNPbx2`, | ||
Size: 5740684, | ||
}, | ||
}, | ||
{ | ||
name: "image test", | ||
args: test.Args{ | ||
URL: "https://www.threads.net/@zuck/post/C-BoS7lM8sH", | ||
Title: `Threads @zuck - C-BoS7lM8sH`, | ||
Size: 159331, | ||
}, | ||
}, | ||
{ | ||
name: "hybrid album test", | ||
args: test.Args{ | ||
URL: "https://www.threads.net/@meta/post/C95Z1DrPNhi", | ||
Title: `Threads @meta - C95Z1DrPNhi`, | ||
Size: 1131229, | ||
}, | ||
}, | ||
} | ||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
data, err := threads.New().Extract(tt.args.URL, extractors.Options{}) | ||
test.CheckError(t, err) | ||
test.Check(t, tt.args, data[0]) | ||
}) | ||
} | ||
} |