-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
use SocialMediaPosting for video urls
RIP graphql
- Loading branch information
Showing
15 changed files
with
311 additions
and
50 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
66 changes: 66 additions & 0 deletions
66
app/src/data-extraction/directly-in-browser/social-media-posting/carousel-video-index.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import { observeCarouselIndex } from "../../../carousel-index-observer"; | ||
import { queryMediaElement } from "../media-and-src/query-media-element"; | ||
import { waitForElementExistence } from "../../../../lib/await-element"; | ||
import { Lazy } from "fp-ts/es6/function"; | ||
|
||
// the object `SocialMediaPosting` contains videos and images of a carousel, | ||
// but they are in seperate arrays. | ||
// how do we get the correct video at a given carousel index? | ||
// it appears that the order of the videos is the same as their order in the carousel. | ||
|
||
// example: | ||
// carousel: [ image1, image2, video1, image3, video2, video3, image4, video4 ] | ||
// SocialMediaPosting.video: [ video1, video2 , video3, video4 ] | ||
// SocialMediaPosting.image: [ image1, image2 , image3, image4 ] | ||
|
||
// so if we are at index 4 in the carousel which is item `video2`, | ||
// how do we know that it is the second video? | ||
// the only idea i have so far is to keep track of how many videos we have | ||
// scrolled past in the carousel. | ||
// this module provides an observer for the video index. | ||
// be cautious! it may not be very robust! | ||
// always doublecheck your downloads! | ||
|
||
|
||
export function makeVideoIndexObserver(postElement: HTMLElement): Lazy<number> { | ||
|
||
let videoIndex = 0; | ||
|
||
(async function(){ | ||
const carouselElement = await waitForElementExistence(100, 5, postElement, "ul"); | ||
|
||
let videoIndexInitialized = false; | ||
let isCurrentlyVideo = false; | ||
let previousIndex = 0; | ||
|
||
observeCarouselIndex( | ||
carouselElement, | ||
({ child, index }) => { | ||
const mediaElement = queryMediaElement(child); | ||
if (!mediaElement) return; | ||
|
||
const isVideoElement = mediaElement.matches("video"); | ||
|
||
if (!videoIndexInitialized) { | ||
isCurrentlyVideo = isVideoElement; | ||
videoIndex = isCurrentlyVideo ? 0 : -1; | ||
videoIndexInitialized = true; | ||
} | ||
else { | ||
if (index > previousIndex) { | ||
const isVideoNext = isVideoElement; | ||
videoIndex += isVideoNext ? 1 : 0; | ||
} | ||
else if (index < previousIndex) { | ||
const wasVideoPrevious = isCurrentlyVideo; | ||
videoIndex += wasVideoPrevious ? -1 : 0; | ||
} | ||
isCurrentlyVideo = isVideoElement; | ||
} | ||
previousIndex = index; | ||
} | ||
); | ||
})(); | ||
|
||
return () => videoIndex; | ||
} |
40 changes: 40 additions & 0 deletions
40
app/src/data-extraction/directly-in-browser/social-media-posting/find-in-dom.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
import { Option, none, some } from "fp-ts/es6/Option"; | ||
import { SocialMediaPosting } from "./types"; | ||
|
||
// my previous methods of obtaining video urls is now broken, | ||
// but Instagram has kindly provided us with the exact | ||
// data we need in the DOM. | ||
// i believe this works only on post pages and not on the mainfeed, | ||
// but i haven't checked yet. | ||
|
||
export function findSocialMediaPostingInDom(): Option<SocialMediaPosting> { | ||
const script = document.querySelector('script[type="application/ld+json"]'); | ||
if (!script) return none; | ||
|
||
try { | ||
const scriptParsed = JSON.parse(script.innerHTML); | ||
if (typeof(scriptParsed) !== "object") return none; | ||
|
||
// i've seen instances where the parsed result is an array | ||
// instead of a single object. | ||
// let's pack the object in an array so that we won't have | ||
// fragmented logic | ||
const resultArray = ( | ||
Array.isArray(scriptParsed) ? scriptParsed : [scriptParsed] | ||
); | ||
|
||
const postingItem = resultArray.find( | ||
(item) => { | ||
if (typeof(item) !== "object") return false; | ||
return item["@type"] === "SocialMediaPosting"; | ||
} | ||
); | ||
if (!postingItem) return none; | ||
|
||
// TODO: validation of type | ||
return some(postingItem as SocialMediaPosting); | ||
} | ||
catch(e){ | ||
return none; | ||
} | ||
} |
109 changes: 109 additions & 0 deletions
109
app/src/data-extraction/directly-in-browser/social-media-posting/media-provider.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
import { Option, fromNullable, isNone, none } from "fp-ts/es6/Option"; | ||
import { PostType } from "../../from-fetch-response/types"; | ||
import { SingleMediaInfo } from "../../media-types"; | ||
import { findTypeOfPost } from "../post-type"; | ||
import { tryGetImageSrc } from "../../hybrid/try-get-image-src"; | ||
import { findUsernameInPost } from "../post-username"; | ||
import { getCurrentPageType, isSinglePostType } from "../../../insta-navigation-observer"; | ||
import { makeVideoIndexObserver } from "./carousel-video-index"; | ||
import { SocialMediaPosting } from "./types"; | ||
import { findSocialMediaPostingInDom } from "./find-in-dom"; | ||
|
||
// make a function that lazily extracts media from this post. | ||
// if it's an image, it will query the image source. | ||
// if it's a video, it will use `SocialMediaPosting` taken | ||
// from a certain script element in the DOM. | ||
// for carousels, it is also necessary to keep track of the | ||
// current video index, because videos and images are stored | ||
// in separate arrays. | ||
|
||
// granted, it is not very pretty! | ||
// i have basically copy pasted this code from another place | ||
// when i was still doing fetches. | ||
// for fetches, it was necessary to cache as many values as possible. | ||
// i should definitely rewrite this function and split it | ||
// into multiple cases (image, single video, carousel, ...). | ||
|
||
export function makeSocialMediaPostingExtractor(postElement: HTMLElement){ | ||
|
||
// videoIndex is not needed if this is an image, | ||
// but since everything is done lazily, we need | ||
// to keep track of the video index if it's a carousel, | ||
// to have it ready when the download button is pressed. | ||
const getVideoIndex = makeVideoIndexObserver(postElement); | ||
|
||
// cached values | ||
let socialMediaPosting: Option<SocialMediaPosting> = none; | ||
let currentPostType: Option<PostType> = none; | ||
|
||
return async (): Promise<SingleMediaInfo | undefined> => { | ||
|
||
// <post type> ------------------------ | ||
|
||
if (isNone(currentPostType)) { | ||
currentPostType = fromNullable(findTypeOfPost(postElement)); | ||
} | ||
|
||
// check again if postType is some | ||
if (isNone(currentPostType)) { | ||
console.error("could not find type of post", postElement); | ||
return; | ||
} | ||
|
||
const postType = currentPostType.value; | ||
|
||
// </post type> ------------------------ | ||
|
||
|
||
|
||
// if this current post or carousel item is an image, | ||
// then we can quickly find its source | ||
const imageSrcData = tryGetImageSrc(postType, postElement); | ||
if (imageSrcData) { | ||
const username = findUsernameInPost(postElement); | ||
return { | ||
username: username as string, | ||
...imageSrcData | ||
} | ||
} | ||
|
||
// case: single video or carousel video on mainfeed | ||
if (!isSinglePostType(getCurrentPageType())) { | ||
console.warn("please open the page of this post in a new tab. downloading videos directly from the mainfeed is currently not supported."); | ||
return; | ||
} | ||
|
||
|
||
// case: single- or carousel-video on post-page | ||
|
||
// <social media posting> ----------------- | ||
|
||
if (isNone(socialMediaPosting)) { | ||
socialMediaPosting = findSocialMediaPostingInDom(); | ||
} | ||
|
||
if (isNone(socialMediaPosting)) { | ||
console.error("could not find social media posting in DOM"); | ||
return; | ||
} | ||
|
||
const mediaPost = socialMediaPosting.value; | ||
const videoItems = mediaPost.video; | ||
const username = mediaPost.author.identifier.value; | ||
|
||
// </social media posting> ----------------- | ||
|
||
|
||
const videoIndex = getVideoIndex(); | ||
if (videoIndex < 0 || videoIndex >= videoItems.length){ | ||
console.warn("video index is out of bounds. somethings wrong!"); | ||
return; | ||
} | ||
|
||
return { | ||
type: "video", | ||
username, | ||
src: videoItems[videoIndex].contentUrl | ||
}; | ||
} | ||
}; |
42 changes: 42 additions & 0 deletions
42
app/src/data-extraction/directly-in-browser/social-media-posting/types.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
export type SocialMediaPosting = { | ||
"@type": "SocialMediaPosting", | ||
articleBody: string, | ||
author: Person, | ||
identifier: { | ||
propertyID: "Post Shortcode", | ||
value: string | ||
}, | ||
image: ImageObject[], | ||
video: VideoObject[] | ||
} | ||
|
||
type Person = { | ||
"@type": "Person", | ||
|
||
name: string, // more like display name | ||
|
||
alternateName: string, // this seems to be the real username, but also could be an alias | ||
|
||
identifier: { | ||
propertyID: "Username", | ||
value: string // i suppose use this for foldernames | ||
}, | ||
|
||
image: string, // profile pic maybe | ||
|
||
url: string | ||
} | ||
|
||
export type ImageObject = { | ||
width: string, | ||
height: string, | ||
representativeOfPage: boolean, | ||
url: string | ||
} | ||
|
||
export type VideoObject = { | ||
width: string, | ||
height: string, | ||
thumbnailUrl: string, | ||
contentUrl: string | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2 changes: 1 addition & 1 deletion
2
dist/assets/background.ts.850526c9.js → dist/assets/background.ts.5bde19ab.js
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.