Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Web production deployment #4356

Merged
merged 36 commits into from
Aug 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
d4aee94
exports/{userId}/{date}/{uuid}.zip
sywhb Aug 26, 2024
a75acc5
add export job and get api
sywhb Aug 26, 2024
444c78f
use async job to handle exporter
sywhb Aug 27, 2024
b2d3633
Remove javascript event handlers from elements
jacksonh Aug 27, 2024
48b3f73
wait for write stream to finish
sywhb Aug 27, 2024
578cf13
Add missing this
jacksonh Aug 27, 2024
0e523d8
upload readable content before exporting to cache the content
sywhb Aug 27, 2024
6129606
Remove typo
jacksonh Aug 27, 2024
f77ded3
save export tasks in db and check db before starting export
sywhb Aug 27, 2024
8294392
fix table permission
sywhb Aug 27, 2024
5e2323a
remove tts cache
sywhb Aug 28, 2024
8ecf2c1
Use identified_only with posthog
jacksonh Aug 29, 2024
4be0f8f
Merge pull request #4346 from omnivore-app/fix/web-posthog-identified…
jacksonh Aug 29, 2024
d0026f0
Bump webpack from 5.76.0 to 5.94.0 in /pkg/extension
dependabot[bot] Aug 29, 2024
62cfa23
Merge pull request #4347 from omnivore-app/dependabot/npm_and_yarn/pk…
jacksonh Aug 29, 2024
21ef69b
Bump micromatch from 4.0.2 to 4.0.8 in /pkg/extension
dependabot[bot] Aug 29, 2024
6b57ab7
Update readability test for new _cleanElement name
jacksonh Aug 29, 2024
ad19903
Remove JS event handler from expected output of test, these are strip…
jacksonh Aug 29, 2024
47a6bd4
Merge pull request #4348 from omnivore-app/dependabot/npm_and_yarn/pk…
jacksonh Aug 29, 2024
3b9dd90
remove comments
sywhb Aug 29, 2024
32f4b68
Merge pull request #4327 from omnivore-app/feature/exporter
sywhb Aug 29, 2024
e1b809f
allow omnivore_admin to delete filters
sywhb Aug 29, 2024
b22af65
add archived status to user account
sywhb Aug 29, 2024
18a58ac
redirect archived user to /export page when login
sywhb Aug 29, 2024
f1c4f3b
remove debugging logs
sywhb Aug 29, 2024
5b7d1bb
remove set lock_timeout
sywhb Aug 29, 2024
43fb698
send page_update event if mark as unread
sywhb Aug 29, 2024
2ff987d
fix github action
sywhb Aug 29, 2024
70d6e9d
reduce logs
sywhb Aug 29, 2024
be7d172
Merge pull request #4352 from omnivore-app/reduce-logs
sywhb Aug 29, 2024
cbdd40f
Allow deleted articles to be read and restored from the article page
jacksonh Aug 30, 2024
73753e7
Merge pull request #4333 from omnivore-app/fix/readability-clean-even…
jacksonh Aug 30, 2024
49f9fb4
Merge pull request #4337 from omnivore-app/fix/remove-tts-cache
sywhb Aug 30, 2024
f89e44d
redirect archived user to /account-archived page when login
sywhb Aug 30, 2024
f3e3d57
Merge pull request #4350 from omnivore-app/feature/archive-account
sywhb Aug 30, 2024
88fe867
Merge pull request #4353 from omnivore-app/fix/web-restore-articles
jacksonh Aug 30, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/lint-migrations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ jobs:
run: |
modified_migrations=$(git diff --diff-filter=d --name-only main 'packages/db/migrations/*.do.*.sql')
echo "$modified_migrations"
echo "text<<EOF" >> $GITHUB_OUTPUT
echo "file_names=$modified_migrations" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
id: modified-migrations
- uses: sbdchd/squawk-action@v1
with:
Expand Down
2 changes: 2 additions & 0 deletions packages/api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
"alfaaz": "^1.1.0",
"apollo-datasource": "^3.3.1",
"apollo-server-express": "^3.6.3",
"archiver": "^7.0.1",
"axios": "^0.27.2",
"bcryptjs": "^2.4.3",
"bullmq": "^5.1.1",
Expand Down Expand Up @@ -123,6 +124,7 @@
"@istanbuljs/nyc-config-typescript": "^1.0.2",
"@types/addressparser": "^1.0.1",
"@types/analytics-node": "^3.1.7",
"@types/archiver": "^6.0.2",
"@types/bcryptjs": "^2.4.2",
"@types/chai": "^4.2.18",
"@types/chai-as-promised": "^7.1.5",
Expand Down
37 changes: 37 additions & 0 deletions packages/api/src/entity/export.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import {
Column,
CreateDateColumn,
Entity,
PrimaryGeneratedColumn,
UpdateDateColumn,
} from 'typeorm'

@Entity()
export class Export {
@PrimaryGeneratedColumn('uuid')
id!: string

@Column('uuid')
userId!: string

@Column('text', { nullable: true })
taskId?: string

@Column('text')
state!: string

@Column('int', { default: 0 })
totalItems!: number

@Column('int', { default: 0 })
processedItems!: number

@Column('text', { nullable: true })
signedUrl?: string

@CreateDateColumn({ type: 'timestamptz' })
createdAt!: Date

@UpdateDateColumn({ type: 'timestamptz' })
updatedAt!: Date
}
1 change: 1 addition & 0 deletions packages/api/src/entity/user.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export enum StatusType {
Active = 'ACTIVE',
Pending = 'PENDING',
Deleted = 'DELETED',
Archived = 'ARCHIVED',
}

@Entity()
Expand Down
6 changes: 3 additions & 3 deletions packages/api/src/jobs/email/inbound_emails.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ import { enqueueSendEmail } from '../../utils/createTask'
import { generateSlug, isUrl } from '../../utils/helpers'
import { logger } from '../../utils/logger'
import {
parseEmailAddress,
isProbablyArticle,
getTitleFromEmailSubject,
generateUniqueUrl,
getTitleFromEmailSubject,
isProbablyArticle,
parseEmailAddress,
} from '../../utils/parser'
import {
generateUploadFilePathName,
Expand Down
287 changes: 287 additions & 0 deletions packages/api/src/jobs/export.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,287 @@
import archiver, { Archiver } from 'archiver'
import { v4 as uuidv4 } from 'uuid'
import { LibraryItem, LibraryItemState } from '../entity/library_item'
import { TaskState } from '../generated/graphql'
import { findExportById, saveExport } from '../services/export'
import { findHighlightsByLibraryItemId } from '../services/highlights'
import {
findLibraryItemById,
searchLibraryItems,
} from '../services/library_item'
import { sendExportJobEmail } from '../services/send_emails'
import { findActiveUser } from '../services/user'
import { logger } from '../utils/logger'
import { highlightToMarkdown } from '../utils/parser'
import { contentFilePath, createGCSFile } from '../utils/uploads'

export interface ExportJobData {
userId: string
exportId: string
}

export const EXPORT_JOB_NAME = 'export'

const itemStateMappping = (state: LibraryItemState) => {
switch (state) {
case LibraryItemState.Archived:
return 'Archived'
case LibraryItemState.ContentNotFetched:
case LibraryItemState.Succeeded:
return 'Active'
default:
return 'Unknown'
}
}

const uploadContent = async (
userId: string,
libraryItem: LibraryItem,
archive: Archiver
) => {
const filePath = contentFilePath({
userId,
libraryItemId: libraryItem.id,
format: 'readable',
savedAt: libraryItem.savedAt,
updatedAt: libraryItem.updatedAt,
})

const file = createGCSFile(filePath)

// check if file is already uploaded
const [exists] = await file.exists()
if (!exists) {
logger.info(`File not found: ${filePath}`)

// upload the content to GCS
const item = await findLibraryItemById(libraryItem.id, userId, {
select: ['readableContent'],
})
if (!item?.readableContent) {
logger.error('Item not found', {
userId,
libraryItemId: libraryItem.id,
})
return
}

await file.save(item.readableContent, {
contentType: 'text/html',
private: true,
})
}

// append the existing file to the archive
archive.append(file.createReadStream(), {
name: `content/${libraryItem.slug}.html`,
})
}

const uploadToBucket = async (
userId: string,
items: Array<LibraryItem>,
cursor: number,
size: number,
archive: Archiver
): Promise<number> => {
// Add the metadata.json file to the root of the zip
const metadata = items.map((item) => ({
id: item.id,
slug: item.slug,
title: item.title,
description: item.description,
author: item.author,
url: item.originalUrl,
state: itemStateMappping(item.state),
readingProgress: item.readingProgressBottomPercent,
thumbnail: item.thumbnail,
labels: item.labelNames,
savedAt: item.savedAt,
updatedAt: item.updatedAt,
publishedAt: item.publishedAt,
}))

const endCursor = cursor + size
archive.append(JSON.stringify(metadata, null, 2), {
name: `metadata_${cursor}_to_${endCursor}.json`,
})

// Loop through the items and add files to /content and /highlights directories
for (const item of items) {
// Add content files to /content
await uploadContent(userId, item, archive)

if (item.highlightAnnotations?.length) {
const highlights = await findHighlightsByLibraryItemId(item.id, userId)
const markdown = highlights.map(highlightToMarkdown).join('\n\n')

// Add highlight files to /highlights
archive.append(markdown, {
name: `highlights/${item.slug}.md`,
})
}
}

return endCursor
}

export const exportJob = async (jobData: ExportJobData) => {
const { userId, exportId } = jobData

try {
const user = await findActiveUser(userId)
if (!user) {
logger.error('user not found', {
userId,
})
return
}

const exportTask = await findExportById(exportId, userId)
if (!exportTask) {
logger.error('export task not found', {
userId,
exportId,
})
return
}

await saveExport(userId, {
id: exportId,
state: TaskState.Running,
})

const emailJob = await sendExportJobEmail(userId, 'started')
if (!emailJob) {
logger.error('Failed to send export job email', {
userId,
})
return
}

logger.info('exporting all items...', {
userId,
})

// export data as a zip file:
// exports/{userId}/{date}/{uuid}.zip
// - metadata.json
// - /content
// - {slug}.html
// - /highlights
// - {slug}.md
const dateStr = new Date().toISOString()
const fileUuid = uuidv4()
const fullPath = `exports/${userId}/${dateStr}/${fileUuid}.zip`

const file = createGCSFile(fullPath)

// Create a write stream
const writeStream = file.createWriteStream({
metadata: {
contentType: 'application/zip',
},
})

// Handle any errors in the streams
writeStream.on('error', (err) => {
logger.error('Error writing to GCS:', err)
})

writeStream.on('finish', () => {
logger.info('File successfully written to GCS')
})

// Initialize archiver for zipping files
const archive = archiver('zip', {
zlib: { level: 9 }, // Compression level
})

// Handle any archiver errors
archive.on('error', (err) => {
throw err
})

// Pipe the archiver output to the write stream
archive.pipe(writeStream)

try {
// fetch data from the database
const batchSize = 20
let cursor = 0
let hasNext = false
do {
const items = await searchLibraryItems(
{
from: cursor,
size: batchSize,
query: 'in:all',
includeContent: false,
includeDeleted: false,
includePending: false,
},
userId
)

const size = items.length
// write data to the csv file
if (size > 0) {
cursor = await uploadToBucket(userId, items, cursor, size, archive)

hasNext = size === batchSize
}
} while (hasNext)
} finally {
// Finalize the archive
await archive.finalize()
}

// Ensure that the writeStream has finished
await new Promise((resolve, reject) => {
writeStream.on('finish', resolve)
writeStream.on('error', reject)
})

logger.info('export completed', {
userId,
})

// generate a temporary signed url for the zip file
const [signedUrl] = await file.getSignedUrl({
action: 'read',
expires: Date.now() + 86400 * 1000, // 15 minutes
})

logger.info('signed url for export:', {
userId,
signedUrl,
})

await saveExport(userId, {
id: exportId,
state: TaskState.Succeeded,
})

const job = await sendExportJobEmail(userId, 'completed', signedUrl)
if (!job) {
logger.error('failed to send export completed email', {
userId,
signedUrl,
})
}
} catch (error) {
logger.error('export failed', error)

await saveExport(userId, {
id: exportId,
state: TaskState.Failed,
})

const job = await sendExportJobEmail(userId, 'failed')
if (!job) {
logger.error('failed to send export failed email', {
userId,
})
}
}
}
3 changes: 1 addition & 2 deletions packages/api/src/jobs/rss/refreshAllFeeds.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,11 @@ export const refreshAllFeeds = async (db: DataSource): Promise<boolean> => {
FROM
omnivore.subscriptions s
INNER JOIN
omnivore.user u ON u.id = s.user_id
omnivore.user u ON u.id = s.user_id AND u.status = $4
WHERE
s.type = $1
AND s.status = $2
AND (s.scheduled_at <= NOW() OR s.scheduled_at IS NULL)
AND u.status = $4
GROUP BY
url
`,
Expand Down
1 change: 0 additions & 1 deletion packages/api/src/jobs/trigger_rule.ts
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,6 @@ export const triggerRule = async (jobData: TriggerRuleJobData) => {
// get rules by calling api
const rules = await findEnabledRules(userId, ruleEventType)
if (rules.length === 0) {
console.log('No rules found')
return false
}

Expand Down
Loading
Loading