Skip to content

Commit

Permalink
Merge pull request conversationai#111 from conversationai/yiqing-viz
Browse files Browse the repository at this point in the history
changing bfs to dfs
  • Loading branch information
vegetable68 authored Aug 1, 2018
2 parents c9fdef9 + 2f3fea6 commit 089d425
Show file tree
Hide file tree
Showing 12 changed files with 467 additions and 102 deletions.
58 changes: 43 additions & 15 deletions crowdsourcing/wpconvlib/src/conversation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ limitations under the License.
*/
export interface Comment {
id: string;
comment_type: 'MODIFICATION'|'ADDITION'|'CREATION'|'RESTORATION'|'DELETION';
type: 'MODIFICATION'|'ADDITION'|'CREATION'|'RESTORATION'|'DELETION';
content: string;
cleaned_content: string;
parent_id: string|null;
Expand All @@ -38,7 +38,32 @@ export interface Comment {
// the current snapshot.
latestVersion?: string | null; // id of the latest version of this comment if isPresent
// is false, otherwise id of self.
dfs_index?: number // index according to Depth First Search of conv.
dfs_index?: number; // index according to Depth First Search of conv.
// Starting here are toxicity scores, since they don't exist in all datasets
// except in English, this fields are optional.
// TODO(yiqingh, ldixon) : decide on which scores to use, delete non-public
// ones from the spanner table.
RockV6_1_SEVERE_TOXICITY?: number | null,
RockV6_1_SEXUAL_ORIENTATION?: number | null,
RockV6_1_SEXUALLY_EXPLICIT?: number | null,
RockV6_1_TOXICITY?: number | null,
//TODO(yiqingh): change TOXICITY_IDENTITY_HATE to IDENTITY_ATTACK
RockV6_1_TOXICITY_IDENTITY_HATE?: number | null,
RockV6_1_TOXICITY_INSULT?: number | null,
RockV6_1_TOXICITY_OBSCENE?: number | null,
RockV6_1_TOXICITY_THREAT?: number | null,
Smirnoff_2_ATTACK_ON_AUTHOR?: number | null,
Smirnoff_2_ATTACK_ON_COMMENTER?: number | null,
Smirnoff_2_INCOHERENT?: number | null,
Smirnoff_2_INFLAMMATORY?: number | null,
Smirnoff_2_LIKELY_TO_REJECT?: number | null,
Smirnoff_2_OBSCENE?: number | null,
Smirnoff_2_OFF_TOPIC?: number | null,
Smirnoff_2_SPAM?: number | null,
Smirnoff_2_UNSUBSTANTIAL?: number | null,
// The following fields are used in displaying comments in viz app.
isCollapsed?: boolean,
rootComment?: Comment,
}

export interface Conversation { [id: string]: Comment }
Expand Down Expand Up @@ -169,15 +194,18 @@ export function htmlForComment(
// Walk down a comment and its children depth first.
export function walkDfsComments(
rootComment: Comment, f: (c: Comment) => void): void {
const commentsHtml = [];
let agenda: Comment[] = [];
let nextComment: Comment|undefined = rootComment;
while (nextComment) {
if (nextComment.children) {
agenda = agenda.concat(nextComment.children);
}
const stack = [rootComment];
let nextComment: Comment|undefined = stack.pop();
while (nextComment){
f(nextComment);
nextComment = agenda.pop();
if (nextComment.children){
if (nextComment.children.reverse()) {
for (const ch of nextComment.children) {
stack.push(ch)
}
}
}
nextComment = stack.pop();
}
}

Expand Down Expand Up @@ -207,7 +235,7 @@ export function makeParent(comment: Comment, parent: Comment) {
parent.children = [];
}
parent.children.push(comment);
parent.children.sort(compareCommentOrderSmallestFirst);
parent.children.sort(compareCommentOrder);
comment.parent_id = parent.id;
comment.isRoot = false;
}
Expand Down Expand Up @@ -263,13 +291,13 @@ export function structureConversaton(conversation: Conversation): Comment|null {
// If the action is deletion, the content must have been deleted.
conversation[i].isPresent = true;
conversation[i].latestVersion = i;
if (comment.comment_type === 'DELETION') {
if (comment.type === 'DELETION') {
conversation[i].isPresent = false;
conversation[i].latestVersion = null;
}
if (comment.parent_id !== null && comment.parent_id !== ''
&& conversation[comment.parent_id]) {
if (comment.comment_type !== 'RESTORATION') {
if (comment.type !== 'RESTORATION') {
conversation[comment.parent_id].isPresent = false;
} else {
conversation[i].isPresent = false;
Expand All @@ -278,15 +306,15 @@ export function structureConversaton(conversation: Conversation): Comment|null {
}
// When a modification happens, the current comment will
// be replaced by the new version.
if (comment.comment_type === 'MODIFICATION') {
if (comment.type === 'MODIFICATION') {
conversation[comment.parent_id].latestVersion = i;
}
}
}

for (const i of ids) {
const comment = conversation[i];
if (comment.comment_type === "RESTORATION" || comment.comment_type === "DELETION") {
if (comment.type === "RESTORATION" || comment.type === "DELETION") {
continue;
}
comment.isFinal = false;
Expand Down
46 changes: 23 additions & 23 deletions crowdsourcing/wpconvlib/src/testdata/example_conversations.ts

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions view_conversations/conv-viewer-webapi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ Before you can deploy, you need to:
1. Copy the `server_config.template.json` file to `build/config/server_config.json`.
2. In the `build/config/server_config.json` file, set these values:

* `bigQueryProjectId` : The Google Cloud Project ID that contains the BigQuery database.
* `bigQueryDataSetId` : The name of the dataset in the cloud project.
* `bigQueryTable` : The name of the table that contains the conversations.
* `spannerProjectId` : The Google Cloud Project ID that contains the Spanner database.
* `spannerDataSetId` : The name of the dataset in the cloud project.
* `spannerTable` : The name of the table that contains the conversations.

TODO(ldixon): in future we'll move to using OAuth and project credentials.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"isProduction": false,
"staticPath": "build/static",

"cloudProjectId": "wikidetox-viz",
"spannerInstanceId": "wikiconv",
"spannerDatabaseName": "convdata",
"spannerTableName": "zh_20180601_conv"
"cloudProjectId": "yourCloudProject",
"spannerInstanceId": "yourSpannerInstance",
"spannerDatabaseName": "yourSpannerDatabase",
"spannerTableName": "yourSpannerTable"
}
8 changes: 6 additions & 2 deletions view_conversations/conv-viewer-webapi/src/db_types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ class TimestampFieldHandler extends SpannerFieldHandler<Date> {

interface HandlerSet { [fieldName:string] : SpannerFieldHandler<Date | number | string | string[]>; };
interface ParsedOutput { [fieldName:string] : string | string[] | Date | number | null; };
const scoreSubstrings = 'RockV6_1|Smirnoff_2';
const scoreType = 'score';

const handlers : Array<SpannerFieldHandler<Date | number | string | string[]>> = [
new StringFieldHandler('id'),
Expand All @@ -146,6 +148,7 @@ const handlers : Array<SpannerFieldHandler<Date | number | string | string[]>> =
new StringFieldHandler('type'),
new IntFieldHandler('user_id'),
new StringFieldHandler('user_text'),
new FloatFieldHandler(scoreType),
];

function addHandler(inputHandlers : HandlerSet, handler : SpannerFieldHandler<Date | number | string | string[]>)
Expand All @@ -160,11 +163,12 @@ export function parseOutputRows<T>(rows: spanner.ResultRow[]) : T[] {
for (const row of rows) {
const ret: { [fieldName:string] : string | string[] | Date | number | null } = {};
for (const field of row) {
if(!(field.name in handlerSet)) {
const testname = new RegExp(scoreSubstrings).test(field.name) ? scoreType : field.name;
if(!(testname in handlerSet)) {
console.error(`Field ${field.name} does not have a handler and so cannot be interpreted.`);
break;
}
ret[field.name] = handlerSet[field.name].fromSpannerResultField(field.value);
ret[field.name] = handlerSet[testname].fromSpannerResultField(field.value);
}
output.push(ret)
}
Expand Down
80 changes: 60 additions & 20 deletions view_conversations/conv-viewer-webapi/src/routes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,22 +33,26 @@ const SEARCH_OP_TYPE =
// escaped quotes.
const SQL_SAFE_STRING =
new runtime_types.RuntimeStringType<string>('SearchBy', /^[^"]+$/);
const conversationIdIndex = '_by_conversation_id';
const toxicityIndex = '_by_toxicity';

// TODO(ldixon): consider using passport auth
// for google cloud project.
export function setup(
app: express.Express, conf: config.Config,
spannerDatabase: spanner.Database) {
let table = `\`${conf.spannerTableName}\``;
const table = `\`${conf.spannerTableName}\``;

app.get('/api/conversation-id/:conv_id', async (req, res) => {
try {
let conv_id: runtime_types.ConversationId =
const conv_id: runtime_types.ConversationId =
runtime_types.ConversationId.assert(req.params.conv_id);
const index = conf.spannerTableName + conversationIdIndex;

// TODO remove outer try wrapper unless it get used.
// Force Spanner using particular indices to speed up performance.
const sqlQuery = `SELECT *
FROM ${table}
FROM ${table}@{FORCE_INDEX=${index}}
WHERE conversation_id="${conv_id}"
LIMIT 100`;
// Query options list:
Expand All @@ -69,16 +73,52 @@ export function setup(
}
});

app.get('/api/toxicity/:score', async (req, res) => {
try {
const score: number = runtime_types.assertNumber(req.params.score);
const index = conf.spannerTableName + toxicityIndex;

// TODO remove outer try wrapper unless it get used.
const sqlQuery = `SELECT *
FROM ${table}@{FORCE_INDEX=${index}}
WHERE RockV6_1_TOXICITY <= ${score} and type != "DELETION"
ORDER BY RockV6_1_TOXICITY DESC
LIMIT 20`;
// Query options list:
// https://cloud.google.com/spanner/docs/getting-started/nodejs/#query_data_using_sql
const query: spanner.Query = {
sql: sqlQuery
};

await spannerDatabase.run(query).then(results => {
const rows = results[0];
res.status(httpcodes.OK).send(JSON.stringify(db_types.parseOutputRows<db_types.OutputRow>(rows), null, 2));
});
} catch (e) {
console.error(`*** Failed: `, e);
res.status(httpcodes.INTERNAL_SERVER_ERROR).send(JSON.stringify({
error: e.message
}));
}
});



app.get('/api/comment-id/:comment_id', async (req, res) => {
try {
let comment_id: runtime_types.CommentId =
const comment_id: runtime_types.CommentId =
runtime_types.CommentId.assert(req.params.comment_id);
const index = conf.spannerTableName + conversationIdIndex;


// TODO remove outer try wrapper unless it get used.
const sqlQuery = `SELECT *
FROM ${table}
WHERE id="${comment_id}"
LIMIT 100`;
// id field is unique.
const sqlQuery = `
SELECT conv_r.*
FROM ${table} conv_l
JOIN ${table}@{FORCE_INDEX=${index}} conv_r
ON conv_r.conversation_id = conv_l.conversation_id
WHERE conv_l.id = "${comment_id}" and conv_r.timestamp <= conv_l.timestamp`;
// Query options list:
// https://cloud.google.com/spanner/docs/getting-started/nodejs/#query_data_using_sql
const query: spanner.Query = {
Expand All @@ -87,7 +127,7 @@ export function setup(

await spannerDatabase.run(query).then(results => {
const rows = results[0];
res.status(httpcodes.OK).send(db_types.parseOutputRows<db_types.OutputRow[]>(rows));
res.status(httpcodes.OK).send(db_types.parseOutputRows<db_types.OutputRow>(rows));
});
} catch (e) {
console.error(`*** Failed: `, e);
Expand All @@ -99,7 +139,7 @@ export function setup(

app.get('/api/revision-id/:rev_id', async (req, res) => {
try {
let rev_id: runtime_types.RevisionId =
const rev_id: runtime_types.RevisionId =
runtime_types.RevisionId.assert(req.params.rev_id);

// TODO remove outer try wrapper unless it get used.
Expand All @@ -115,7 +155,7 @@ export function setup(

await spannerDatabase.run(query).then(results => {
const rows = results[0];
res.status(httpcodes.OK).send(db_types.parseOutputRows<db_types.OutputRow[]>(rows));
res.status(httpcodes.OK).send(db_types.parseOutputRows<db_types.OutputRow>(rows));
});
} catch (e) {
console.error(`*** Failed: `, e);
Expand All @@ -127,7 +167,7 @@ export function setup(

app.get('/api/page-id/:page_id', async (req, res) => {
try {
let page_id: runtime_types.PageId =
const page_id: runtime_types.PageId =
runtime_types.PageId.assert(req.params.page_id);

// TODO remove outer try wrapper unless it get used.
Expand All @@ -144,7 +184,7 @@ export function setup(

await spannerDatabase.run(query).then(results => {
const rows = results[0];
res.status(httpcodes.OK).send(db_types.parseOutputRows<db_types.OutputRow[]>(rows));
res.status(httpcodes.OK).send(db_types.parseOutputRows<db_types.OutputRow>(rows));
});
} catch (e) {
console.error(`*** Failed: `, e);
Expand All @@ -156,13 +196,13 @@ export function setup(

app.get('/api/page-title/:page_title', async (req, res) => {
try {
let page_title: runtime_types.PageTitleSearch =
const page_title: runtime_types.PageTitleSearch =
runtime_types.PageTitleSearch.assert(req.params.page_title);

// TODO remove outer try wrapper unless it get used.
const sqlQuery = `SELECT *
FROM ${table}
WHERE page_title LIKE "${page_title}"
WHERE page_title = "${page_title}"
LIMIT 100`;
// Query options list:
// https://cloud.google.com/spanner/docs/getting-started/nodejs/#query_data_using_sql
Expand All @@ -173,7 +213,7 @@ export function setup(

await spannerDatabase.run(query).then(results => {
const rows = results[0];
res.status(httpcodes.OK).send(db_types.parseOutputRows<db_types.OutputRow[]>(rows));
res.status(httpcodes.OK).send(db_types.parseOutputRows<db_types.OutputRow>(rows));
});
} catch (e) {
console.error(`*** Failed: `, e);
Expand All @@ -185,19 +225,19 @@ export function setup(

app.get('/api/search/:search_by/:search_op/:search_for', async (req, res) => {
if (!SEARCH_BY_TYPE.isValid(req.params.search_by)) {
let errorMsg = `Error: Invalid searchBy string: ${req.params.search_by}`
const errorMsg = `Error: Invalid searchBy string: ${req.params.search_by}`
console.error(errorMsg);
res.status(httpcodes.BAD_REQUEST).send(JSON.stringify({error: errorMsg}));
return;
}
if (!SEARCH_OP_TYPE.isValid(req.params.search_op)) {
let errorMsg = `Error: Invalid searchOp string: ${req.params.search_op}`
const errorMsg = `Error: Invalid searchOp string: ${req.params.search_op}`
console.error(errorMsg);
res.status(httpcodes.BAD_REQUEST).send(JSON.stringify({error: errorMsg}));
return;
}
if (!SQL_SAFE_STRING.isValid(req.params.search_for)) {
let errorMsg = `Error: Invalid searchFor string: ${req.params.search_for}`
const errorMsg = `Error: Invalid searchFor string: ${req.params.search_for}`
console.error(errorMsg);
res.status(httpcodes.BAD_REQUEST).send(JSON.stringify({error: errorMsg}));
return;
Expand All @@ -218,7 +258,7 @@ export function setup(

await spannerDatabase.run(query).then(results => {
const rows = results[0];
res.status(httpcodes.OK).send(db_types.parseOutputRows<db_types.OutputRow[]>(rows));
res.status(httpcodes.OK).send(db_types.parseOutputRows<db_types.OutputRow>(rows));
});
} catch (e) {
console.error(`*** Failed: `, e);
Expand Down
7 changes: 7 additions & 0 deletions view_conversations/conv-viewer-webapi/src/runtime_types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,10 @@ export let PageId = new RuntimeStringType<PageId>('PageId', /^(\d+)$/);
export let PageTitleSearch =
new RuntimeStringType<PageTitleSearch>('PageTitleSearch', /^([^"]+)$/);

export function assertNumber(score : number) {
if isNaN(score) {
throw new Error(`Wanted number but got: NaN.`);
}
return score;
}

Loading

0 comments on commit 089d425

Please sign in to comment.