Skip to content

Commit

Permalink
fix(interactive): Update example movie graph and fix some bug (#3304)
Browse files Browse the repository at this point in the history
Update the example movie graph's schema and raw data for interactive. 
Add some demo cypher queries.

---------

Co-authored-by: BingqingLyu <[email protected]>
  • Loading branch information
zhanglei1949 and BingqingLyu authored Oct 23, 2023
1 parent 43f5176 commit 7a7bd4e
Show file tree
Hide file tree
Showing 19 changed files with 214 additions and 77 deletions.
1 change: 1 addition & 0 deletions flex/engines/hqps_db/core/operator/limit.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class LimitOp {
cur_offset += 1;
}
new_offsets.push_back(cur_offset);
cur_ind += 1;
}

std::vector<size_t> selected_indices;
Expand Down
2 changes: 1 addition & 1 deletion flex/engines/hqps_db/core/operator/sink.h
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ class SinkOp {
}
}

// sink collection of pod, expect for LabelKey type
// sink collection of pod type
template <size_t Ind, size_t act_tag_id, typename T,
typename std::enable_if<
(!gs::is_vector<T>::value) && (!gs::is_tuple<T>::value) &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,8 @@ class SingleLabelEdgeSetBuilder {
vec_.push_back(std::get<1>(tuple));
}

void Insert(const ele_tuple_t& tuple) { vec_.push_back(tuple); }

result_t Build() {
return result_t(std::move(vec_), std::move(label_triplet_), prop_names_,
direction_);
Expand Down
1 change: 1 addition & 0 deletions flex/interactive/examples/movies/0_get_user.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
MATCH (u: User) Return u.id, u.name LIMIT 5;
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
MATCH (u: User) -[r:REVIEW]->(movie: Movie) WHERE r.rating > $rateThresh RETURN COUNT(u);
5 changes: 5 additions & 0 deletions flex/interactive/examples/movies/2_user_colfilter.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
MATCH (u1: User) -[r1:REVIEW]->(movie: Movie)<-[r2: REVIEW]- (u2: User)
WHERE u1.id > u2.id
AND r1.rating > $rateThresh
AND r2.rating > $rateThresh
Return u1.name, u2.name, movie.title LIMIT 10;
6 changes: 6 additions & 0 deletions flex/interactive/examples/movies/3_high_rate_ratio.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
MATCH (u: User)-[:REVIEW]->(m: Movie)
WITH u, COUNT(m) as cnt1
MATCH (u)-[r:REVIEW]->(likeM: Movie)
WHERE r.rating > $rateThresh
WITH u, cnt1, COUNT(likeM) as cnt2
RETURN u.name, cnt2 / cnt1;
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
MATCH (u: User)-[:REVIEW]->(m: Movie)<-[:ACTED_IN]-(actor: Person),
(u)-[:FOLLOWS]->(actor)
WITH DISTINCT u, COUNT(m) as cnt1
MATCH (u: User)-[r:REVIEW]->(likeM: Movie)<-[:ACTED_IN]-(actor: Person)
MATCH (u:User)-[:FOLLOWS]->(actor)
WHERE r.rating > $rateThresh
WITH DISTINCT u, cnt1, COUNT(likeM) as cnt2
RETURN u.name, cnt2 / cnt1;
3 changes: 3 additions & 0 deletions flex/interactive/examples/movies/5_recommend_rule.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MATCH (u: User)-[:FOLLOWS]->(a: Person)-[:ACTED_IN]->(m: Movie)
WHERE NOT (u : User)-[:REVIEW]->(m)
RETURN u.name, m.title LIMIT 5;
11 changes: 8 additions & 3 deletions flex/interactive/examples/movies/FOLLOWS.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
start|end
170|169
168|169
167|168
169|71
171|71
172|71
173|71
174|71
169|16
172|16
174|16
3 changes: 0 additions & 3 deletions flex/interactive/examples/movies/Person.csv
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,3 @@ id|born|name
165|1963|Lori Petty
166|1943|Penny Marshall
167|2003|Paul Blythe
168|2003|Angela Scope
169|2003|Jessica Thompson
170|2003|James Thompson
73 changes: 73 additions & 0 deletions flex/interactive/examples/movies/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
## Step 1
(1) 用户探索
```cypher
MATCH (u: User)
Return u.id, u.name;
```


## Step 2
(2) 探索用户好评的电影,评价可能给一个阈值,通过一些探索,找到一个打好评的较好的阈值

```cypher
MATCH (u: User) -[r:REVIEWED]->(movie: Movie)
WHERE r.rating > $rateThresh
RETURN COUNT(u);
```

high_rate_number(0) 其实就是用户所有的评价数量
high_rate_number(0.5) 好评threshold的占比
high_rate_number(0.7)


## Step 3
(3) 探索两名用户同时给一个电影打好评
```cypher
MATCH (u1: User) -[r1:REVIEWED]->(movie: Movie)<-[r2: REVIEWED]- (u2: User)
Return u1.name, u2.name, movie.name
WHERE u1.id > u2.id
AND r1.rate > $rateThresh
AND r2.rate > $rateThresh;
```

但可能不是很有代表性,而且我们的数据集里并没有维护这两名用户直接的关系。
我们继续探索可能的关联。

## Step 4
(4)探索是否用户会比较倾向于给自己关注的演员参演的电影打好评

我们得到用户给一个电影打高分的可能性有大
```cypher
MATCH (u: User) -[:REVIEWED]->(m: Movie)
WITH u, COUNT(m) as cnt1
MATCH (u) - [r:REVIEWED]->(likeM: Movie)
WHERE r.rate > $rateThresh
WITH u, cnt1, COUNT(likeM) as cnt2
RETURN u.name, cnt2 / cnt1
```


在这个电影是该用户关注的演员参演的情况下,我们再次计算这个可能性
```cypher
MATCH (u: User) -[:REVIEWED]->(m: Movie)<-[:ACTED_IN]-(actor: Person),
(u) -[:FOLLOWS]-(actor)
WITH DISTINCT u, COUNT(m) as cnt1
MATCH (u) - [r:REVIEWED]->(likeM: Movie)<-[:ACTED_IN]-(actor: Person),
(u) -[:FOLLOWS]-(actor)
WHERE r.rate > $rateThresh
WITH DISTINCT u, cnt1, COUNT(likeM) as cnt2
RETURN u.name, cnt2 / cnt1
```


## Step 5
(5)那么我们搜索这样的推荐规则:
给用户u推荐电影m,如果
(1)u关注演员a
(2)a参演了电影m
(3)u还没有看过电影m (还未做评价)
```cypher
MATCH (u: User) -[:FOLLOWS] -> (a: Person) -[: ACTED_IN] -> (m: Movie),
WHERE NOT((u) -[:REVIEW] -(m))
RETURN u1.name, m.name
```
28 changes: 18 additions & 10 deletions flex/interactive/examples/movies/REVIEWED.csv
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
start|end|review|rating
169|105|yes|95
169|87|bravo|65
170|87|good|100
168|87|bad|62
169|97|cool|85
169|95|poor|45
169|111|bad|68
170|111|bad|65
169|37|bravo|92
start|end|rating|summary

169|105|95|An amazing journey
171|105|100|A perfect movie
173|105|90|Unforgettable
169|87|65|Silly, but fun
170|87|100|The coolest football movie ever
168|87|62|Pretty funny at times
169|97|85|Dark, but compelling
169|95|45|Slapstick redeemed only by the Robin Williams and Gene Hackman's stellar performances
169|111|68|A solid romp
170|111|65|Fun, but a little far fetched
172|111|60|Just too popcorn
169|37|92|You had me at Jerry
174|37|90|Gonna watch again
170|116|60|The borderline
172|116|61|A little confused
174|116|68|Good, but can do better
8 changes: 8 additions & 0 deletions flex/interactive/examples/movies/User.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
id|born|name
168|2003|Angela Scope
169|2003|Jessica Thompson
170|2003|James Thompson
171|2005|Lei Li
172|2006|Mei Wang
173|2006|Thomas Christ
174|2007|Edward Zhang
17 changes: 15 additions & 2 deletions flex/interactive/examples/movies/graph.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,19 @@ schema:
primitive_type: DT_STRING
primary_keys:
- id
- type_name: User
properties:
- property_name: id
property_type:
primitive_type: DT_SIGNED_INT64
- property_name: born
property_type:
primitive_type: DT_SIGNED_INT32
- property_name: name
property_type:
primitive_type: DT_STRING
primary_keys:
- id
edge_types:
- type_name: ACTED_IN
vertex_type_pair_relations:
Expand All @@ -43,7 +56,7 @@ schema:
relation: MANY_TO_MANY
- type_name: REVIEW
vertex_type_pair_relations:
- source_vertex: Person
- source_vertex: User
destination_vertex: Movie
relation: MANY_TO_MANY
properties:
Expand All @@ -52,7 +65,7 @@ schema:
primitive_type: DT_SIGNED_INT32
- type_name: FOLLOWS
vertex_type_pair_relations:
- source_vertex: Person
- source_vertex: User
destination_vertex: Person
relation: MANY_TO_MANY
- type_name: WROTE
Expand Down
55 changes: 5 additions & 50 deletions flex/interactive/examples/movies/import.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ vertex_mappings:
- type_name: Person # must align with the schema
inputs:
- Person.csv
- type_name: User
inputs:
- User.csv
- type_name: Movie
inputs:
- Movie.csv
Expand All @@ -28,70 +31,30 @@ edge_mappings:
edge: ACTED_IN
source_vertex: Person
destination_vertex: Movie
source_vertex_mappings:
- column:
index: 0
name: id
destination_vertex_mappings:
- column:
index: 1
name: id
inputs:
- ACTED_IN.csv
- type_triplet:
edge: DIRECTED
source_vertex: Person
destination_vertex: Movie
source_vertex_mappings:
- column:
index: 0
name: id
destination_vertex_mappings:
- column:
index: 1
name: id
inputs:
- DIRECTED.csv
- type_triplet:
edge: FOLLOWS
source_vertex: Person
source_vertex: User
destination_vertex: Person
source_vertex_mappings:
- column:
index: 0
name: id
destination_vertex_mappings:
- column:
index: 1
name: id
inputs:
- FOLLOWS.csv
- type_triplet:
edge: PRODUCED
source_vertex: Person
destination_vertex: Movie
source_vertex_mappings:
- column:
index: 0
name: id
destination_vertex_mappings:
- column:
index: 1
name: id
inputs:
- PRODUCED.csv
- type_triplet:
edge: REVIEW
source_vertex: Person
source_vertex: User
destination_vertex: Movie
source_vertex_mappings:
- column:
index: 0
name: id
destination_vertex_mappings:
- column:
index: 1
name: id
column_mappings:
- column:
index: 3
Expand All @@ -103,13 +66,5 @@ edge_mappings:
edge: WROTE
source_vertex: Person
destination_vertex: Movie
source_vertex_mappings:
- column:
index: 0
name: id
destination_vertex_mappings:
- column:
index: 1
name: id
inputs:
- WROTE.csv
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ public static QueryContext get_movie_query3_test() {
Arrays.asList(
"Record<{personName: \"Aaron Sorkin\"}>",
"Record<{personName: \"Al Pacino\"}>",
"Record<{personName: \"Angela Scope\"}>",
"Record<{personName: \"Annabella Sciorra\"}>",
"Record<{personName: \"Anthony Edwards\"}>",
"Record<{personName: \"Audrey Tautou\"}>",
"Record<{personName: \"Ben Miles\"}>",
"Record<{personName: \"Bill Paxton\"}>",
"Record<{personName: \"Bill Pullman\"}>",
"Record<{personName: \"Billy Crystal\"}>");
"Record<{personName: \"Billy Crystal\"}>",
"Record<{personName: \"Bonnie Hunt\"}>");
return new QueryContext(query, expected);
}

Expand Down
Loading

0 comments on commit 7a7bd4e

Please sign in to comment.