From b08400c7a793d3960b991ade338e676ceb09ab80 Mon Sep 17 00:00:00 2001
From: Shawn <shmutyala@gmail.com>
Date: Fri, 18 Nov 2022 10:13:31 -0500
Subject: [PATCH] Update MANIFEST.in

---
 MANIFEST.in | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/MANIFEST.in b/MANIFEST.in
index 9561fb1..0a814ab 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1 +1,45 @@
 include README.rst
+010747
+
+from loanElig.loanEngineering.features.loanEligibility import spark_app
+from pyspark.sql import functions as F
+spark = spark_app()
+
+
+def table_identifier(col):
+    col = col.map(lambda x: x[1]).flatmap(lambda x: x.split(' '))
+    col_cnt = col.map(lambda x : (x,1)).reducebyKey(lambda a,b : a+b)
+    return col_cnt
+
+
+
+def replace_special_chars(col):
+    list1 =['select','where','group','by', 'order']
+    for i in list1:
+        col = col.map(lambda x: x.replace(x,i,""))
+    return col
+
+
+df = spark.read.option("multiline","true").json('sample.json')
+
+# df = df.select('state', F.regexp_replace(F.col("state"), "[_\"\'():;,.!?\\-]", " ").alias("table_list"))
+# Word_count = df.groupBy('table_list').count()
+# Word_count.orderBy(F.col('count').desc()).show(10)
+list1 =['select','where','group','by', 'order','*','in',"[",'(',']',')']
+df = df.withColumn('cnt',F.regexp_replace(F.split(F.col('state'), 'from'),",",""))\
+        .withColumn('cnt2',F.explode(F.split(F.col('cnt'), " ")))
+
+# replace_pat = lambda x: x.replace(i,"") for i in list1
+def repl(col):
+    for i in list1:
+        col = col.replace(i,"")
+    return col
+
+rep1 = F.udf(repl)
+
+df = df.withColumn('cnt2',rep1(F.col('cnt2')))\
+    .filter((F.col('cnt2').isNotNull()) | (F.col('cnt2') != ""))\
+    .groupBy('cnt2')\
+    .count()
+
+df.show()