In [ ]:
import json
In [ ]:
filename = "india_states.geojson" # This file provides all the geojson coordinates

with open(filename, 'r') as f:
        datastore = json.load(f)
In [ ]:
import pandas as pd
In [ ]:
input_file = "infant_mortality.csv"
df = pd.read_csv(input_file,index_col="states")
df = df[:-1] # removing all_india statistics
In [ ]:
for e in df.index:
    found = False
    for i in datastore["features"]:
        if i["properties"]["NAME_1"] == e:
            found = True
    if not found:
           print("no match found for" ,e)         

I have to manually change the file so that I can plot. Very hacky solution below

In [ ]:
for i in datastore["features"]:
    print(i["properties"]["NAME_1"])
In [ ]:
df["new_states"] = df.index

df.loc["Jammu & Kashmir", "new_states"] = "Jammu and Kashmir"
df.loc["Odisha", "new_states"] = "Orissa"
df.loc["Uttarakhand", "new_states"] = "Uttaranchal"
df.loc["A& N Islands", "new_states"] = "Andaman and Nicobar"
df.loc["D & N Haveli", "new_states"] = "Dadra and Nagar Haveli"
df.loc["Daman & Diu", "new_states"] = "Daman and Diu"
In [ ]:
df.drop(index="Telangana", inplace=True)  # Dropping telangana from the map. will update later.
In [ ]:
for e in df.new_states:
    found = False
    for i in datastore["features"]:
        if i["properties"]["NAME_1"] == e:
            found = True
    if not found:
           print("no match found for" ,e)   
In [ ]:
df.set_index("new_states", inplace=True)
In [ ]:
for e in df.index:
    for i in datastore["features"]:
        if i["properties"]["NAME_1"] == e:
            for k in ["total", "urban", "rural"]:
                i["properties"][k] = float(df.loc[e,k])

Below I create a smaller file.

In [ ]:
new_datastore = {"type":"FeatureCollection"}

features = []
for e in datastore["features"]:
    d = {"type":"Feature"}
    d["geometry"] = e["geometry"]
    d["properties"] = {"Name": e["properties"]["NAME_1"]}
    for k in ["total", "urban", "rural"]:
                d["properties"][k] = e["properties"][k]
    features.append(d)

new_datastore["features"] = features
In [ ]:
output_file = "kepler_"+input_file.strip(".csv") + ".geojson"
In [ ]:
with open(output_file, 'w') as f:
        json.dump(new_datastore, f)

Qauntile or Quantize ?

In [ ]:
a= df["total"].values.tolist()
In [ ]:
a.sort()
In [ ]:
import matplotlib.pyplot as plt
%matplotlib inline
In [ ]:
plt.scatter(range(len(a)),a)
plt.axis('scaled')

Going with Quantile becuase of the horizontal line at 25. read this if confused