float' object has no attribute 'split' python

Question

i am newbie with Python. i have this Python code to split Columns from csv array:

import pandas as pd
import re

df = pd.read_csv("test.csv")

#get data from traffic
name_regex = r"(?<=(\'name\': ))[^,]*(?=,)"
medium_regex = r"(?<=(\'medium\': ))[^,]*(?=,)"
source_regex = r"(?<=(\'source\': ))[^,]*(?=})"

def get_name(x):
    return [re.search(name_regex, line).group().replace("'", "") for line in x.split("
")]
def get_medium(x):
    return [re.search(medium_regex, line).group().replace("'", "") for line in x.split("
")]
def get_source(x):
    return [re.search(source_regex, line).group().replace("'", "") for line in x.split("
")]

df["traffic_name"] = df["traffic"].apply(get_name)
df["traffic_medium"] = df["traffic"].apply(get_medium)
df["traffic_source"] = df["traffic"].apply(get_source)
del df["traffic"]

final_df = df.explode(["traffic_name", "traffic_medium", "traffic_source"])

print(final_df)

final_df.to_csv("traffic.csv")

this is my csv data:

,traffic_source
0,"{'name': '(test1)', 'medium': '(no1)', 'source': '(yes)'}"
1,
2,"{'name': '(test1)', 'medium': 'no2', 'source': 'yes1'}"
3,
4,"{'name': '(test2)', 'medium': 'no3', 'source': 'yes'}"

when i run the python code , there is an Error. Can you please help? Thank you

~\AppData\Local\Temp\ipykernel_15228\2852501840.py in get_name(x)
     12 
     13 def get_name(x):
---> 14     return [re.search(name_regex, line).group().replace("'", "") for line in x.split("
")]
     15 def get_medium(x):
     16     return [re.search(medium_regex, line).group().replace("'", "") for line in x.split("
")]

AttributeError: 'float' object has no attribute 'split'

Soviut · Accepted Answer

The first column in your CSV file is a number (being treated as a float). Numbers cannot be split.

Additionally, your CSV data looks like it's storing JSON in the second column. Rather than writing regular expressions, just decode the string using the json library.

N_Z · Answer

This error happens because Pandas treat the first and third rows of the traffic column (There is no value, which means nan) as a float, you can remove float rows using:

df=df[df['traffic'].apply(lambda x: isinstance(x, str))]

Whole solution:

import pandas as pd
from io import StringIO
import re

#I use this to regenerate your CSV
CSV=StringIO("""
,traffic
0,"{'name': '(test1)', 'medium': '(no1)', 'source': '(yes)'}"
1,
2,"{'name': '(test1)', 'medium': 'no2', 'source': 'yes1'}"
3,
4,"{'name': '(test2)', 'medium': 'no3', 'source': 'yes'}"
""")


df = pd.read_csv(CSV, sep=",")


name_regex = r"(?<=(\'name\': ))[^,]*(?=,)"
medium_regex = r"(?<=(\'medium\': ))[^,]*(?=,)"
source_regex = r"(?<=(\'source\': ))[^,]*(?=})"

def get_name(x):
  return [re.search(name_regex, line).group().replace("'", "") for line in x.split("
")]
def get_medium(x):
      return [re.search(medium_regex, line).group().replace("'", "") for line in x.split("
")]
def get_source(x):
      return [re.search(source_regex, line).group().replace("'", "") for line in x.split("
")]


df=df[df['traffic'].apply(lambda x: isinstance(x, str))] 

df["traffic_name"] =   df["traffic"].apply(get_name)
df["traffic_medium"] = df["traffic"].apply(get_medium)
df["traffic_source"] = df["traffic"].apply(get_source)
del df["traffic"]

final_df = df.explode(["traffic_name", "traffic_medium", "traffic_source"])

print(final_df)

final_df.to_csv("traffic.csv")

float' object has no attribute 'split' python

Tags:

python

pandas

dataframe

Phh

2 Answers

Soviut

N_Z

Recent Activity

Donate For Us

float' object has no attribute 'split' python

Tags:

python

pandas

dataframe

Phh

2 Answers

Soviut

N_Z

Related questions

Recent Activity

Donate For Us