python programming newbie here. I was trying to create a simple histogram with altair using the following code. The dataset is large, around 1,000 rows.
count_chart = alt.Chart(df).mark_bar().encode(
x=alt.X('r:Q', bin=True),
y=alt.Y('count():Q')
).properties(title='Histogram plot of r counts')
count_chart
But received the following error message:
---------------------------------------------------------------------------
`ImportError Traceback (most recent call last)
File ~\AppData\Local\miniconda3\Lib\site-packages\IPython\core\formatters.py:977, in MimeBundleFormatter.__call__(self, obj, include, exclude)
974 method = get_real_method(obj, self.print_method)
976 if method is not None:
--> 977 return method(include=include, exclude=exclude)
978 return None
979 else:
File ~\AppData\Local\miniconda3\Lib\site-packages\altair\vegalite\v5\api.py:2576, in TopLevelMixin._repr_mimebundle_(self, include, exclude)
2574 return {}
2575 else:
-> 2576 return renderers.get()(dct)
File ~\AppData\Local\miniconda3\Lib\site-packages\altair\utils\display.py:227, in HTMLRenderer.__call__(self, spec, **metadata)
224 kwargs.update(metadata)
225 # To get proper return value type, would need to write complex
226 # overload signatures for spec_to_mimebundle based on `format`
--> 227 return spec_to_mimebundle( # type: ignore[return-value]
228 spec, format="html", output_div=self.output_div, **kwargs
229 )
File ~\AppData\Local\miniconda3\Lib\site-packages\altair\utils\mimebundle.py:70, in spec_to_mimebundle(spec, format, mode, vega_version, vegaembed_version, vegalite_version, embed_options, engine, **kwargs)
68 internal_mode: Literal["vega-lite", "vega"] = mode
69 if using_vegafusion():
---> 70 spec = compile_with_vegafusion(spec)
71 internal_mode = "vega"
73 # Default to the embed options set by alt.renderers.set_embed_options
File ~\AppData\Local\miniconda3\Lib\site-packages\altair\utils\_vegafusion_data.py:226, in compile_with_vegafusion(vegalite_spec)
224 # Pre-evaluate transforms in vega spec with vegafusion
225 row_limit = data_transformers.options.get("max_rows", None)
--> 226 transformed_vega_spec, warnings = vf.runtime.pre_transform_spec(
227 vega_spec,
228 vf.get_local_tz(),
229 inline_datasets=inline_tables,
230 row_limit=row_limit,
231 )
233 # Check from row limit warning and convert to MaxRowsError
234 handle_row_limit_exceeded(row_limit, warnings)
File ~\AppData\Local\miniconda3\Lib\site-packages\vegafusion\runtime.py:364, in VegaFusionRuntime.pre_transform_spec(self, spec, local_tz, default_input_tz, row_limit, preserve_interactivity, inline_datasets, keep_signals, keep_datasets, data_encoding_threshold, data_encoding_format)
362 else:
363 local_tz = local_tz or get_local_tz()
--> 364 imported_inline_dataset = self._import_or_register_inline_datasets(inline_datasets)
366 # Parse input keep signals and datasets
367 keep_signals = parse_variables(keep_signals)
File ~\AppData\Local\miniconda3\Lib\site-packages\vegafusion\runtime.py:217, in VegaFusionRuntime._import_or_register_inline_datasets(self, inline_datasets)
214 except ValueError:
215 pass
--> 217 imported_inline_datasets[name] = PandasDatasource(value)
218 elif hasattr(value, "__dataframe__"):
219 # Let polars convert to pyarrow since it has broader support than the raw dataframe interchange
220 # protocol, and "This operation is mostly zero copy."
221 try:
File ~\AppData\Local\miniconda3\Lib\site-packages\vegafusion\datasource\pandas_datasource.py:13, in PandasDatasource.__init__(self, df, sample_size, batch_size)
11 def __init__(self, df: "pd.DataFrame", sample_size: int = 1000, batch_size: int = 8096):
12 import pandas as pd
---> 13 import pyarrow as pa
15 fields = []
16 casts = {}
File ~\AppData\Local\miniconda3\Lib\site-packages\pyarrow\__init__.py:65
63 _gc_enabled = _gc.isenabled()
64 _gc.disable()
---> 65 import pyarrow.lib as _lib
66 if _gc_enabled:
67 _gc.enable()
ImportError: DLL load failed while importing lib: The specified procedure could not be found.`
Before getting this error message, I got an error message about the size of the dataset, so I used this line of code before the code for the histogram: alt.data_transformers.enable("vegafusion"), as this is supposed to be helpful for large datasets. But I then received the error message above.
I also tested a smaller dataset with the same code for the histogram, and it worked.
Thanks for any suggestions you can provide.
import pyarrow
will probably give the same error